From 31c96a54006a5f41b98659489df3abda6a82b88d Mon Sep 17 00:00:00 2001 From: Alexey Zinoviev Date: Wed, 26 Nov 2025 16:59:05 +0100 Subject: [PATCH 1/3] Force consistent handling of BIGINT as Long across JDBC metadata and MariaDB driver, improve fallback mechanisms for unsupported JDBC metadata methods, and update relevant tests. --- .../kotlinx/dataframe/io/db/DbType.kt | 103 ++++++++++++++---- .../kotlinx/dataframe/io/db/MariaDb.kt | 26 +++++ .../dataframe/io/commonTestScenarios.kt | 31 +++--- .../kotlinx/dataframe/io/local/mariadbTest.kt | 2 +- 4 files changed, 127 insertions(+), 35 deletions(-) diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DbType.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DbType.kt index 905a6c867f..00e7aa2d43 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DbType.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DbType.kt @@ -380,6 +380,10 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) { tableColumnMetadata.jdbcType == Types.NUMERIC && tableColumnMetadata.javaClassName == "java.lang.Double" -> Double::class + // Force BIGINT to always be Long, regardless of javaClassName + // Some JDBC drivers (e.g., MariaDB) may report Integer for small BIGINT values + tableColumnMetadata.jdbcType == Types.BIGINT -> Long::class + else -> jdbcTypeToKTypeMapping[tableColumnMetadata.jdbcType] ?: String::class } @@ -402,14 +406,22 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) { /** * Retrieves column metadata from a JDBC ResultSet. * - * By default, this method reads column metadata from [ResultSetMetaData], - * which is fast and supported by most JDBC drivers. - * If the driver does not provide sufficient information (e.g., `isNullable` unknown), - * it falls back to using [DatabaseMetaData.getColumns] for affected columns. + * This method reads column metadata from [ResultSetMetaData] with graceful fallbacks + * for JDBC drivers that throw [java.sql.SQLFeatureNotSupportedException] for certain methods + * (e.g., Apache Hive). + * + * Fallback behavior for unsupported methods: + * - `getColumnName()` → `getColumnLabel()` → `"column_N"` + * - `getTableName()` → extract from column name if contains '.' → `null` + * - `isNullable()` → [DatabaseMetaData.getColumns] → `true` (assume nullable) + * - `getColumnTypeName()` → `"OTHER"` + * - `getColumnType()` → [java.sql.Types.OTHER] + * - `getColumnDisplaySize()` → `0` + * - `getColumnClassName()` → `"java.lang.Object"` * * Override this method in subclasses to provide database-specific behavior * (for example, to disable fallback for databases like Teradata or Oracle - * where `DatabaseMetaData.getColumns` is known to be slow). + * where [DatabaseMetaData.getColumns] is known to be slow). * * @param resultSet The [ResultSet] containing query results. * @return A list of [TableColumnMetadata] objects. @@ -418,16 +430,44 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) { val rsMetaData = resultSet.metaData val connection = resultSet.statement.connection val dbMetaData = connection.metaData - val catalog = connection.catalog.takeUnless { it.isNullOrBlank() } - val schema = connection.schema.takeUnless { it.isNullOrBlank() } + + // Some JDBC drivers (e.g., Hive) throw SQLFeatureNotSupportedException + val catalog = try { + connection.catalog.takeUnless { it.isNullOrBlank() } + } catch (_: Exception) { + null + } + + val schema = try { + connection.schema.takeUnless { it.isNullOrBlank() } + } catch (_: Exception) { + null + } val columnCount = rsMetaData.columnCount val columns = mutableListOf() val nameCounter = mutableMapOf() for (index in 1..columnCount) { - val columnName = rsMetaData.getColumnName(index) - val tableName = rsMetaData.getTableName(index) + // Try to getColumnName, fallback to getColumnLabel, then generate name + val columnName = try { + rsMetaData.getColumnName(index) + } catch (_: Exception) { + try { + rsMetaData.getColumnLabel(index) + } catch (_: Exception) { + "column_$index" + } + } + + // Some JDBC drivers (e.g., Apache Hive) throw SQLFeatureNotSupportedException + val tableName = try { + rsMetaData.getTableName(index).takeUnless { it.isBlank() } + } catch (_: Exception) { + // Fallback: try to extract table name from column name if it contains '.' + val dotIndex = columnName.lastIndexOf('.') + if (dotIndex > 0) columnName.take(dotIndex) else "" + } // Try to detect nullability from ResultSetMetaData val isNullable = try { @@ -436,25 +476,48 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) { ResultSetMetaData.columnNullable -> true - ResultSetMetaData.columnNullableUnknown -> { - // Unknown nullability: assume it nullable, may trigger fallback - true - } + // Unknown nullability: assume it nullable, may trigger fallback + ResultSetMetaData.columnNullableUnknown -> true else -> true } } catch (_: Exception) { // Some drivers may throw for unsupported features - // In that case, fallback to DatabaseMetaData - dbMetaData.getColumns(catalog, schema, tableName, columnName).use { cols -> - if (cols.next()) !cols.getString("IS_NULLABLE").equals("NO", ignoreCase = true) else true + // Try fallback to DatabaseMetaData, with additional safety + try { + dbMetaData.getColumns(catalog, schema, tableName, columnName).use { cols -> + if (cols.next()) !cols.getString("IS_NULLABLE").equals("NO", ignoreCase = true) else true + } + } catch (_: Exception) { + // Fallback failed, assume nullable as the safest default + true } } - val columnType = rsMetaData.getColumnTypeName(index) - val jdbcType = rsMetaData.getColumnType(index) - val displaySize = rsMetaData.getColumnDisplaySize(index) - val javaClassName = rsMetaData.getColumnClassName(index) + // adding fallbacks to avoid SQLException + val columnType = try { + rsMetaData.getColumnTypeName(index) + } catch (_: Exception) { + "OTHER" + } + + val jdbcType = try { + rsMetaData.getColumnType(index) + } catch (_: Exception) { + Types.OTHER + } + + val displaySize = try { + rsMetaData.getColumnDisplaySize(index) + } catch (_: Exception) { + 0 + } + + val javaClassName = try { + rsMetaData.getColumnClassName(index) + } catch (_: Exception) { + "java.lang.Object" + } val uniqueName = manageColumnNameDuplication(nameCounter, columnName) diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/MariaDb.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/MariaDb.kt index 7ce8e4d681..77d091adef 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/MariaDb.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/MariaDb.kt @@ -18,6 +18,20 @@ public object MariaDb : DbType("mariadb") { get() = "org.mariadb.jdbc.Driver" override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? { + // Force BIGINT to always be Long, regardless of javaClassName + // MariaDB JDBC driver may report Integer for small BIGINT values + if (tableColumnMetadata.jdbcType == java.sql.Types.BIGINT) { + val kType = Long::class.createType(nullable = tableColumnMetadata.isNullable) + return ColumnSchema.Value(kType) + } + + if (tableColumnMetadata.sqlTypeName == "INTEGER UNSIGNED" || + tableColumnMetadata.sqlTypeName == "INT UNSIGNED" + ) { + val kType = Long::class.createType(nullable = tableColumnMetadata.isNullable) + return ColumnSchema.Value(kType) + } + if (tableColumnMetadata.sqlTypeName == "SMALLINT" && tableColumnMetadata.javaClassName == "java.lang.Short") { val kType = Short::class.createType(nullable = tableColumnMetadata.isNullable) return ColumnSchema.Value(kType) @@ -35,6 +49,18 @@ public object MariaDb : DbType("mariadb") { ) override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? { + // Force BIGINT to always be Long, regardless of javaClassName + // MariaDB JDBC driver may report Integer for small BIGINT values + if (tableColumnMetadata.jdbcType == java.sql.Types.BIGINT) { + return Long::class.createType(nullable = tableColumnMetadata.isNullable) + } + + if (tableColumnMetadata.sqlTypeName == "INTEGER UNSIGNED" || + tableColumnMetadata.sqlTypeName == "INT UNSIGNED" + ) { + return Long::class.createType(nullable = tableColumnMetadata.isNullable) + } + if (tableColumnMetadata.sqlTypeName == "SMALLINT" && tableColumnMetadata.javaClassName == "java.lang.Short") { return Short::class.createType(nullable = tableColumnMetadata.isNullable) } diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/commonTestScenarios.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/commonTestScenarios.kt index 947fe2137a..626a6883da 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/commonTestScenarios.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/commonTestScenarios.kt @@ -13,11 +13,15 @@ import java.sql.Connection import java.sql.ResultSet import kotlin.reflect.typeOf +private const val TEST_TABLE_NAME = "testtable123" + internal fun inferNullability(connection: Connection) { + connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS $TEST_TABLE_NAME") } + // prepare tables and data @Language("SQL") val createTestTable1Query = """ - CREATE TABLE TestTable1 ( + CREATE TABLE $TEST_TABLE_NAME ( id INT PRIMARY KEY, name VARCHAR(50), surname VARCHAR(50), @@ -25,28 +29,27 @@ internal fun inferNullability(connection: Connection) { ) """ - connection.createStatement().execute(createTestTable1Query) + connection.createStatement().use { st -> st.execute(createTestTable1Query) } connection.createStatement() - .execute("INSERT INTO TestTable1 (id, name, surname, age) VALUES (1, 'John', 'Crawford', 40)") + .execute("INSERT INTO $TEST_TABLE_NAME (id, name, surname, age) VALUES (1, 'John', 'Crawford', 40)") connection.createStatement() - .execute("INSERT INTO TestTable1 (id, name, surname, age) VALUES (2, 'Alice', 'Smith', 25)") + .execute("INSERT INTO $TEST_TABLE_NAME (id, name, surname, age) VALUES (2, 'Alice', 'Smith', 25)") connection.createStatement() - .execute("INSERT INTO TestTable1 (id, name, surname, age) VALUES (3, 'Bob', 'Johnson', 47)") + .execute("INSERT INTO $TEST_TABLE_NAME (id, name, surname, age) VALUES (3, 'Bob', 'Johnson', 47)") connection.createStatement() - .execute("INSERT INTO TestTable1 (id, name, surname, age) VALUES (4, 'Sam', NULL, 15)") + .execute("INSERT INTO $TEST_TABLE_NAME (id, name, surname, age) VALUES (4, 'Sam', NULL, 15)") // start testing `readSqlTable` method - // with default inferNullability: Boolean = true - val tableName = "TestTable1" - val df = DataFrame.readSqlTable(connection, tableName) + // with default inferNullability: Boolean = true " + val df = DataFrame.readSqlTable(connection, TEST_TABLE_NAME) df.schema().columns["id"]!!.type shouldBe typeOf() df.schema().columns["name"]!!.type shouldBe typeOf() df.schema().columns["surname"]!!.type shouldBe typeOf() df.schema().columns["age"]!!.type shouldBe typeOf() - val dataSchema = DataFrameSchema.readSqlTable(connection, tableName) + val dataSchema = DataFrameSchema.readSqlTable(connection, TEST_TABLE_NAME) dataSchema.columns.size shouldBe 4 dataSchema.columns["id"]!!.type shouldBe typeOf() dataSchema.columns["name"]!!.type shouldBe typeOf() @@ -54,7 +57,7 @@ internal fun inferNullability(connection: Connection) { dataSchema.columns["age"]!!.type shouldBe typeOf() // with inferNullability: Boolean = false - val df1 = DataFrame.readSqlTable(connection, tableName, inferNullability = false) + val df1 = DataFrame.readSqlTable(connection, TEST_TABLE_NAME, inferNullability = false) df1.schema().columns["id"]!!.type shouldBe typeOf() // this column changed a type because it doesn't contain nulls @@ -70,7 +73,7 @@ internal fun inferNullability(connection: Connection) { @Language("SQL") val sqlQuery = """ - SELECT name, surname, age FROM TestTable1 + SELECT name, surname, age FROM $TEST_TABLE_NAME """.trimIndent() val df2 = DataFrame.readSqlQuery(connection, sqlQuery) @@ -97,7 +100,7 @@ internal fun inferNullability(connection: Connection) { connection.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_UPDATABLE).use { st -> @Language("SQL") - val selectStatement = "SELECT * FROM TestTable1" + val selectStatement = "SELECT * FROM $TEST_TABLE_NAME" st.executeQuery(selectStatement).use { rs -> // ith default inferNullability: Boolean = true @@ -130,7 +133,7 @@ internal fun inferNullability(connection: Connection) { } // end testing `readResultSet` method - connection.createStatement().execute("DROP TABLE TestTable1") + connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS $TEST_TABLE_NAME") } } /** diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/mariadbTest.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/mariadbTest.kt index be98d56ecf..4b5d3b13f5 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/mariadbTest.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/mariadbTest.kt @@ -338,7 +338,7 @@ class MariadbTest { val result = df1.filter { it[Table1MariaDb::id] == 1 } result[0][26] shouldBe "textValue1" val byteArray = "tinyblobValue".toByteArray() - (result[0][22] as Blob).getBytes(1, byteArray.size) contentEquals byteArray + result[0][22] shouldBe byteArray val schema = DataFrameSchema.readSqlTable(connection, "table1") schema.columns["id"]!!.type shouldBe typeOf() From 23ca81a1fd2331fb72a0bce546ea765d92a9fafc Mon Sep 17 00:00:00 2001 From: zaleslaw Date: Thu, 27 Nov 2025 15:31:04 +0100 Subject: [PATCH 2/3] Commented out BIGINT type mapping for investigation and adjusted minor code details --- .../org/jetbrains/kotlinx/dataframe/io/db/DbType.kt | 6 +++--- .../org/jetbrains/kotlinx/dataframe/io/db/MariaDb.kt | 10 ++++++---- .../kotlinx/dataframe/io/commonTestScenarios.kt | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DbType.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DbType.kt index 00e7aa2d43..e4fb482d8f 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DbType.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DbType.kt @@ -382,7 +382,7 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) { // Force BIGINT to always be Long, regardless of javaClassName // Some JDBC drivers (e.g., MariaDB) may report Integer for small BIGINT values - tableColumnMetadata.jdbcType == Types.BIGINT -> Long::class + // TODO: tableColumnMetadata.jdbcType == Types.BIGINT -> Long::class else -> jdbcTypeToKTypeMapping[tableColumnMetadata.jdbcType] ?: String::class } @@ -456,7 +456,7 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) { try { rsMetaData.getColumnLabel(index) } catch (_: Exception) { - "column_$index" + "column$index" } } @@ -466,7 +466,7 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) { } catch (_: Exception) { // Fallback: try to extract table name from column name if it contains '.' val dotIndex = columnName.lastIndexOf('.') - if (dotIndex > 0) columnName.take(dotIndex) else "" + if (dotIndex > 0) columnName.take(dotIndex) else null } // Try to detect nullability from ResultSetMetaData diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/MariaDb.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/MariaDb.kt index 77d091adef..407f04877c 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/MariaDb.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/MariaDb.kt @@ -20,10 +20,11 @@ public object MariaDb : DbType("mariadb") { override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? { // Force BIGINT to always be Long, regardless of javaClassName // MariaDB JDBC driver may report Integer for small BIGINT values - if (tableColumnMetadata.jdbcType == java.sql.Types.BIGINT) { + // TODO: investigate the corner case + /*if (tableColumnMetadata.jdbcType == java.sql.Types.BIGINT) { val kType = Long::class.createType(nullable = tableColumnMetadata.isNullable) return ColumnSchema.Value(kType) - } + }*/ if (tableColumnMetadata.sqlTypeName == "INTEGER UNSIGNED" || tableColumnMetadata.sqlTypeName == "INT UNSIGNED" @@ -51,9 +52,10 @@ public object MariaDb : DbType("mariadb") { override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? { // Force BIGINT to always be Long, regardless of javaClassName // MariaDB JDBC driver may report Integer for small BIGINT values - if (tableColumnMetadata.jdbcType == java.sql.Types.BIGINT) { + // TODO: investigate the corner case + /*if (tableColumnMetadata.jdbcType == java.sql.Types.BIGINT) { return Long::class.createType(nullable = tableColumnMetadata.isNullable) - } + }*/ if (tableColumnMetadata.sqlTypeName == "INTEGER UNSIGNED" || tableColumnMetadata.sqlTypeName == "INT UNSIGNED" diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/commonTestScenarios.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/commonTestScenarios.kt index 626a6883da..2669f7e944 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/commonTestScenarios.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/commonTestScenarios.kt @@ -42,7 +42,7 @@ internal fun inferNullability(connection: Connection) { // start testing `readSqlTable` method - // with default inferNullability: Boolean = true " + // with default inferNullability: Boolean = true val df = DataFrame.readSqlTable(connection, TEST_TABLE_NAME) df.schema().columns["id"]!!.type shouldBe typeOf() df.schema().columns["name"]!!.type shouldBe typeOf() From 1affdc6b0239600e685fd760d47cdd00692288a5 Mon Sep 17 00:00:00 2001 From: zaleslaw Date: Thu, 27 Nov 2025 15:36:54 +0100 Subject: [PATCH 3/3] Linter --- .../jetbrains/kotlinx/dataframe/io/db/MariaDb.kt | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/MariaDb.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/MariaDb.kt index 407f04877c..d91b60a0b2 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/MariaDb.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/MariaDb.kt @@ -21,10 +21,11 @@ public object MariaDb : DbType("mariadb") { // Force BIGINT to always be Long, regardless of javaClassName // MariaDB JDBC driver may report Integer for small BIGINT values // TODO: investigate the corner case - /*if (tableColumnMetadata.jdbcType == java.sql.Types.BIGINT) { - val kType = Long::class.createType(nullable = tableColumnMetadata.isNullable) - return ColumnSchema.Value(kType) - }*/ + + // if (tableColumnMetadata.jdbcType == java.sql.Types.BIGINT) { + // val kType = Long::class.createType(nullable = tableColumnMetadata.isNullable) + // return ColumnSchema.Value(kType) + // } if (tableColumnMetadata.sqlTypeName == "INTEGER UNSIGNED" || tableColumnMetadata.sqlTypeName == "INT UNSIGNED" @@ -53,9 +54,9 @@ public object MariaDb : DbType("mariadb") { // Force BIGINT to always be Long, regardless of javaClassName // MariaDB JDBC driver may report Integer for small BIGINT values // TODO: investigate the corner case - /*if (tableColumnMetadata.jdbcType == java.sql.Types.BIGINT) { - return Long::class.createType(nullable = tableColumnMetadata.isNullable) - }*/ + // if (tableColumnMetadata.jdbcType == java.sql.Types.BIGINT) { + // return Long::class.createType(nullable = tableColumnMetadata.isNullable) + // } if (tableColumnMetadata.sqlTypeName == "INTEGER UNSIGNED" || tableColumnMetadata.sqlTypeName == "INT UNSIGNED"