Skip to content

Commit 05bc5d4

Browse files
pan3793dongjoon-hyun
authored andcommitted
[SPARK-54350][SQL][STS] SparkGetColumnsOperation ORDINAL_POSITION should be 1-based
### What changes were proposed in this pull request? The SparkGetColumnsOperation is mainly used for the JDBC driver, while JDBC uses 1-based ordinal/column-index instead of 0-based. This is also documented in Hive API. https://github.com/apache/spark/blob/551b922a53acfdfeb2c065d5dedf35cb8cd30e1d/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java#L94-L95 Note, the GetColumnsOperation, which is originally copied from the Hive has a correct implementation, the issue only exists in SparkGetColumnsOperation. For safety, a config `spark.sql.legacy.hive.thriftServer.useZeroBasedColumnOrdinalPosition` is added to allow the user to switch back to the previous behavior. ### Why are the changes needed? The SparkGetColumnsOperation is mainly used by JDBC [java.sql.DatabaseMetaData#getColumns](https://docs.oracle.com/en/java/javase/17/docs/api/java.sql/java/sql/DatabaseMetaData.html#getColumns(java.lang.String,java.lang.String,java.lang.String,java.lang.String)), this change makes it satisfy the JDBC API specification. ### Does this PR introduce _any_ user-facing change? Yes, see the above section. ### How was this patch tested? UTs are modified. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #53062 from pan3793/SPARK-54350. Authored-by: Cheng Pan <chengpan@apache.org> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
1 parent dce992b commit 05bc5d4

File tree

4 files changed

+50
-7
lines changed

4 files changed

+50
-7
lines changed

docs/sql-migration-guide.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ license: |
2525
## Upgrading from Spark SQL 4.0 to 4.1
2626

2727
- Since Spark 4.1, the Parquet reader no longer assumes all struct values to be null, if all the requested fields are missing in the parquet file. The new default behavior is to read an additional struct field that is present in the file to determine nullness. To restore the previous behavior, set `spark.sql.legacy.parquet.returnNullStructIfAllFieldsMissing` to `true`.
28+
- Since Spark 4.1, the Spark Thrift Server returns the corrected 1-based ORDINAL_POSITION in the result of GetColumns operation, instead of the wrongly 0-based. To restore the previous behavior, set `spark.sql.legacy.hive.thriftServer.useZeroBasedColumnOrdinalPosition` to `true`.
2829

2930
## Upgrading from Spark SQL 3.5 to 4.0
3031

sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import org.apache.spark.internal.Logging
3131
import org.apache.spark.internal.LogKeys._
3232
import org.apache.spark.sql.SparkSession
3333
import org.apache.spark.sql.catalyst.TableIdentifier
34+
import org.apache.spark.sql.hive.HiveUtils
3435
import org.apache.spark.sql.types._
3536

3637
/**
@@ -200,6 +201,11 @@ private[hive] class SparkGetColumnsOperation(
200201
schema.zipWithIndex.foreach { case (column, pos) =>
201202
if (columnPattern != null && !columnPattern.matcher(column.name).matches()) {
202203
} else {
204+
val ordinal = if (session.conf.get(HiveUtils.LEGACY_STS_ZERO_BASED_COLUMN_ORDINAL)) {
205+
pos
206+
} else {
207+
pos + 1
208+
}
203209
val rowData = Array[AnyRef](
204210
null, // TABLE_CAT
205211
dbName, // TABLE_SCHEM
@@ -217,7 +223,7 @@ private[hive] class SparkGetColumnsOperation(
217223
null, // SQL_DATA_TYPE
218224
null, // SQL_DATETIME_SUB
219225
null, // CHAR_OCTET_LENGTH
220-
pos.asInstanceOf[AnyRef], // ORDINAL_POSITION
226+
ordinal.asInstanceOf[AnyRef], // ORDINAL_POSITION, 1-based
221227
"YES", // IS_NULLABLE
222228
null, // SCOPE_CATALOG
223229
null, // SCOPE_SCHEMA

sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import org.apache.hive.service.cli.HiveSQLException
2424

2525
import org.apache.spark.SPARK_VERSION
2626
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
27+
import org.apache.spark.sql.hive.HiveUtils
2728
import org.apache.spark.sql.internal.SQLConf
2829
import org.apache.spark.sql.types._
2930
import org.apache.spark.util.VersionUtils
@@ -341,7 +342,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
341342

342343
assert(rowSet.getInt("NULLABLE") === 1)
343344
assert(rowSet.getString("REMARKS") === pos.toString)
344-
assert(rowSet.getInt("ORDINAL_POSITION") === pos)
345+
assert(rowSet.getInt("ORDINAL_POSITION") === pos + 1)
345346
assert(rowSet.getString("IS_NULLABLE") === "YES")
346347
assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO")
347348
pos += 1
@@ -372,7 +373,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
372373
assert(rowSet.getInt("NUM_PREC_RADIX") === 0)
373374
assert(rowSet.getInt("NULLABLE") === 0)
374375
assert(rowSet.getString("REMARKS") === "")
375-
assert(rowSet.getInt("ORDINAL_POSITION") === 0)
376+
assert(rowSet.getInt("ORDINAL_POSITION") === 1)
376377
assert(rowSet.getString("IS_NULLABLE") === "YES")
377378
assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO")
378379
}
@@ -400,7 +401,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
400401
assert(rowSet.getInt("NUM_PREC_RADIX") === 0)
401402
assert(rowSet.getInt("NULLABLE") === 0)
402403
assert(rowSet.getString("REMARKS") === "")
403-
assert(rowSet.getInt("ORDINAL_POSITION") === 0)
404+
assert(rowSet.getInt("ORDINAL_POSITION") === 1)
404405
assert(rowSet.getString("IS_NULLABLE") === "YES")
405406
assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO")
406407
}
@@ -426,7 +427,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
426427
assert(rowSet.getInt("NUM_PREC_RADIX") === 0)
427428
assert(rowSet.getInt("NULLABLE") === 0)
428429
assert(rowSet.getString("REMARKS") === "")
429-
assert(rowSet.getInt("ORDINAL_POSITION") === 0)
430+
assert(rowSet.getInt("ORDINAL_POSITION") === 1)
430431
assert(rowSet.getString("IS_NULLABLE") === "YES")
431432
assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO")
432433
}
@@ -453,7 +454,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
453454
assert(rowSet.getInt("NUM_PREC_RADIX") === 0)
454455
assert(rowSet.getInt("NULLABLE") === 1)
455456
assert(rowSet.getString("REMARKS") === "")
456-
assert(rowSet.getInt("ORDINAL_POSITION") === 0)
457+
assert(rowSet.getInt("ORDINAL_POSITION") === 1)
457458
assert(rowSet.getString("IS_NULLABLE") === "YES")
458459
assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO")
459460
}
@@ -680,9 +681,36 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
680681
assert(rowSet.getInt("DECIMAL_DIGITS") === 6)
681682
assert(rowSet.getInt("NUM_PREC_RADIX") === 0)
682683
assert(rowSet.getInt("NULLABLE") === 0)
683-
assert(rowSet.getInt("ORDINAL_POSITION") === idx)
684+
assert(rowSet.getInt("ORDINAL_POSITION") === idx + 1)
684685
idx += 1
685686
}
686687
}
687688
}
689+
690+
test("SPARK-54350: SparkGetColumnsOperation respects useZeroBasedColumnOrdinalPosition config") {
691+
Seq(true, false).foreach { zeroBasedOrdinal =>
692+
val viewName = "view_column_ordinal_position"
693+
val ddl = s"CREATE OR REPLACE GLOBAL TEMPORARY VIEW $viewName AS " +
694+
"SELECT 1 AS id, 'foo' AS name"
695+
696+
withJdbcStatement(viewName) { statement =>
697+
statement.execute(
698+
s"SET ${HiveUtils.LEGACY_STS_ZERO_BASED_COLUMN_ORDINAL.key}=$zeroBasedOrdinal")
699+
statement.execute(ddl)
700+
val data = statement.getConnection.getMetaData
701+
val rowSet = data.getColumns("", "global_temp", viewName, null)
702+
assert(rowSet.next())
703+
assert(rowSet.getString("TABLE_SCHEM") === "global_temp")
704+
assert(rowSet.getString("TABLE_NAME") === viewName)
705+
assert(rowSet.getString("COLUMN_NAME") === "id")
706+
assert(rowSet.getInt("ORDINAL_POSITION") === (if (zeroBasedOrdinal) 0 else 1))
707+
assert(rowSet.next())
708+
assert(rowSet.getString("TABLE_SCHEM") === "global_temp")
709+
assert(rowSet.getString("TABLE_NAME") === viewName)
710+
assert(rowSet.getString("COLUMN_NAME") === "name")
711+
assert(rowSet.getInt("ORDINAL_POSITION") === (if (zeroBasedOrdinal) 1 else 2))
712+
assert(!rowSet.next())
713+
}
714+
}
715+
}
688716
}

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,14 @@ private[spark] object HiveUtils extends Logging {
222222
.booleanConf
223223
.createWithDefault(true)
224224

225+
val LEGACY_STS_ZERO_BASED_COLUMN_ORDINAL =
226+
buildConf("spark.sql.legacy.hive.thriftServer.useZeroBasedColumnOrdinalPosition")
227+
.doc("When set to true, Hive Thrift server returns 0-based ORDINAL_POSITION in the " +
228+
"result of GetColumns operation, instead of the corrected 1-based.")
229+
.version("4.1.0")
230+
.booleanConf
231+
.createWithDefault(false)
232+
225233
val USE_DELEGATE_FOR_SYMLINK_TEXT_INPUT_FORMAT =
226234
buildConf("spark.sql.hive.useDelegateForSymlinkTextInputFormat")
227235
.internal()

0 commit comments

Comments
 (0)