Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/sql-migration-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ license: |
## Upgrading from Spark SQL 4.0 to 4.1

- Since Spark 4.1, the Parquet reader no longer assumes all struct values to be null, if all the requested fields are missing in the parquet file. The new default behavior is to read an additional struct field that is present in the file to determine nullness. To restore the previous behavior, set `spark.sql.legacy.parquet.returnNullStructIfAllFieldsMissing` to `true`.
- Since Spark 4.1, the Spark Thrift Server returns the corrected 1-based ORDINAL_POSITION in the result of GetColumns operation, instead of the wrongly 0-based. To restore the previous behavior, set `spark.sql.legacy.hive.thriftServer.useZeroBasedColumnOrdinalPosition` to `true`.

## Upgrading from Spark SQL 3.5 to 4.0

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import org.apache.spark.internal.Logging
import org.apache.spark.internal.LogKeys._
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.hive.HiveUtils
import org.apache.spark.sql.types._

/**
Expand Down Expand Up @@ -200,6 +201,11 @@ private[hive] class SparkGetColumnsOperation(
schema.zipWithIndex.foreach { case (column, pos) =>
if (columnPattern != null && !columnPattern.matcher(column.name).matches()) {
} else {
val ordinal = if (session.conf.get(HiveUtils.LEGACY_STS_ZERO_BASED_COLUMN_ORDINAL)) {
pos
} else {
pos + 1
}
val rowData = Array[AnyRef](
null, // TABLE_CAT
dbName, // TABLE_SCHEM
Expand All @@ -217,7 +223,7 @@ private[hive] class SparkGetColumnsOperation(
null, // SQL_DATA_TYPE
null, // SQL_DATETIME_SUB
null, // CHAR_OCTET_LENGTH
pos.asInstanceOf[AnyRef], // ORDINAL_POSITION
ordinal.asInstanceOf[AnyRef], // ORDINAL_POSITION, 1-based
"YES", // IS_NULLABLE
null, // SCOPE_CATALOG
null, // SCOPE_SCHEMA
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import org.apache.hive.service.cli.HiveSQLException

import org.apache.spark.SPARK_VERSION
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.hive.HiveUtils
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.util.VersionUtils
Expand Down Expand Up @@ -341,7 +342,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {

assert(rowSet.getInt("NULLABLE") === 1)
assert(rowSet.getString("REMARKS") === pos.toString)
assert(rowSet.getInt("ORDINAL_POSITION") === pos)
assert(rowSet.getInt("ORDINAL_POSITION") === pos + 1)
assert(rowSet.getString("IS_NULLABLE") === "YES")
assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO")
pos += 1
Expand Down Expand Up @@ -372,7 +373,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
assert(rowSet.getInt("NUM_PREC_RADIX") === 0)
assert(rowSet.getInt("NULLABLE") === 0)
assert(rowSet.getString("REMARKS") === "")
assert(rowSet.getInt("ORDINAL_POSITION") === 0)
assert(rowSet.getInt("ORDINAL_POSITION") === 1)
assert(rowSet.getString("IS_NULLABLE") === "YES")
assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO")
}
Expand Down Expand Up @@ -400,7 +401,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
assert(rowSet.getInt("NUM_PREC_RADIX") === 0)
assert(rowSet.getInt("NULLABLE") === 0)
assert(rowSet.getString("REMARKS") === "")
assert(rowSet.getInt("ORDINAL_POSITION") === 0)
assert(rowSet.getInt("ORDINAL_POSITION") === 1)
assert(rowSet.getString("IS_NULLABLE") === "YES")
assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO")
}
Expand All @@ -426,7 +427,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
assert(rowSet.getInt("NUM_PREC_RADIX") === 0)
assert(rowSet.getInt("NULLABLE") === 0)
assert(rowSet.getString("REMARKS") === "")
assert(rowSet.getInt("ORDINAL_POSITION") === 0)
assert(rowSet.getInt("ORDINAL_POSITION") === 1)
assert(rowSet.getString("IS_NULLABLE") === "YES")
assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO")
}
Expand All @@ -453,7 +454,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
assert(rowSet.getInt("NUM_PREC_RADIX") === 0)
assert(rowSet.getInt("NULLABLE") === 1)
assert(rowSet.getString("REMARKS") === "")
assert(rowSet.getInt("ORDINAL_POSITION") === 0)
assert(rowSet.getInt("ORDINAL_POSITION") === 1)
assert(rowSet.getString("IS_NULLABLE") === "YES")
assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO")
}
Expand Down Expand Up @@ -680,9 +681,36 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
assert(rowSet.getInt("DECIMAL_DIGITS") === 6)
assert(rowSet.getInt("NUM_PREC_RADIX") === 0)
assert(rowSet.getInt("NULLABLE") === 0)
assert(rowSet.getInt("ORDINAL_POSITION") === idx)
assert(rowSet.getInt("ORDINAL_POSITION") === idx + 1)
idx += 1
}
}
}

test("SPARK-54350: SparkGetColumnsOperation respects useZeroBasedColumnOrdinalPosition config") {
Seq(true, false).foreach { zeroBasedOrdinal =>
val viewName = "view_column_ordinal_position"
val ddl = s"CREATE OR REPLACE GLOBAL TEMPORARY VIEW $viewName AS " +
"SELECT 1 AS id, 'foo' AS name"

withJdbcStatement(viewName) { statement =>
statement.execute(
s"SET ${HiveUtils.LEGACY_STS_ZERO_BASED_COLUMN_ORDINAL.key}=$zeroBasedOrdinal")
statement.execute(ddl)
val data = statement.getConnection.getMetaData
val rowSet = data.getColumns("", "global_temp", viewName, null)
assert(rowSet.next())
assert(rowSet.getString("TABLE_SCHEM") === "global_temp")
assert(rowSet.getString("TABLE_NAME") === viewName)
assert(rowSet.getString("COLUMN_NAME") === "id")
assert(rowSet.getInt("ORDINAL_POSITION") === (if (zeroBasedOrdinal) 0 else 1))
assert(rowSet.next())
assert(rowSet.getString("TABLE_SCHEM") === "global_temp")
assert(rowSet.getString("TABLE_NAME") === viewName)
assert(rowSet.getString("COLUMN_NAME") === "name")
assert(rowSet.getInt("ORDINAL_POSITION") === (if (zeroBasedOrdinal) 1 else 2))
assert(!rowSet.next())
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,14 @@ private[spark] object HiveUtils extends Logging {
.booleanConf
.createWithDefault(true)

val LEGACY_STS_ZERO_BASED_COLUMN_ORDINAL =
buildConf("spark.sql.legacy.hive.thriftServer.useZeroBasedColumnOrdinalPosition")
.doc("When set to true, Hive Thrift server returns 0-based ORDINAL_POSITION in the " +
"result of GetColumns operation, instead of the corrected 1-based.")
.version("4.1.0")
.booleanConf
.createWithDefault(false)

val USE_DELEGATE_FOR_SYMLINK_TEXT_INPUT_FORMAT =
buildConf("spark.sql.hive.useDelegateForSymlinkTextInputFormat")
.internal()
Expand Down