From d5666c5f3ee24d59dc1f06ad2b9df0d89a6724f3 Mon Sep 17 00:00:00 2001 From: Mihailo Timotic Date: Fri, 26 Sep 2025 17:02:52 +0000 Subject: [PATCH] fix --- .../catalyst/analysis/ColumnResolutionHelper.scala | 11 ++++++++--- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 10 ++++++++++ .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 11 +++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala index 3224ccafafec3..0502f7f67078f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala @@ -167,12 +167,17 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase { } } - case u @ UnresolvedExtractValue(child, fieldName) => + case u @ UnresolvedExtractValue(child, field) => val newChild = innerResolve(child, isTopLevel = false) + val resolvedField = if (conf.getConf(SQLConf.PREFER_COLUMN_OVER_LCA_IN_ARRAY_INDEX)) { + innerResolve(field, isTopLevel = false) + } else { + field + } if (newChild.resolved) { - ExtractValue(newChild, fieldName, resolver) + ExtractValue(child = newChild, extraction = resolvedField, resolver = resolver) } else { - u.copy(child = newChild) + u.copy(child = newChild, extraction = resolvedField) } case _ => e.mapChildren(innerResolve(_, isTopLevel = false)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 17b8dd493cf80..c7663a053b053 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -241,6 +241,16 @@ object SQLConf { } } + val PREFER_COLUMN_OVER_LCA_IN_ARRAY_INDEX = + buildConf("spark.sql.analyzer.preferColumnOverLcaInArrayIndex") + .internal() + .doc( + "When true, prefer the column from the underlying relation over the lateral column alias " + + "reference with the same name (see SPARK-53734)." + ) + .booleanConf + .createWithDefault(true) + val DONT_DEDUPLICATE_EXPRESSION_IF_EXPR_ID_IN_OUTPUT = buildConf("spark.sql.analyzer.dontDeduplicateExpressionIfExprIdInOutput") .internal() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 89a6a12a7e4e9..90375d0e08732 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -5079,6 +5079,17 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark checkAnswer(df, Row(1)) } + + test("SPARK-53734: Prefer table column over LCA when resolving array index") { + val query = "SELECT 1 AS col1, col2[col1] FROM VALUES(0, ARRAY(1, 2));" + withSQLConf(SQLConf.PREFER_COLUMN_OVER_LCA_IN_ARRAY_INDEX.key -> "true") { + checkAnswer(sql(query), Row(1, 1)) + } + + withSQLConf(SQLConf.PREFER_COLUMN_OVER_LCA_IN_ARRAY_INDEX.key -> "false") { + checkAnswer(sql(query), Row(1, 2)) + } + } } case class Foo(bar: Option[String])