From e3e79e5b0891b41c8b3399cebefba3e14d4b4db7 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 25 Oct 2024 18:10:21 +0200 Subject: [PATCH 01/51] [SPARK-50123][TESTS] Move BitmapExpressionUtilsSuite & ExpressionImplUtilsSuite from java to scala test sources folder ### What changes were proposed in this pull request? Move the BitmapExpressionUtilsSuite and ExpressionImplUtilsSuite from the Java to the Scala test sources folder where they belong. ### Why are the changes needed? code refactoring ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests ### Was this patch authored or co-authored using generative AI tooling? no Closes #48657 from yaooqinn/minor. Authored-by: Kent Yao Signed-off-by: Max Gekk (cherry picked from commit 4de286aed61cd9199a99257a395a3e375d0aab3c) Signed-off-by: Max Gekk --- .../sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala | 0 .../spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename sql/catalyst/src/test/{java => scala}/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala (100%) rename sql/catalyst/src/test/{java => scala}/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala (100%) diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala similarity index 100% rename from sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala similarity index 100% rename from sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala From 2f5e0fae122283c1ebd2c7b84b9614758af4b674 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 28 Oct 2024 17:00:33 -0700 Subject: [PATCH 02/51] [SPARK-50150][BUILD][3.5] Upgrade Jetty to 9.4.56.v20240826 ### What changes were proposed in this pull request? This PR aims to upgrade Jetty to 9.4.56.v20240826. ### Why are the changes needed? To bring the latest bug fixes. ### Does this PR introduce _any_ user-facing change? No behavior change. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48684 from dongjoon-hyun/SPARK-50150. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-3-hive-2.3 | 4 ++-- pom.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index a9d63c1ad0f99..62e001b8fd53a 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -130,8 +130,8 @@ jersey-container-servlet/2.40//jersey-container-servlet-2.40.jar jersey-hk2/2.40//jersey-hk2-2.40.jar jersey-server/2.40//jersey-server-2.40.jar jettison/1.1//jettison-1.1.jar -jetty-util-ajax/9.4.54.v20240208//jetty-util-ajax-9.4.54.v20240208.jar -jetty-util/9.4.54.v20240208//jetty-util-9.4.54.v20240208.jar +jetty-util-ajax/9.4.56.v20240826//jetty-util-ajax-9.4.56.v20240826.jar +jetty-util/9.4.56.v20240826//jetty-util-9.4.56.v20240826.jar jline/2.14.6//jline-2.14.6.jar joda-time/2.12.5//joda-time-2.12.5.jar jodd-core/3.5.2//jodd-core-3.5.2.jar diff --git a/pom.xml b/pom.xml index 3d9b003bd19c8..8a1bfd7ee6b0e 100644 --- a/pom.xml +++ b/pom.xml @@ -143,7 +143,7 @@ 1.13.1 1.9.4 shaded-protobuf - 9.4.54.v20240208 + 9.4.56.v20240826 4.0.3 0.10.0 10.14.2.0 1.13.1 - 1.9.4 + 1.9.5 shaded-protobuf 9.4.56.v20240826 4.0.3 From 242d33399658b5eb10c012bc33ab676ff0b32ded Mon Sep 17 00:00:00 2001 From: cuiyanxiang Date: Fri, 15 Nov 2024 14:41:49 +0800 Subject: [PATCH 16/51] [SPARK-50312][SQL] SparkThriftServer createServer parameter passing error when kerberos is true ### What changes were proposed in this pull request? When kerberos is enabled and SparkThriftServer is started, the service delivery parameters keytab and principal are created when hadoop authentication errors occur `saslServer = ShimLoader.getHadoopThriftAuthBridge().createServer(principal, keytab);` `public Server createServer(String keytabFile, String principalConf) throws TTransportException { return new Server(keytabFile, principalConf); }` ### Why are the changes needed? Failed to start SparkThriftServer when kerberos is true ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? verified ### Was this patch authored or co-authored using generative AI tooling? No Closes #48855 from CuiYanxiang/SPARK-50312. Authored-by: cuiyanxiang Signed-off-by: Kent Yao (cherry picked from commit 3237885000e9126f261013ed3550bad394727466) Signed-off-by: Kent Yao --- .../main/java/org/apache/hive/service/auth/HiveAuthFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java index e3316cef241c3..ba8210a267701 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java @@ -117,7 +117,7 @@ public HiveAuthFactory(HiveConf conf) throws TTransportException, IOException { String keytab = conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB); if (needUgiLogin(UserGroupInformation.getCurrentUser(), SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keytab)) { - saslServer = ShimLoader.getHadoopThriftAuthBridge().createServer(principal, keytab); + saslServer = ShimLoader.getHadoopThriftAuthBridge().createServer(keytab, principal); } else { // Using the default constructor to avoid unnecessary UGI login. saslServer = new HadoopThriftAuthBridge.Server(); From 08b195c6faa0eea63d96bd781ac2e9ba34998e4f Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Wed, 20 Nov 2024 11:41:52 +0800 Subject: [PATCH 17/51] [MINOR][DOCS] Fix a HTML/Markdown syntax error in sql-migration-guide.md ### What changes were proposed in this pull request? This PR fixes the below HTML/Markdown syntax error in sql-migration-guide.md ![image](https://github.com/user-attachments/assets/bb62a240-1ee5-4763-92c2-97fdd5436284) ### Why are the changes needed? docfix ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? ![image](https://github.com/user-attachments/assets/95b83aa0-beb1-418c-be08-02310010f4d8) ### Was this patch authored or co-authored using generative AI tooling? no Closes #48899 from yaooqinn/minor. Authored-by: Kent Yao Signed-off-by: Kent Yao (cherry picked from commit b582daca568f7098c7b8fe4e3068e5986acdc18b) Signed-off-by: Kent Yao --- docs/sql-migration-guide.md | 239 ++++++++++++++++-------------------- 1 file changed, 104 insertions(+), 135 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 964f7de637e8b..0f3adbdafeaf9 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -595,142 +595,111 @@ license: | - Since Spark 2.3, the Join/Filter's deterministic predicates that are after the first non-deterministic predicates are also pushed down/through the child operators, if possible. In prior Spark versions, these filters are not eligible for predicate pushdown. - Partition column inference previously found incorrect common type for different inferred types, for example, previously it ended up with double type as the common type for double type and date type. Now it finds the correct common type for such conflicts. The conflict resolution follows the table below: + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
- InputA \ InputB - - NullType - - IntegerType - - LongType - - DecimalType(38,0)* - - DoubleType - - DateType - - TimestampType - - StringType -
- NullType - NullTypeIntegerTypeLongTypeDecimalType(38,0)DoubleTypeDateTypeTimestampTypeStringType
- IntegerType - IntegerTypeIntegerTypeLongTypeDecimalType(38,0)DoubleTypeStringTypeStringTypeStringType
- LongType - LongTypeLongTypeLongTypeDecimalType(38,0)StringTypeStringTypeStringTypeStringType
- DecimalType(38,0)* - DecimalType(38,0)DecimalType(38,0)DecimalType(38,0)DecimalType(38,0)StringTypeStringTypeStringTypeStringType
- DoubleType - DoubleTypeDoubleTypeStringTypeStringTypeDoubleTypeStringTypeStringTypeStringType
- DateType - DateTypeStringTypeStringTypeStringTypeStringTypeDateTypeTimestampTypeStringType
- TimestampType - TimestampTypeStringTypeStringTypeStringTypeStringTypeTimestampTypeTimestampTypeStringType
- StringType - StringTypeStringTypeStringTypeStringTypeStringTypeStringTypeStringTypeStringType
InputA \ InputBNullTypeIntegerTypeLongTypeDecimalType(38,0)*DoubleTypeDateTypeTimestampTypeStringType
NullTypeNullTypeIntegerTypeLongTypeDecimalType(38,0)DoubleTypeDateTypeTimestampTypeStringType
IntegerTypeIntegerTypeIntegerTypeLongTypeDecimalType(38,0)DoubleTypeStringTypeStringTypeStringType
LongTypeLongTypeLongTypeLongTypeDecimalType(38,0)StringTypeStringTypeStringTypeStringType
DecimalType(38,0)*DecimalType(38,0)DecimalType(38,0)DecimalType(38,0)DecimalType(38,0)StringTypeStringTypeStringTypeStringType
DoubleTypeDoubleTypeDoubleTypeStringTypeStringTypeDoubleTypeStringTypeStringTypeStringType
DateTypeDateTypeStringTypeStringTypeStringTypeStringTypeDateTypeTimestampTypeStringType
TimestampTypeTimestampTypeStringTypeStringTypeStringTypeStringTypeTimestampTypeTimestampTypeStringType
StringTypeStringTypeStringTypeStringTypeStringTypeStringTypeStringTypeStringTypeStringType
Note that, for DecimalType(38,0)*, the table above intentionally does not cover all other combinations of scales and precisions because currently we only infer decimal type like `BigInteger`/`BigInt`. For example, 1.1 is inferred as double type. From df9b9def5528a1b653ab6764347c3a8115c59eed Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 20 Nov 2024 21:03:14 +0800 Subject: [PATCH 18/51] [SPARK-50258][SQL] Fix output column order changed issue after AQE optimization The root cause of this issue is the planner turns `Limit` + `Sort` into `TakeOrderedAndProjectExec` which adds an additional `Project` that does not exist in the logical plan. We shouldn't use this additional `Project` to optimize out other `Project`s, otherwise when AQE turns physical plan back to logical plan, we lose the `Project` and may mess up the output column order. This PR makes it does not remove redundant projects if AEQ is enabled and projectList is the same as child output in `TakeOrderedAndProjectExec`. Fix potential data issue and avoid Spark Driver crash: ``` ... ``` No. Unit test. No. Closes #48789 from wangyum/SPARK-50258. Authored-by: Yuming Wang Signed-off-by: Wenchen Fan (cherry picked from commit 6ee53da5f356232e2026a67c8408de38c625038e) Signed-off-by: Wenchen Fan --- .../execution/RemoveRedundantProjects.scala | 8 ++++++- .../adaptive/AdaptiveQueryExecSuite.scala | 23 ++++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala index 8f4ce0f49a89a..69230fd7b3343 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala @@ -58,7 +58,13 @@ object RemoveRedundantProjects extends Rule[SparkPlan] { p.mapChildren(removeProject(_, false)) } case op: TakeOrderedAndProjectExec => - op.mapChildren(removeProject(_, false)) + // The planner turns Limit + Sort into TakeOrderedAndProjectExec which adds an additional + // Project that does not exist in the logical plan. We shouldn't use this additional Project + // to optimize out other Projects, otherwise when AQE turns physical plan back to + // logical plan, we lose the Project and may mess up the output column order. So column + // ordering is required if AQE is enabled and projectList is the same as child output. + val requireColOrdering = conf.adaptiveExecutionEnabled && op.projectList == op.child.output + op.mapChildren(removeProject(_, requireColOrdering)) case a: BaseAggregateExec => // BaseAggregateExec require specific column ordering when mode is Final or PartialMerge. // See comments in BaseAggregateExec inputAttributes method. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index f6b96ee7e1ebd..2f8e401e743bb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -30,7 +30,7 @@ import org.apache.spark.shuffle.sort.SortShuffleManager import org.apache.spark.sql.{Dataset, QueryTest, Row, SparkSession, Strategy} import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight} import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan} -import org.apache.spark.sql.execution.{CollectLimitExec, ColumnarToRowExec, LocalTableScanExec, PartialReducerPartitionSpec, QueryExecution, ReusedSubqueryExec, ShuffledRowRDD, SortExec, SparkPlan, SparkPlanInfo, UnionExec} +import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.aggregate.BaseAggregateExec import org.apache.spark.sql.execution.columnar.{InMemoryTableScanExec, InMemoryTableScanLike} import org.apache.spark.sql.execution.command.DataWritingCommandExec @@ -40,6 +40,7 @@ import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ENSURE_RE import org.apache.spark.sql.execution.joins.{BaseJoinExec, BroadcastHashJoinExec, BroadcastNestedLoopJoinExec, ShuffledHashJoinExec, ShuffledJoin, SortMergeJoinExec} import org.apache.spark.sql.execution.metric.SQLShuffleReadMetricsReporter import org.apache.spark.sql.execution.ui.{SparkListenerSQLAdaptiveExecutionUpdate, SparkListenerSQLAdaptiveSQLMetricUpdates, SparkListenerSQLExecutionStart} +import org.apache.spark.sql.execution.window.WindowExec import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode @@ -2965,6 +2966,26 @@ class AdaptiveQueryExecSuite } } } + + test("SPARK-50258: Fix output column order changed issue after AQE optimization") { + withTable("t") { + sql("SELECT course, year, earnings FROM courseSales").write.saveAsTable("t") + val df = sql( + """ + |SELECT year, course, earnings, SUM(earnings) OVER (ORDER BY year, course) AS balance + |FROM t ORDER BY year, course + |LIMIT 100 + |""".stripMargin) + df.collect() + + val plan = df.queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec] + assert(plan.inputPlan.isInstanceOf[TakeOrderedAndProjectExec]) + assert(plan.finalPhysicalPlan.isInstanceOf[WindowExec]) + plan.inputPlan.output.zip(plan.finalPhysicalPlan.output).foreach { case (o1, o2) => + assert(o1.semanticEquals(o2), "Different output column order after AQE optimization") + } + } + } } /** From 5ff129ac8261c674b90545f3e1651e166dbc6249 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 27 Nov 2024 14:22:01 +0900 Subject: [PATCH 19/51] [SPARK-50430][CORE] Use the standard Properties.clone instead of manual clone ### What changes were proposed in this pull request? This PR proposes to use the standard Properties.clone instead of manual clone ### Why are the changes needed? In a very rare condition, when the properties were changed during the clone of Properties, it might throw an exception as below: ``` : java.util.ConcurrentModificationException at java.util.Hashtable$Enumerator.next(Hashtable.java:1408) at java.util.Hashtable.putAll(Hashtable.java:523) at org.apache.spark.util.Utils$.cloneProperties(Utils.scala:3474) at org.apache.spark.SparkContext.getCredentialResolvedProperties(SparkContext.scala:523) at org.apache.spark.SparkContext.runJobInternal(SparkContext.scala:3157) at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1104) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:165) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:125) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:454) at org.apache.spark.rdd.RDD.collect(RDD.scala:1102) at org.apache.spark.mllib.evaluation.AreaUnderCurve$.of(AreaUnderCurve.scala:44) at org.apache.spark.mllib.evaluation.BinaryClassificationMetrics.areaUnderROC(BinaryClassificationMetrics.scala:127) at org.apache.spark.ml.evaluation.BinaryClassificationEvaluator.evaluate(BinaryClassificationEvaluator.scala:101) at sun.reflect.GeneratedMethodAccessor323.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:397) at py4j.Gateway.invoke(Gateway.java:306) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:199) at py4j.ClientServerConnection.run(ClientServerConnection.java:119) at java.lang.Thread.run(Thread.java:750) ``` We should use the standard clone method. ### Does this PR introduce _any_ user-facing change? It fixes a very corner case bug as described above. ### How was this patch tested? It's difficult to test because the issue is from concurrent execution. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48978 from HyukjinKwon/SPARK-50430. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 7614819884ca192fab45ee2ace8a8e081ec8becc) Signed-off-by: Hyukjin Kwon --- core/src/main/scala/org/apache/spark/util/Utils.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 3b0efffedec6f..8762f0a6cdbc3 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2982,9 +2982,7 @@ private[spark] object Utils if (props == null) { return props } - val resultProps = new Properties() - props.forEach((k, v) => resultProps.put(k, v)) - resultProps + props.clone().asInstanceOf[Properties] } /** From 5e51e2c4541077eb15898dadecf0fb0d1a5e961a Mon Sep 17 00:00:00 2001 From: xunxunmimi5577 <52647492+xunxunmimi5577@users.noreply.github.com> Date: Mon, 2 Dec 2024 16:38:14 +0800 Subject: [PATCH 20/51] [SPARK-49294][UI] Add width attribute for shuffle-write-time checkbox ### What changes were proposed in this pull request? The pr aims to add the style for `shuffle-write-time-checkbox-div` and set the width to be `155` pixels. ### Why are the changes needed? Fix bug for UI. The tip of `shuffle-write-time` appears in an strange position before this change. As shown below ![MEITU_20240819_105642523](https://github.com/user-attachments/assets/1e4e9639-a949-4fc3-86f4-7cb65d6d9c73) ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually check. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #47798 from xunxunmimi5577/add-width-style-for-shuffle_write_time-checkbox. Authored-by: xunxunmimi5577 <52647492+xunxunmimi5577@users.noreply.github.com> Signed-off-by: panbingkun (cherry picked from commit 05728e4ff64e6684d7c6501f8a079e3b9aded9ed) Signed-off-by: panbingkun --- core/src/main/resources/org/apache/spark/ui/static/webui.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css index f952f86503e30..58c5add2d2400 100755 --- a/core/src/main/resources/org/apache/spark/ui/static/webui.css +++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css @@ -355,6 +355,10 @@ a.expandbutton { width: 170px; } +.shuffle-write-time-checkbox-div { + width: 155px; +} + .result-serialization-time-checkbox-div { width: 185px; } From 1d6f7adbe622f8433b9e22e87fd191316ad86053 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Tue, 3 Dec 2024 08:31:07 -0800 Subject: [PATCH 21/51] [SPARK-50433][DOCS][TESTS][3.5] Fix configuring log4j2 guide docs for Spark on YARN and UT Backport https://github.com/apache/spark/pull/48981 to 3.5 ### What changes were proposed in this pull request? As title. ### Why are the changes needed? SPARK-37814 (3.3.0) migrated logging system from log4j1 to log4j2, we should updated the docs as well. ### Does this PR introduce _any_ user-facing change? Yes, docs are updated. ### How was this patch tested? Review. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49044 from pan3793/SPARK-50433-3.5. Authored-by: Cheng Pan Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/test/IntegrationTestUtils.scala | 2 +- .../spark/examples/streaming/KinesisWordCountASL.scala | 2 +- docs/running-on-yarn.md | 8 ++++---- .../spark/examples/streaming/StreamingExamples.scala | 2 +- .../org/apache/spark/deploy/yarn/YarnClusterSuite.scala | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala index 61d08912aec23..3ae9b9fc73b48 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala @@ -74,7 +74,7 @@ object IntegrationTestUtils { // Redirect server log into console "--conf", - s"spark.driver.extraJavaOptions=-Dlog4j.configuration=$log4j2") + s"spark.driver.extraJavaOptions=-Dlog4j.configurationFile=$log4j2") } else Seq.empty } diff --git a/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala index 7d12af3256f1f..d388b480e065d 100644 --- a/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala +++ b/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala @@ -275,7 +275,7 @@ private[streaming] object StreamingExamples extends Logging { // We first log something to initialize Spark's default logging, then we override the // logging level. logInfo("Setting log level to [WARN] for streaming example." + - " To override add a custom log4j.properties to the classpath.") + " To override add a custom log4j2.properties to the classpath.") Configurator.setRootLevel(Level.WARN) } } diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index ce7121b806cb0..d0e725f6a98f4 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -121,15 +121,15 @@ all environment variables used for launching each container. This process is use classpath problems in particular. (Note that enabling this requires admin privileges on cluster settings and a restart of all node managers. Thus, this is not applicable to hosted clusters). -To use a custom log4j configuration for the application master or executors, here are the options: +To use a custom log4j2 configuration for the application master or executors, here are the options: -- upload a custom `log4j.properties` using `spark-submit`, by adding it to the `--files` list of files +- upload a custom `log4j2.properties` using `spark-submit`, by adding it to the `--files` list of files to be uploaded with the application. -- add `-Dlog4j.configuration=` to `spark.driver.extraJavaOptions` +- add `-Dlog4j.configurationFile=` to `spark.driver.extraJavaOptions` (for the driver) or `spark.executor.extraJavaOptions` (for executors). Note that if using a file, the `file:` protocol should be explicitly provided, and the file needs to exist locally on all the nodes. -- update the `$SPARK_CONF_DIR/log4j.properties` file and it will be automatically uploaded along +- update the `$SPARK_CONF_DIR/log4j2.properties` file and it will be automatically uploaded along with the other configurations. Note that other 2 options has higher priority than this option if multiple options are specified. diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala index 20c5eb1700155..9289b005e3ba4 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala @@ -31,7 +31,7 @@ object StreamingExamples extends Logging { // We first log something to initialize Spark's default logging, then we override the // logging level. logInfo("Setting log level to [WARN] for streaming example." + - " To override add a custom log4j.properties to the classpath.") + " To override add a custom log4j2.properties to the classpath.") Configurator.setRootLevel(Level.WARN) } } diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala index 2637b2eab80e2..5cd69314d28fa 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala @@ -293,7 +293,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite { } test("running Spark in yarn-cluster mode displays driver log links") { - val log4jConf = new File(tempDir, "log4j.properties") + val log4jConf = new File(tempDir, "log4j2.properties") val logOutFile = new File(tempDir, "logs") Files.write( s"""rootLogger.level = debug From 5dc927bab129c40b1e919889d22ada1c95157dc9 Mon Sep 17 00:00:00 2001 From: huangxiaoping <1754789345@qq.com> Date: Wed, 4 Dec 2024 18:24:35 +0800 Subject: [PATCH 22/51] [SPARK-50487][DOCS] Update broken jira link ### What changes were proposed in this pull request? Update broken jira link ### Why are the changes needed? The old link is not accessible ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? No testing required ### Was this patch authored or co-authored using generative AI tooling? No Closes #49052 from huangxiaopingRD/SPARK-50487. Lead-authored-by: huangxiaoping <1754789345@qq.com> Co-authored-by: Dongjoon Hyun Signed-off-by: Kent Yao (cherry picked from commit 3d063a01d7c2a6d9613e11dec882739daa7eeb71) Signed-off-by: Kent Yao --- core/src/main/scala/org/apache/spark/rdd/RDD.scala | 5 +++-- python/pyspark/rdd.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index f695b10202758..b63e5999127d4 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -1812,8 +1812,9 @@ abstract class RDD[T: ClassTag]( * Please read the linked SPIP and design docs to understand the limitations and future plans. * @return an [[RDDBarrier]] instance that provides actions within a barrier stage * @see [[org.apache.spark.BarrierTaskContext]] - * @see SPIP: Barrier Execution Mode - * @see Design Doc + * @see + * SPIP: Barrier Execution Mode + * @see Design Doc */ @Experimental @Since("2.4.0") diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index aa63c6509dce8..142678a4a6f6d 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -5130,8 +5130,8 @@ def barrier(self: "RDD[T]") -> "RDDBarrier[T]": ----- For additional information see - - `SPIP: Barrier Execution Mode `_ - - `Design Doc `_ + - `SPIP: Barrier Execution Mode `_ + - `Design Doc `_ This API is experimental """ From acedb15ea3695c91d8ffbf207d593e9c0204ea09 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Thu, 5 Dec 2024 07:47:26 -0800 Subject: [PATCH 23/51] [SPARK-50498][PYTHON] Avoid unnecessary py4j call in `listFunctions` ### What changes were proposed in this pull request? Avoid unnecessary py4j call in `listFunctions` ### Why are the changes needed? ``` iter = self._jcatalog.listFunctions(dbName).toLocalIterator() if pattern is None: iter = self._jcatalog.listFunctions(dbName).toLocalIterator() else: iter = self._jcatalog.listFunctions(dbName, pattern).toLocalIterator() ``` the first `self._jcatalog.listFunctions` is unnecessary ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? ci ### Was this patch authored or co-authored using generative AI tooling? no Closes #49073 from zhengruifeng/avoid_list_funcs. Authored-by: Ruifeng Zheng Signed-off-by: Dongjoon Hyun (cherry picked from commit 36285956ed2b9b8034d6918a9e951f1a2748f3ce) Signed-off-by: Dongjoon Hyun --- python/pyspark/sql/catalog.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py index 2c6ed28461f36..3c22473a06235 100644 --- a/python/pyspark/sql/catalog.py +++ b/python/pyspark/sql/catalog.py @@ -481,7 +481,6 @@ def listFunctions( """ if dbName is None: dbName = self.currentDatabase() - iter = self._jcatalog.listFunctions(dbName).toLocalIterator() if pattern is None: iter = self._jcatalog.listFunctions(dbName).toLocalIterator() else: From 86e29e94d9fd5637d0258b0b234f1a82eb4fd860 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 5 Dec 2024 14:43:39 -0800 Subject: [PATCH 24/51] [SPARK-50505][DOCS] Fix `spark.storage.replication.proactive` default value documentation ### What changes were proposed in this pull request? This PR aims to fix `spark.storage.replication.proactive` default value documentation. ### Why are the changes needed? `spark.storage.replication.proactive` has been enabled by default since Apache Spark 3.2.0. https://github.com/apache/spark/blob/6add9c89855f9311d5e185774ddddcbf4323beee/docs/core-migration-guide.md?plain=1#L85 https://github.com/apache/spark/blob/6add9c89855f9311d5e185774ddddcbf4323beee/core/src/main/scala/org/apache/spark/internal/config/package.scala#L494-L502 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manual review. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49081 from dongjoon-hyun/SPARK-50505. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 21451fb312fee32188b6d24f406cb4f3a8349414) Signed-off-by: Dongjoon Hyun --- docs/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration.md b/docs/configuration.md index abd934572bd00..302348aa0f0eb 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1959,7 +1959,7 @@ Apart from these, the following properties are also available, and may be useful spark.storage.replication.proactive - false + true Enables proactive block replication for RDD blocks. Cached RDD block replicas lost due to executor failures are replenished if there are any existing available replicas. This tries From d01f34f670d1e6ffb6fb6580ffa8ea34e20d07cb Mon Sep 17 00:00:00 2001 From: Livia Zhu Date: Fri, 6 Dec 2024 11:28:55 +0900 Subject: [PATCH 25/51] [SPARK-50492][SS] Fix java.util.NoSuchElementException when event time column is dropped after dropDuplicatesWithinWatermark ### What changes were proposed in this pull request? Update `DeduplicateWithinWatermark` references to include all attributes that could be the watermarking column. ### Why are the changes needed? Fix `java.util.NoSuchElementException` due to ColumnPruning. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added unit test ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49065 from liviazhu-db/liviazhu-db/dedup-watermark-fix. Authored-by: Livia Zhu Signed-off-by: Jungtaek Lim (cherry picked from commit 851f5f2ff905636388ff31f349c6fc5064875172) Signed-off-by: Jungtaek Lim --- .../plans/logical/basicLogicalOperators.scala | 3 +++ ...treamingDeduplicationWithinWatermarkSuite.scala | 14 ++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index f76e698a64005..b2ae138a9b0a9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -1944,6 +1944,9 @@ case class Deduplicate( } case class DeduplicateWithinWatermark(keys: Seq[Attribute], child: LogicalPlan) extends UnaryNode { + // Ensure that references include event time columns so they are not pruned away. + override def references: AttributeSet = AttributeSet(keys) ++ + AttributeSet(child.output.filter(_.metadata.contains(EventTimeWatermark.delayKey))) override def maxRows: Option[Long] = child.maxRows override def output: Seq[Attribute] = child.output final override val nodePatterns: Seq[TreePattern] = Seq(DISTINCT_LIKE) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationWithinWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationWithinWatermarkSuite.scala index 9a02ab3df7dd4..af86e6ec88996 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationWithinWatermarkSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationWithinWatermarkSuite.scala @@ -220,4 +220,18 @@ class StreamingDeduplicationWithinWatermarkSuite extends StateStoreMetricsTest { ) } } + + test("SPARK-50492: drop event time column after dropDuplicatesWithinWatermark") { + val inputData = MemoryStream[(Int, Int)] + val result = inputData.toDS() + .withColumn("first", timestamp_seconds($"_1")) + .withWatermark("first", "10 seconds") + .dropDuplicatesWithinWatermark("_2") + .select("_2") + + testStream(result, Append)( + AddData(inputData, (1, 2)), + CheckAnswer(2) + ) + } } From 153cd9e1a79643c88d5b26e6fae0086a472380f8 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Fri, 6 Dec 2024 16:36:11 +0800 Subject: [PATCH 26/51] [SPARK-50492][SS][FOLLOWUP][3.5] Change `def references` to `lazy val references` in `DeduplicateWithinWatermark` to fix the compilation issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This pr change `def references` to `lazy val references` in `DeduplicateWithinWatermark` to fix the following compilation error: - https://github.com/apache/spark/actions/runs/12191807324/job/34011354774 ``` [error] /home/runner/work/spark/spark/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala:1948:16: overriding lazy value references in class QueryPlan of type org.apache.spark.sql.catalyst.expressions.AttributeSet; [error] method references needs to be a stable, immutable value [error] override def references: AttributeSet = AttributeSet(keys) ++ [error] ^ [error] one error found ``` ### Why are the changes needed? Fix compile error. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions ### Was this patch authored or co-authored using generative AI tooling? No Closes #49087 from LuciferYang/SPARK-50492-FOLLOWUP-3.5. Authored-by: yangjie01 Signed-off-by: yangjie01 --- .../sql/catalyst/plans/logical/basicLogicalOperators.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index b2ae138a9b0a9..4824bc1f3c263 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -1945,7 +1945,8 @@ case class Deduplicate( case class DeduplicateWithinWatermark(keys: Seq[Attribute], child: LogicalPlan) extends UnaryNode { // Ensure that references include event time columns so they are not pruned away. - override def references: AttributeSet = AttributeSet(keys) ++ + @transient + override lazy val references: AttributeSet = AttributeSet(keys) ++ AttributeSet(child.output.filter(_.metadata.contains(EventTimeWatermark.delayKey))) override def maxRows: Option[Long] = child.maxRows override def output: Seq[Attribute] = child.output From bf29ab9eae79e73c6844881b0cd3a5e284960618 Mon Sep 17 00:00:00 2001 From: Terry Wang Date: Fri, 6 Dec 2024 09:29:57 -0800 Subject: [PATCH 27/51] [SPARK-50421][CORE][3.5] Fix executor related memory config incorrect when multiple resource profiles worked ### What changes were proposed in this pull request? Reset the executor's env memory related config when resource profile is not as the default resource profile! ### Why are the changes needed? When multiple resource profile exists in the same spark application, now the executor's memory related config is not override by resource profile's memory size, which will cause maxOffHeap in `UnifiedMemoryManager` is not correct. See https://issues.apache.org/jira/browse/SPARK-50421 for more details ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Tests in our inner spark version and jobs. ### Was this patch authored or co-authored using generative AI tooling? No This is a backporting from https://github.com/apache/spark/pull/48963 to branch 3.5 Closes #49090 from zjuwangg/m35_fixConfig. Authored-by: Terry Wang Signed-off-by: Dongjoon Hyun --- .../CoarseGrainedExecutorBackend.scala | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala index 537522326fc78..fe90895cacb53 100644 --- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala @@ -474,6 +474,27 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging { } driverConf.set(EXECUTOR_ID, arguments.executorId) + // Set executor memory related config here according to resource profile + if (cfg.resourceProfile.id != ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID) { + cfg.resourceProfile + .executorResources + .foreach { + case (ResourceProfile.OFFHEAP_MEM, request) => + driverConf.set(MEMORY_OFFHEAP_SIZE.key, request.amount.toString + "m") + logInfo(s"Set executor off-heap memory to $request") + case (ResourceProfile.MEMORY, request) => + driverConf.set(EXECUTOR_MEMORY.key, request.amount.toString + "m") + logInfo(s"Set executor memory to $request") + case (ResourceProfile.OVERHEAD_MEM, request) => + // Maybe don't need to set this since it's nearly used by tasks. + driverConf.set(EXECUTOR_MEMORY_OVERHEAD.key, request.amount.toString + "m") + logInfo(s"Set executor memory_overhead to $request") + case (ResourceProfile.CORES, request) => + driverConf.set(EXECUTOR_CORES.key, request.amount.toString) + logInfo(s"Set executor cores to $request") + case _ => + } + } val env = SparkEnv.createExecutorEnv(driverConf, arguments.executorId, arguments.bindAddress, arguments.hostname, arguments.cores, cfg.ioEncryptionKey, isLocal = false) // Set the application attemptId in the BlockStoreClient if available. From d8f3afac944291869b55dc8ad52aa638ba24f98b Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sat, 7 Dec 2024 10:22:15 +0800 Subject: [PATCH 28/51] [SPARK-50514][DOCS] Add `IDENTIFIER clause` page to `menu-sql.yaml` ### What changes were proposed in this pull request? This PR aims to add `IDENTIFIER clause` page to `menu-sql.yaml` for Apache Spark 3.5.4. ### Why are the changes needed? This was missed at SPARK-43205 (Apache Spark 3.5.0). - #42506 ### Does this PR introduce _any_ user-facing change? **BEFORE** ![Screenshot 2024-12-06 at 11 35 52](https://github.com/user-attachments/assets/c3c8dc56-b8d4-4f8d-bb9e-31bccb1f5d42) **AFTER** ![Screenshot 2024-12-06 at 11 36 14](https://github.com/user-attachments/assets/bd1606d2-eb3f-4640-92ef-b0079847c3a3) ### How was this patch tested? Manual review. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49097 from dongjoon-hyun/SPARK-50514. Authored-by: Dongjoon Hyun Signed-off-by: yangjie01 (cherry picked from commit 28766d4120f4f5bb13f474d53e83e05f38a31475) Signed-off-by: yangjie01 --- docs/_data/menu-sql.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml index ff93f09a83ce1..99d7aded3f25a 100644 --- a/docs/_data/menu-sql.yaml +++ b/docs/_data/menu-sql.yaml @@ -87,6 +87,8 @@ url: sql-ref-functions.html - text: Identifiers url: sql-ref-identifier.html + - text: IDENTIFIER clause + url: sql-ref-identifier-clause.html - text: Literals url: sql-ref-literals.html - text: Null Semantics From 305d2a0aa48cf5b470f75b02677e7761dca2a0e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrej=20Gobelji=C4=87?= Date: Sat, 7 Dec 2024 10:24:53 -0800 Subject: [PATCH 29/51] [SPARK-49695][SQL][3.5] Postgres fix xor push-down MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Backport of the https://github.com/apache/spark/pull/48144 This PR fixes the pushdown of ^ operator (XOR operator) for Postgres. Those two databases use this as exponent, rather then bitwise xor. Fix is consisted of overriding the SQLExpressionBuilder to replace the '^' character with '#'. ### Why are the changes needed? Result is incorrect. ### Does this PR introduce _any_ user-facing change? Yes. The user will now have a proper translation of the ^ operator. ### How was this patch tested? ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49071 from andrej-db/PGXORBackport. Lead-authored-by: Andrej Gobeljić Co-authored-by: andrej-gobeljic_data Signed-off-by: Dongjoon Hyun --- .../jdbc/v2/PostgresIntegrationSuite.scala | 10 +++++++++ .../spark/sql/jdbc/PostgresDialect.scala | 21 ++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala index 7fef3ccd6b3f6..b0edac3fcdd1f 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala @@ -22,6 +22,7 @@ import java.sql.Connection import org.apache.spark.SparkConf import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException +import org.apache.spark.sql.execution.FilterExec import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog import org.apache.spark.sql.jdbc.DatabaseOnDocker import org.apache.spark.sql.types._ @@ -123,4 +124,13 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT ) } } + + test("SPARK-49695: Postgres fix xor push-down") { + val df = spark.sql(s"select dept, name from $catalogName.employee where dept ^ 6 = 0") + val rows = df.collect() + assert(!df.queryExecution.sparkPlan.exists(_.isInstanceOf[FilterExec])) + assert(rows.length == 1) + assert(rows(0).getInt(0) === 6) + assert(rows(0).getString(1) === "jen") + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala index f8f72d88589e3..dd4545bc41b42 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala @@ -22,11 +22,13 @@ import java.time.{LocalDateTime, ZoneOffset} import java.util import java.util.Locale +import scala.util.control.NonFatal + import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.SQLConfHelper import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NonEmptyNamespaceException, NoSuchIndexException} import org.apache.spark.sql.connector.catalog.Identifier -import org.apache.spark.sql.connector.expressions.NamedReference +import org.apache.spark.sql.connector.expressions.{Expression, NamedReference} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils} import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo @@ -258,6 +260,23 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper { } } + class PostgresSQLBuilder extends JDBCSQLBuilder { + override def visitBinaryArithmetic(name: String, l: String, r: String): String = { + l + " " + name.replace('^', '#') + " " + r + } + } + + override def compileExpression(expr: Expression): Option[String] = { + val postgresSQLBuilder = new PostgresSQLBuilder() + try { + Some(postgresSQLBuilder.build(expr)) + } catch { + case NonFatal(e) => + logWarning("Error occurs while compiling V2 expression", e) + None + } + } + override def supportsLimit: Boolean = true override def supportsOffset: Boolean = true From a57f3c273b4d9cc637f189d36ba0c4db256d60a9 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 7 Dec 2024 20:00:37 -0800 Subject: [PATCH 30/51] [SPARK-50483][CORE][SQL][3.5] BlockMissingException should be thrown even if ignoreCorruptFiles is enabled ### What changes were proposed in this pull request? `BlockMissingException` extends from `IOException`. When `BlockMissingException` occurs and ignoreCorruptFiles is enabled, the current task may not get any data and will be marked as successful([code](https://github.com/apache/spark/blob/0d045db8d15d0aeb0f54a1557fd360363e77ed42/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala#L271-L273)). This will cause data quality issues. Generally speaking, `BlockMissingException` is a system issue, not a file corruption issue. Therefore, `BlockMissingException` should be thrown even if ignoreCorruptFiles is enabled. Related error message: ``` 24/11/29 01:56:00 WARN FileScanRDD: Skipped the rest of the content in the corrupted file: path: viewfs://hadoop-cluster/path/to/data/part-00320-7915e327-3214-4585-a44e-f9c58e362b43.c000.snappy.parquet, range: 191727616-281354675, partition values: [empty row] org.apache.hadoop.hdfs.BlockMissingException: Could not obtain block: BP-169998034-10.210.23.11-1507067630530:blk_83565156183_82548880660 file/path/to/data/part-00320-7915e327-3214-4585-a44e-f9c58e362b43.c000.snappy.parquet No live nodes contain current block Block locations: DatanodeInfoWithStorage[10.209.145.174:50010,DS-c7c0a172-5ffa-4f90-bfb5-717fb1e9ecf2,DISK] DatanodeInfoWithStorage[10.3.22.142:50010,DS-a1ba9ac9-dc92-4131-a2c2-9f7d03b97caf,DISK] DatanodeInfoWithStorage[10.209.146.156:50010,DS-71d8ae97-15d3-454e-a715-d9490e184989,DISK] Dead nodes: DatanodeInfoWithStorage[10.209.146.156:50010,DS-71d8ae97-15d3-454e-a715-d9490e184989,DISK] DatanodeInfoWithStorage[10.209.145.174:50010,DS-c7c0a172-5ffa-4f90-bfb5-717fb1e9ecf2,DISK] DatanodeInfoWithStorage[10.3.22.142:50010,DS-a1ba9ac9-dc92-4131-a2c2-9f7d03b97caf,DISK] ``` ![image](https://github.com/user-attachments/assets/e040ce9d-1a0e-44eb-bd03-4cd7a9fff80f) ### Why are the changes needed? Avoid data issue if ignoreCorruptFiles is enabled when `BlockMissingException` occurred. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Manual test. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49105 from wangyum/SPARK-50483-branch-3.5. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun --- .../src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala | 3 +++ core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala | 3 +++ core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala | 3 +++ .../apache/spark/sql/execution/datasources/FileScanRDD.scala | 2 ++ .../sql/execution/datasources/v2/FilePartitionReader.scala | 4 ++++ 5 files changed, 15 insertions(+) diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala index 2554106d78e9d..67e4583fe4822 100644 --- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala +++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala @@ -29,6 +29,7 @@ import org.apache.avro.mapred.{AvroOutputFormat, FsInput} import org.apache.avro.mapreduce.AvroJob import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.FileStatus +import org.apache.hadoop.hdfs.BlockMissingException import org.apache.hadoop.mapreduce.Job import org.apache.spark.SparkException @@ -140,6 +141,8 @@ private[sql] object AvroUtils extends Logging { try { Some(DataFileReader.openReader(in, new GenericDatumReader[GenericRecord]())) } catch { + case e: BlockMissingException => + throw new SparkException(s"Could not read file: $path", e) case e: IOException => if (ignoreCorruptFiles) { logWarning(s"Skipped the footer in the corrupted file: $path", e) diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala index cad107256c58c..edd07a2649dbb 100644 --- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala @@ -25,6 +25,7 @@ import scala.collection.immutable.Map import scala.reflect.ClassTag import org.apache.hadoop.conf.{Configurable, Configuration} +import org.apache.hadoop.hdfs.BlockMissingException import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.hadoop.mapred._ import org.apache.hadoop.mapred.lib.CombineFileSplit @@ -293,6 +294,7 @@ class HadoopRDD[K, V]( null // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e + case e: BlockMissingException => throw e case e: IOException if ignoreCorruptFiles => logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e) finished = true @@ -318,6 +320,7 @@ class HadoopRDD[K, V]( finished = true // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e + case e: BlockMissingException => throw e case e: IOException if ignoreCorruptFiles => logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e) finished = true diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala index 119fdae531f22..fbd2235aabaf6 100644 --- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala @@ -25,6 +25,7 @@ import scala.collection.JavaConverters.asScalaBufferConverter import scala.reflect.ClassTag import org.apache.hadoop.conf.{Configurable, Configuration} +import org.apache.hadoop.hdfs.BlockMissingException import org.apache.hadoop.io.Writable import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.hadoop.mapred.JobConf @@ -227,6 +228,7 @@ class NewHadoopRDD[K, V]( null // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e + case e: BlockMissingException => throw e case e: IOException if ignoreCorruptFiles => logWarning( s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}", @@ -255,6 +257,7 @@ class NewHadoopRDD[K, V]( finished = true // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e + case e: BlockMissingException => throw e case e: IOException if ignoreCorruptFiles => logWarning( s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}", diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala index 0cca51cf4e393..ce56fc1b28296 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala @@ -23,6 +23,7 @@ import java.net.URI import scala.util.control.NonFatal import org.apache.hadoop.fs.Path +import org.apache.hadoop.hdfs.BlockMissingException import org.apache.spark.{Partition => RDDPartition, SparkUpgradeException, TaskContext} import org.apache.spark.deploy.SparkHadoopUtil @@ -259,6 +260,7 @@ class FileScanRDD( null // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e + case e: BlockMissingException => throw e case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles => logWarning( s"Skipped the rest of the content in the corrupted file: $currentFile", e) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala index 7159bc6de3a47..8f51226dcfe99 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala @@ -20,6 +20,8 @@ import java.io.{FileNotFoundException, IOException} import scala.util.control.NonFatal +import org.apache.hadoop.hdfs.BlockMissingException + import org.apache.spark.SparkUpgradeException import org.apache.spark.internal.Logging import org.apache.spark.rdd.InputFileBlockHolder @@ -49,6 +51,7 @@ class FilePartitionReader[T]( // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw QueryExecutionErrors.fileNotFoundError(e) + case e: BlockMissingException => throw e case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles => logWarning( s"Skipped the rest of the content in the corrupted file.", e) @@ -68,6 +71,7 @@ class FilePartitionReader[T]( throw QueryExecutionErrors.unsupportedSchemaColumnConvertError( currentReader.file.urlEncodedPath, e.getColumn, e.getLogicalType, e.getPhysicalType, e) + case e: BlockMissingException => throw e case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles => logWarning( s"Skipped the rest of the content in the corrupted file: $currentReader", e) From 929a19fe1604e07adf9ed2798ec0c1b53e0bd60d Mon Sep 17 00:00:00 2001 From: Jie Yang Date: Mon, 9 Dec 2024 01:32:17 +0000 Subject: [PATCH 31/51] Preparing Spark release v3.5.4-rc1 --- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- common/utils/pom.xml | 2 +- connector/avro/pom.xml | 2 +- connector/connect/client/jvm/pom.xml | 2 +- connector/connect/common/pom.xml | 2 +- connector/connect/server/pom.xml | 2 +- connector/docker-integration-tests/pom.xml | 2 +- connector/kafka-0-10-assembly/pom.xml | 2 +- connector/kafka-0-10-sql/pom.xml | 2 +- connector/kafka-0-10-token-provider/pom.xml | 2 +- connector/kafka-0-10/pom.xml | 2 +- connector/kinesis-asl-assembly/pom.xml | 2 +- connector/kinesis-asl/pom.xml | 2 +- connector/protobuf/pom.xml | 2 +- connector/spark-ganglia-lgpl/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 2 +- examples/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/api/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 44 files changed, 44 insertions(+), 44 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index 3367c1629c578..47b38621d6400 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 014ff5bbaf209..3757f69e9bd17 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index ed2352fd1276e..83243d183b7b9 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index b791a06aad43a..e74fb05beb0ae 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 685ada5194905..13c4b5cca1e32 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index b2e488c7bb222..709bbed0c553c 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 3a260a8dff53f..59e9973c42d05 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index fd0aa7ba2a3a2..e222499eec228 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/utils/pom.xml b/common/utils/pom.xml index 7c87be73d7d96..7b2a1ad57b0ff 100644 --- a/common/utils/pom.xml +++ b/common/utils/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml index 8bc2802ea5d0d..1a6fe528b9168 100644 --- a/connector/avro/pom.xml +++ b/connector/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml index 87f6a589261cc..695146d7a1113 100644 --- a/connector/connect/client/jvm/pom.xml +++ b/connector/connect/client/jvm/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../../../pom.xml diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml index 994179fd99ac8..6c50469717f95 100644 --- a/connector/connect/common/pom.xml +++ b/connector/connect/common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../../pom.xml diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml index 801c28319ee84..aeadbacb7c692 100644 --- a/connector/connect/server/pom.xml +++ b/connector/connect/server/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../../pom.xml diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml index 19377b36a612f..435c0fbd797aa 100644 --- a/connector/docker-integration-tests/pom.xml +++ b/connector/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml index ae11f0eac307d..8b6d7d47b0392 100644 --- a/connector/kafka-0-10-assembly/pom.xml +++ b/connector/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml index 533a45e18f662..dce1990f1c9d0 100644 --- a/connector/kafka-0-10-sql/pom.xml +++ b/connector/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml index 07ca1c2b2f3c7..5973b9595db8a 100644 --- a/connector/kafka-0-10-token-provider/pom.xml +++ b/connector/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml index 176d92da63801..54ba2b22093d0 100644 --- a/connector/kafka-0-10/pom.xml +++ b/connector/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml index a6ef06142f5cb..5e0c0fcafc12b 100644 --- a/connector/kinesis-asl-assembly/pom.xml +++ b/connector/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml index 4282e1f035716..65b0fa33db29f 100644 --- a/connector/kinesis-asl/pom.xml +++ b/connector/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml index 2af6002b5c7db..95be9ab74f105 100644 --- a/connector/protobuf/pom.xml +++ b/connector/protobuf/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml index a46c9bbfec2cf..62f1c4ab2b124 100644 --- a/connector/spark-ganglia-lgpl/pom.xml +++ b/connector/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index d1b0e82c7c0d5..e59066e19850d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 3dea0c82204bd..a207cc2d911b5 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,7 +19,7 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.5.4-SNAPSHOT +SPARK_VERSION: 3.5.4 SPARK_VERSION_SHORT: 3.5.4 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.18" diff --git a/examples/pom.xml b/examples/pom.xml index 26d91eff504f2..5efc255218570 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index c4f250b40f33d..74ad5a732f054 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index a47d25015dfa9..cb89c27d5f0ba 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index 5c1844be5782d..e36d57fe6a573 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index bb821190273e1..26e9a1502abab 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 202b80d38e24f..079ce72d5d959 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/pom.xml b/pom.xml index 6497056fa2e48..8dc47f391f967 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 pom Spark Project Parent POM https://spark.apache.org/ diff --git a/python/pyspark/version.py b/python/pyspark/version.py index 002d06e28ea15..db20a2ffae586 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__: str = "3.5.4.dev0" +__version__: str = "3.5.4" diff --git a/repl/pom.xml b/repl/pom.xml index 5ef505bbc48e5..3f8c931a60664 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index cd90f32d0814f..5991f1848ccf8 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index b72a3daea3c38..8ec4e86ab8f12 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 31377cbda5d8e..9ace7e29de0a0 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index d7f3786e1050f..f0df0ff0ea2f4 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/sql/api/pom.xml b/sql/api/pom.xml index 038b6c16a4e88..c180a208bf093 100644 --- a/sql/api/pom.xml +++ b/sql/api/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 0564a6be7432a..58a2333b5b5a0 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 62d33dbfc2d41..9577de81c2057 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 5d2708dfdd714..6c86bc35a89d7 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 9a313907eb130..be3c952e4131b 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 6cbccb39772c9..21d2981fe1088 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index f23f4a4b50559..7e5724b5d9dd3 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4-SNAPSHOT + 3.5.4 ../pom.xml From 8e6507a3d83a8f8ef466ed5f411884b9655916df Mon Sep 17 00:00:00 2001 From: Jie Yang Date: Mon, 9 Dec 2024 01:32:23 +0000 Subject: [PATCH 32/51] Preparing development version 3.5.5-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- common/utils/pom.xml | 2 +- connector/avro/pom.xml | 2 +- connector/connect/client/jvm/pom.xml | 2 +- connector/connect/common/pom.xml | 2 +- connector/connect/server/pom.xml | 2 +- connector/docker-integration-tests/pom.xml | 2 +- connector/kafka-0-10-assembly/pom.xml | 2 +- connector/kafka-0-10-sql/pom.xml | 2 +- connector/kafka-0-10-token-provider/pom.xml | 2 +- connector/kafka-0-10/pom.xml | 2 +- connector/kinesis-asl-assembly/pom.xml | 2 +- connector/kinesis-asl/pom.xml | 2 +- connector/protobuf/pom.xml | 2 +- connector/spark-ganglia-lgpl/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/api/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 45 files changed, 47 insertions(+), 47 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 8657755b8d0ea..5eca59375425e 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.5.4 +Version: 3.5.5 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 47b38621d6400..ee2e7b48871ee 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 3757f69e9bd17..a5ac18252d9c9 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 83243d183b7b9..c3f33905ae20c 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index e74fb05beb0ae..de0af6da6c9ec 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 13c4b5cca1e32..54edc410aa9cc 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 709bbed0c553c..88ae8e2715a04 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 59e9973c42d05..adeab180901c6 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index e222499eec228..0518930d7eb5b 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/utils/pom.xml b/common/utils/pom.xml index 7b2a1ad57b0ff..fc15de78ed505 100644 --- a/common/utils/pom.xml +++ b/common/utils/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml index 1a6fe528b9168..8d78204ddce30 100644 --- a/connector/avro/pom.xml +++ b/connector/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml index 695146d7a1113..c220ce4d032e5 100644 --- a/connector/connect/client/jvm/pom.xml +++ b/connector/connect/client/jvm/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../../../pom.xml diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml index 6c50469717f95..8fd5820c35c71 100644 --- a/connector/connect/common/pom.xml +++ b/connector/connect/common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../../pom.xml diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml index aeadbacb7c692..747c56d148ebb 100644 --- a/connector/connect/server/pom.xml +++ b/connector/connect/server/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../../pom.xml diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml index 435c0fbd797aa..878bd4590b50f 100644 --- a/connector/docker-integration-tests/pom.xml +++ b/connector/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml index 8b6d7d47b0392..119761501dad9 100644 --- a/connector/kafka-0-10-assembly/pom.xml +++ b/connector/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml index dce1990f1c9d0..51ab703585beb 100644 --- a/connector/kafka-0-10-sql/pom.xml +++ b/connector/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml index 5973b9595db8a..7e1c6ebac9a7f 100644 --- a/connector/kafka-0-10-token-provider/pom.xml +++ b/connector/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml index 54ba2b22093d0..5014e1a1494ce 100644 --- a/connector/kafka-0-10/pom.xml +++ b/connector/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml index 5e0c0fcafc12b..65d86f5617b1c 100644 --- a/connector/kinesis-asl-assembly/pom.xml +++ b/connector/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml index 65b0fa33db29f..47495110bf7d8 100644 --- a/connector/kinesis-asl/pom.xml +++ b/connector/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml index 95be9ab74f105..3f417fba4b021 100644 --- a/connector/protobuf/pom.xml +++ b/connector/protobuf/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml index 62f1c4ab2b124..47db5eb9253e8 100644 --- a/connector/spark-ganglia-lgpl/pom.xml +++ b/connector/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index e59066e19850d..ed5c82d28ca68 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index a207cc2d911b5..969b61fa00363 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.5.4 -SPARK_VERSION_SHORT: 3.5.4 +SPARK_VERSION: 3.5.5-SNAPSHOT +SPARK_VERSION_SHORT: 3.5.5 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.18" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.5.4"] + 'facetFilters': ["version:3.5.5"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index 5efc255218570..506aa7836485a 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 74ad5a732f054..f330d4d1a5377 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index cb89c27d5f0ba..29f47eec8a5c6 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index e36d57fe6a573..e309c1f734296 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 26e9a1502abab..f8a3cf1cc16df 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 079ce72d5d959..1fa3e215977d9 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 8dc47f391f967..0ccb6ac76a9bc 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT pom Spark Project Parent POM https://spark.apache.org/ diff --git a/python/pyspark/version.py b/python/pyspark/version.py index db20a2ffae586..df09fc3284fbd 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__: str = "3.5.4" +__version__: str = "3.5.5.dev0" diff --git a/repl/pom.xml b/repl/pom.xml index 3f8c931a60664..e3f52f07cc4d7 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 5991f1848ccf8..e4d890a930a2a 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 8ec4e86ab8f12..85a125ddfe4b7 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 9ace7e29de0a0..118f0034ddf5a 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index f0df0ff0ea2f4..4c85e90c4e485 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/sql/api/pom.xml b/sql/api/pom.xml index c180a208bf093..1613c3218649b 100644 --- a/sql/api/pom.xml +++ b/sql/api/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 58a2333b5b5a0..e7736c95007ad 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 9577de81c2057..889fba8892568 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 6c86bc35a89d7..110f9e168de6b 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index be3c952e4131b..8fc5d81ab8982 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 21d2981fe1088..e3002680f2173 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 7e5724b5d9dd3..3530297bf8f83 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml From deabe49d94f28d19438865effe2f92ec536d2bc6 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 9 Dec 2024 19:45:16 +0800 Subject: [PATCH 33/51] [SPARK-49134][INFRA][3.5] Support retry for deploying artifacts to Nexus staging repository ### What changes were proposed in this pull request? This PR improves `dev/create-release/release-build.sh` by enabling 3-times retry for deploying artifacts to the Nexus staging repository When I was setting up 3.5.2-rc5 on my AWS EC2 instance, I encountered an issue with closing the `orgapachespark-1461` due to a timeout while uploading a sha1 file. ```xml Uploading spark-streaming-kafka-0-10_2.13/3.5.2/spark-streaming-kafka-0-10_2.13-3.5.2-test-sources.jar.sha1 % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 262 100 221 100 41 15 2 0:00:20 0:00:13 0:00:07 58 408 Request Timeout

Request Timeout

Server timeout waiting for the HTTP request from the client.

``` I might choose to upload it manually but I didn't, because I was afraid of making some unpredictable errors. So I regenerated and uploaded `orgapachespark-1462`. ### Why are the changes needed? To avoid temporary network errors when performing the publish step for release managers. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests ### Was this patch authored or co-authored using generative AI tooling? no Closes #49108 from LuciferYang/SPARK-49134-3.5. Authored-by: Kent Yao Signed-off-by: yangjie01 --- dev/create-release/release-build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index 99841916cf293..3546e721edbd2 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -505,7 +505,7 @@ if [[ "$1" == "publish-release" ]]; then file_short=$(echo $file | sed -e "s/\.\///") dest_url="$nexus_upload/org/apache/spark/$file_short" echo " Uploading $file_short" - curl -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url + curl --retry 3 --retry-all-errors -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url done echo "Closing nexus staging repository" From a3cf28ea73f0bd1147af6557954b329cad5226ea Mon Sep 17 00:00:00 2001 From: Richard Chen Date: Wed, 11 Dec 2024 01:56:22 +0800 Subject: [PATCH 34/51] [SPARK-50463][SQL][3.5] Fix `ConstantColumnVector` with Columnar to Row conversion ### What changes were proposed in this pull request? https://github.com/apache/spark/commit/800faf0abfa368ad0a5ef1e0fa44b74dbaab724e frees column vector resources between batches in columnar to row conversion. However, like `WritableColumnVector`, `ConstantColumnVector` should not free resources between batches because the same data is used across batches ### Why are the changes needed? Without this change, ConstantColumnVectors with string values, for example, will fail if used with column->row conversion. For instance, reading a parquet table partitioned by a string column with multiple batches. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? added UT that failed before and now passes ### Was this patch authored or co-authored using generative AI tooling? no Closes #49131 from LuciferYang/SPARK-50463-3.5. Authored-by: Richard Chen Signed-off-by: yangjie01 --- .../spark/sql/vectorized/ColumnVector.java | 12 +++++------ .../spark/sql/vectorized/ColumnarBatch.java | 8 ++++---- .../vectorized/ConstantColumnVector.java | 5 +++++ .../vectorized/WritableColumnVector.java | 2 +- .../apache/spark/sql/execution/Columnar.scala | 2 +- .../parquet/ParquetQuerySuite.scala | 20 +++++++++++++++++++ 6 files changed, 37 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java index 7dc2d38144296..ea199e2685a54 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java @@ -68,14 +68,14 @@ public abstract class ColumnVector implements AutoCloseable { public abstract void close(); /** - * Cleans up memory for this column vector if it's not writable. The column vector is not usable - * after this. + * Cleans up memory for this column vector if it's resources are freeable between batches. + * The column vector is not usable after this. * - * If this is a writable column vector, it is a no-op. + * If this is a writable column vector or constant column vector, it is a no-op. */ - public void closeIfNotWritable() { - // By default, we just call close() for all column vectors. If a column vector is writable, it - // should override this method and do nothing. + public void closeIfFreeable() { + // By default, we just call close() for all column vectors. If a column vector is writable or + // constant, it should override this method and do nothing. close(); } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java index 52e4115af336a..7ef570a212292 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java @@ -46,12 +46,12 @@ public void close() { } /** - * Called to close all the columns if they are not writable. This is used to clean up memory - * allocated during columnar processing. + * Called to close all the columns if their resources are freeable between batches. + * This is used to clean up memory allocated during columnar processing. */ - public void closeIfNotWritable() { + public void closeIfFreeable() { for (ColumnVector c: columns) { - c.closeIfNotWritable(); + c.closeIfFreeable(); } } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java index 5095e6b0c9c6b..9713998549c72 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java @@ -72,6 +72,11 @@ public ConstantColumnVector(int numRows, DataType type) { } } + public void closeIfFreeable() { + // no-op: `ConstantColumnVector`s reuse the data backing its value across multiple batches and + // are freed at the end of execution in `close`. + } + @Override public void close() { stringData = null; diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java index 0fde85fd454c1..d23de1ff0cfe9 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java @@ -88,7 +88,7 @@ public void close() { } @Override - public void closeIfNotWritable() { + public void closeIfFreeable() { // no-op } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala index ea559efc45f13..bfb198adad501 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala @@ -194,7 +194,7 @@ case class ColumnarToRowExec(child: SparkPlan) extends ColumnarToRowTransition w | $shouldStop | } | $idx = $numRows; - | $batch.closeIfNotWritable(); + | $batch.closeIfFreeable(); | $batch = null; | $nextBatchFuncName(); |} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala index 29cb224c8787c..f6472ba3d9dbc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala @@ -482,6 +482,26 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS } } + test("SPARK-50463: Partition values can be read over multiple batches") { + withTempDir { dir => + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_BATCH_SIZE.key -> "1") { + val path = dir.getAbsolutePath + spark.range(0, 5) + .selectExpr("concat(cast(id % 2 as string), 'a') as partCol", "id") + .write + .format("parquet") + .mode("overwrite") + .partitionBy("partCol").save(path) + val df = spark.read.format("parquet").load(path).selectExpr("partCol") + val expected = spark.range(0, 5) + .selectExpr("concat(cast(id % 2 as string), 'a') as partCol") + .collect() + + checkAnswer(df, expected) + } + } + } + test("SPARK-10301 requested schema clipping - same schema") { withTempPath { dir => val path = dir.getCanonicalPath From bb953f97e6c5a26861296aca0a4f14db078ef2ef Mon Sep 17 00:00:00 2001 From: changgyoopark-db Date: Wed, 11 Dec 2024 12:56:43 +0800 Subject: [PATCH 35/51] [SPARK-50510][CONNECT][3.5] Fix sporadic ReattachableExecuteSuite failure ### What changes were proposed in this pull request? ReattachableExecuteSuite detected a rare data race issue where ExecuteThreadRunner may send the client the wrong error code before the SparkConnect service sends the correct error code. - The test fails if ExecuteThreadRunner is finished before the SparkConnect service sends the correct error code and after the session is invalidated; to be specific, the event manager throws an illegal state exception (SPARK-49688) that is translated into an unknown error. - The whole problem was addressed under https://github.com/apache/spark/pull/48208 for Spark 4.0. ### Why are the changes needed? 1. Clients may get the wrong error message: expect session-closed or the like, but get unknown. 2. To fix the ReattachableExecuteSuite failure. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? ReattachableExecuteSuite. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49127 from changgyoopark-db/SPARK-50510. Authored-by: changgyoopark-db Signed-off-by: yangjie01 --- .../spark/sql/connect/service/ExecuteHolder.scala | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala index 0e4f344da901c..93a1757cd687a 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala @@ -21,6 +21,7 @@ import java.util.UUID import scala.collection.JavaConverters._ import scala.collection.mutable +import scala.util.control.NonFatal import org.apache.spark.{SparkEnv, SparkSQLException} import org.apache.spark.connect.proto @@ -237,7 +238,14 @@ private[connect] class ExecuteHolder( // it does. responseObserver.removeAll() // post closed to UI - eventsManager.postClosed() + try { + eventsManager.postClosed() + } catch { + // Catching the exception to prevent the wrong error code from being returned to the + // user: SPARK-49688. The issue was fixed by completely refactoring the code in Spark 4.0. + case e: Throwable if NonFatal.apply(e) => + logError(s"Error posting closed event to UI: ${e.getMessage()}") + } } // interrupt any attached grpcResponseSenders grpcResponseSenders.foreach(_.interrupt()) From e97580a904dab438b2b5a51e49db63602dc45592 Mon Sep 17 00:00:00 2001 From: andrej-gobeljic_data Date: Wed, 11 Dec 2024 16:52:11 -0800 Subject: [PATCH 36/51] [SPARK-50087][SQL][3.5] Robust handling of boolean expressions in CASE WHEN for MsSqlServer and future connectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This PR proposes to propagate the `isPredicate` info in `V2ExpressionBuilder` and wrap the children of CASE WHEN expression (only `Predicate`s) with `IIF(<>, 1, 0)` for MsSqlServer. This is done to force returning an int instead of a boolean, as SqlServer cannot handle boolean expressions as a return type in CASE WHEN. E.g. ```CASE WHEN ... ELSE a = b END``` Old behavior: ```CASE WHEN ... ELSE a = b END = 1``` New behavior: Since in SqlServer a `= 1` is appended to the CASE WHEN, THEN and ELSE blocks must return an int. Therefore the final expression becomes: ```CASE WHEN ... ELSE IIF(a = b, 1, 0) END = 1``` ### Why are the changes needed? A user cannot work with an MsSqlServer data with CASE WHEN clauses or IF clauses if they wish to return a boolean value. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added tests to MsSqlServerIntegrationSuite ### Was this patch authored or co-authored using generative AI tooling? No Closes #49115 from andrej-db/CASEWHENBackport. Lead-authored-by: andrej-gobeljic_data Co-authored-by: Wenchen Fan Co-authored-by: Andrej Gobeljić Signed-off-by: Wenchen Fan --- .../jdbc/v2/MsSqlServerIntegrationSuite.scala | 79 +++++++++++++++++++ .../util/V2ExpressionSQLBuilder.java | 2 +- .../catalyst/util/V2ExpressionBuilder.scala | 6 +- .../execution/datasources/jdbc/JDBCRDD.scala | 31 +++++++- .../apache/spark/sql/jdbc/JdbcDialects.scala | 13 +++ .../spark/sql/jdbc/MsSqlServerDialect.scala | 24 +++++- 6 files changed, 147 insertions(+), 8 deletions(-) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala index de8fcf1a4a787..78fdbe7158bb7 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala @@ -22,7 +22,11 @@ import java.sql.Connection import org.scalatest.time.SpanSugar._ import org.apache.spark.{SparkConf, SparkSQLFeatureNotSupportedException} +import org.apache.spark.rdd.RDD import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan} +import org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog import org.apache.spark.sql.jdbc.MsSQLServerDatabaseOnDocker import org.apache.spark.sql.types._ @@ -39,6 +43,17 @@ import org.apache.spark.tags.DockerTest @DockerTest class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest { + def getExternalEngineQuery(executedPlan: SparkPlan): String = { + getExternalEngineRdd(executedPlan).asInstanceOf[JDBCRDD].getExternalEngineQuery + } + + def getExternalEngineRdd(executedPlan: SparkPlan): RDD[InternalRow] = { + val queryNode = executedPlan.collect { case r: RowDataSourceScanExec => + r + }.head + queryNode.rdd + } + override def excluded: Seq[String] = Seq( "simple scan with OFFSET", "simple scan with LIMIT and OFFSET", @@ -137,4 +152,68 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD "WHERE (dept > 1 AND ((name LIKE 'am%') = (name LIKE '%y')))") assert(df3.collect().length == 3) } + + test("SPARK-50087: SqlServer handle booleans in CASE WHEN test") { + val df = sql( + s"""|SELECT * FROM $catalogName.employee + |WHERE CASE WHEN name = 'Legolas' THEN name = 'Elf' ELSE NOT (name = 'Wizard') END + |""".stripMargin + ) + + // scalastyle:off + assert(getExternalEngineQuery(df.queryExecution.executedPlan) == + """SELECT "dept","name","salary","bonus" FROM "employee" WHERE (CASE WHEN ("name" = 'Legolas') THEN IIF(("name" = 'Elf'), 1, 0) ELSE IIF(("name" <> 'Wizard'), 1, 0) END = 1) """ + ) + // scalastyle:on + df.collect() + } + + test("SPARK-50087: SqlServer handle booleans in CASE WHEN with always true test") { + val df = sql( + s"""|SELECT * FROM $catalogName.employee + |WHERE CASE WHEN (name = 'Legolas') THEN (name = 'Elf') ELSE (1=1) END + |""".stripMargin + ) + + // scalastyle:off + assert(getExternalEngineQuery(df.queryExecution.executedPlan) == + """SELECT "dept","name","salary","bonus" FROM "employee" WHERE (CASE WHEN ("name" = 'Legolas') THEN IIF(("name" = 'Elf'), 1, 0) ELSE 1 END = 1) """ + ) + // scalastyle:on + df.collect() + } + + test("SPARK-50087: SqlServer handle booleans in nested CASE WHEN test") { + val df = sql( + s"""|SELECT * FROM $catalogName.employee + |WHERE CASE WHEN (name = 'Legolas') THEN + | CASE WHEN (name = 'Elf') THEN (name = 'Elrond') ELSE (name = 'Gandalf') END + | ELSE (name = 'Sauron') END + |""".stripMargin + ) + + // scalastyle:off + assert(getExternalEngineQuery(df.queryExecution.executedPlan) == + """SELECT "dept","name","salary","bonus" FROM "employee" WHERE (CASE WHEN ("name" = 'Legolas') THEN IIF((CASE WHEN ("name" = 'Elf') THEN IIF(("name" = 'Elrond'), 1, 0) ELSE IIF(("name" = 'Gandalf'), 1, 0) END = 1), 1, 0) ELSE IIF(("name" = 'Sauron'), 1, 0) END = 1) """ + ) + // scalastyle:on + df.collect() + } + + test("SPARK-50087: SqlServer handle non-booleans in nested CASE WHEN test") { + val df = sql( + s"""|SELECT * FROM $catalogName.employee + |WHERE CASE WHEN (name = 'Legolas') THEN + | CASE WHEN (name = 'Elf') THEN 'Elf' ELSE 'Wizard' END + | ELSE 'Sauron' END = name + |""".stripMargin + ) + + // scalastyle:off + assert(getExternalEngineQuery(df.queryExecution.executedPlan) == + """SELECT "dept","name","salary","bonus" FROM "employee" WHERE ("name" IS NOT NULL) AND ((CASE WHEN "name" = 'Legolas' THEN CASE WHEN "name" = 'Elf' THEN 'Elf' ELSE 'Wizard' END ELSE 'Sauron' END) = "name") """ + ) + // scalastyle:on + df.collect() + } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index e170951bfa284..193ffee003a3d 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -290,7 +290,7 @@ protected String visitContains(String l, String r) { return l + " LIKE '%" + escapeSpecialCharsForLikePattern(value) + "%' ESCAPE '\\'"; } - private String inputToSQL(Expression input) { + protected String inputToSQL(Expression input) { if (input.children().length > 1) { return "(" + build(input) + ")"; } else { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala index c7bca751e56e4..bbd5bace76b6c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala @@ -189,8 +189,8 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) { case _: BitwiseNot => generateExpressionWithName("~", expr, isPredicate) case caseWhen @ CaseWhen(branches, elseValue) => val conditions = branches.map(_._1).flatMap(generateExpression(_, true)) - val values = branches.map(_._2).flatMap(generateExpression(_)) - val elseExprOpt = elseValue.flatMap(generateExpression(_)) + val values = branches.map(_._2).flatMap(generateExpression(_, isPredicate)) + val elseExprOpt = elseValue.flatMap(generateExpression(_, isPredicate)) if (conditions.length == branches.length && values.length == branches.length && elseExprOpt.size == elseValue.size) { val branchExpressions = conditions.zip(values).flatMap { case (c, v) => @@ -356,7 +356,7 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) { children: Seq[Expression], dataType: DataType, isPredicate: Boolean): Option[V2Expression] = { - val childrenExpressions = children.flatMap(generateExpression(_)) + val childrenExpressions = children.flatMap(generateExpression(_, isPredicate)) if (childrenExpressions.length == children.length) { if (isPredicate && dataType.isInstanceOf[BooleanType]) { Some(new V2Predicate(v2ExpressionName, childrenExpressions.toArray[V2Expression])) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala index e241951abe392..2bb2a3a1f67a0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala @@ -157,7 +157,7 @@ object JDBCRDD extends Logging { * Both the driver code and the workers must be able to access the database; the driver * needs to fetch the schema while the workers need to fetch the data. */ -private[jdbc] class JDBCRDD( +class JDBCRDD( sc: SparkContext, getConnection: Int => Connection, schema: StructType, @@ -173,11 +173,40 @@ private[jdbc] class JDBCRDD( offset: Int) extends RDD[InternalRow](sc, Nil) { + private lazy val dialect = JdbcDialects.get(url) + + def generateJdbcQuery(partition: Option[JDBCPartition]): String = { + // H2's JDBC driver does not support the setSchema() method. We pass a + // fully-qualified table name in the SELECT statement. I don't know how to + // talk about a table in a completely portable way. + var builder = dialect + .getJdbcSQLQueryBuilder(options) + .withPredicates(predicates, partition.getOrElse(JDBCPartition(whereClause = null, idx = 1))) + .withColumns(columns) + .withSortOrders(sortOrders) + .withLimit(limit) + .withOffset(offset) + + groupByColumns.foreach { groupByKeys => + builder = builder.withGroupByColumns(groupByKeys) + } + + sample.foreach { tableSampleInfo => + builder = builder.withTableSample(tableSampleInfo) + } + + builder.build() + } + /** * Retrieve the list of partitions corresponding to this RDD. */ override def getPartitions: Array[Partition] = partitions + def getExternalEngineQuery: String = { + generateJdbcQuery(partition = None) + } + /** * Runs the SQL query against the JDBC driver. * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index ae8d89f0f0469..90fc48ff5276f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -38,6 +38,7 @@ import org.apache.spark.sql.connector.catalog.functions.UnboundFunction import org.apache.spark.sql.connector.catalog.index.TableIndex import org.apache.spark.sql.connector.expressions.{Expression, Literal, NamedReference} import org.apache.spark.sql.connector.expressions.aggregate.AggregateFunc +import org.apache.spark.sql.connector.expressions.filter.Predicate import org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.datasources.jdbc.{DriverRegistry, JDBCOptions, JdbcOptionsInWrite, JdbcUtils} @@ -279,6 +280,18 @@ abstract class JdbcDialect extends Serializable with Logging { } private[jdbc] class JDBCSQLBuilder extends V2ExpressionSQLBuilder { + // Some dialects do not support boolean type and this convenient util function is + // provided to generate SQL string without boolean values. + protected def inputToSQLNoBool(input: Expression): String = input match { + case p: Predicate if p.name() == "ALWAYS_TRUE" => "1" + case p: Predicate if p.name() == "ALWAYS_FALSE" => "0" + case p: Predicate => predicateToIntSQL(inputToSQL(p)) + case _ => super.inputToSQL(input) + } + + protected def predicateToIntSQL(input: String): String = + "CASE WHEN " + input + " THEN 1 ELSE 0 END" + override def visitLiteral(literal: Literal[_]): String = { Option(literal.value()).map(v => compileValue(CatalystTypeConverters.convertToScala(v, literal.dataType())).toString) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala index 3022bca87a9f9..f7c9cdad2a43c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala @@ -66,6 +66,8 @@ private object MsSqlServerDialect extends JdbcDialect { supportedFunctions.contains(funcName) class MsSqlServerSQLBuilder extends JDBCSQLBuilder { + override protected def predicateToIntSQL(input: String): String = + "IIF(" + input + ", 1, 0)" override def visitSortOrder( sortKey: String, sortDirection: SortDirection, nullOrdering: NullOrdering): String = { (sortDirection, nullOrdering) match { @@ -93,9 +95,25 @@ private object MsSqlServerDialect extends JdbcDialect { // We shouldn't propagate these queries to MsSqlServer expr match { case e: Predicate => e.name() match { - case "=" | "<>" | "<=>" | "<" | "<=" | ">" | ">=" - if e.children().exists(_.isInstanceOf[Predicate]) => - super.visitUnexpectedExpr(expr) + case "=" | "<>" | "<=>" | "<" | "<=" | ">" | ">=" => + val Array(l, r) = e.children().map(inputToSQLNoBool) + visitBinaryComparison(e.name(), l, r) + case "CASE_WHEN" => + // Since MsSqlServer cannot handle boolean expressions inside + // a CASE WHEN, it is necessary to convert those to another + // CASE WHEN expression that will return 1 or 0 depending on + // the result. + // Example: + // In: ... CASE WHEN a = b THEN c = d ... END + // Out: ... CASE WHEN a = b THEN CASE WHEN c = d THEN 1 ELSE 0 END ... END = 1 + val stringArray = e.children().grouped(2).flatMap { + case Array(whenExpression, thenExpression) => + Array(inputToSQL(whenExpression), inputToSQLNoBool(thenExpression)) + case Array(elseExpression) => + Array(inputToSQLNoBool(elseExpression)) + }.toArray + + visitCaseWhen(stringArray) + " = 1" case _ => super.build(expr) } case _ => super.build(expr) From 92e650c9ccab7a5f4aa25af2d6c0d6052dfe576b Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Thu, 12 Dec 2024 20:47:18 +0800 Subject: [PATCH 37/51] [SPARK-50545][CORE][SQL][3.5] `AccessControlException` should be thrown even if `ignoreCorruptFiles` is enabled Cherry-pick https://github.com/apache/spark/issues/49143 to branch-3.5 ### What changes were proposed in this pull request? `AccessControlException` extends `IOException` but we should not treat it as a data corruption issue. This is similar to SPARK-50483 which handles `BlockMissingException` in the same way. ``` 2024-12-11 06:29:05 WARN HadoopRDD: Skipped the rest content in the corrupted file: hdfs://hadoop-master1.orb.local:8020/warehouse/region/part-00000-2dc8a6f6-8cea-4652-8ba1-762c1b65e2b4-c000:192+192 org.apache.hadoop.security.AccessControlException: Permission denied: user=hive, access=READ, inode="/warehouse/region/part-00000-2dc8a6f6-8cea-4652-8ba1-762c1b65e2b4-c000":kyuubi.hadoop:hadoop:-rw------- at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:506) ``` image ### Why are the changes needed? Avoid data issue if `ignoreCorruptFiles` is enabled when `AccessControlException` occurred. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Manual test. Task fails with `org.apache.hadoop.security.AccessControlException` even with `spark.sql.files.ignoreCorruptFiles=true` and `spark.files.ignoreCorruptFiles=true` image ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49162 from pan3793/SPARK-50545-3.5. Authored-by: Cheng Pan Signed-off-by: yangjie01 --- core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala | 5 +++-- core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala | 5 +++-- .../apache/spark/sql/execution/datasources/FileScanRDD.scala | 3 ++- .../sql/execution/datasources/v2/FilePartitionReader.scala | 5 +++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala index edd07a2649dbb..8aa7d54fd61b9 100644 --- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala @@ -31,6 +31,7 @@ import org.apache.hadoop.mapred._ import org.apache.hadoop.mapred.lib.CombineFileSplit import org.apache.hadoop.mapreduce.TaskType import org.apache.hadoop.mapreduce.lib.input.FileInputFormat +import org.apache.hadoop.security.AccessControlException import org.apache.hadoop.util.ReflectionUtils import org.apache.spark._ @@ -294,7 +295,7 @@ class HadoopRDD[K, V]( null // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e - case e: BlockMissingException => throw e + case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e case e: IOException if ignoreCorruptFiles => logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e) finished = true @@ -320,7 +321,7 @@ class HadoopRDD[K, V]( finished = true // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e - case e: BlockMissingException => throw e + case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e case e: IOException if ignoreCorruptFiles => logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e) finished = true diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala index fbd2235aabaf6..7fc93806998bf 100644 --- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala @@ -32,6 +32,7 @@ import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapreduce._ import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, FileInputFormat, FileSplit, InvalidInputException} import org.apache.hadoop.mapreduce.task.{JobContextImpl, TaskAttemptContextImpl} +import org.apache.hadoop.security.AccessControlException import org.apache.spark._ import org.apache.spark.annotation.DeveloperApi @@ -228,7 +229,7 @@ class NewHadoopRDD[K, V]( null // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e - case e: BlockMissingException => throw e + case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e case e: IOException if ignoreCorruptFiles => logWarning( s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}", @@ -257,7 +258,7 @@ class NewHadoopRDD[K, V]( finished = true // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e - case e: BlockMissingException => throw e + case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e case e: IOException if ignoreCorruptFiles => logWarning( s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}", diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala index ce56fc1b28296..8f6f981ec6a73 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala @@ -24,6 +24,7 @@ import scala.util.control.NonFatal import org.apache.hadoop.fs.Path import org.apache.hadoop.hdfs.BlockMissingException +import org.apache.hadoop.security.AccessControlException import org.apache.spark.{Partition => RDDPartition, SparkUpgradeException, TaskContext} import org.apache.spark.deploy.SparkHadoopUtil @@ -260,7 +261,7 @@ class FileScanRDD( null // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e - case e: BlockMissingException => throw e + case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles => logWarning( s"Skipped the rest of the content in the corrupted file: $currentFile", e) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala index 8f51226dcfe99..6a63d8268c3b0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala @@ -21,6 +21,7 @@ import java.io.{FileNotFoundException, IOException} import scala.util.control.NonFatal import org.apache.hadoop.hdfs.BlockMissingException +import org.apache.hadoop.security.AccessControlException import org.apache.spark.SparkUpgradeException import org.apache.spark.internal.Logging @@ -51,7 +52,7 @@ class FilePartitionReader[T]( // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw QueryExecutionErrors.fileNotFoundError(e) - case e: BlockMissingException => throw e + case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles => logWarning( s"Skipped the rest of the content in the corrupted file.", e) @@ -71,7 +72,7 @@ class FilePartitionReader[T]( throw QueryExecutionErrors.unsupportedSchemaColumnConvertError( currentReader.file.urlEncodedPath, e.getColumn, e.getLogicalType, e.getPhysicalType, e) - case e: BlockMissingException => throw e + case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles => logWarning( s"Skipped the rest of the content in the corrupted file: $currentReader", e) From 91af6f9c16f773bdf84dce678eb6ab7b6acb90fd Mon Sep 17 00:00:00 2001 From: Jie Yang Date: Mon, 16 Dec 2024 01:34:44 +0000 Subject: [PATCH 38/51] Preparing Spark release v3.5.4-rc2 --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- common/utils/pom.xml | 2 +- connector/avro/pom.xml | 2 +- connector/connect/client/jvm/pom.xml | 2 +- connector/connect/common/pom.xml | 2 +- connector/connect/server/pom.xml | 2 +- connector/docker-integration-tests/pom.xml | 2 +- connector/kafka-0-10-assembly/pom.xml | 2 +- connector/kafka-0-10-sql/pom.xml | 2 +- connector/kafka-0-10-token-provider/pom.xml | 2 +- connector/kafka-0-10/pom.xml | 2 +- connector/kinesis-asl-assembly/pom.xml | 2 +- connector/kinesis-asl/pom.xml | 2 +- connector/protobuf/pom.xml | 2 +- connector/spark-ganglia-lgpl/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/api/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 45 files changed, 47 insertions(+), 47 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 5eca59375425e..8657755b8d0ea 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.5.5 +Version: 3.5.4 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index ee2e7b48871ee..47b38621d6400 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index a5ac18252d9c9..3757f69e9bd17 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index c3f33905ae20c..83243d183b7b9 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index de0af6da6c9ec..e74fb05beb0ae 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 54edc410aa9cc..13c4b5cca1e32 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 88ae8e2715a04..709bbed0c553c 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index adeab180901c6..59e9973c42d05 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 0518930d7eb5b..e222499eec228 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/utils/pom.xml b/common/utils/pom.xml index fc15de78ed505..7b2a1ad57b0ff 100644 --- a/common/utils/pom.xml +++ b/common/utils/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml index 8d78204ddce30..1a6fe528b9168 100644 --- a/connector/avro/pom.xml +++ b/connector/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml index c220ce4d032e5..695146d7a1113 100644 --- a/connector/connect/client/jvm/pom.xml +++ b/connector/connect/client/jvm/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../../../pom.xml diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml index 8fd5820c35c71..6c50469717f95 100644 --- a/connector/connect/common/pom.xml +++ b/connector/connect/common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../../pom.xml diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml index 747c56d148ebb..aeadbacb7c692 100644 --- a/connector/connect/server/pom.xml +++ b/connector/connect/server/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../../pom.xml diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml index 878bd4590b50f..435c0fbd797aa 100644 --- a/connector/docker-integration-tests/pom.xml +++ b/connector/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml index 119761501dad9..8b6d7d47b0392 100644 --- a/connector/kafka-0-10-assembly/pom.xml +++ b/connector/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml index 51ab703585beb..dce1990f1c9d0 100644 --- a/connector/kafka-0-10-sql/pom.xml +++ b/connector/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml index 7e1c6ebac9a7f..5973b9595db8a 100644 --- a/connector/kafka-0-10-token-provider/pom.xml +++ b/connector/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml index 5014e1a1494ce..54ba2b22093d0 100644 --- a/connector/kafka-0-10/pom.xml +++ b/connector/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml index 65d86f5617b1c..5e0c0fcafc12b 100644 --- a/connector/kinesis-asl-assembly/pom.xml +++ b/connector/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml index 47495110bf7d8..65b0fa33db29f 100644 --- a/connector/kinesis-asl/pom.xml +++ b/connector/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml index 3f417fba4b021..95be9ab74f105 100644 --- a/connector/protobuf/pom.xml +++ b/connector/protobuf/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml index 47db5eb9253e8..62f1c4ab2b124 100644 --- a/connector/spark-ganglia-lgpl/pom.xml +++ b/connector/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index ed5c82d28ca68..e59066e19850d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 969b61fa00363..a207cc2d911b5 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.5.5-SNAPSHOT -SPARK_VERSION_SHORT: 3.5.5 +SPARK_VERSION: 3.5.4 +SPARK_VERSION_SHORT: 3.5.4 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.18" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.5.5"] + 'facetFilters': ["version:3.5.4"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index 506aa7836485a..5efc255218570 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index f330d4d1a5377..74ad5a732f054 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 29f47eec8a5c6..cb89c27d5f0ba 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index e309c1f734296..e36d57fe6a573 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index f8a3cf1cc16df..26e9a1502abab 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 1fa3e215977d9..079ce72d5d959 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/pom.xml b/pom.xml index 0ccb6ac76a9bc..8dc47f391f967 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 pom Spark Project Parent POM https://spark.apache.org/ diff --git a/python/pyspark/version.py b/python/pyspark/version.py index df09fc3284fbd..db20a2ffae586 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__: str = "3.5.5.dev0" +__version__: str = "3.5.4" diff --git a/repl/pom.xml b/repl/pom.xml index e3f52f07cc4d7..3f8c931a60664 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index e4d890a930a2a..5991f1848ccf8 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 85a125ddfe4b7..8ec4e86ab8f12 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 118f0034ddf5a..9ace7e29de0a0 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 4c85e90c4e485..f0df0ff0ea2f4 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/sql/api/pom.xml b/sql/api/pom.xml index 1613c3218649b..c180a208bf093 100644 --- a/sql/api/pom.xml +++ b/sql/api/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index e7736c95007ad..58a2333b5b5a0 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 889fba8892568..9577de81c2057 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 110f9e168de6b..6c86bc35a89d7 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 8fc5d81ab8982..be3c952e4131b 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index e3002680f2173..21d2981fe1088 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 3530297bf8f83..7e5724b5d9dd3 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml From a764524c8ae49f0125ff868f96dd6fc751af0ede Mon Sep 17 00:00:00 2001 From: Jie Yang Date: Mon, 16 Dec 2024 01:34:51 +0000 Subject: [PATCH 39/51] Preparing development version 3.5.5-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- common/utils/pom.xml | 2 +- connector/avro/pom.xml | 2 +- connector/connect/client/jvm/pom.xml | 2 +- connector/connect/common/pom.xml | 2 +- connector/connect/server/pom.xml | 2 +- connector/docker-integration-tests/pom.xml | 2 +- connector/kafka-0-10-assembly/pom.xml | 2 +- connector/kafka-0-10-sql/pom.xml | 2 +- connector/kafka-0-10-token-provider/pom.xml | 2 +- connector/kafka-0-10/pom.xml | 2 +- connector/kinesis-asl-assembly/pom.xml | 2 +- connector/kinesis-asl/pom.xml | 2 +- connector/protobuf/pom.xml | 2 +- connector/spark-ganglia-lgpl/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/api/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 45 files changed, 47 insertions(+), 47 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 8657755b8d0ea..5eca59375425e 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.5.4 +Version: 3.5.5 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 47b38621d6400..ee2e7b48871ee 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 3757f69e9bd17..a5ac18252d9c9 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 83243d183b7b9..c3f33905ae20c 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index e74fb05beb0ae..de0af6da6c9ec 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 13c4b5cca1e32..54edc410aa9cc 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 709bbed0c553c..88ae8e2715a04 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 59e9973c42d05..adeab180901c6 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index e222499eec228..0518930d7eb5b 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/utils/pom.xml b/common/utils/pom.xml index 7b2a1ad57b0ff..fc15de78ed505 100644 --- a/common/utils/pom.xml +++ b/common/utils/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml index 1a6fe528b9168..8d78204ddce30 100644 --- a/connector/avro/pom.xml +++ b/connector/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml index 695146d7a1113..c220ce4d032e5 100644 --- a/connector/connect/client/jvm/pom.xml +++ b/connector/connect/client/jvm/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../../../pom.xml diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml index 6c50469717f95..8fd5820c35c71 100644 --- a/connector/connect/common/pom.xml +++ b/connector/connect/common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../../pom.xml diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml index aeadbacb7c692..747c56d148ebb 100644 --- a/connector/connect/server/pom.xml +++ b/connector/connect/server/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../../pom.xml diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml index 435c0fbd797aa..878bd4590b50f 100644 --- a/connector/docker-integration-tests/pom.xml +++ b/connector/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml index 8b6d7d47b0392..119761501dad9 100644 --- a/connector/kafka-0-10-assembly/pom.xml +++ b/connector/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml index dce1990f1c9d0..51ab703585beb 100644 --- a/connector/kafka-0-10-sql/pom.xml +++ b/connector/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml index 5973b9595db8a..7e1c6ebac9a7f 100644 --- a/connector/kafka-0-10-token-provider/pom.xml +++ b/connector/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml index 54ba2b22093d0..5014e1a1494ce 100644 --- a/connector/kafka-0-10/pom.xml +++ b/connector/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml index 5e0c0fcafc12b..65d86f5617b1c 100644 --- a/connector/kinesis-asl-assembly/pom.xml +++ b/connector/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml index 65b0fa33db29f..47495110bf7d8 100644 --- a/connector/kinesis-asl/pom.xml +++ b/connector/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml index 95be9ab74f105..3f417fba4b021 100644 --- a/connector/protobuf/pom.xml +++ b/connector/protobuf/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml index 62f1c4ab2b124..47db5eb9253e8 100644 --- a/connector/spark-ganglia-lgpl/pom.xml +++ b/connector/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index e59066e19850d..ed5c82d28ca68 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index a207cc2d911b5..969b61fa00363 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.5.4 -SPARK_VERSION_SHORT: 3.5.4 +SPARK_VERSION: 3.5.5-SNAPSHOT +SPARK_VERSION_SHORT: 3.5.5 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.18" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.5.4"] + 'facetFilters': ["version:3.5.5"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index 5efc255218570..506aa7836485a 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 74ad5a732f054..f330d4d1a5377 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index cb89c27d5f0ba..29f47eec8a5c6 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index e36d57fe6a573..e309c1f734296 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 26e9a1502abab..f8a3cf1cc16df 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 079ce72d5d959..1fa3e215977d9 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 8dc47f391f967..0ccb6ac76a9bc 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT pom Spark Project Parent POM https://spark.apache.org/ diff --git a/python/pyspark/version.py b/python/pyspark/version.py index db20a2ffae586..df09fc3284fbd 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__: str = "3.5.4" +__version__: str = "3.5.5.dev0" diff --git a/repl/pom.xml b/repl/pom.xml index 3f8c931a60664..e3f52f07cc4d7 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 5991f1848ccf8..e4d890a930a2a 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 8ec4e86ab8f12..85a125ddfe4b7 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 9ace7e29de0a0..118f0034ddf5a 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index f0df0ff0ea2f4..4c85e90c4e485 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/sql/api/pom.xml b/sql/api/pom.xml index c180a208bf093..1613c3218649b 100644 --- a/sql/api/pom.xml +++ b/sql/api/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 58a2333b5b5a0..e7736c95007ad 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 9577de81c2057..889fba8892568 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 6c86bc35a89d7..110f9e168de6b 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index be3c952e4131b..8fc5d81ab8982 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 21d2981fe1088..e3002680f2173 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 7e5724b5d9dd3..3530297bf8f83 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml From 8168ea8548a4935a0506edcadc601850096a10ab Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 3 Dec 2024 15:37:43 +0900 Subject: [PATCH 40/51] [SPARK-50430][CORE][FOLLOW-UP] Keep the logic of manual putting key and values in Properties This PR proposes to actually more conservatively preserve the original code of creating new properties instead of cloning. Previous codes only copied the key and values but `clone` actually copies more fields in `Properties`. `cloneProperties` is being used in Spark Core, and all other components so I propose to keep the logic as is. This is more a fix of a potential bug. No, it is difficult to add a test. No. Closes #49036 from HyukjinKwon/SPARK-50430-followup. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 4abaab3ffeba5a3d39216e7224928bb82b254e22) Signed-off-by: Hyukjin Kwon --- core/src/main/scala/org/apache/spark/util/Utils.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 8762f0a6cdbc3..a33bb33ea9c0f 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2982,7 +2982,9 @@ private[spark] object Utils if (props == null) { return props } - props.clone().asInstanceOf[Properties] + val resultProps = new Properties() + resultProps.putAll(props.clone().asInstanceOf[Properties]) + resultProps } /** From 0fbe292774a856ae49c436e9eb83441e9c38f7de Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 16 Dec 2024 14:06:38 +0900 Subject: [PATCH 41/51] Revert "[SPARK-50430][CORE][FOLLOW-UP] Keep the logic of manual putting key and values in Properties" This reverts commit 8168ea8548a4935a0506edcadc601850096a10ab. --- core/src/main/scala/org/apache/spark/util/Utils.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index a33bb33ea9c0f..8762f0a6cdbc3 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2982,9 +2982,7 @@ private[spark] object Utils if (props == null) { return props } - val resultProps = new Properties() - resultProps.putAll(props.clone().asInstanceOf[Properties]) - resultProps + props.clone().asInstanceOf[Properties] } /** From f7c48fe24fd8fb5f7f3eb9b3bec2659d99e4307c Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 16 Dec 2024 14:06:45 +0900 Subject: [PATCH 42/51] Revert "[SPARK-50430][CORE] Use the standard Properties.clone instead of manual clone" This reverts commit 5ff129ac8261c674b90545f3e1651e166dbc6249. --- core/src/main/scala/org/apache/spark/util/Utils.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 8762f0a6cdbc3..3b0efffedec6f 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2982,7 +2982,9 @@ private[spark] object Utils if (props == null) { return props } - props.clone().asInstanceOf[Properties] + val resultProps = new Properties() + props.forEach((k, v) => resultProps.put(k, v)) + resultProps } /** From b0a7d4d4c5be46282aa9be3a08754c201f38fc2e Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 16 Dec 2024 20:30:26 +0800 Subject: [PATCH 43/51] [SPARK-50587][INFRA][3.5] Remove unsupported `curl` option `--retry-all-errors` from `release-build.sh` ### What changes were proposed in this pull request? This pr aims to remove unsupported `curl` option `--retry-all-errors` from branch-3.5's `release-build.sh` ### Why are the changes needed? branch-3.5 uses Ubuntu 20.04 for release, and the `curl` installed via `apt-get install` on Ubuntu 20.04 does not yet support `--retry-all-errors`. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual tested ### Was this patch authored or co-authored using generative AI tooling? No Closes #49201 from LuciferYang/SPARK-50587. Authored-by: yangjie01 Signed-off-by: yangjie01 --- dev/create-release/release-build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index 3546e721edbd2..7728e84bf9066 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -505,7 +505,7 @@ if [[ "$1" == "publish-release" ]]; then file_short=$(echo $file | sed -e "s/\.\///") dest_url="$nexus_upload/org/apache/spark/$file_short" echo " Uploading $file_short" - curl --retry 3 --retry-all-errors -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url + curl --retry 3 -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url done echo "Closing nexus staging repository" From a6f220d951742f4074b37772485ee0ec7a774e7d Mon Sep 17 00:00:00 2001 From: Jie Yang Date: Tue, 17 Dec 2024 04:09:53 +0000 Subject: [PATCH 44/51] Preparing Spark release v3.5.4-rc3 --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- common/utils/pom.xml | 2 +- connector/avro/pom.xml | 2 +- connector/connect/client/jvm/pom.xml | 2 +- connector/connect/common/pom.xml | 2 +- connector/connect/server/pom.xml | 2 +- connector/docker-integration-tests/pom.xml | 2 +- connector/kafka-0-10-assembly/pom.xml | 2 +- connector/kafka-0-10-sql/pom.xml | 2 +- connector/kafka-0-10-token-provider/pom.xml | 2 +- connector/kafka-0-10/pom.xml | 2 +- connector/kinesis-asl-assembly/pom.xml | 2 +- connector/kinesis-asl/pom.xml | 2 +- connector/protobuf/pom.xml | 2 +- connector/spark-ganglia-lgpl/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/api/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 45 files changed, 47 insertions(+), 47 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 5eca59375425e..8657755b8d0ea 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.5.5 +Version: 3.5.4 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index ee2e7b48871ee..47b38621d6400 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index a5ac18252d9c9..3757f69e9bd17 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index c3f33905ae20c..83243d183b7b9 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index de0af6da6c9ec..e74fb05beb0ae 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 54edc410aa9cc..13c4b5cca1e32 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 88ae8e2715a04..709bbed0c553c 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index adeab180901c6..59e9973c42d05 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 0518930d7eb5b..e222499eec228 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/common/utils/pom.xml b/common/utils/pom.xml index fc15de78ed505..7b2a1ad57b0ff 100644 --- a/common/utils/pom.xml +++ b/common/utils/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml index 8d78204ddce30..1a6fe528b9168 100644 --- a/connector/avro/pom.xml +++ b/connector/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml index c220ce4d032e5..695146d7a1113 100644 --- a/connector/connect/client/jvm/pom.xml +++ b/connector/connect/client/jvm/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../../../pom.xml diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml index 8fd5820c35c71..6c50469717f95 100644 --- a/connector/connect/common/pom.xml +++ b/connector/connect/common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../../pom.xml diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml index 747c56d148ebb..aeadbacb7c692 100644 --- a/connector/connect/server/pom.xml +++ b/connector/connect/server/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../../pom.xml diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml index 878bd4590b50f..435c0fbd797aa 100644 --- a/connector/docker-integration-tests/pom.xml +++ b/connector/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml index 119761501dad9..8b6d7d47b0392 100644 --- a/connector/kafka-0-10-assembly/pom.xml +++ b/connector/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml index 51ab703585beb..dce1990f1c9d0 100644 --- a/connector/kafka-0-10-sql/pom.xml +++ b/connector/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml index 7e1c6ebac9a7f..5973b9595db8a 100644 --- a/connector/kafka-0-10-token-provider/pom.xml +++ b/connector/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml index 5014e1a1494ce..54ba2b22093d0 100644 --- a/connector/kafka-0-10/pom.xml +++ b/connector/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml index 65d86f5617b1c..5e0c0fcafc12b 100644 --- a/connector/kinesis-asl-assembly/pom.xml +++ b/connector/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml index 47495110bf7d8..65b0fa33db29f 100644 --- a/connector/kinesis-asl/pom.xml +++ b/connector/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml index 3f417fba4b021..95be9ab74f105 100644 --- a/connector/protobuf/pom.xml +++ b/connector/protobuf/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml index 47db5eb9253e8..62f1c4ab2b124 100644 --- a/connector/spark-ganglia-lgpl/pom.xml +++ b/connector/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index ed5c82d28ca68..e59066e19850d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 969b61fa00363..a207cc2d911b5 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.5.5-SNAPSHOT -SPARK_VERSION_SHORT: 3.5.5 +SPARK_VERSION: 3.5.4 +SPARK_VERSION_SHORT: 3.5.4 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.18" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.5.5"] + 'facetFilters': ["version:3.5.4"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index 506aa7836485a..5efc255218570 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index f330d4d1a5377..74ad5a732f054 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 29f47eec8a5c6..cb89c27d5f0ba 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index e309c1f734296..e36d57fe6a573 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index f8a3cf1cc16df..26e9a1502abab 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 1fa3e215977d9..079ce72d5d959 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/pom.xml b/pom.xml index 0ccb6ac76a9bc..8dc47f391f967 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 pom Spark Project Parent POM https://spark.apache.org/ diff --git a/python/pyspark/version.py b/python/pyspark/version.py index df09fc3284fbd..db20a2ffae586 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__: str = "3.5.5.dev0" +__version__: str = "3.5.4" diff --git a/repl/pom.xml b/repl/pom.xml index e3f52f07cc4d7..3f8c931a60664 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index e4d890a930a2a..5991f1848ccf8 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 85a125ddfe4b7..8ec4e86ab8f12 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 118f0034ddf5a..9ace7e29de0a0 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 4c85e90c4e485..f0df0ff0ea2f4 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/sql/api/pom.xml b/sql/api/pom.xml index 1613c3218649b..c180a208bf093 100644 --- a/sql/api/pom.xml +++ b/sql/api/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index e7736c95007ad..58a2333b5b5a0 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 889fba8892568..9577de81c2057 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 110f9e168de6b..6c86bc35a89d7 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 8fc5d81ab8982..be3c952e4131b 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index e3002680f2173..21d2981fe1088 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 3530297bf8f83..7e5724b5d9dd3 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.5-SNAPSHOT + 3.5.4 ../pom.xml From bcaa5a99ab35bdcf51da130a26d998dafc2f3a64 Mon Sep 17 00:00:00 2001 From: Jie Yang Date: Tue, 17 Dec 2024 04:09:59 +0000 Subject: [PATCH 45/51] Preparing development version 3.5.5-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- common/utils/pom.xml | 2 +- connector/avro/pom.xml | 2 +- connector/connect/client/jvm/pom.xml | 2 +- connector/connect/common/pom.xml | 2 +- connector/connect/server/pom.xml | 2 +- connector/docker-integration-tests/pom.xml | 2 +- connector/kafka-0-10-assembly/pom.xml | 2 +- connector/kafka-0-10-sql/pom.xml | 2 +- connector/kafka-0-10-token-provider/pom.xml | 2 +- connector/kafka-0-10/pom.xml | 2 +- connector/kinesis-asl-assembly/pom.xml | 2 +- connector/kinesis-asl/pom.xml | 2 +- connector/protobuf/pom.xml | 2 +- connector/spark-ganglia-lgpl/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/api/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 45 files changed, 47 insertions(+), 47 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 8657755b8d0ea..5eca59375425e 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.5.4 +Version: 3.5.5 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 47b38621d6400..ee2e7b48871ee 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 3757f69e9bd17..a5ac18252d9c9 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 83243d183b7b9..c3f33905ae20c 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index e74fb05beb0ae..de0af6da6c9ec 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 13c4b5cca1e32..54edc410aa9cc 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 709bbed0c553c..88ae8e2715a04 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 59e9973c42d05..adeab180901c6 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index e222499eec228..0518930d7eb5b 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/common/utils/pom.xml b/common/utils/pom.xml index 7b2a1ad57b0ff..fc15de78ed505 100644 --- a/common/utils/pom.xml +++ b/common/utils/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml index 1a6fe528b9168..8d78204ddce30 100644 --- a/connector/avro/pom.xml +++ b/connector/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml index 695146d7a1113..c220ce4d032e5 100644 --- a/connector/connect/client/jvm/pom.xml +++ b/connector/connect/client/jvm/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../../../pom.xml diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml index 6c50469717f95..8fd5820c35c71 100644 --- a/connector/connect/common/pom.xml +++ b/connector/connect/common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../../pom.xml diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml index aeadbacb7c692..747c56d148ebb 100644 --- a/connector/connect/server/pom.xml +++ b/connector/connect/server/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../../pom.xml diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml index 435c0fbd797aa..878bd4590b50f 100644 --- a/connector/docker-integration-tests/pom.xml +++ b/connector/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml index 8b6d7d47b0392..119761501dad9 100644 --- a/connector/kafka-0-10-assembly/pom.xml +++ b/connector/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml index dce1990f1c9d0..51ab703585beb 100644 --- a/connector/kafka-0-10-sql/pom.xml +++ b/connector/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml index 5973b9595db8a..7e1c6ebac9a7f 100644 --- a/connector/kafka-0-10-token-provider/pom.xml +++ b/connector/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml index 54ba2b22093d0..5014e1a1494ce 100644 --- a/connector/kafka-0-10/pom.xml +++ b/connector/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml index 5e0c0fcafc12b..65d86f5617b1c 100644 --- a/connector/kinesis-asl-assembly/pom.xml +++ b/connector/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml index 65b0fa33db29f..47495110bf7d8 100644 --- a/connector/kinesis-asl/pom.xml +++ b/connector/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml index 95be9ab74f105..3f417fba4b021 100644 --- a/connector/protobuf/pom.xml +++ b/connector/protobuf/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml index 62f1c4ab2b124..47db5eb9253e8 100644 --- a/connector/spark-ganglia-lgpl/pom.xml +++ b/connector/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index e59066e19850d..ed5c82d28ca68 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index a207cc2d911b5..969b61fa00363 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.5.4 -SPARK_VERSION_SHORT: 3.5.4 +SPARK_VERSION: 3.5.5-SNAPSHOT +SPARK_VERSION_SHORT: 3.5.5 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.18" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.5.4"] + 'facetFilters': ["version:3.5.5"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index 5efc255218570..506aa7836485a 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 74ad5a732f054..f330d4d1a5377 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index cb89c27d5f0ba..29f47eec8a5c6 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index e36d57fe6a573..e309c1f734296 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 26e9a1502abab..f8a3cf1cc16df 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 079ce72d5d959..1fa3e215977d9 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 8dc47f391f967..0ccb6ac76a9bc 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT pom Spark Project Parent POM https://spark.apache.org/ diff --git a/python/pyspark/version.py b/python/pyspark/version.py index db20a2ffae586..df09fc3284fbd 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__: str = "3.5.4" +__version__: str = "3.5.5.dev0" diff --git a/repl/pom.xml b/repl/pom.xml index 3f8c931a60664..e3f52f07cc4d7 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 5991f1848ccf8..e4d890a930a2a 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 8ec4e86ab8f12..85a125ddfe4b7 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 9ace7e29de0a0..118f0034ddf5a 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index f0df0ff0ea2f4..4c85e90c4e485 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/sql/api/pom.xml b/sql/api/pom.xml index c180a208bf093..1613c3218649b 100644 --- a/sql/api/pom.xml +++ b/sql/api/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 58a2333b5b5a0..e7736c95007ad 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 9577de81c2057..889fba8892568 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 6c86bc35a89d7..110f9e168de6b 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index be3c952e4131b..8fc5d81ab8982 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 21d2981fe1088..e3002680f2173 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 7e5724b5d9dd3..3530297bf8f83 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.4 + 3.5.5-SNAPSHOT ../pom.xml From 45349b652e2d6c24b4f5eef1f6118389c0d68ed8 Mon Sep 17 00:00:00 2001 From: changgyoopark-db Date: Tue, 17 Dec 2024 20:12:22 +0900 Subject: [PATCH 46/51] [SPARK-50510][CONNECT][TEST][3.5] Fix flaky ReattachableExecuteSuite ### What changes were proposed in this pull request? Simplify org.apache.spark.sql.connect.execution.ReattachableExecuteSuite."reattach after connection expired" to make it more deterministic. ### Why are the changes needed? The test previously involved execution and interruption that made the test unnecessarily flaky, e.g., an exception was thrown when releasing the corresponding [execution](https://github.com/apache/spark/actions/runs/12296721038/job/34316344940), not when reattaching the execution. - The test's sole purpose is to check whether the lack of 'session' results in the correct error code. - The involvement of actual query execution only makes the test flaky and complicated. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Repeatedly ran testOnly org.apache.spark.sql.connect.execution.ReattachableExecuteSuite. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49203 from changgyoopark-db/SPARK-50510. Authored-by: changgyoopark-db Signed-off-by: Hyukjin Kwon --- .../execution/ReattachableExecuteSuite.scala | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala index f828e45e6a6c8..00de9fb6fd260 100644 --- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala +++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala @@ -58,18 +58,15 @@ class ReattachableExecuteSuite extends SparkConnectServerTest { test("reattach after connection expired") { withClient { client => - val iter = client.execute(buildPlan(MEDIUM_RESULTS_QUERY)) - val operationId = getReattachableIterator(iter).operationId - // open the iterator - iter.next() - - SparkConnectService.invalidateSession(defaultUserId, defaultSessionId) withRawBlockingStub { stub => - val iter2 = stub.reattachExecute(buildReattachExecuteRequest(operationId, None)) + // emulate session expiration + SparkConnectService.invalidateSession(defaultUserId, defaultSessionId) - // session closed, bound to fail + // session closed, bound to fail immediately + val operationId = UUID.randomUUID().toString + val iter = stub.reattachExecute(buildReattachExecuteRequest(operationId, None)) val e = intercept[StatusRuntimeException] { - while (iter2.hasNext) iter2.next() + iter.next() } assert(e.getMessage.contains("INVALID_HANDLE.SESSION_NOT_FOUND")) } From a3d23fdb775bee3f03c52a77b80bc0c724108e20 Mon Sep 17 00:00:00 2001 From: Neil Ramaswamy Date: Wed, 18 Dec 2024 15:45:36 +0900 Subject: [PATCH 47/51] [MINOR][SS] Minor update to watermark propagation comments ### What changes were proposed in this pull request? A few minor changes to clarify (and fix one typo) in the comments for watermark propagation in Structured Streaming. ### Why are the changes needed? I found some of the terminology around "simulation" confusing, and the current comment describes incorrect logic for output watermark calculation. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? N/A. ### Was this patch authored or co-authored using generative AI tooling? No Closes #49188 from neilramaswamy/nr/minor-wm-prop. Authored-by: Neil Ramaswamy Signed-off-by: Jungtaek Lim (cherry picked from commit 2b41131d7fa66ef5b23fbe247e057d631ee5e4f6) Signed-off-by: Jungtaek Lim --- .../sql/execution/streaming/WatermarkPropagator.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkPropagator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkPropagator.scala index 6f3725bebb9ab..3d9325f9c98c5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkPropagator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkPropagator.scala @@ -124,12 +124,14 @@ class UseSingleWatermarkPropagator extends WatermarkPropagator { /** * This implementation simulates propagation of watermark among operators. * - * The simulation algorithm traverses the physical plan tree via post-order (children first) to - * calculate (input watermark, output watermark) for all nodes. + * It is considered a "simulation" because watermarks are not being physically sent between + * operators, but rather propagated up the tree via post-order (children first) traversal of + * the query plan. This allows Structured Streaming to determine the new (input watermark, output + * watermark) for all nodes. * * For each node, below logic is applied: * - * - Input watermark for specific node is decided by `min(input watermarks from all children)`. + * - Input watermark for specific node is decided by `min(output watermarks from all children)`. * -- Children providing no input watermark (DEFAULT_WATERMARK_MS) are excluded. * -- If there is no valid input watermark from children, input watermark = DEFAULT_WATERMARK_MS. * - Output watermark for specific node is decided as following: From 5a91172c019c119e686f8221bbdb31f59d3d7776 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Mon, 23 Dec 2024 09:05:56 +0900 Subject: [PATCH 48/51] [SPARK-50483][SPARK-50545][DOC][FOLLOWUP][3.5] Mention behavior changes in migration guide Backport https://github.com/apache/spark/pull/49252 to branch-3.5 ### What changes were proposed in this pull request? Update migration guide for SPARK-50483 and SPARK-50545 ### Why are the changes needed? Mention behavior changes in migration guide ### Does this PR introduce _any_ user-facing change? Yes, docs are updated. ### How was this patch tested? Review. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49256 from pan3793/SPARK-50483-SPARK-50545-followup-3.5. Authored-by: Cheng Pan Signed-off-by: Hyukjin Kwon --- docs/core-migration-guide.md | 4 ++++ docs/sql-migration-guide.md | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md index 36465cc3f4e86..9381b28c8b078 100644 --- a/docs/core-migration-guide.md +++ b/docs/core-migration-guide.md @@ -22,6 +22,10 @@ license: | * Table of contents {:toc} +## Upgrading from Core 3.5.3 to 3.5.4 + +- Since Spark 3.5.4, when reading files hits `org.apache.hadoop.security.AccessControlException` and `org.apache.hadoop.hdfs.BlockMissingException`, the exception will be thrown and fail the task, even if `spark.files.ignoreCorruptFiles` is set to `true`. + ## Upgrading from Core 3.4 to 3.5 - Since Spark 3.5, `spark.yarn.executor.failuresValidityInterval` is deprecated. Use `spark.executor.failuresValidityInterval` instead. diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 0f3adbdafeaf9..be4e92ec4df43 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -22,6 +22,10 @@ license: | * Table of contents {:toc} +## Upgrading from Spark SQL 3.5.3 to 3.5.4 + +- Since Spark 3.5.4, when reading SQL tables hits `org.apache.hadoop.security.AccessControlException` and `org.apache.hadoop.hdfs.BlockMissingException`, the exception will be thrown and fail the task, even if `spark.sql.files.ignoreCorruptFiles` is set to `true`. + ## Upgrading from Spark SQL 3.5.1 to 3.5.2 - Since 3.5.2, MySQL JDBC datasource will read TINYINT UNSIGNED as ShortType, while in 3.5.1, it was wrongly read as ByteType. From c458b6ae38b81fc727972d530f3c0bbc0948aa9f Mon Sep 17 00:00:00 2001 From: ejblanco Date: Tue, 7 Jan 2025 09:26:45 +0100 Subject: [PATCH 49/51] fix mockito --- resource-managers/kubernetes/core/pom.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 5991f1848ccf8..a34750602eba1 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -117,7 +117,8 @@ org.mockito - mockito-core + mockito-scala_2.12 + 1.17.22 test From b58ca6d1c5c84e44135da0c5d808ba14a1c3b7e5 Mon Sep 17 00:00:00 2001 From: ejblanco Date: Tue, 7 Jan 2025 10:53:18 +0100 Subject: [PATCH 50/51] add custom --- common/tags/dev/checkstyle-suppressions.xml | 63 +++ common/tags/dev/checkstyle.xml | 191 +++++++ common/tags/pom.xml | 1 + common/tags/scalastyle-config.xml | 465 ++++++++++++++++++ .../core/dev/checkstyle-suppressions.xml | 63 +++ .../kubernetes/core/dev/checkstyle.xml | 191 +++++++ resource-managers/kubernetes/core/pom.xml | 6 +- .../kubernetes/core/scalastyle-config.xml | 465 ++++++++++++++++++ 8 files changed, 1443 insertions(+), 2 deletions(-) create mode 100644 common/tags/dev/checkstyle-suppressions.xml create mode 100644 common/tags/dev/checkstyle.xml create mode 100644 common/tags/scalastyle-config.xml create mode 100644 resource-managers/kubernetes/core/dev/checkstyle-suppressions.xml create mode 100644 resource-managers/kubernetes/core/dev/checkstyle.xml create mode 100644 resource-managers/kubernetes/core/scalastyle-config.xml diff --git a/common/tags/dev/checkstyle-suppressions.xml b/common/tags/dev/checkstyle-suppressions.xml new file mode 100644 index 0000000000000..8ba1ff1b3b1eb --- /dev/null +++ b/common/tags/dev/checkstyle-suppressions.xml @@ -0,0 +1,63 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/common/tags/dev/checkstyle.xml b/common/tags/dev/checkstyle.xml new file mode 100644 index 0000000000000..5af15318081a6 --- /dev/null +++ b/common/tags/dev/checkstyle.xml @@ -0,0 +1,191 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 59e9973c42d05..638c33ad4647f 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -27,6 +27,7 @@ spark-tags_2.12 + 3.5.4-CUSTOM jar Spark Project Tags https://spark.apache.org/ diff --git a/common/tags/scalastyle-config.xml b/common/tags/scalastyle-config.xml new file mode 100644 index 0000000000000..0ccd937e72e88 --- /dev/null +++ b/common/tags/scalastyle-config.xml @@ -0,0 +1,465 @@ + + + + + Scalastyle standard configuration + + + + + + + + + + + + + + + + + + + + + + + + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW + + + + + + ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW + + + + + + + + + ^AnyFunSuite[A-Za-z]*$ + Tests must extend org.apache.spark.SparkFunSuite instead. + + + + + ^println$ + + + + + spark(.sqlContext)?.sparkContext.hadoopConfiguration + + + + + @VisibleForTesting + + + + + Runtime\.getRuntime\.addShutdownHook + + + + + mutable\.SynchronizedBuffer + + + + + Class\.forName + + + + + Await\.result + + + + + Await\.ready + + + + + new.*ParVector + + + + + (\.toUpperCase|\.toLowerCase)(?!(\(|\(Locale.ROOT\))) + + + + + throw new \w+Error\( + + + + + + JavaConversions + Instead of importing implicits in scala.collection.JavaConversions._, import + scala.collection.JavaConverters._ and use .asScala / .asJava methods + + + + org\.apache\.commons\.lang\. + Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead + of Commons Lang 2 (package org.apache.commons.lang.*) + + + + scala\.concurrent\.ExecutionContext\.Implicits\.global + User queries can use global thread pool, causing starvation and eventual OOM. + Thus, Spark-internal APIs should not use this thread pool + + + + FileSystem.get\([a-zA-Z_$][a-zA-Z_$0-9]*\) + + + + + extractOpt + Use jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter + is slower. + + + + + java,scala,3rdParty,spark + javax?\..* + scala\..* + (?!(javax?\.|scala\.|org\.apache\.spark\.)).* + org\.apache\.spark\..* + + + + + + COMMA + + + + + + \)\{ + + + + + (?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*] + Use Javadoc style indentation for multiline comments + + + + case[^\n>]*=>\s*\{ + Omit braces in case clauses. + + + + new (java\.lang\.)?(Byte|Integer|Long|Short)\( + Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors. + + + + + + + + + + + + + + + Please use scala.collection instead. + + + + + Please use Apache Log4j 2 instead. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 800> + + + + + 30 + + + + + 10 + + + + + 50 + + + + + + + + + + + -1,0,1,2,3 + + + + Objects.toStringHelper + Avoid using Object.toStringHelper. Use ToStringBuilder instead. + + + + Files\.createTempDir\( + Avoid using com.google.common.io.Files.createTempDir due to CVE-2020-8908. + Use org.apache.spark.util.Utils.createTempDir instead. + + + + + new Path\(new URI\( + + + diff --git a/resource-managers/kubernetes/core/dev/checkstyle-suppressions.xml b/resource-managers/kubernetes/core/dev/checkstyle-suppressions.xml new file mode 100644 index 0000000000000..8ba1ff1b3b1eb --- /dev/null +++ b/resource-managers/kubernetes/core/dev/checkstyle-suppressions.xml @@ -0,0 +1,63 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/resource-managers/kubernetes/core/dev/checkstyle.xml b/resource-managers/kubernetes/core/dev/checkstyle.xml new file mode 100644 index 0000000000000..5af15318081a6 --- /dev/null +++ b/resource-managers/kubernetes/core/dev/checkstyle.xml @@ -0,0 +1,191 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index a34750602eba1..7296ca7ae0efc 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -25,6 +25,7 @@ spark-kubernetes_2.12 + 3.5.4-CUSTOM jar Spark Project Kubernetes @@ -57,13 +58,13 @@ org.apache.spark spark-core_${scala.binary.version} - ${project.version} + 3.5.4 org.apache.spark spark-core_${scala.binary.version} - ${project.version} + 3.5.4 test-jar test @@ -71,6 +72,7 @@ org.apache.spark spark-tags_${scala.binary.version} + 3.5.4-CUSTOM test-jar test diff --git a/resource-managers/kubernetes/core/scalastyle-config.xml b/resource-managers/kubernetes/core/scalastyle-config.xml new file mode 100644 index 0000000000000..0ccd937e72e88 --- /dev/null +++ b/resource-managers/kubernetes/core/scalastyle-config.xml @@ -0,0 +1,465 @@ + + + + + Scalastyle standard configuration + + + + + + + + + + + + + + + + + + + + + + + + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW + + + + + + ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW + + + + + + + + + ^AnyFunSuite[A-Za-z]*$ + Tests must extend org.apache.spark.SparkFunSuite instead. + + + + + ^println$ + + + + + spark(.sqlContext)?.sparkContext.hadoopConfiguration + + + + + @VisibleForTesting + + + + + Runtime\.getRuntime\.addShutdownHook + + + + + mutable\.SynchronizedBuffer + + + + + Class\.forName + + + + + Await\.result + + + + + Await\.ready + + + + + new.*ParVector + + + + + (\.toUpperCase|\.toLowerCase)(?!(\(|\(Locale.ROOT\))) + + + + + throw new \w+Error\( + + + + + + JavaConversions + Instead of importing implicits in scala.collection.JavaConversions._, import + scala.collection.JavaConverters._ and use .asScala / .asJava methods + + + + org\.apache\.commons\.lang\. + Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead + of Commons Lang 2 (package org.apache.commons.lang.*) + + + + scala\.concurrent\.ExecutionContext\.Implicits\.global + User queries can use global thread pool, causing starvation and eventual OOM. + Thus, Spark-internal APIs should not use this thread pool + + + + FileSystem.get\([a-zA-Z_$][a-zA-Z_$0-9]*\) + + + + + extractOpt + Use jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter + is slower. + + + + + java,scala,3rdParty,spark + javax?\..* + scala\..* + (?!(javax?\.|scala\.|org\.apache\.spark\.)).* + org\.apache\.spark\..* + + + + + + COMMA + + + + + + \)\{ + + + + + (?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*] + Use Javadoc style indentation for multiline comments + + + + case[^\n>]*=>\s*\{ + Omit braces in case clauses. + + + + new (java\.lang\.)?(Byte|Integer|Long|Short)\( + Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors. + + + + + + + + + + + + + + + Please use scala.collection instead. + + + + + Please use Apache Log4j 2 instead. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 800> + + + + + 30 + + + + + 10 + + + + + 50 + + + + + + + + + + + -1,0,1,2,3 + + + + Objects.toStringHelper + Avoid using Object.toStringHelper. Use ToStringBuilder instead. + + + + Files\.createTempDir\( + Avoid using com.google.common.io.Files.createTempDir due to CVE-2020-8908. + Use org.apache.spark.util.Utils.createTempDir instead. + + + + + new Path\(new URI\( + + + From 94e6bb71679ac45bba989f67353519f8723b4885 Mon Sep 17 00:00:00 2001 From: ejblanco Date: Tue, 7 Jan 2025 11:01:12 +0100 Subject: [PATCH 51/51] Revert "add custom" This reverts commit b58ca6d1c5c84e44135da0c5d808ba14a1c3b7e5. --- common/tags/dev/checkstyle-suppressions.xml | 63 --- common/tags/dev/checkstyle.xml | 191 ------- common/tags/pom.xml | 1 - common/tags/scalastyle-config.xml | 465 ------------------ .../core/dev/checkstyle-suppressions.xml | 63 --- .../kubernetes/core/dev/checkstyle.xml | 191 ------- resource-managers/kubernetes/core/pom.xml | 6 +- .../kubernetes/core/scalastyle-config.xml | 465 ------------------ 8 files changed, 2 insertions(+), 1443 deletions(-) delete mode 100644 common/tags/dev/checkstyle-suppressions.xml delete mode 100644 common/tags/dev/checkstyle.xml delete mode 100644 common/tags/scalastyle-config.xml delete mode 100644 resource-managers/kubernetes/core/dev/checkstyle-suppressions.xml delete mode 100644 resource-managers/kubernetes/core/dev/checkstyle.xml delete mode 100644 resource-managers/kubernetes/core/scalastyle-config.xml diff --git a/common/tags/dev/checkstyle-suppressions.xml b/common/tags/dev/checkstyle-suppressions.xml deleted file mode 100644 index 8ba1ff1b3b1eb..0000000000000 --- a/common/tags/dev/checkstyle-suppressions.xml +++ /dev/null @@ -1,63 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/common/tags/dev/checkstyle.xml b/common/tags/dev/checkstyle.xml deleted file mode 100644 index 5af15318081a6..0000000000000 --- a/common/tags/dev/checkstyle.xml +++ /dev/null @@ -1,191 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 638c33ad4647f..59e9973c42d05 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -27,7 +27,6 @@ spark-tags_2.12 - 3.5.4-CUSTOM jar Spark Project Tags https://spark.apache.org/ diff --git a/common/tags/scalastyle-config.xml b/common/tags/scalastyle-config.xml deleted file mode 100644 index 0ccd937e72e88..0000000000000 --- a/common/tags/scalastyle-config.xml +++ /dev/null @@ -1,465 +0,0 @@ - - - - - Scalastyle standard configuration - - - - - - - - - - - - - - - - - - - - - - - - true - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW - - - - - - ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW - - - - - - - - - ^AnyFunSuite[A-Za-z]*$ - Tests must extend org.apache.spark.SparkFunSuite instead. - - - - - ^println$ - - - - - spark(.sqlContext)?.sparkContext.hadoopConfiguration - - - - - @VisibleForTesting - - - - - Runtime\.getRuntime\.addShutdownHook - - - - - mutable\.SynchronizedBuffer - - - - - Class\.forName - - - - - Await\.result - - - - - Await\.ready - - - - - new.*ParVector - - - - - (\.toUpperCase|\.toLowerCase)(?!(\(|\(Locale.ROOT\))) - - - - - throw new \w+Error\( - - - - - - JavaConversions - Instead of importing implicits in scala.collection.JavaConversions._, import - scala.collection.JavaConverters._ and use .asScala / .asJava methods - - - - org\.apache\.commons\.lang\. - Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead - of Commons Lang 2 (package org.apache.commons.lang.*) - - - - scala\.concurrent\.ExecutionContext\.Implicits\.global - User queries can use global thread pool, causing starvation and eventual OOM. - Thus, Spark-internal APIs should not use this thread pool - - - - FileSystem.get\([a-zA-Z_$][a-zA-Z_$0-9]*\) - - - - - extractOpt - Use jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter - is slower. - - - - - java,scala,3rdParty,spark - javax?\..* - scala\..* - (?!(javax?\.|scala\.|org\.apache\.spark\.)).* - org\.apache\.spark\..* - - - - - - COMMA - - - - - - \)\{ - - - - - (?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*] - Use Javadoc style indentation for multiline comments - - - - case[^\n>]*=>\s*\{ - Omit braces in case clauses. - - - - new (java\.lang\.)?(Byte|Integer|Long|Short)\( - Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors. - - - - - - - - - - - - - - - Please use scala.collection instead. - - - - - Please use Apache Log4j 2 instead. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 800> - - - - - 30 - - - - - 10 - - - - - 50 - - - - - - - - - - - -1,0,1,2,3 - - - - Objects.toStringHelper - Avoid using Object.toStringHelper. Use ToStringBuilder instead. - - - - Files\.createTempDir\( - Avoid using com.google.common.io.Files.createTempDir due to CVE-2020-8908. - Use org.apache.spark.util.Utils.createTempDir instead. - - - - - new Path\(new URI\( - - - diff --git a/resource-managers/kubernetes/core/dev/checkstyle-suppressions.xml b/resource-managers/kubernetes/core/dev/checkstyle-suppressions.xml deleted file mode 100644 index 8ba1ff1b3b1eb..0000000000000 --- a/resource-managers/kubernetes/core/dev/checkstyle-suppressions.xml +++ /dev/null @@ -1,63 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/resource-managers/kubernetes/core/dev/checkstyle.xml b/resource-managers/kubernetes/core/dev/checkstyle.xml deleted file mode 100644 index 5af15318081a6..0000000000000 --- a/resource-managers/kubernetes/core/dev/checkstyle.xml +++ /dev/null @@ -1,191 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 7296ca7ae0efc..a34750602eba1 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -25,7 +25,6 @@ spark-kubernetes_2.12 - 3.5.4-CUSTOM jar Spark Project Kubernetes @@ -58,13 +57,13 @@ org.apache.spark spark-core_${scala.binary.version} - 3.5.4 + ${project.version} org.apache.spark spark-core_${scala.binary.version} - 3.5.4 + ${project.version} test-jar test @@ -72,7 +71,6 @@ org.apache.spark spark-tags_${scala.binary.version} - 3.5.4-CUSTOM test-jar test diff --git a/resource-managers/kubernetes/core/scalastyle-config.xml b/resource-managers/kubernetes/core/scalastyle-config.xml deleted file mode 100644 index 0ccd937e72e88..0000000000000 --- a/resource-managers/kubernetes/core/scalastyle-config.xml +++ /dev/null @@ -1,465 +0,0 @@ - - - - - Scalastyle standard configuration - - - - - - - - - - - - - - - - - - - - - - - - true - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW - - - - - - ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW - - - - - - - - - ^AnyFunSuite[A-Za-z]*$ - Tests must extend org.apache.spark.SparkFunSuite instead. - - - - - ^println$ - - - - - spark(.sqlContext)?.sparkContext.hadoopConfiguration - - - - - @VisibleForTesting - - - - - Runtime\.getRuntime\.addShutdownHook - - - - - mutable\.SynchronizedBuffer - - - - - Class\.forName - - - - - Await\.result - - - - - Await\.ready - - - - - new.*ParVector - - - - - (\.toUpperCase|\.toLowerCase)(?!(\(|\(Locale.ROOT\))) - - - - - throw new \w+Error\( - - - - - - JavaConversions - Instead of importing implicits in scala.collection.JavaConversions._, import - scala.collection.JavaConverters._ and use .asScala / .asJava methods - - - - org\.apache\.commons\.lang\. - Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead - of Commons Lang 2 (package org.apache.commons.lang.*) - - - - scala\.concurrent\.ExecutionContext\.Implicits\.global - User queries can use global thread pool, causing starvation and eventual OOM. - Thus, Spark-internal APIs should not use this thread pool - - - - FileSystem.get\([a-zA-Z_$][a-zA-Z_$0-9]*\) - - - - - extractOpt - Use jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter - is slower. - - - - - java,scala,3rdParty,spark - javax?\..* - scala\..* - (?!(javax?\.|scala\.|org\.apache\.spark\.)).* - org\.apache\.spark\..* - - - - - - COMMA - - - - - - \)\{ - - - - - (?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*] - Use Javadoc style indentation for multiline comments - - - - case[^\n>]*=>\s*\{ - Omit braces in case clauses. - - - - new (java\.lang\.)?(Byte|Integer|Long|Short)\( - Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors. - - - - - - - - - - - - - - - Please use scala.collection instead. - - - - - Please use Apache Log4j 2 instead. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 800> - - - - - 30 - - - - - 10 - - - - - 50 - - - - - - - - - - - -1,0,1,2,3 - - - - Objects.toStringHelper - Avoid using Object.toStringHelper. Use ToStringBuilder instead. - - - - Files\.createTempDir\( - Avoid using com.google.common.io.Files.createTempDir due to CVE-2020-8908. - Use org.apache.spark.util.Utils.createTempDir instead. - - - - - new Path\(new URI\( - - -