From e3e79e5b0891b41c8b3399cebefba3e14d4b4db7 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Fri, 25 Oct 2024 18:10:21 +0200
Subject: [PATCH 01/51] [SPARK-50123][TESTS] Move BitmapExpressionUtilsSuite &
 ExpressionImplUtilsSuite from java to scala test sources folder

### What changes were proposed in this pull request?

Move the BitmapExpressionUtilsSuite and ExpressionImplUtilsSuite from the Java to the Scala test sources folder where they belong.

### Why are the changes needed?

code refactoring

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing tests

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #48657 from yaooqinn/minor.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
(cherry picked from commit 4de286aed61cd9199a99257a395a3e375d0aab3c)
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala     | 0
 .../spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename sql/catalyst/src/test/{java => scala}/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala (100%)
 rename sql/catalyst/src/test/{java => scala}/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala (100%)

diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala
similarity index 100%
rename from sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala
similarity index 100%
rename from sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala

From 2f5e0fae122283c1ebd2c7b84b9614758af4b674 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 28 Oct 2024 17:00:33 -0700
Subject: [PATCH 02/51] [SPARK-50150][BUILD][3.5] Upgrade Jetty to
 9.4.56.v20240826

### What changes were proposed in this pull request?

This PR aims to upgrade Jetty to 9.4.56.v20240826.

### Why are the changes needed?

To bring the latest bug fixes.

### Does this PR introduce _any_ user-facing change?

No behavior change.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48684 from dongjoon-hyun/SPARK-50150.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 4 ++--
 pom.xml                               | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index a9d63c1ad0f99..62e001b8fd53a 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -130,8 +130,8 @@ jersey-container-servlet/2.40//jersey-container-servlet-2.40.jar
 jersey-hk2/2.40//jersey-hk2-2.40.jar
 jersey-server/2.40//jersey-server-2.40.jar
 jettison/1.1//jettison-1.1.jar
-jetty-util-ajax/9.4.54.v20240208//jetty-util-ajax-9.4.54.v20240208.jar
-jetty-util/9.4.54.v20240208//jetty-util-9.4.54.v20240208.jar
+jetty-util-ajax/9.4.56.v20240826//jetty-util-ajax-9.4.56.v20240826.jar
+jetty-util/9.4.56.v20240826//jetty-util-9.4.56.v20240826.jar
 jline/2.14.6//jline-2.14.6.jar
 joda-time/2.12.5//joda-time-2.12.5.jar
 jodd-core/3.5.2//jodd-core-3.5.2.jar
diff --git a/pom.xml b/pom.xml
index 3d9b003bd19c8..8a1bfd7ee6b0e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -143,7 +143,7 @@
     <parquet.version>1.13.1</parquet.version>
     <orc.version>1.9.4</orc.version>
     <orc.classifier>shaded-protobuf</orc.classifier>
-    <jetty.version>9.4.54.v20240208</jetty.version>
+    <jetty.version>9.4.56.v20240826</jetty.version>
     <jakartaservlet.version>4.0.3</jakartaservlet.version>
     <chill.version>0.10.0</chill.version>
     <!--

From 175b5e9d7f80167d835fd229efa461a0012228c2 Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@apache.org>
Date: Thu, 10 Aug 2023 12:12:11 +0900
Subject: [PATCH 03/51] [MINOR][BUILD] Skip `deepspeed` in requirements on
 MacOS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
`deepspeed` is not supported on MacOS

### Why are the changes needed?
to fix this on MacOS
```
pip install -U -r dev/requirements.txt

...

Collecting deepspeed (from -r dev/requirements.txt (line 69))
  Using cached deepspeed-0.10.0.tar.gz (836 kB)
  Preparing metadata (setup.py) ... error
  error: subprocess-exited-with-error

  × python setup.py egg_info did not run successfully.
  │ exit code: 1
  ╰─> [20 lines of output]
      Traceback (most recent call last):
        File "<string>", line 2, in <module>
        File "<pip-setuptools-caller>", line 34, in <module>
        File "/private/var/folders/l_/b6xgqlvx0895dljz46x9nl780000gp/T/pip-install-zd43o1nk/deepspeed_47bbb5784bc942e6bdf0f5ec24e9f939/setup.py", line 37, in <module>
          from op_builder.all_ops import ALL_OPS
        File "/private/var/folders/l_/b6xgqlvx0895dljz46x9nl780000gp/T/pip-install-zd43o1nk/deepspeed_47bbb5784bc942e6bdf0f5ec24e9f939/op_builder/all_ops.py", line 29, in <module>
          builder = get_accelerator().create_op_builder(member_name)
        File "/private/var/folders/l_/b6xgqlvx0895dljz46x9nl780000gp/T/pip-install-zd43o1nk/deepspeed_47bbb5784bc942e6bdf0f5ec24e9f939/accelerator/mps_accelerator.py", line 211, in create_op_builder
          builder_class = self.get_op_builder(op_name)
        File "/private/var/folders/l_/b6xgqlvx0895dljz46x9nl780000gp/T/pip-install-zd43o1nk/deepspeed_47bbb5784bc942e6bdf0f5ec24e9f939/accelerator/mps_accelerator.py", line 218, in get_op_builder
          from deepspeed.ops.op_builder.cpu import NotImplementedBuilder
        File "/private/var/folders/l_/b6xgqlvx0895dljz46x9nl780000gp/T/pip-install-zd43o1nk/deepspeed_47bbb5784bc942e6bdf0f5ec24e9f939/deepspeed/__init__.py", line 21, in <module>
          from . import ops
        File "/private/var/folders/l_/b6xgqlvx0895dljz46x9nl780000gp/T/pip-install-zd43o1nk/deepspeed_47bbb5784bc942e6bdf0f5ec24e9f939/deepspeed/ops/__init__.py", line 6, in <module>
          from . import adam
        File "/private/var/folders/l_/b6xgqlvx0895dljz46x9nl780000gp/T/pip-install-zd43o1nk/deepspeed_47bbb5784bc942e6bdf0f5ec24e9f939/deepspeed/ops/adam/__init__.py", line 6, in <module>
          from .cpu_adam import DeepSpeedCPUAdam
        File "/private/var/folders/l_/b6xgqlvx0895dljz46x9nl780000gp/T/pip-install-zd43o1nk/deepspeed_47bbb5784bc942e6bdf0f5ec24e9f939/deepspeed/ops/adam/cpu_adam.py", line 7, in <module>
          from cpuinfo import get_cpu_info
      ModuleNotFoundError: No module named 'cpuinfo'
      [end of output]

  note: This error originates from a subprocess, and is likely not a problem with pip.
error: metadata-generation-failed

× Encountered error while generating package metadata.
╰─> See above for output.

note: This is an issue with the package mentioned above, not pip.
hint: See above for details.

```

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
manually check

Closes #42411 from zhengruifeng/install_deepspeed_on_linux.

Authored-by: Ruifeng Zheng <ruifengz@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 5c94565e75cc32e552414f27da8082591121a86d)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/requirements.txt b/dev/requirements.txt
index 0749af75aa4be..e3c3cae59d05d 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -67,5 +67,5 @@ torchvision
 torcheval
 
 # DeepspeedTorchDistributor dependencies
-deepspeed
+deepspeed; sys_platform != 'darwin'
 

From 4205b795f893ef51441250cd48c094655ec42193 Mon Sep 17 00:00:00 2001
From: panbingkun <panbingkun@baidu.com>
Date: Wed, 30 Oct 2024 11:04:26 +0100
Subject: [PATCH 04/51] [SPARK-50155][3.5] Move scala and java files to their
 default folders

### What changes were proposed in this pull request?
The pr aims to move `scala` and `java` files to their default folders (`src/main/scala` and `src/main/java`), includes:
- `ByteArrayUtils.java`
  from: `common/utils/src/main/scala/org/apache/spark/unsafe/array/ByteArrayUtils.java`
  to: `common/utils/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java`
- `CustomDecimal.scala`
  from: `connector/avro/src/main/java/org/apache/spark/sql/avro/CustomDecimal.scala`
  to: `connector/avro/src/main/scala/org/apache/spark/sql/avro/CustomDecimal.scala`

PS: The pr is backport branch-3.5, master pr is: https://github.com/apache/spark/pull/48691
Note: In branch-3.5, the `OrcCompressionCodec.java` file does not exist, so there is no need to move it.

### Why are the changes needed?
Move `scala` and `java` files to the default folder of the project to avoid `misunderstandings` for spark developers.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass GA.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #48700 from panbingkun/SPARK-50155_branch3.5.

Authored-by: panbingkun <panbingkun@baidu.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../org/apache/spark/unsafe/array/ByteArrayUtils.java             | 0
 .../{java => scala}/org/apache/spark/sql/avro/CustomDecimal.scala | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename common/utils/src/main/{scala => java}/org/apache/spark/unsafe/array/ByteArrayUtils.java (100%)
 rename connector/avro/src/main/{java => scala}/org/apache/spark/sql/avro/CustomDecimal.scala (100%)

diff --git a/common/utils/src/main/scala/org/apache/spark/unsafe/array/ByteArrayUtils.java b/common/utils/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java
similarity index 100%
rename from common/utils/src/main/scala/org/apache/spark/unsafe/array/ByteArrayUtils.java
rename to common/utils/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java
diff --git a/connector/avro/src/main/java/org/apache/spark/sql/avro/CustomDecimal.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/CustomDecimal.scala
similarity index 100%
rename from connector/avro/src/main/java/org/apache/spark/sql/avro/CustomDecimal.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/avro/CustomDecimal.scala

From 0229c0ea451c953e78cd273d9de2bc05962470a5 Mon Sep 17 00:00:00 2001
From: changgyoopark-db <changgyoo.park@databricks.com>
Date: Fri, 1 Nov 2024 12:03:21 +0900
Subject: [PATCH 05/51] [SPARK-50176][CONNECT][3.5] Disallow reattaching after
 the session is closed

### What changes were proposed in this pull request?

Disallow cursors from reattaching corresponding ExecuteHolders after the session is closed.
In order to prevent a session with a long-running query from being closed, the session is always accessed when reattaching.

https://github.com/apache/spark/pull/44670 resolves this issue in Spark 4.0.0.

### Why are the changes needed?

SPARK-50176. Sessions with long running queries are susceptible to cache eviction, causing trouble when the client tries to reattach to the execution.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

org.apache.spark.sql.connect.execution.ReattachableExecuteSuite

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48725 from changgyoopark-db/SPARK-50176-3.5.

Authored-by: changgyoopark-db <changgyoo.park@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../SparkConnectReattachExecuteHandler.scala  |  3 +++
 .../connect/service/SparkConnectService.scala |  7 +++++++
 .../execution/ReattachableExecuteSuite.scala  | 20 +++++++++++++++++++
 3 files changed, 30 insertions(+)

diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala
index 393b832de878e..deb29e49367a5 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala
@@ -29,6 +29,9 @@ class SparkConnectReattachExecuteHandler(
     extends Logging {
 
   def handle(v: proto.ReattachExecuteRequest): Unit = {
+    // An exception will be raised if the session is not available.
+    val _sessionHolder =
+      SparkConnectService.getIsolatedSession(v.getUserContext.getUserId, v.getSessionId)
     val executeHolder = SparkConnectService.executionManager
       .getExecuteHolder(ExecuteKey(v.getUserContext.getUserId, v.getSessionId, v.getOperationId))
       .getOrElse {
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
index e8af2acfd2e27..edf8147eff85a 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
@@ -360,6 +360,13 @@ object SparkConnectService extends Logging {
     userSessionMapping.invalidateAll()
   }
 
+  /**
+   * Used for testing
+   */
+  private[connect] def invalidateSession(userId: String, sessionId: String): Unit = {
+    userSessionMapping.invalidate((userId, sessionId))
+  }
+
   /**
    * Used for testing.
    */
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
index 06cd1a5666b66..28c1a8e4e109f 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
@@ -56,6 +56,26 @@ class ReattachableExecuteSuite extends SparkConnectServerTest {
     }
   }
 
+  test("reattach after connection expired") {
+    withClient { client =>
+      val iter = client.execute(buildPlan(MEDIUM_RESULTS_QUERY))
+      val operationId = getReattachableIterator(iter).operationId
+      // open the iterator
+      iter.next()
+
+      SparkConnectService.invalidateSession(defaultUserId, defaultSessionId)
+      withRawBlockingStub { stub =>
+        val iter2 = stub.reattachExecute(buildReattachExecuteRequest(operationId, None))
+
+        // session closed, bound to fail
+        val e = intercept[StatusRuntimeException] {
+          while (iter2.hasNext) iter2.next()
+        }
+        assert(e.getMessage.contains("INVALID_HANDLE.SESSION_NOT_FOUND"))
+      }
+    }
+  }
+
   test("raw interrupted RPC results in INVALID_CURSOR.DISCONNECTED error") {
     withRawBlockingStub { stub =>
       val iter = stub.executePlan(buildExecutePlanRequest(buildPlan(MEDIUM_RESULTS_QUERY)))

From 6df196622e1d20fb9d57a0bae105ea2c4f7b51d8 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 31 Oct 2024 20:29:49 -0700
Subject: [PATCH 06/51] [SPARK-50195][CORE] Fix `StandaloneRestServer` to
 propagate `spark.app.name` to `SparkSubmit` properly

This PR aims to fix `StandaloneRestServer` to propagate `spark.app.name` to `SparkSubmit` properly.

This is a long-standing bug which PySpark job didn't get a proper `spark.app.name` propagation unlike Scala/Java Spark jobs. Since PySpark jobs are invoked indirectly via `SparkSubmit`, we need to hand over `spark.app.name` via `-c` configuration.

This is a bug fix. The new behavior is the expected bahavior.

Pass the CIs with the newly added test case.

No.

Closes #48729 from dongjoon-hyun/SPARK-50195.

Lead-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Co-authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit ce899404f59d63b83e24ad24d65628676f85c773)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/deploy/rest/StandaloneRestServer.scala      | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
index c060ef9da8c10..12413698d2832 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
@@ -21,7 +21,7 @@ import java.io.File
 import javax.servlet.http.HttpServletResponse
 
 import org.apache.spark.{SPARK_VERSION => sparkVersion, SparkConf}
-import org.apache.spark.deploy.{Command, DeployMessages, DriverDescription}
+import org.apache.spark.deploy.{Command, DeployMessages, DriverDescription, SparkSubmit}
 import org.apache.spark.deploy.ClientArguments._
 import org.apache.spark.internal.config
 import org.apache.spark.launcher.SparkLauncher
@@ -168,9 +168,16 @@ private[rest] class StandaloneSubmitRequestServlet(
     val extraJavaOpts = driverExtraJavaOptions.map(Utils.splitCommandString).getOrElse(Seq.empty)
     val sparkJavaOpts = Utils.sparkJavaOpts(conf)
     val javaOpts = sparkJavaOpts ++ defaultJavaOpts ++ extraJavaOpts
+    val sparkSubmitOpts = if (mainClass.equals(classOf[SparkSubmit].getName)) {
+      sparkProperties.get("spark.app.name")
+        .map { v => Seq("-c", s"spark.app.name=$v") }
+        .getOrElse(Seq.empty[String])
+    } else {
+      Seq.empty[String]
+    }
     val command = new Command(
       "org.apache.spark.deploy.worker.DriverWrapper",
-      Seq("{{WORKER_URL}}", "{{USER_JAR}}", mainClass) ++ appArgs, // args to the DriverWrapper
+      Seq("{{WORKER_URL}}", "{{USER_JAR}}", mainClass) ++ sparkSubmitOpts ++ appArgs,
       environmentVariables, extraClassPath, extraLibraryPath, javaOpts)
     val actualDriverMemory = driverMemory.map(Utils.memoryStringToMb).getOrElse(DEFAULT_MEMORY)
     val actualDriverCores = driverCores.map(_.toInt).getOrElse(DEFAULT_CORES)

From 9d472661daad4703628e9fbf0ba9922abeed7354 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 31 Oct 2024 23:42:06 -0700
Subject: [PATCH 07/51] [SPARK-50199][PYTHON][TESTS] Use Spark 3.4.4 instead of
 3.0.1 in `test_install_spark`

### What changes were proposed in this pull request?

This PR aims to use Spark 3.4.4 instead of 3.0.1 in `test_install_spark`.

Since Spark 3.4.4 is the End-Of-Life release, it will be in `dlcdc`, `archive`, and `dist` channel until Apache Spark 4.0 release. Previously, 3.0.1 exists only in `archive` and causes flaky failures.

### Why are the changes needed?

To reduce the flakiness.

- https://github.com/apache/spark/actions/runs/11623974780/job/32371883850
 ```
urllib.error.URLError: <urlopen error [Errno 110] Connection timed out>
ERROR
test_package_name (pyspark.tests.test_install_spark.SparkInstallationTestCase) ... Trying to download Spark spark-3.0.1 from [https://dlcdn.apache.org/, https://archive.apache.org/dist, https://dist.apache.org/repos/dist/release]
Downloading spark-3.0.1 for Hadoop hadoop3.2 from:
- https://dlcdn.apache.org//spark/spark-3.0.1/spark-3.0.1-bin-hadoop3.2.tgz
Failed to download spark-3.0.1 for Hadoop hadoop3.2 from https://dlcdn.apache.org//spark/spark-3.0.1/spark-3.0.1-bin-hadoop3.2.tgz:
Downloading spark-3.0.1 for Hadoop hadoop3.2 from:
- https://archive.apache.org/dist/spark/spark-3.0.1/spark-3.0.1-bin-hadoop3.2.tgz
Failed to download spark-3.0.1 for Hadoop hadoop3.2 from https://archive.apache.org/dist/spark/spark-3.0.1/spark-3.0.1-bin-hadoop3.2.tgz:
Downloading spark-3.0.1 for Hadoop hadoop3.2 from:
- https://dist.apache.org/repos/dist/release/spark/spark-3.0.1/spark-3.0.1-bin-hadoop3.2.tgz
Failed to download spark-3.0.1 for Hadoop hadoop3.2 from https://dist.apache.org/repos/dist/release/spark/spark-3.0.1/spark-3.0.1-bin-hadoop3.2.tgz:
ok
```

**AFTER**
```
test_install_spark (pyspark.tests.test_install_spark.SparkInstallationTestCase) ... Trying to download Spark spark-3.4.4 from [https://dlcdn.apache.org/, https://archive.apache.org/dist, https://dist.apache.org/repos/dist/release]
Downloading spark-3.4.4 for Hadoop hadoop3 from:
- https://dlcdn.apache.org//spark/spark-3.4.4/spark-3.4.4-bin-hadoop3.tgz
Downloaded 1048576 of 388988563 bytes (0.27%)
...
```

Since Spark 3.4.4 is the EOL version, it will be in `download.apache.org` until Apache Spark 4.0.0 release.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48733 from dongjoon-hyun/SPARK-50199.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit fcfbf8e660db0396096e4534d59595efdf358058)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 python/pyspark/tests/test_install_spark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/tests/test_install_spark.py b/python/pyspark/tests/test_install_spark.py
index e980a17673f8d..d39e20b055410 100644
--- a/python/pyspark/tests/test_install_spark.py
+++ b/python/pyspark/tests/test_install_spark.py
@@ -32,7 +32,7 @@ class SparkInstallationTestCase(unittest.TestCase):
     def test_install_spark(self):
         # Test only one case. Testing this is expensive because it needs to download
         # the Spark distribution.
-        spark_version, hadoop_version, hive_version = checked_versions("3.0.1", "3", "2.3")
+        spark_version, hadoop_version, hive_version = checked_versions("3.4.4", "3", "2.3")
 
         with tempfile.TemporaryDirectory() as tmp_dir:
             install_spark(

From 08023c08800da9700a4491cb7e979b4287dbfb46 Mon Sep 17 00:00:00 2001
From: Changgyoo Park <changgyoo.park@databricks.com>
Date: Mon, 4 Nov 2024 10:17:39 -0800
Subject: [PATCH 08/51] [SPARK-50176][CONNECT][FOLLOWUP][3.5] Fix
 ReattachableExecuteSuite failure

### What changes were proposed in this pull request?

https://github.com/apache/spark/pull/48725 closes a session completely during ReattachableExecuteSuite causing 'sleep' to be unavailable in subsequent test cases.

https://github.com/apache/spark/pull/43546 fixes the issue by re-creating the 'sleep' udf in each test case needing the udf, and this PR back-ports part of it.

### Why are the changes needed?

In order to make the test green again.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48745 from changgyoopark-db/SPARK-50176-3.5.

Authored-by: Changgyoo Park <changgyoo.park@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../connect/service/SparkConnectReattachExecuteHandler.scala  | 4 +++-
 .../org/apache/spark/sql/connect/SparkConnectServerTest.scala | 4 ----
 .../sql/connect/execution/ReattachableExecuteSuite.scala      | 4 ++++
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala
index deb29e49367a5..b8363c15e6ba2 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala
@@ -30,8 +30,10 @@ class SparkConnectReattachExecuteHandler(
 
   def handle(v: proto.ReattachExecuteRequest): Unit = {
     // An exception will be raised if the session is not available.
-    val _sessionHolder =
+    val sessionHolder =
       SparkConnectService.getIsolatedSession(v.getUserContext.getUserId, v.getSessionId)
+    assert(sessionHolder != null)
+
     val executeHolder = SparkConnectService.executionManager
       .getExecuteHolder(ExecuteKey(v.getUserContext.getUserId, v.getSessionId, v.getOperationId))
       .getOrElse {
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala
index eddd1c6be72b1..234ee526d438a 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala
@@ -52,10 +52,6 @@ trait SparkConnectServerTest extends SharedSparkSession {
     withSparkEnvConfs((Connect.CONNECT_GRPC_BINDING_PORT.key, serverPort.toString)) {
       SparkConnectService.start(spark.sparkContext)
     }
-    // register udf directly on the server, we're not testing client UDFs here...
-    val serverSession =
-      SparkConnectService.getOrCreateIsolatedSession(defaultUserId, defaultSessionId).session
-    serverSession.udf.register("sleep", ((ms: Int) => { Thread.sleep(ms); ms }))
   }
 
   override def afterAll(): Unit = {
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
index 28c1a8e4e109f..f828e45e6a6c8 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
@@ -367,6 +367,10 @@ class ReattachableExecuteSuite extends SparkConnectServerTest {
   }
 
   test("long sleeping query") {
+    // register udf directly on the server, we're not testing client UDFs here...
+    val serverSession =
+      SparkConnectService.getOrCreateIsolatedSession(defaultUserId, defaultSessionId).session
+    serverSession.udf.register("sleep", ((ms: Int) => { Thread.sleep(ms); ms }))
     // query will be sleeping and not returning results, while having multiple reattach
     withSparkEnvConfs(
       (Connect.CONNECT_EXECUTE_REATTACHABLE_SENDER_MAX_STREAM_DURATION.key, "1s")) {

From 1ddf4a9f33937f3cabcdb352837bb42dcaf94250 Mon Sep 17 00:00:00 2001
From: "d.vorst" <87502756+dvorst@users.noreply.github.com>
Date: Mon, 4 Nov 2024 13:44:17 -0800
Subject: [PATCH 09/51] [MINOR][DOCS][3.5] Fix specified java versions in
 `install.rst`

### What changes were proposed in this pull request?

Documentation change: the JAVA versions mentioned on the getting_started page of PySpark 3.5 are corrected (https://spark.apache.org/docs/3.5.3/api/python/getting_started/install.html#dependencies).

### Why are the changes needed?

The original description "PySpark requires Java 8 or later" is incorrect since 3.5 does not support java prior to 8u371 anymore and the latest supported version is 17, the downloading page (https://spark.apache.org/docs/3.5.3/#downloading) however, does correctly state this. I thus corrected the mentioned java versions.

### Does this PR introduce _any_ user-facing change?

Yes
documentation fix

### How was this patch tested?

Manually

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #48411 from dvorst/branch-3.5.

Authored-by: d.vorst <87502756+dvorst@users.noreply.github.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 python/docs/source/getting_started/install.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst
index e97632a8b384b..287e65400861b 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -164,7 +164,7 @@ Package                    Supported version Note
 `googleapis-common-protos` ==1.56.4                  Required for Spark Connect
 ========================== ========================= ======================================================================================
 
-Note that PySpark requires Java 8 or later with ``JAVA_HOME`` properly set.  
+Note that PySpark requires Java 8 (except prior to 8u371), 11 or 17 with ``JAVA_HOME`` properly set.
 If using JDK 11, set ``-Dio.netty.tryReflectionSetAccessible=true`` for Arrow related features and refer
 to |downloading|_.
 

From 9b172deebc2ff99de7401871392eba5cc61b1822 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 4 Nov 2024 01:19:51 +0800
Subject: [PATCH 10/51] [SPARK-50210][CORE] Fix `SparkSubmit` to show REST API
 `kill` response properly

### What changes were proposed in this pull request?

This PR aims to fix `SparkSubmit` to show `REST API` `kill` response properly.

### Why are the changes needed?

**PREPARE SPARK CLUSTER**
```
$ SPARK_MASTER_OPTS='-Dspark.master.rest.enabled=true' sbin/start-master.sh
$ sbin/start-worker.sh spark://$(hostname):7077
```

**BEFORE (4.0.0-preview2)**
`spark-submit` didn't show error messages properly.
```
$ bin/spark-submit --master spark://$(hostname):6066 --kill invalid-submission-id
```

**AFTER**
```
$ bin/spark-submit --master spark://$(hostname):6066 --kill invalid-submission-id
Error: Driver invalid-submission-id has already finished or does not exist
```

```
$ sh examples/src/main/scripts/submit-pi.sh
{
  "action" : "CreateSubmissionResponse",
  "message" : "Driver successfully submitted as driver-20241102232042-0000",
  "serverSparkVersion" : "4.0.0-SNAPSHOT",
  "submissionId" : "driver-20241102232042-0000",
  "success" : true
}%

$ bin/spark-submit --master spark://$(hostname):6066 --kill driver-20241102232042-0000
driver-20241102232042-0000 is killed successfully.
```

### Does this PR introduce _any_ user-facing change?

Yes, but this logs show additional log messages.

### How was this patch tested?

Manual tests because this requires a Spark Standalone Cluster and the difference is only a log of `spark-submit`.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48742 from dongjoon-hyun/SPARK-50210.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
(cherry picked from commit 9cf98ed41b2de1b44c44f0b4d1273d46761459fe)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../main/scala/org/apache/spark/deploy/SparkSubmit.scala   | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 0f0d8b6c07c0a..43c95bed5c685 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -104,8 +104,13 @@ private[spark] class SparkSubmit extends Logging {
    */
   private def kill(args: SparkSubmitArguments): Unit = {
     if (RestSubmissionClient.supportsRestClient(args.master)) {
-      new RestSubmissionClient(args.master)
+      val response = new RestSubmissionClient(args.master)
         .killSubmission(args.submissionToKill)
+      if (response.success) {
+        logInfo(s"${args.submissionToKill} is killed successfully.")
+      } else {
+        logError(response.message)
+      }
     } else {
       val sparkConf = args.toSparkConf()
       sparkConf.set("spark.master", args.master)

From acccf53b579348f84d342eb26bc083b21ca25a3d Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Tue, 5 Nov 2024 14:13:53 +0800
Subject: [PATCH 11/51] [SPARK-50212][BUILD][3.5] Fix the conditional check for
 executing the `build` task in `build_and_test.yml`

### What changes were proposed in this pull request?
Comparing https://github.com/apache/spark/blob/c53dac05058c48ae1edad7912e8cc82533839ca0/.github/workflows/build_and_test.yml#L102 and https://github.com/apache/spark/blob/9d472661daad4703628e9fbf0ba9922abeed7354/.github/workflows/build_and_test.yml#L97, the master branch uses `dev/is-changed.py` to check for changes in additional modules: `variant`, `api`, `streaming-kinesis-asl`, `protobuf`, and `connect`. Among these, the `api`, `protobuf`, and `connect` modules also exist in the `branch-3.5` and should be checked as well.

Therefore, this pr includes the following changes:

1. Adds `is-changed` checks for the `api`, `protobuf`, and `connect` modules.
2. In `dev/sparktestsupport/modules.py`, adds the definition for the `api` module, aligning with the master branch.
3. In `dev/sparktestsupport/modules.py`, adds the definition for the `utils` module, aligning with the master branch. Prior to this PR, although `dev/is-changed.py` was used to check for changes in the `utils` module, its definition was missing from `dev/sparktestsupport/modules.py`.

### Why are the changes needed?
Fix the conditional check for executing the `build` task in `build_and_test.yml`

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
- Pass GitHub Actions

- Manually verified the effectiveness of this pull request:

Before:

Set the latest commit by executing `export APACHE_SPARK_REF=9d472661daad4703628e9fbf0ba9922abeed7354`. Then manually edit files in the `api`, `protobuf`, or `connect` modules and commit them. Run the following command:

```
./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,graphx,catalyst,hive-thriftserver,streaming,sql-kafka-0-10,streaming-kafka-0-10,mllib-local,mllib,yarn,mesos,kubernetes,hadoop-cloud,spark-ganglia-lgpl,sql,hive,connect,protobuf,api"
```

The console will print `false`.

After:

Set the latest commit by executing `export APACHE_SPARK_REF=41446b3d98cfccf5c6f6ddb8bc3c7c6c1b1c3f54`. Then manually edit files in the `api`, `protobuf`, or `connect` modules and commit them. Run the same command as before:

```
./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,graphx,catalyst,hive-thriftserver,streaming,sql-kafka-0-10,streaming-kafka-0-10,mllib-local,mllib,yarn,mesos,kubernetes,hadoop-cloud,spark-ganglia-lgpl,sql,hive,connect,protobuf,api"
```

The console will now print `true`.

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #48744 from LuciferYang/is-change-3.5.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 .github/workflows/build_and_test.yml |  2 +-
 dev/sparktestsupport/modules.py      | 24 ++++++++++++++++++++----
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index b016a29a86be1..7ec8f94a62925 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -94,7 +94,7 @@ jobs:
             tpcds=false
             docker=false
           fi
-          build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,graphx,catalyst,hive-thriftserver,streaming,sql-kafka-0-10,streaming-kafka-0-10,mllib-local,mllib,yarn,mesos,kubernetes,hadoop-cloud,spark-ganglia-lgpl,sql,hive"`
+          build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,graphx,catalyst,hive-thriftserver,streaming,sql-kafka-0-10,streaming-kafka-0-10,mllib-local,mllib,yarn,mesos,kubernetes,hadoop-cloud,spark-ganglia-lgpl,sql,hive,connect,protobuf,api"`
           precondition="
             {
               \"build\": \"$build\",
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index d29fc8726018d..5df59476007a0 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -113,6 +113,14 @@ def __hash__(self):
     ],
 )
 
+utils = Module(
+    name="utils",
+    dependencies=[tags],
+    source_file_regexes=[
+        "common/utils/",
+    ],
+)
+
 kvstore = Module(
     name="kvstore",
     dependencies=[tags],
@@ -126,7 +134,7 @@ def __hash__(self):
 
 network_common = Module(
     name="network-common",
-    dependencies=[tags],
+    dependencies=[tags, utils],
     source_file_regexes=[
         "common/network-common/",
     ],
@@ -148,7 +156,7 @@ def __hash__(self):
 
 unsafe = Module(
     name="unsafe",
-    dependencies=[tags],
+    dependencies=[tags, utils],
     source_file_regexes=[
         "common/unsafe",
     ],
@@ -179,7 +187,7 @@ def __hash__(self):
 
 core = Module(
     name="core",
-    dependencies=[kvstore, network_common, network_shuffle, unsafe, launcher],
+    dependencies=[kvstore, network_common, network_shuffle, unsafe, launcher, utils],
     source_file_regexes=[
         "core/",
     ],
@@ -188,9 +196,17 @@ def __hash__(self):
     ],
 )
 
+api = Module(
+    name="api",
+    dependencies=[utils, unsafe],
+    source_file_regexes=[
+        "sql/api/",
+    ],
+)
+
 catalyst = Module(
     name="catalyst",
-    dependencies=[tags, sketch, core],
+    dependencies=[tags, sketch, core, api],
     source_file_regexes=[
         "sql/catalyst/",
     ],

From d39f5ab99f67ce959b4379ecc3d6e262c10146cf Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 6 Nov 2024 17:12:11 +0800
Subject: [PATCH 12/51] [SPARK-50235][SQL] Clean up ColumnVector resource after
 processing all rows in ColumnarToRowExec

### What changes were proposed in this pull request?

This patch cleans up ColumnVector resource after processing all rows in ColumnarToRowExec. This patch only focus on codeben implementation of ColumnarToRowExec. For non-codegen, it should be relatively rare to use, and currently no good way has proposed, so leaving it to a follow up.

### Why are the changes needed?

Currently we only assign null to ColumnarBatch object but it doesn't release the resources hold by the vectors in the batch. For OnHeapColumnVector, the Java arrays may be automatically collected by JVM, but for OffHeapColumnVector, the allocated off-heap memory will be leaked.

For custom ColumnVector implementations like Arrow-based, it also possibly causes issues on memory safety if the underlying buffers are reused across batches. Because when ColumnarToRowExec begins to fill values for next batch, the arrays in previous batch are still hold.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing tests.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #48767 from viirya/close_if_not_writable.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 800faf0abfa368ad0a5ef1e0fa44b74dbaab724e)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../apache/spark/sql/vectorized/ColumnVector.java    | 12 ++++++++++++
 .../apache/spark/sql/vectorized/ColumnarBatch.java   | 10 ++++++++++
 .../execution/vectorized/WritableColumnVector.java   |  5 +++++
 .../org/apache/spark/sql/execution/Columnar.scala    |  5 +++++
 4 files changed, 32 insertions(+)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
index a3c58ae025477..7dc2d38144296 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
@@ -67,6 +67,18 @@ public abstract class ColumnVector implements AutoCloseable {
   @Override
   public abstract void close();
 
+  /**
+   * Cleans up memory for this column vector if it's not writable. The column vector is not usable
+   * after this.
+   *
+   * If this is a writable column vector, it is a no-op.
+   */
+  public void closeIfNotWritable() {
+    // By default, we just call close() for all column vectors. If a column vector is writable, it
+    // should override this method and do nothing.
+    close();
+  }
+
   /**
    * Returns true if this column vector contains any null values.
    */
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
index 9e859e77644ac..52e4115af336a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
@@ -45,6 +45,16 @@ public void close() {
     }
   }
 
+  /**
+   * Called to close all the columns if they are not writable. This is used to clean up memory
+   * allocated during columnar processing.
+   */
+  public void closeIfNotWritable() {
+    for (ColumnVector c: columns) {
+      c.closeIfNotWritable();
+    }
+  }
+
   /**
    * Returns an iterator over the rows in this batch.
    */
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
index a8e4aad60c222..0fde85fd454c1 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
@@ -87,6 +87,11 @@ public void close() {
     dictionary = null;
   }
 
+  @Override
+  public void closeIfNotWritable() {
+    // no-op
+  }
+
   public void reserveAdditional(int additionalCapacity) {
     reserve(elementsAppended + additionalCapacity);
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
index 3fec13a7f9ba9..ea559efc45f13 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
@@ -194,9 +194,14 @@ case class ColumnarToRowExec(child: SparkPlan) extends ColumnarToRowTransition w
        |    $shouldStop
        |  }
        |  $idx = $numRows;
+       |  $batch.closeIfNotWritable();
        |  $batch = null;
        |  $nextBatchFuncName();
        |}
+       |// clean up resources
+       |if ($batch != null) {
+       |  $batch.close();
+       |}
      """.stripMargin
   }
 

From 8da69877717bd3a9a9aa5d3332ecdd571bb8522a Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 13 Nov 2024 14:57:53 -0800
Subject: [PATCH 13/51] [SPARK-50300][BUILD] Use mirror host instead of
 `archive.apache.org`

### What changes were proposed in this pull request?

This PR aims to use `mirror host` instead of `archive.apache.org`.

### Why are the changes needed?

Currently, Apache Spark CI is flaky due to the checksum download failure like the following. It took over 9 minutes and failed eventually.

- https://github.com/apache/spark/actions/runs/11818847971/job/32927380452
- https://github.com/apache/spark/actions/runs/11818847971/job/32927382179
```
exec: curl --retry 3 --silent --show-error -L https://www.apache.org/dyn/closer.lua/maven/maven-3/3.9.9/binaries/apache-maven-3.9.9-bin.tar.gz?action=download
exec: curl --retry 3 --silent --show-error -L https://archive.apache.org/dist/maven/maven-3/3.9.9/binaries/apache-maven-3.9.9-bin.tar.gz.sha512
curl: (28) Failed to connect to archive.apache.org port 443 after 135199 ms: Connection timed out
curl: (28) Failed to connect to archive.apache.org port 443 after 134166 ms: Connection timed out
curl: (28) Failed to connect to archive.apache.org port 443 after 135213 ms: Connection timed out
curl: (28) Failed to connect to archive.apache.org port 443 after 135260 ms: Connection timed out
Verifying checksum from /home/runner/work/spark/spark/build/apache-maven-3.9.9-bin.tar.gz.sha512
shasum: /home/runner/work/spark/spark/build/apache-maven-3.9.9-bin.tar.gz.sha512: no properly formatted SHA checksum lines found
Bad checksum from https://archive.apache.org/dist/maven/maven-3/3.9.9/binaries/apache-maven-3.9.9-bin.tar.gz.sha512
Error: Process completed with exit code 2.
```

**BEFORE**
```
$ build/mvn clean
exec: curl --retry 3 --silent --show-error -L https://www.apache.org/dyn/closer.lua/maven/maven-3/3.9.9/binaries/apache-maven-3.9.9-bin.tar.gz?action=download
exec: curl --retry 3 --silent --show-error -L https://archive.apache.org/dist/maven/maven-3/3.9.9/binaries/apache-maven-3.9.9-bin.tar.gz.sha512
```

**AFTER**
```
$ build/mvn clean
exec: curl --retry 3 --silent --show-error -L https://www.apache.org/dyn/closer.lua/maven/maven-3/3.9.9/binaries/apache-maven-3.9.9-bin.tar.gz?action=download
exec: curl --retry 3 --silent --show-error -L https://www.apache.org/dyn/closer.lua/maven/maven-3/3.9.9/binaries/apache-maven-3.9.9-bin.tar.gz.sha512?action=download
```

### Does this PR introduce _any_ user-facing change?

No, this is a dev-only change.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48836 from dongjoon-hyun/SPARK-50300.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit 5cc60f46708844c812ee0f21bee4f4b4b70c6d92)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 build/mvn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build/mvn b/build/mvn
index 3179099304c7a..2c778fd6c71a7 100755
--- a/build/mvn
+++ b/build/mvn
@@ -56,7 +56,7 @@ install_app() {
   local binary="${_DIR}/$6"
   local remote_tarball="${mirror_host}/${url_path}${url_query}"
   local local_checksum="${local_tarball}.${checksum_suffix}"
-  local remote_checksum="https://archive.apache.org/dist/${url_path}.${checksum_suffix}"
+  local remote_checksum="${mirror_host}/${url_path}.${checksum_suffix}${url_query}"
 
   local curl_opts="--silent --show-error -L"
   local wget_opts="--no-verbose"

From 8502a2234a972dad277d179a2a90bfad3e081ea8 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 14 Nov 2024 09:01:19 +0900
Subject: [PATCH 14/51] [SPARK-50304][INFRA] Remove `(any|empty).proto` from
 RAT exclusion

### What changes were proposed in this pull request?

This PR aims to remove `(any|empty).proto` from RAT exclusion.

### Why are the changes needed?

`(any|empty).proto` files were never a part of Apache Spark repository. Those files were only used in the initial `Connect` PR and removed before merging.
- #37710
  - Added: https://github.com/apache/spark/pull/37710/commits/45c7bc55498f38081818424d231ec12576a0dc54
  - Excluded from RAT check: https://github.com/apache/spark/pull/37710/commits/cf6b19a991c9bf8c0f208bb2de39dd7121b146a2
  - Removed: https://github.com/apache/spark/pull/37710/commits/497198051af069f9afa70c9435dd5d7a099f11f1

### Does this PR introduce _any_ user-facing change?

No. This is a dev-only change.

### How was this patch tested?

Pass the CIs or manual check.

```
$ ./dev/check-license
Ignored 0 lines in your exclusion files as comments or empty lines.
RAT checks passed.
```

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48837 from dongjoon-hyun/SPARK-50304.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 33378a6f86e001b20236c7ccd1cebf0acbb54f3e)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/.rat-excludes | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index 6bf840cee2831..845e66f197489 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -138,9 +138,6 @@ ansible-for-test-node/*
 node_modules
 spark-events-broken/*
 SqlBaseLexer.tokens
-# Spark Connect related files with custom licence
-any.proto
-empty.proto
 .*\.explain
 .*\.proto.bin
 LimitedInputStream.java

From e0bdfee9b8e7ea88af8ed1d056b8d9865ab44908 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 14 Nov 2024 14:16:51 -0800
Subject: [PATCH 15/51] [SPARK-50316][BUILD][3.5] Upgrade ORC to 1.9.5

### What changes were proposed in this pull request?

This PR aims to upgrade ORC to 1.9.5 for Apache Spark 3.5.4.

### Why are the changes needed?

To bring the latest bug fix:
- https://orc.apache.org/news/2024/11/14/ORC-1.9.5/
  - https://github.com/apache/orc/pull/1960

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48845 from dongjoon-hyun/SPARK-50316.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 6 +++---
 pom.xml                               | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 62e001b8fd53a..06e6aa199021f 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -212,9 +212,9 @@ opencsv/2.3//opencsv-2.3.jar
 opentracing-api/0.33.0//opentracing-api-0.33.0.jar
 opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar
 opentracing-util/0.33.0//opentracing-util-0.33.0.jar
-orc-core/1.9.4/shaded-protobuf/orc-core-1.9.4-shaded-protobuf.jar
-orc-mapreduce/1.9.4/shaded-protobuf/orc-mapreduce-1.9.4-shaded-protobuf.jar
-orc-shims/1.9.4//orc-shims-1.9.4.jar
+orc-core/1.9.5/shaded-protobuf/orc-core-1.9.5-shaded-protobuf.jar
+orc-mapreduce/1.9.5/shaded-protobuf/orc-mapreduce-1.9.5-shaded-protobuf.jar
+orc-shims/1.9.5//orc-shims-1.9.5.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
diff --git a/pom.xml b/pom.xml
index 8a1bfd7ee6b0e..6497056fa2e48 100644
--- a/pom.xml
+++ b/pom.xml
@@ -141,7 +141,7 @@
     <!-- After 10.15.1.3, the minimum required version is JDK9 -->
     <derby.version>10.14.2.0</derby.version>
     <parquet.version>1.13.1</parquet.version>
-    <orc.version>1.9.4</orc.version>
+    <orc.version>1.9.5</orc.version>
     <orc.classifier>shaded-protobuf</orc.classifier>
     <jetty.version>9.4.56.v20240826</jetty.version>
     <jakartaservlet.version>4.0.3</jakartaservlet.version>

From 242d33399658b5eb10c012bc33ab676ff0b32ded Mon Sep 17 00:00:00 2001
From: cuiyanxiang <kaer@startdt.com>
Date: Fri, 15 Nov 2024 14:41:49 +0800
Subject: [PATCH 16/51] [SPARK-50312][SQL] SparkThriftServer createServer
 parameter passing error when kerberos is true

### What changes were proposed in this pull request?

When kerberos is enabled and SparkThriftServer is started, the service delivery parameters keytab and principal are created when hadoop authentication errors occur

`saslServer = ShimLoader.getHadoopThriftAuthBridge().createServer(principal, keytab);`

`public Server createServer(String keytabFile, String principalConf) throws TTransportException {
     return new Server(keytabFile, principalConf);
 }`

### Why are the changes needed?

Failed to start SparkThriftServer when kerberos is true

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

verified

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #48855 from CuiYanxiang/SPARK-50312.

Authored-by: cuiyanxiang <kaer@startdt.com>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 3237885000e9126f261013ed3550bad394727466)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../main/java/org/apache/hive/service/auth/HiveAuthFactory.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
index e3316cef241c3..ba8210a267701 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
@@ -117,7 +117,7 @@ public HiveAuthFactory(HiveConf conf) throws TTransportException, IOException {
         String keytab = conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB);
         if (needUgiLogin(UserGroupInformation.getCurrentUser(),
           SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keytab)) {
-          saslServer = ShimLoader.getHadoopThriftAuthBridge().createServer(principal, keytab);
+          saslServer = ShimLoader.getHadoopThriftAuthBridge().createServer(keytab, principal);
         } else {
           // Using the default constructor to avoid unnecessary UGI login.
           saslServer = new HadoopThriftAuthBridge.Server();

From 08b195c6faa0eea63d96bd781ac2e9ba34998e4f Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Wed, 20 Nov 2024 11:41:52 +0800
Subject: [PATCH 17/51] [MINOR][DOCS] Fix a HTML/Markdown syntax error in
 sql-migration-guide.md

### What changes were proposed in this pull request?
This PR fixes the below HTML/Markdown syntax error in sql-migration-guide.md

![image](https://github.com/user-attachments/assets/bb62a240-1ee5-4763-92c2-97fdd5436284)

### Why are the changes needed?
docfix

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?

![image](https://github.com/user-attachments/assets/95b83aa0-beb1-418c-be08-02310010f4d8)

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #48899 from yaooqinn/minor.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit b582daca568f7098c7b8fe4e3068e5986acdc18b)
Signed-off-by: Kent Yao <yao@apache.org>
---
 docs/sql-migration-guide.md | 239 ++++++++++++++++--------------------
 1 file changed, 104 insertions(+), 135 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 964f7de637e8b..0f3adbdafeaf9 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -595,142 +595,111 @@ license: |
   - Since Spark 2.3, the Join/Filter's deterministic predicates that are after the first non-deterministic predicates are also pushed down/through the child operators, if possible. In prior Spark versions, these filters are not eligible for predicate pushdown.
 
   - Partition column inference previously found incorrect common type for different inferred types, for example, previously it ended up with double type as the common type for double type and date type. Now it finds the correct common type for such conflicts. The conflict resolution follows the table below:
+
     <table>
-    <thead>
-      <tr>
-        <th>
-          <b>InputA \ InputB</b>
-        </th>
-        <th>
-          <b>NullType</b>
-        </th>
-        <th>
-          <b>IntegerType</b>
-        </th>
-        <th>
-          <b>LongType</b>
-        </th>
-        <th>
-          <b>DecimalType(38,0)*</b>
-        </th>
-        <th>
-          <b>DoubleType</b>
-        </th>
-        <th>
-          <b>DateType</b>
-        </th>
-        <th>
-          <b>TimestampType</b>
-        </th>
-        <th>
-          <b>StringType</b>
-        </th>
-      </tr>
-   </thead>
-      <tr>
-        <td>
-          <b>NullType</b>
-        </td>
-        <td>NullType</td>
-        <td>IntegerType</td>
-        <td>LongType</td>
-        <td>DecimalType(38,0)</td>
-        <td>DoubleType</td>
-        <td>DateType</td>
-        <td>TimestampType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>IntegerType</b>
-        </td>
-        <td>IntegerType</td>
-        <td>IntegerType</td>
-        <td>LongType</td>
-        <td>DecimalType(38,0)</td>
-        <td>DoubleType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>LongType</b>
-        </td>
-        <td>LongType</td>
-        <td>LongType</td>
-        <td>LongType</td>
-        <td>DecimalType(38,0)</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>DecimalType(38,0)*</b>
-        </td>
-        <td>DecimalType(38,0)</td>
-        <td>DecimalType(38,0)</td>
-        <td>DecimalType(38,0)</td>
-        <td>DecimalType(38,0)</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>DoubleType</b>
-        </td>
-        <td>DoubleType</td>
-        <td>DoubleType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>DoubleType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>DateType</b>
-        </td>
-        <td>DateType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>DateType</td>
-        <td>TimestampType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>TimestampType</b>
-        </td>
-        <td>TimestampType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>TimestampType</td>
-        <td>TimestampType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>StringType</b>
-        </td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
+      <thead>
+        <tr>
+          <th><b>InputA \ InputB</b></th>
+          <th><b>NullType</b></th>
+          <th><b>IntegerType</b></th>
+          <th><b>LongType</b></th>
+          <th><b>DecimalType(38,0)*</b></th>
+          <th><b>DoubleType</b></th>
+          <th><b>DateType</b></th>
+          <th><b>TimestampType</b></th>
+          <th><b>StringType</b></th>
+        </tr>
+      </thead>
+      <tbody>
+        <tr>
+          <td><b>NullType</b></td>
+          <td>NullType</td>
+          <td>IntegerType</td>
+          <td>LongType</td>
+          <td>DecimalType(38,0)</td>
+          <td>DoubleType</td>
+          <td>DateType</td>
+          <td>TimestampType</td>
+          <td>StringType</td>
+        </tr>
+        <tr>
+          <td><b>IntegerType</b></td>
+          <td>IntegerType</td>
+          <td>IntegerType</td>
+          <td>LongType</td>
+          <td>DecimalType(38,0)</td>
+          <td>DoubleType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+        </tr>
+        <tr>
+          <td><b>LongType</b></td>
+          <td>LongType</td>
+          <td>LongType</td>
+          <td>LongType</td>
+          <td>DecimalType(38,0)</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+         </tr>
+        <tr>
+          <td><b>DecimalType(38,0)*</b></td>
+          <td>DecimalType(38,0)</td>
+          <td>DecimalType(38,0)</td>
+          <td>DecimalType(38,0)</td>
+          <td>DecimalType(38,0)</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+        </tr>
+        <tr>
+          <td><b>DoubleType</b></td>
+          <td>DoubleType</td>
+          <td>DoubleType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>DoubleType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+        </tr>
+        <tr>
+          <td><b>DateType</b></td>
+          <td>DateType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>DateType</td>
+          <td>TimestampType</td>
+          <td>StringType</td>
+        </tr>
+        <tr>
+          <td><b>TimestampType</b></td>
+          <td>TimestampType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>TimestampType</td>
+          <td>TimestampType</td>
+          <td>StringType</td>
+        </tr>
+        <tr>
+          <td><b>StringType</b></td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+        </tr> 
+      </tbody>
     </table>
 
     Note that, for <b>DecimalType(38,0)*</b>, the table above intentionally does not cover all other combinations of scales and precisions because currently we only infer decimal type like `BigInteger`/`BigInt`. For example, 1.1 is inferred as double type.

From df9b9def5528a1b653ab6764347c3a8115c59eed Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Wed, 20 Nov 2024 21:03:14 +0800
Subject: [PATCH 18/51] [SPARK-50258][SQL] Fix output column order changed
 issue after AQE optimization

The root cause of this issue is the planner turns `Limit` + `Sort` into `TakeOrderedAndProjectExec` which adds an additional `Project` that does not exist in the logical plan. We shouldn't use this additional `Project` to optimize out other `Project`s, otherwise when AQE turns physical plan back to logical plan, we lose the `Project` and may mess up the output column order.

This PR makes it does not remove redundant projects if AEQ is enabled and projectList is the same as child output in `TakeOrderedAndProjectExec`.

Fix potential data issue and avoid Spark Driver crash:
```
...
```

No.

Unit test.

No.

Closes #48789 from wangyum/SPARK-50258.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 6ee53da5f356232e2026a67c8408de38c625038e)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/RemoveRedundantProjects.scala   |  8 ++++++-
 .../adaptive/AdaptiveQueryExecSuite.scala     | 23 ++++++++++++++++++-
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
index 8f4ce0f49a89a..69230fd7b3343 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
@@ -58,7 +58,13 @@ object RemoveRedundantProjects extends Rule[SparkPlan] {
           p.mapChildren(removeProject(_, false))
         }
       case op: TakeOrderedAndProjectExec =>
-        op.mapChildren(removeProject(_, false))
+        // The planner turns Limit + Sort into TakeOrderedAndProjectExec which adds an additional
+        // Project that does not exist in the logical plan. We shouldn't use this additional Project
+        // to optimize out other Projects, otherwise when AQE turns physical plan back to
+        // logical plan, we lose the Project and may mess up the output column order. So column
+        // ordering is required if AQE is enabled and projectList is the same as child output.
+        val requireColOrdering = conf.adaptiveExecutionEnabled && op.projectList == op.child.output
+        op.mapChildren(removeProject(_, requireColOrdering))
       case a: BaseAggregateExec =>
         // BaseAggregateExec require specific column ordering when mode is Final or PartialMerge.
         // See comments in BaseAggregateExec inputAttributes method.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index f6b96ee7e1ebd..2f8e401e743bb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.shuffle.sort.SortShuffleManager
 import org.apache.spark.sql.{Dataset, QueryTest, Row, SparkSession, Strategy}
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
-import org.apache.spark.sql.execution.{CollectLimitExec, ColumnarToRowExec, LocalTableScanExec, PartialReducerPartitionSpec, QueryExecution, ReusedSubqueryExec, ShuffledRowRDD, SortExec, SparkPlan, SparkPlanInfo, UnionExec}
+import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
 import org.apache.spark.sql.execution.columnar.{InMemoryTableScanExec, InMemoryTableScanLike}
 import org.apache.spark.sql.execution.command.DataWritingCommandExec
@@ -40,6 +40,7 @@ import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ENSURE_RE
 import org.apache.spark.sql.execution.joins.{BaseJoinExec, BroadcastHashJoinExec, BroadcastNestedLoopJoinExec, ShuffledHashJoinExec, ShuffledJoin, SortMergeJoinExec}
 import org.apache.spark.sql.execution.metric.SQLShuffleReadMetricsReporter
 import org.apache.spark.sql.execution.ui.{SparkListenerSQLAdaptiveExecutionUpdate, SparkListenerSQLAdaptiveSQLMetricUpdates, SparkListenerSQLExecutionStart}
+import org.apache.spark.sql.execution.window.WindowExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
@@ -2965,6 +2966,26 @@ class AdaptiveQueryExecSuite
       }
     }
   }
+
+  test("SPARK-50258: Fix output column order changed issue after AQE optimization") {
+    withTable("t") {
+      sql("SELECT course, year, earnings FROM courseSales").write.saveAsTable("t")
+      val df = sql(
+        """
+          |SELECT year, course, earnings, SUM(earnings) OVER (ORDER BY year, course) AS balance
+          |FROM t ORDER BY year, course
+          |LIMIT 100
+          |""".stripMargin)
+      df.collect()
+
+      val plan = df.queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec]
+      assert(plan.inputPlan.isInstanceOf[TakeOrderedAndProjectExec])
+      assert(plan.finalPhysicalPlan.isInstanceOf[WindowExec])
+      plan.inputPlan.output.zip(plan.finalPhysicalPlan.output).foreach { case (o1, o2) =>
+        assert(o1.semanticEquals(o2), "Different output column order after AQE optimization")
+      }
+    }
+  }
 }
 
 /**

From 5ff129ac8261c674b90545f3e1651e166dbc6249 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 27 Nov 2024 14:22:01 +0900
Subject: [PATCH 19/51] [SPARK-50430][CORE] Use the standard Properties.clone
 instead of manual clone

### What changes were proposed in this pull request?

This PR proposes to use the standard Properties.clone instead of manual clone

### Why are the changes needed?

In a very rare condition, when the properties were changed during the clone of Properties, it might throw an exception as below:

```
: java.util.ConcurrentModificationException
	at java.util.Hashtable$Enumerator.next(Hashtable.java:1408)
	at java.util.Hashtable.putAll(Hashtable.java:523)
	at org.apache.spark.util.Utils$.cloneProperties(Utils.scala:3474)
	at org.apache.spark.SparkContext.getCredentialResolvedProperties(SparkContext.scala:523)
	at org.apache.spark.SparkContext.runJobInternal(SparkContext.scala:3157)
	at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1104)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:165)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:125)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:454)
	at org.apache.spark.rdd.RDD.collect(RDD.scala:1102)
	at org.apache.spark.mllib.evaluation.AreaUnderCurve$.of(AreaUnderCurve.scala:44)
	at org.apache.spark.mllib.evaluation.BinaryClassificationMetrics.areaUnderROC(BinaryClassificationMetrics.scala:127)
	at org.apache.spark.ml.evaluation.BinaryClassificationEvaluator.evaluate(BinaryClassificationEvaluator.scala:101)
	at sun.reflect.GeneratedMethodAccessor323.invoke(Unknown Source)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:397)
	at py4j.Gateway.invoke(Gateway.java:306)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:199)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:119)
	at java.lang.Thread.run(Thread.java:750)
```

We should use the standard clone method.

### Does this PR introduce _any_ user-facing change?

It fixes a very corner case bug as described above.

### How was this patch tested?

It's difficult to test because the issue is from concurrent execution.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48978 from HyukjinKwon/SPARK-50430.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 7614819884ca192fab45ee2ace8a8e081ec8becc)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 core/src/main/scala/org/apache/spark/util/Utils.scala | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 3b0efffedec6f..8762f0a6cdbc3 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2982,9 +2982,7 @@ private[spark] object Utils
     if (props == null) {
       return props
     }
-    val resultProps = new Properties()
-    props.forEach((k, v) => resultProps.put(k, v))
-    resultProps
+    props.clone().asInstanceOf[Properties]
   }
 
   /**

From 5e51e2c4541077eb15898dadecf0fb0d1a5e961a Mon Sep 17 00:00:00 2001
From: xunxunmimi5577 <52647492+xunxunmimi5577@users.noreply.github.com>
Date: Mon, 2 Dec 2024 16:38:14 +0800
Subject: [PATCH 20/51] [SPARK-49294][UI] Add width attribute for
 shuffle-write-time checkbox

### What changes were proposed in this pull request?
The pr aims to add the style for `shuffle-write-time-checkbox-div` and set the width to be `155` pixels.

### Why are the changes needed?
Fix bug for UI.

The tip of `shuffle-write-time` appears in an strange position before this change.
As shown below
![MEITU_20240819_105642523](https://github.com/user-attachments/assets/1e4e9639-a949-4fc3-86f4-7cb65d6d9c73)

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manually check.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #47798 from xunxunmimi5577/add-width-style-for-shuffle_write_time-checkbox.

Authored-by: xunxunmimi5577 <52647492+xunxunmimi5577@users.noreply.github.com>
Signed-off-by: panbingkun <panbingkun@apache.org>
(cherry picked from commit 05728e4ff64e6684d7c6501f8a079e3b9aded9ed)
Signed-off-by: panbingkun <panbingkun@apache.org>
---
 core/src/main/resources/org/apache/spark/ui/static/webui.css | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css
index f952f86503e30..58c5add2d2400 100755
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css
@@ -355,6 +355,10 @@ a.expandbutton {
   width: 170px;
 }
 
+.shuffle-write-time-checkbox-div {
+  width: 155px; 
+} 
+
 .result-serialization-time-checkbox-div {
   width: 185px;
 }

From 1d6f7adbe622f8433b9e22e87fd191316ad86053 Mon Sep 17 00:00:00 2001
From: Cheng Pan <chengpan@apache.org>
Date: Tue, 3 Dec 2024 08:31:07 -0800
Subject: [PATCH 21/51] [SPARK-50433][DOCS][TESTS][3.5] Fix configuring log4j2
 guide docs for Spark on YARN and UT

Backport https://github.com/apache/spark/pull/48981 to 3.5

### What changes were proposed in this pull request?

As title.

### Why are the changes needed?

SPARK-37814 (3.3.0) migrated logging system from log4j1 to log4j2, we should updated the docs as well.

### Does this PR introduce _any_ user-facing change?

Yes, docs are updated.

### How was this patch tested?

Review.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #49044 from pan3793/SPARK-50433-3.5.

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/sql/test/IntegrationTestUtils.scala  | 2 +-
 .../spark/examples/streaming/KinesisWordCountASL.scala    | 2 +-
 docs/running-on-yarn.md                                   | 8 ++++----
 .../spark/examples/streaming/StreamingExamples.scala      | 2 +-
 .../org/apache/spark/deploy/yarn/YarnClusterSuite.scala   | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala
index 61d08912aec23..3ae9b9fc73b48 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala
@@ -74,7 +74,7 @@ object IntegrationTestUtils {
 
         // Redirect server log into console
         "--conf",
-        s"spark.driver.extraJavaOptions=-Dlog4j.configuration=$log4j2")
+        s"spark.driver.extraJavaOptions=-Dlog4j.configurationFile=$log4j2")
     } else Seq.empty
   }
 
diff --git a/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
index 7d12af3256f1f..d388b480e065d 100644
--- a/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
+++ b/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
@@ -275,7 +275,7 @@ private[streaming] object StreamingExamples extends Logging {
       // We first log something to initialize Spark's default logging, then we override the
       // logging level.
       logInfo("Setting log level to [WARN] for streaming example." +
-        " To override add a custom log4j.properties to the classpath.")
+        " To override add a custom log4j2.properties to the classpath.")
       Configurator.setRootLevel(Level.WARN)
     }
   }
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index ce7121b806cb0..d0e725f6a98f4 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -121,15 +121,15 @@ all environment variables used for launching each container. This process is use
 classpath problems in particular. (Note that enabling this requires admin privileges on cluster
 settings and a restart of all node managers. Thus, this is not applicable to hosted clusters).
 
-To use a custom log4j configuration for the application master or executors, here are the options:
+To use a custom log4j2 configuration for the application master or executors, here are the options:
 
-- upload a custom `log4j.properties` using `spark-submit`, by adding it to the `--files` list of files
+- upload a custom `log4j2.properties` using `spark-submit`, by adding it to the `--files` list of files
   to be uploaded with the application.
-- add `-Dlog4j.configuration=<location of configuration file>` to `spark.driver.extraJavaOptions`
+- add `-Dlog4j.configurationFile=<location of configuration file>` to `spark.driver.extraJavaOptions`
   (for the driver) or `spark.executor.extraJavaOptions` (for executors). Note that if using a file,
   the `file:` protocol should be explicitly provided, and the file needs to exist locally on all
   the nodes.
-- update the `$SPARK_CONF_DIR/log4j.properties` file and it will be automatically uploaded along
+- update the `$SPARK_CONF_DIR/log4j2.properties` file and it will be automatically uploaded along
   with the other configurations. Note that other 2 options has higher priority than this option if
   multiple options are specified.
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala
index 20c5eb1700155..9289b005e3ba4 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala
@@ -31,7 +31,7 @@ object StreamingExamples extends Logging {
       // We first log something to initialize Spark's default logging, then we override the
       // logging level.
       logInfo("Setting log level to [WARN] for streaming example." +
-        " To override add a custom log4j.properties to the classpath.")
+        " To override add a custom log4j2.properties to the classpath.")
       Configurator.setRootLevel(Level.WARN)
     }
   }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 2637b2eab80e2..5cd69314d28fa 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -293,7 +293,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
   }
 
   test("running Spark in yarn-cluster mode displays driver log links") {
-    val log4jConf = new File(tempDir, "log4j.properties")
+    val log4jConf = new File(tempDir, "log4j2.properties")
     val logOutFile = new File(tempDir, "logs")
     Files.write(
       s"""rootLogger.level = debug

From 5dc927bab129c40b1e919889d22ada1c95157dc9 Mon Sep 17 00:00:00 2001
From: huangxiaoping <1754789345@qq.com>
Date: Wed, 4 Dec 2024 18:24:35 +0800
Subject: [PATCH 22/51] [SPARK-50487][DOCS] Update broken jira link

### What changes were proposed in this pull request?
Update broken jira link

### Why are the changes needed?
The old link is not accessible

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
No testing required

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #49052 from huangxiaopingRD/SPARK-50487.

Lead-authored-by: huangxiaoping <1754789345@qq.com>
Co-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 3d063a01d7c2a6d9613e11dec882739daa7eeb71)
Signed-off-by: Kent Yao <yao@apache.org>
---
 core/src/main/scala/org/apache/spark/rdd/RDD.scala | 5 +++--
 python/pyspark/rdd.py                              | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index f695b10202758..b63e5999127d4 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1812,8 +1812,9 @@ abstract class RDD[T: ClassTag](
    * Please read the linked SPIP and design docs to understand the limitations and future plans.
    * @return an [[RDDBarrier]] instance that provides actions within a barrier stage
    * @see [[org.apache.spark.BarrierTaskContext]]
-   * @see <a href="https://jira.apache.org/jira/browse/SPARK-24374">SPIP: Barrier Execution Mode</a>
-   * @see <a href="https://jira.apache.org/jira/browse/SPARK-24582">Design Doc</a>
+   * @see <a href="https://issues.apache.org/jira/browse/SPARK-24374">
+   *        SPIP: Barrier Execution Mode</a>
+   * @see <a href="https://issues.apache.org/jira/browse/SPARK-24582">Design Doc</a>
    */
   @Experimental
   @Since("2.4.0")
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index aa63c6509dce8..142678a4a6f6d 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -5130,8 +5130,8 @@ def barrier(self: "RDD[T]") -> "RDDBarrier[T]":
         -----
         For additional information see
 
-        - `SPIP: Barrier Execution Mode <http://jira.apache.org/jira/browse/SPARK-24374>`_
-        - `Design Doc <https://jira.apache.org/jira/browse/SPARK-24582>`_
+        - `SPIP: Barrier Execution Mode <https://issues.apache.org/jira/browse/SPARK-24374>`_
+        - `Design Doc <https://issues.apache.org/jira/browse/SPARK-24582>`_
 
         This API is experimental
         """

From acedb15ea3695c91d8ffbf207d593e9c0204ea09 Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@apache.org>
Date: Thu, 5 Dec 2024 07:47:26 -0800
Subject: [PATCH 23/51] [SPARK-50498][PYTHON] Avoid unnecessary py4j call in
 `listFunctions`

### What changes were proposed in this pull request?
Avoid unnecessary py4j call in `listFunctions`

### Why are the changes needed?
```
        iter = self._jcatalog.listFunctions(dbName).toLocalIterator()
        if pattern is None:
            iter = self._jcatalog.listFunctions(dbName).toLocalIterator()
        else:
            iter = self._jcatalog.listFunctions(dbName, pattern).toLocalIterator()
```

the first `self._jcatalog.listFunctions` is unnecessary

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
ci

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #49073 from zhengruifeng/avoid_list_funcs.

Authored-by: Ruifeng Zheng <ruifengz@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit 36285956ed2b9b8034d6918a9e951f1a2748f3ce)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 python/pyspark/sql/catalog.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index 2c6ed28461f36..3c22473a06235 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -481,7 +481,6 @@ def listFunctions(
         """
         if dbName is None:
             dbName = self.currentDatabase()
-        iter = self._jcatalog.listFunctions(dbName).toLocalIterator()
         if pattern is None:
             iter = self._jcatalog.listFunctions(dbName).toLocalIterator()
         else:

From 86e29e94d9fd5637d0258b0b234f1a82eb4fd860 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 5 Dec 2024 14:43:39 -0800
Subject: [PATCH 24/51] [SPARK-50505][DOCS] Fix
 `spark.storage.replication.proactive` default value documentation

### What changes were proposed in this pull request?

This PR aims to fix `spark.storage.replication.proactive` default value documentation.

### Why are the changes needed?

`spark.storage.replication.proactive` has been enabled by default since Apache Spark 3.2.0.

https://github.com/apache/spark/blob/6add9c89855f9311d5e185774ddddcbf4323beee/docs/core-migration-guide.md?plain=1#L85

https://github.com/apache/spark/blob/6add9c89855f9311d5e185774ddddcbf4323beee/core/src/main/scala/org/apache/spark/internal/config/package.scala#L494-L502

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual review.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #49081 from dongjoon-hyun/SPARK-50505.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit 21451fb312fee32188b6d24f406cb4f3a8349414)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/configuration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index abd934572bd00..302348aa0f0eb 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1959,7 +1959,7 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.storage.replication.proactive</code></td>
-  <td>false</td>
+  <td>true</td>
   <td>
     Enables proactive block replication for RDD blocks. Cached RDD block replicas lost due to
     executor failures are replenished if there are any existing available replicas. This tries

From d01f34f670d1e6ffb6fb6580ffa8ea34e20d07cb Mon Sep 17 00:00:00 2001
From: Livia Zhu <livia.zhu@databricks.com>
Date: Fri, 6 Dec 2024 11:28:55 +0900
Subject: [PATCH 25/51] [SPARK-50492][SS] Fix java.util.NoSuchElementException
 when event time column is dropped after dropDuplicatesWithinWatermark

### What changes were proposed in this pull request?

Update `DeduplicateWithinWatermark` references to include all attributes that could be the watermarking column.

### Why are the changes needed?

Fix `java.util.NoSuchElementException` due to ColumnPruning.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added unit test

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #49065 from liviazhu-db/liviazhu-db/dedup-watermark-fix.

Authored-by: Livia Zhu <livia.zhu@databricks.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
(cherry picked from commit 851f5f2ff905636388ff31f349c6fc5064875172)
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../plans/logical/basicLogicalOperators.scala      |  3 +++
 ...treamingDeduplicationWithinWatermarkSuite.scala | 14 ++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index f76e698a64005..b2ae138a9b0a9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -1944,6 +1944,9 @@ case class Deduplicate(
 }
 
 case class DeduplicateWithinWatermark(keys: Seq[Attribute], child: LogicalPlan) extends UnaryNode {
+  // Ensure that references include event time columns so they are not pruned away.
+  override def references: AttributeSet = AttributeSet(keys) ++
+    AttributeSet(child.output.filter(_.metadata.contains(EventTimeWatermark.delayKey)))
   override def maxRows: Option[Long] = child.maxRows
   override def output: Seq[Attribute] = child.output
   final override val nodePatterns: Seq[TreePattern] = Seq(DISTINCT_LIKE)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationWithinWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationWithinWatermarkSuite.scala
index 9a02ab3df7dd4..af86e6ec88996 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationWithinWatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationWithinWatermarkSuite.scala
@@ -220,4 +220,18 @@ class StreamingDeduplicationWithinWatermarkSuite extends StateStoreMetricsTest {
       )
     }
   }
+
+  test("SPARK-50492: drop event time column after dropDuplicatesWithinWatermark") {
+    val inputData = MemoryStream[(Int, Int)]
+    val result = inputData.toDS()
+      .withColumn("first", timestamp_seconds($"_1"))
+      .withWatermark("first", "10 seconds")
+      .dropDuplicatesWithinWatermark("_2")
+      .select("_2")
+
+    testStream(result, Append)(
+      AddData(inputData, (1, 2)),
+      CheckAnswer(2)
+    )
+  }
 }

From 153cd9e1a79643c88d5b26e6fae0086a472380f8 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Fri, 6 Dec 2024 16:36:11 +0800
Subject: [PATCH 26/51] [SPARK-50492][SS][FOLLOWUP][3.5] Change `def
 references` to `lazy val references` in `DeduplicateWithinWatermark` to fix
 the compilation issue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
This pr change `def references` to `lazy val references` in `DeduplicateWithinWatermark` to fix the following compilation error：

- https://github.com/apache/spark/actions/runs/12191807324/job/34011354774

```
[error] /home/runner/work/spark/spark/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala:1948:16: overriding lazy value references in class QueryPlan of type org.apache.spark.sql.catalyst.expressions.AttributeSet;
[error]  method references needs to be a stable, immutable value
[error]   override def references: AttributeSet = AttributeSet(keys) ++
[error]                ^
[error] one error found
```

### Why are the changes needed?
Fix compile error.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GitHub Actions

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #49087 from LuciferYang/SPARK-50492-FOLLOWUP-3.5.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 .../sql/catalyst/plans/logical/basicLogicalOperators.scala     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index b2ae138a9b0a9..4824bc1f3c263 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -1945,7 +1945,8 @@ case class Deduplicate(
 
 case class DeduplicateWithinWatermark(keys: Seq[Attribute], child: LogicalPlan) extends UnaryNode {
   // Ensure that references include event time columns so they are not pruned away.
-  override def references: AttributeSet = AttributeSet(keys) ++
+  @transient
+  override lazy val references: AttributeSet = AttributeSet(keys) ++
     AttributeSet(child.output.filter(_.metadata.contains(EventTimeWatermark.delayKey)))
   override def maxRows: Option[Long] = child.maxRows
   override def output: Seq[Attribute] = child.output

From bf29ab9eae79e73c6844881b0cd3a5e284960618 Mon Sep 17 00:00:00 2001
From: Terry Wang <zjuwangg@foxmail.com>
Date: Fri, 6 Dec 2024 09:29:57 -0800
Subject: [PATCH 27/51] [SPARK-50421][CORE][3.5] Fix executor related memory
 config incorrect when multiple resource profiles worked

### What changes were proposed in this pull request?

Reset the executor's env memory related config when resource profile is not as the default resource profile!

### Why are the changes needed?
When multiple resource profile exists in the same spark application, now the executor's memory related config is not override by resource profile's memory size, which will cause maxOffHeap in `UnifiedMemoryManager` is not correct.
See https://issues.apache.org/jira/browse/SPARK-50421 for more details

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Tests in our inner spark version and jobs.

### Was this patch authored or co-authored using generative AI tooling?
No

This is a backporting from https://github.com/apache/spark/pull/48963 to branch 3.5

Closes #49090 from zjuwangg/m35_fixConfig.

Authored-by: Terry Wang <zjuwangg@foxmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../CoarseGrainedExecutorBackend.scala        | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index 537522326fc78..fe90895cacb53 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -474,6 +474,27 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
       }
 
       driverConf.set(EXECUTOR_ID, arguments.executorId)
+      // Set executor memory related config here according to resource profile
+      if (cfg.resourceProfile.id != ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID) {
+        cfg.resourceProfile
+          .executorResources
+          .foreach {
+            case (ResourceProfile.OFFHEAP_MEM, request) =>
+              driverConf.set(MEMORY_OFFHEAP_SIZE.key, request.amount.toString + "m")
+              logInfo(s"Set executor off-heap memory to $request")
+            case (ResourceProfile.MEMORY, request) =>
+              driverConf.set(EXECUTOR_MEMORY.key, request.amount.toString + "m")
+              logInfo(s"Set executor memory to $request")
+            case (ResourceProfile.OVERHEAD_MEM, request) =>
+              // Maybe don't need to set this since it's nearly used by tasks.
+              driverConf.set(EXECUTOR_MEMORY_OVERHEAD.key, request.amount.toString + "m")
+              logInfo(s"Set executor memory_overhead to $request")
+            case (ResourceProfile.CORES, request) =>
+              driverConf.set(EXECUTOR_CORES.key, request.amount.toString)
+              logInfo(s"Set executor cores to $request")
+            case _ =>
+          }
+      }
       val env = SparkEnv.createExecutorEnv(driverConf, arguments.executorId, arguments.bindAddress,
         arguments.hostname, arguments.cores, cfg.ioEncryptionKey, isLocal = false)
       // Set the application attemptId in the BlockStoreClient if available.

From d8f3afac944291869b55dc8ad52aa638ba24f98b Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sat, 7 Dec 2024 10:22:15 +0800
Subject: [PATCH 28/51] [SPARK-50514][DOCS] Add `IDENTIFIER clause` page to
 `menu-sql.yaml`

### What changes were proposed in this pull request?

This PR aims to add `IDENTIFIER clause` page to `menu-sql.yaml` for Apache Spark 3.5.4.

### Why are the changes needed?

This was missed at SPARK-43205 (Apache Spark 3.5.0).
- #42506

### Does this PR introduce _any_ user-facing change?

**BEFORE**
![Screenshot 2024-12-06 at 11 35 52](https://github.com/user-attachments/assets/c3c8dc56-b8d4-4f8d-bb9e-31bccb1f5d42)

**AFTER**
![Screenshot 2024-12-06 at 11 36 14](https://github.com/user-attachments/assets/bd1606d2-eb3f-4640-92ef-b0079847c3a3)

### How was this patch tested?

Manual review.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #49097 from dongjoon-hyun/SPARK-50514.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
(cherry picked from commit 28766d4120f4f5bb13f474d53e83e05f38a31475)
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 docs/_data/menu-sql.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml
index ff93f09a83ce1..99d7aded3f25a 100644
--- a/docs/_data/menu-sql.yaml
+++ b/docs/_data/menu-sql.yaml
@@ -87,6 +87,8 @@
       url: sql-ref-functions.html
     - text: Identifiers
       url: sql-ref-identifier.html
+    - text: IDENTIFIER clause
+      url: sql-ref-identifier-clause.html
     - text: Literals
       url: sql-ref-literals.html
     - text: Null Semantics

From 305d2a0aa48cf5b470f75b02677e7761dca2a0e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andrej=20Gobelji=C4=87?= <andrej.gobeljic@databricks.com>
Date: Sat, 7 Dec 2024 10:24:53 -0800
Subject: [PATCH 29/51] [SPARK-49695][SQL][3.5] Postgres fix xor push-down
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
Backport of the https://github.com/apache/spark/pull/48144

This PR fixes the pushdown of ^ operator (XOR operator) for Postgres. Those two databases use this as exponent, rather then bitwise xor.

Fix is consisted of overriding the SQLExpressionBuilder to replace the '^' character with '#'.
### Why are the changes needed?
Result is incorrect.

### Does this PR introduce _any_ user-facing change?
Yes. The user will now have a proper translation of the ^ operator.

### How was this patch tested?

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #49071 from andrej-db/PGXORBackport.

Lead-authored-by: Andrej Gobeljić <andrej.gobeljic@databricks.com>
Co-authored-by: andrej-gobeljic_data <andrej.gobeljic@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../jdbc/v2/PostgresIntegrationSuite.scala    | 10 +++++++++
 .../spark/sql/jdbc/PostgresDialect.scala      | 21 ++++++++++++++++++-
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
index 7fef3ccd6b3f6..b0edac3fcdd1f 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -22,6 +22,7 @@ import java.sql.Connection
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+import org.apache.spark.sql.execution.FilterExec
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
 import org.apache.spark.sql.jdbc.DatabaseOnDocker
 import org.apache.spark.sql.types._
@@ -123,4 +124,13 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT
       )
     }
   }
+
+  test("SPARK-49695: Postgres fix xor push-down") {
+    val df = spark.sql(s"select dept, name from $catalogName.employee where dept ^ 6 = 0")
+    val rows = df.collect()
+    assert(!df.queryExecution.sparkPlan.exists(_.isInstanceOf[FilterExec]))
+    assert(rows.length == 1)
+    assert(rows(0).getInt(0) === 6)
+    assert(rows(0).getString(1) === "jen")
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
index f8f72d88589e3..dd4545bc41b42 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -22,11 +22,13 @@ import java.time.{LocalDateTime, ZoneOffset}
 import java.util
 import java.util.Locale
 
+import scala.util.control.NonFatal
+
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NonEmptyNamespaceException, NoSuchIndexException}
 import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.expressions.NamedReference
+import org.apache.spark.sql.connector.expressions.{Expression, NamedReference}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
 import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
@@ -258,6 +260,23 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper {
     }
   }
 
+  class PostgresSQLBuilder extends JDBCSQLBuilder {
+    override def visitBinaryArithmetic(name: String, l: String, r: String): String = {
+      l + " " + name.replace('^', '#') + " " + r
+    }
+  }
+
+  override def compileExpression(expr: Expression): Option[String] = {
+    val postgresSQLBuilder = new PostgresSQLBuilder()
+    try {
+      Some(postgresSQLBuilder.build(expr))
+    } catch {
+      case NonFatal(e) =>
+        logWarning("Error occurs while compiling V2 expression", e)
+        None
+    }
+  }
+
   override def supportsLimit: Boolean = true
 
   override def supportsOffset: Boolean = true

From a57f3c273b4d9cc637f189d36ba0c4db256d60a9 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sat, 7 Dec 2024 20:00:37 -0800
Subject: [PATCH 30/51] [SPARK-50483][CORE][SQL][3.5] BlockMissingException
 should be thrown even if ignoreCorruptFiles is enabled

### What changes were proposed in this pull request?

`BlockMissingException` extends from `IOException`. When `BlockMissingException` occurs and ignoreCorruptFiles is enabled, the current task may not get any data and will be marked as successful([code](https://github.com/apache/spark/blob/0d045db8d15d0aeb0f54a1557fd360363e77ed42/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala#L271-L273)). This will cause data quality issues.
Generally speaking, `BlockMissingException` is a system issue, not a file corruption issue. Therefore, `BlockMissingException` should be thrown even if ignoreCorruptFiles is enabled.

Related error message:
```
24/11/29 01:56:00 WARN FileScanRDD: Skipped the rest of the content in the corrupted file: path: viewfs://hadoop-cluster/path/to/data/part-00320-7915e327-3214-4585-a44e-f9c58e362b43.c000.snappy.parquet, range: 191727616-281354675, partition values: [empty row]
org.apache.hadoop.hdfs.BlockMissingException: Could not obtain block: BP-169998034-10.210.23.11-1507067630530:blk_83565156183_82548880660 file/path/to/data/part-00320-7915e327-3214-4585-a44e-f9c58e362b43.c000.snappy.parquet No live nodes contain current block Block locations: DatanodeInfoWithStorage[10.209.145.174:50010,DS-c7c0a172-5ffa-4f90-bfb5-717fb1e9ecf2,DISK] DatanodeInfoWithStorage[10.3.22.142:50010,DS-a1ba9ac9-dc92-4131-a2c2-9f7d03b97caf,DISK] DatanodeInfoWithStorage[10.209.146.156:50010,DS-71d8ae97-15d3-454e-a715-d9490e184989,DISK] Dead nodes:  DatanodeInfoWithStorage[10.209.146.156:50010,DS-71d8ae97-15d3-454e-a715-d9490e184989,DISK] DatanodeInfoWithStorage[10.209.145.174:50010,DS-c7c0a172-5ffa-4f90-bfb5-717fb1e9ecf2,DISK] DatanodeInfoWithStorage[10.3.22.142:50010,DS-a1ba9ac9-dc92-4131-a2c2-9f7d03b97caf,DISK]
```

![image](https://github.com/user-attachments/assets/e040ce9d-1a0e-44eb-bd03-4cd7a9fff80f)

### Why are the changes needed?

Avoid data issue if ignoreCorruptFiles is enabled when `BlockMissingException` occurred.

### Does this PR introduce _any_ user-facing change?

Yes.

### How was this patch tested?

Manual test.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #49105 from wangyum/SPARK-50483-branch-3.5.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala  | 3 +++
 core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala      | 3 +++
 core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala   | 3 +++
 .../apache/spark/sql/execution/datasources/FileScanRDD.scala  | 2 ++
 .../sql/execution/datasources/v2/FilePartitionReader.scala    | 4 ++++
 5 files changed, 15 insertions(+)

diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
index 2554106d78e9d..67e4583fe4822 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
@@ -29,6 +29,7 @@ import org.apache.avro.mapred.{AvroOutputFormat, FsInput}
 import org.apache.avro.mapreduce.AvroJob
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.FileStatus
+import org.apache.hadoop.hdfs.BlockMissingException
 import org.apache.hadoop.mapreduce.Job
 
 import org.apache.spark.SparkException
@@ -140,6 +141,8 @@ private[sql] object AvroUtils extends Logging {
           try {
             Some(DataFileReader.openReader(in, new GenericDatumReader[GenericRecord]()))
           } catch {
+            case e: BlockMissingException =>
+              throw new SparkException(s"Could not read file: $path", e)
             case e: IOException =>
               if (ignoreCorruptFiles) {
                 logWarning(s"Skipped the footer in the corrupted file: $path", e)
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index cad107256c58c..edd07a2649dbb 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -25,6 +25,7 @@ import scala.collection.immutable.Map
 import scala.reflect.ClassTag
 
 import org.apache.hadoop.conf.{Configurable, Configuration}
+import org.apache.hadoop.hdfs.BlockMissingException
 import org.apache.hadoop.io.compress.CompressionCodecFactory
 import org.apache.hadoop.mapred._
 import org.apache.hadoop.mapred.lib.CombineFileSplit
@@ -293,6 +294,7 @@ class HadoopRDD[K, V](
             null
           // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
           case e: FileNotFoundException if !ignoreMissingFiles => throw e
+          case e: BlockMissingException => throw e
           case e: IOException if ignoreCorruptFiles =>
             logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e)
             finished = true
@@ -318,6 +320,7 @@ class HadoopRDD[K, V](
             finished = true
           // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
           case e: FileNotFoundException if !ignoreMissingFiles => throw e
+          case e: BlockMissingException => throw e
           case e: IOException if ignoreCorruptFiles =>
             logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e)
             finished = true
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index 119fdae531f22..fbd2235aabaf6 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -25,6 +25,7 @@ import scala.collection.JavaConverters.asScalaBufferConverter
 import scala.reflect.ClassTag
 
 import org.apache.hadoop.conf.{Configurable, Configuration}
+import org.apache.hadoop.hdfs.BlockMissingException
 import org.apache.hadoop.io.Writable
 import org.apache.hadoop.io.compress.CompressionCodecFactory
 import org.apache.hadoop.mapred.JobConf
@@ -227,6 +228,7 @@ class NewHadoopRDD[K, V](
             null
           // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
           case e: FileNotFoundException if !ignoreMissingFiles => throw e
+          case e: BlockMissingException => throw e
           case e: IOException if ignoreCorruptFiles =>
             logWarning(
               s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}",
@@ -255,6 +257,7 @@ class NewHadoopRDD[K, V](
               finished = true
             // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
             case e: FileNotFoundException if !ignoreMissingFiles => throw e
+            case e: BlockMissingException => throw e
             case e: IOException if ignoreCorruptFiles =>
               logWarning(
                 s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}",
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index 0cca51cf4e393..ce56fc1b28296 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -23,6 +23,7 @@ import java.net.URI
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hdfs.BlockMissingException
 
 import org.apache.spark.{Partition => RDDPartition, SparkUpgradeException, TaskContext}
 import org.apache.spark.deploy.SparkHadoopUtil
@@ -259,6 +260,7 @@ class FileScanRDD(
                     null
                   // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
                   case e: FileNotFoundException if !ignoreMissingFiles => throw e
+                  case e: BlockMissingException => throw e
                   case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
                     logWarning(
                       s"Skipped the rest of the content in the corrupted file: $currentFile", e)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
index 7159bc6de3a47..8f51226dcfe99 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
@@ -20,6 +20,8 @@ import java.io.{FileNotFoundException, IOException}
 
 import scala.util.control.NonFatal
 
+import org.apache.hadoop.hdfs.BlockMissingException
+
 import org.apache.spark.SparkUpgradeException
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.InputFileBlockHolder
@@ -49,6 +51,7 @@ class FilePartitionReader[T](
           // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
           case e: FileNotFoundException if !ignoreMissingFiles =>
             throw QueryExecutionErrors.fileNotFoundError(e)
+          case e: BlockMissingException => throw e
           case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
             logWarning(
               s"Skipped the rest of the content in the corrupted file.", e)
@@ -68,6 +71,7 @@ class FilePartitionReader[T](
         throw QueryExecutionErrors.unsupportedSchemaColumnConvertError(
           currentReader.file.urlEncodedPath,
           e.getColumn, e.getLogicalType, e.getPhysicalType, e)
+      case e: BlockMissingException => throw e
       case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
         logWarning(
           s"Skipped the rest of the content in the corrupted file: $currentReader", e)

From 929a19fe1604e07adf9ed2798ec0c1b53e0bd60d Mon Sep 17 00:00:00 2001
From: Jie Yang <yangjie01@apache.org>
Date: Mon, 9 Dec 2024 01:32:17 +0000
Subject: [PATCH 31/51] Preparing Spark release v3.5.4-rc1

---
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 2 +-
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 44 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index 3367c1629c578..47b38621d6400 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 014ff5bbaf209..3757f69e9bd17 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index ed2352fd1276e..83243d183b7b9 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index b791a06aad43a..e74fb05beb0ae 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 685ada5194905..13c4b5cca1e32 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index b2e488c7bb222..709bbed0c553c 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 3a260a8dff53f..59e9973c42d05 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index fd0aa7ba2a3a2..e222499eec228 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index 7c87be73d7d96..7b2a1ad57b0ff 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index 8bc2802ea5d0d..1a6fe528b9168 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 87f6a589261cc..695146d7a1113 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 994179fd99ac8..6c50469717f95 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.4-SNAPSHOT</version>
+        <version>3.5.4</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 801c28319ee84..aeadbacb7c692 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 19377b36a612f..435c0fbd797aa 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index ae11f0eac307d..8b6d7d47b0392 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 533a45e18f662..dce1990f1c9d0 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 07ca1c2b2f3c7..5973b9595db8a 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 176d92da63801..54ba2b22093d0 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index a6ef06142f5cb..5e0c0fcafc12b 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 4282e1f035716..65b0fa33db29f 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 2af6002b5c7db..95be9ab74f105 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index a46c9bbfec2cf..62f1c4ab2b124 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index d1b0e82c7c0d5..e59066e19850d 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 3dea0c82204bd..a207cc2d911b5 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,7 +19,7 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.4-SNAPSHOT
+SPARK_VERSION: 3.5.4
 SPARK_VERSION_SHORT: 3.5.4
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
diff --git a/examples/pom.xml b/examples/pom.xml
index 26d91eff504f2..5efc255218570 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index c4f250b40f33d..74ad5a732f054 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index a47d25015dfa9..cb89c27d5f0ba 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 5c1844be5782d..e36d57fe6a573 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index bb821190273e1..26e9a1502abab 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 202b80d38e24f..079ce72d5d959 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 6497056fa2e48..8dc47f391f967 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.4-SNAPSHOT</version>
+  <version>3.5.4</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 002d06e28ea15..db20a2ffae586 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.4.dev0"
+__version__: str = "3.5.4"
diff --git a/repl/pom.xml b/repl/pom.xml
index 5ef505bbc48e5..3f8c931a60664 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index cd90f32d0814f..5991f1848ccf8 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index b72a3daea3c38..8ec4e86ab8f12 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 31377cbda5d8e..9ace7e29de0a0 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index d7f3786e1050f..f0df0ff0ea2f4 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 038b6c16a4e88..c180a208bf093 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.4-SNAPSHOT</version>
+        <version>3.5.4</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 0564a6be7432a..58a2333b5b5a0 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 62d33dbfc2d41..9577de81c2057 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 5d2708dfdd714..6c86bc35a89d7 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 9a313907eb130..be3c952e4131b 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 6cbccb39772c9..21d2981fe1088 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index f23f4a4b50559..7e5724b5d9dd3 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 8e6507a3d83a8f8ef466ed5f411884b9655916df Mon Sep 17 00:00:00 2001
From: Jie Yang <yangjie01@apache.org>
Date: Mon, 9 Dec 2024 01:32:23 +0000
Subject: [PATCH 32/51] Preparing development version 3.5.5-SNAPSHOT

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 8657755b8d0ea..5eca59375425e 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.4
+Version: 3.5.5
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 47b38621d6400..ee2e7b48871ee 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 3757f69e9bd17..a5ac18252d9c9 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 83243d183b7b9..c3f33905ae20c 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index e74fb05beb0ae..de0af6da6c9ec 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 13c4b5cca1e32..54edc410aa9cc 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 709bbed0c553c..88ae8e2715a04 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 59e9973c42d05..adeab180901c6 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index e222499eec228..0518930d7eb5b 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index 7b2a1ad57b0ff..fc15de78ed505 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index 1a6fe528b9168..8d78204ddce30 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 695146d7a1113..c220ce4d032e5 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 6c50469717f95..8fd5820c35c71 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.4</version>
+        <version>3.5.5-SNAPSHOT</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index aeadbacb7c692..747c56d148ebb 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 435c0fbd797aa..878bd4590b50f 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index 8b6d7d47b0392..119761501dad9 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index dce1990f1c9d0..51ab703585beb 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 5973b9595db8a..7e1c6ebac9a7f 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 54ba2b22093d0..5014e1a1494ce 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 5e0c0fcafc12b..65d86f5617b1c 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 65b0fa33db29f..47495110bf7d8 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 95be9ab74f105..3f417fba4b021 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 62f1c4ab2b124..47db5eb9253e8 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index e59066e19850d..ed5c82d28ca68 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index a207cc2d911b5..969b61fa00363 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.4
-SPARK_VERSION_SHORT: 3.5.4
+SPARK_VERSION: 3.5.5-SNAPSHOT
+SPARK_VERSION_SHORT: 3.5.5
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.4"]
+        'facetFilters': ["version:3.5.5"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 5efc255218570..506aa7836485a 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 74ad5a732f054..f330d4d1a5377 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index cb89c27d5f0ba..29f47eec8a5c6 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index e36d57fe6a573..e309c1f734296 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 26e9a1502abab..f8a3cf1cc16df 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 079ce72d5d959..1fa3e215977d9 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 8dc47f391f967..0ccb6ac76a9bc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.4</version>
+  <version>3.5.5-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index db20a2ffae586..df09fc3284fbd 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.4"
+__version__: str = "3.5.5.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 3f8c931a60664..e3f52f07cc4d7 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 5991f1848ccf8..e4d890a930a2a 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 8ec4e86ab8f12..85a125ddfe4b7 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 9ace7e29de0a0..118f0034ddf5a 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index f0df0ff0ea2f4..4c85e90c4e485 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index c180a208bf093..1613c3218649b 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.4</version>
+        <version>3.5.5-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 58a2333b5b5a0..e7736c95007ad 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 9577de81c2057..889fba8892568 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 6c86bc35a89d7..110f9e168de6b 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index be3c952e4131b..8fc5d81ab8982 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 21d2981fe1088..e3002680f2173 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 7e5724b5d9dd3..3530297bf8f83 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From deabe49d94f28d19438865effe2f92ec536d2bc6 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Mon, 9 Dec 2024 19:45:16 +0800
Subject: [PATCH 33/51] [SPARK-49134][INFRA][3.5] Support retry for deploying
 artifacts to Nexus staging repository

### What changes were proposed in this pull request?

This PR improves `dev/create-release/release-build.sh` by enabling 3-times retry for deploying artifacts to the Nexus staging repository

When I was setting up 3.5.2-rc5 on my AWS EC2 instance, I encountered an issue with closing the `orgapachespark-1461` due to a timeout while uploading a sha1 file.

```xml
  Uploading spark-streaming-kafka-0-10_2.13/3.5.2/spark-streaming-kafka-0-10_2.13-3.5.2-test-sources.jar.sha1
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   262  100   221  100    41     15      2  0:00:20  0:00:13  0:00:07    58
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
<html><head>
<title>408 Request Timeout</title>
</head><body>
<h1>Request Timeout</h1>
<p>Server timeout waiting for the HTTP request from the client.</p>
</body></html>

```

I might choose to upload it manually but I didn't, because I was afraid of making some unpredictable errors.

So I regenerated and uploaded `orgapachespark-1462`.

### Why are the changes needed?

To avoid temporary network errors when performing the publish step for release managers.
### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing tests

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #49108 from LuciferYang/SPARK-49134-3.5.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 dev/create-release/release-build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 99841916cf293..3546e721edbd2 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -505,7 +505,7 @@ if [[ "$1" == "publish-release" ]]; then
       file_short=$(echo $file | sed -e "s/\.\///")
       dest_url="$nexus_upload/org/apache/spark/$file_short"
       echo "  Uploading $file_short"
-      curl -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url
+      curl --retry 3 --retry-all-errors -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url
     done
 
     echo "Closing nexus staging repository"

From a3cf28ea73f0bd1147af6557954b329cad5226ea Mon Sep 17 00:00:00 2001
From: Richard Chen <r.chen@databricks.com>
Date: Wed, 11 Dec 2024 01:56:22 +0800
Subject: [PATCH 34/51] [SPARK-50463][SQL][3.5] Fix `ConstantColumnVector` with
 Columnar to Row conversion

### What changes were proposed in this pull request?

https://github.com/apache/spark/commit/800faf0abfa368ad0a5ef1e0fa44b74dbaab724e frees column vector resources between batches in columnar to row conversion. However, like `WritableColumnVector`, `ConstantColumnVector` should not free resources between batches because the same data is used across batches

### Why are the changes needed?
Without this change, ConstantColumnVectors with string values, for example, will fail if used with column->row conversion. For instance, reading a parquet table partitioned by a string column with multiple batches.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
added UT that failed before and now passes

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #49131 from LuciferYang/SPARK-50463-3.5.

Authored-by: Richard Chen <r.chen@databricks.com>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 .../spark/sql/vectorized/ColumnVector.java    | 12 +++++------
 .../spark/sql/vectorized/ColumnarBatch.java   |  8 ++++----
 .../vectorized/ConstantColumnVector.java      |  5 +++++
 .../vectorized/WritableColumnVector.java      |  2 +-
 .../apache/spark/sql/execution/Columnar.scala |  2 +-
 .../parquet/ParquetQuerySuite.scala           | 20 +++++++++++++++++++
 6 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
index 7dc2d38144296..ea199e2685a54 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
@@ -68,14 +68,14 @@ public abstract class ColumnVector implements AutoCloseable {
   public abstract void close();
 
   /**
-   * Cleans up memory for this column vector if it's not writable. The column vector is not usable
-   * after this.
+   * Cleans up memory for this column vector if it's resources are freeable between batches.
+   * The column vector is not usable after this.
    *
-   * If this is a writable column vector, it is a no-op.
+   * If this is a writable column vector or constant column vector, it is a no-op.
    */
-  public void closeIfNotWritable() {
-    // By default, we just call close() for all column vectors. If a column vector is writable, it
-    // should override this method and do nothing.
+  public void closeIfFreeable() {
+    // By default, we just call close() for all column vectors. If a column vector is writable or
+    // constant, it should override this method and do nothing.
     close();
   }
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
index 52e4115af336a..7ef570a212292 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
@@ -46,12 +46,12 @@ public void close() {
   }
 
   /**
-   * Called to close all the columns if they are not writable. This is used to clean up memory
-   * allocated during columnar processing.
+   * Called to close all the columns if their resources are freeable between batches.
+   * This is used to clean up memory allocated during columnar processing.
    */
-  public void closeIfNotWritable() {
+  public void closeIfFreeable() {
     for (ColumnVector c: columns) {
-      c.closeIfNotWritable();
+      c.closeIfFreeable();
     }
   }
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
index 5095e6b0c9c6b..9713998549c72 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
@@ -72,6 +72,11 @@ public ConstantColumnVector(int numRows, DataType type) {
     }
   }
 
+  public void closeIfFreeable() {
+    // no-op: `ConstantColumnVector`s reuse the data backing its value across multiple batches and
+    // are freed at the end of execution in `close`.
+  }
+
   @Override
   public void close() {
     stringData = null;
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
index 0fde85fd454c1..d23de1ff0cfe9 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
@@ -88,7 +88,7 @@ public void close() {
   }
 
   @Override
-  public void closeIfNotWritable() {
+  public void closeIfFreeable() {
     // no-op
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
index ea559efc45f13..bfb198adad501 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
@@ -194,7 +194,7 @@ case class ColumnarToRowExec(child: SparkPlan) extends ColumnarToRowTransition w
        |    $shouldStop
        |  }
        |  $idx = $numRows;
-       |  $batch.closeIfNotWritable();
+       |  $batch.closeIfFreeable();
        |  $batch = null;
        |  $nextBatchFuncName();
        |}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 29cb224c8787c..f6472ba3d9dbc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -482,6 +482,26 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
     }
   }
 
+  test("SPARK-50463: Partition values can be read over multiple batches") {
+    withTempDir { dir =>
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_BATCH_SIZE.key -> "1") {
+        val path = dir.getAbsolutePath
+        spark.range(0, 5)
+          .selectExpr("concat(cast(id % 2 as string), 'a') as partCol", "id")
+          .write
+          .format("parquet")
+          .mode("overwrite")
+          .partitionBy("partCol").save(path)
+        val df = spark.read.format("parquet").load(path).selectExpr("partCol")
+        val expected = spark.range(0, 5)
+          .selectExpr("concat(cast(id % 2 as string), 'a') as partCol")
+          .collect()
+
+        checkAnswer(df, expected)
+      }
+    }
+  }
+
   test("SPARK-10301 requested schema clipping - same schema") {
     withTempPath { dir =>
       val path = dir.getCanonicalPath

From bb953f97e6c5a26861296aca0a4f14db078ef2ef Mon Sep 17 00:00:00 2001
From: changgyoopark-db <changgyoo.park@databricks.com>
Date: Wed, 11 Dec 2024 12:56:43 +0800
Subject: [PATCH 35/51] [SPARK-50510][CONNECT][3.5] Fix sporadic
 ReattachableExecuteSuite failure

### What changes were proposed in this pull request?

ReattachableExecuteSuite detected a rare data race issue where ExecuteThreadRunner may send the client the wrong error code before the SparkConnect service sends the correct error code.
- The test fails if ExecuteThreadRunner is finished before the SparkConnect service sends the correct error code and after the session is invalidated; to be specific, the event manager throws an illegal state exception (SPARK-49688) that is translated into an unknown error.
- The whole problem was addressed under https://github.com/apache/spark/pull/48208 for Spark 4.0.

### Why are the changes needed?

1. Clients may get the wrong error message: expect session-closed or the like, but get unknown.
2. To fix the ReattachableExecuteSuite failure.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

ReattachableExecuteSuite.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #49127 from changgyoopark-db/SPARK-50510.

Authored-by: changgyoopark-db <changgyoo.park@databricks.com>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 .../spark/sql/connect/service/ExecuteHolder.scala      | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
index 0e4f344da901c..93a1757cd687a 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
@@ -21,6 +21,7 @@ import java.util.UUID
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
+import scala.util.control.NonFatal
 
 import org.apache.spark.{SparkEnv, SparkSQLException}
 import org.apache.spark.connect.proto
@@ -237,7 +238,14 @@ private[connect] class ExecuteHolder(
         // it does.
         responseObserver.removeAll()
         // post closed to UI
-        eventsManager.postClosed()
+        try {
+          eventsManager.postClosed()
+        } catch {
+          // Catching the exception to prevent the wrong error code from being returned to the
+          // user: SPARK-49688. The issue was fixed by completely refactoring the code in Spark 4.0.
+          case e: Throwable if NonFatal.apply(e) =>
+            logError(s"Error posting closed event to UI: ${e.getMessage()}")
+        }
       }
       // interrupt any attached grpcResponseSenders
       grpcResponseSenders.foreach(_.interrupt())

From e97580a904dab438b2b5a51e49db63602dc45592 Mon Sep 17 00:00:00 2001
From: andrej-gobeljic_data <andrej.gobeljic@databricks.com>
Date: Wed, 11 Dec 2024 16:52:11 -0800
Subject: [PATCH 36/51] [SPARK-50087][SQL][3.5] Robust handling of boolean
 expressions in CASE WHEN for MsSqlServer and future connectors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
This PR proposes to propagate the `isPredicate` info in `V2ExpressionBuilder` and wrap the children of CASE WHEN expression (only `Predicate`s) with `IIF(<>, 1, 0)` for MsSqlServer. This is done to force returning an int instead of a boolean, as SqlServer cannot handle boolean expressions as a return type in CASE WHEN.

E.g.
```CASE WHEN ... ELSE a = b END```

Old behavior:
```CASE WHEN ... ELSE a = b END = 1```

New behavior:
Since in SqlServer a `= 1` is appended to the CASE WHEN, THEN and ELSE blocks must return an int. Therefore the final expression becomes:
```CASE WHEN ... ELSE IIF(a = b, 1, 0) END = 1```

### Why are the changes needed?
A user cannot work with an MsSqlServer data with CASE WHEN clauses or IF clauses if they wish to return a boolean value.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added tests to MsSqlServerIntegrationSuite

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #49115 from andrej-db/CASEWHENBackport.

Lead-authored-by: andrej-gobeljic_data <andrej.gobeljic@databricks.com>
Co-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Co-authored-by: Andrej Gobeljić <andrej.gobeljic@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../jdbc/v2/MsSqlServerIntegrationSuite.scala | 79 +++++++++++++++++++
 .../util/V2ExpressionSQLBuilder.java          |  2 +-
 .../catalyst/util/V2ExpressionBuilder.scala   |  6 +-
 .../execution/datasources/jdbc/JDBCRDD.scala  | 31 +++++++-
 .../apache/spark/sql/jdbc/JdbcDialects.scala  | 13 +++
 .../spark/sql/jdbc/MsSqlServerDialect.scala   | 24 +++++-
 6 files changed, 147 insertions(+), 8 deletions(-)

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
index de8fcf1a4a787..78fdbe7158bb7 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
@@ -22,7 +22,11 @@ import java.sql.Connection
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.{SparkConf, SparkSQLFeatureNotSupportedException}
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan}
+import org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
 import org.apache.spark.sql.jdbc.MsSQLServerDatabaseOnDocker
 import org.apache.spark.sql.types._
@@ -39,6 +43,17 @@ import org.apache.spark.tags.DockerTest
 @DockerTest
 class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
 
+  def getExternalEngineQuery(executedPlan: SparkPlan): String = {
+    getExternalEngineRdd(executedPlan).asInstanceOf[JDBCRDD].getExternalEngineQuery
+  }
+
+  def getExternalEngineRdd(executedPlan: SparkPlan): RDD[InternalRow] = {
+    val queryNode = executedPlan.collect { case r: RowDataSourceScanExec =>
+      r
+    }.head
+    queryNode.rdd
+  }
+
   override def excluded: Seq[String] = Seq(
     "simple scan with OFFSET",
     "simple scan with LIMIT and OFFSET",
@@ -137,4 +152,68 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD
       "WHERE (dept > 1 AND ((name LIKE 'am%') = (name LIKE '%y')))")
     assert(df3.collect().length == 3)
   }
+
+  test("SPARK-50087: SqlServer handle booleans in CASE WHEN test") {
+    val df = sql(
+      s"""|SELECT * FROM $catalogName.employee
+          |WHERE CASE WHEN name = 'Legolas' THEN name = 'Elf' ELSE NOT (name = 'Wizard') END
+          |""".stripMargin
+    )
+
+    // scalastyle:off
+    assert(getExternalEngineQuery(df.queryExecution.executedPlan) ==
+      """SELECT  "dept","name","salary","bonus" FROM "employee"  WHERE (CASE WHEN ("name" = 'Legolas') THEN IIF(("name" = 'Elf'), 1, 0) ELSE IIF(("name" <> 'Wizard'), 1, 0) END = 1)  """
+    )
+    // scalastyle:on
+    df.collect()
+  }
+
+  test("SPARK-50087: SqlServer handle booleans in CASE WHEN with always true test") {
+    val df = sql(
+      s"""|SELECT * FROM $catalogName.employee
+          |WHERE CASE WHEN (name = 'Legolas') THEN (name = 'Elf') ELSE (1=1) END
+          |""".stripMargin
+    )
+
+    // scalastyle:off
+    assert(getExternalEngineQuery(df.queryExecution.executedPlan) ==
+      """SELECT  "dept","name","salary","bonus" FROM "employee"  WHERE (CASE WHEN ("name" = 'Legolas') THEN IIF(("name" = 'Elf'), 1, 0) ELSE 1 END = 1)  """
+    )
+    // scalastyle:on
+    df.collect()
+  }
+
+  test("SPARK-50087: SqlServer handle booleans in nested CASE WHEN test") {
+    val df = sql(
+      s"""|SELECT * FROM $catalogName.employee
+          |WHERE CASE WHEN (name = 'Legolas') THEN
+          | CASE WHEN (name = 'Elf') THEN (name = 'Elrond') ELSE (name = 'Gandalf') END
+          | ELSE (name = 'Sauron') END
+          |""".stripMargin
+    )
+
+    // scalastyle:off
+    assert(getExternalEngineQuery(df.queryExecution.executedPlan) ==
+      """SELECT  "dept","name","salary","bonus" FROM "employee"  WHERE (CASE WHEN ("name" = 'Legolas') THEN IIF((CASE WHEN ("name" = 'Elf') THEN IIF(("name" = 'Elrond'), 1, 0) ELSE IIF(("name" = 'Gandalf'), 1, 0) END = 1), 1, 0) ELSE IIF(("name" = 'Sauron'), 1, 0) END = 1)  """
+    )
+    // scalastyle:on
+    df.collect()
+  }
+
+  test("SPARK-50087: SqlServer handle non-booleans in nested CASE WHEN test") {
+    val df = sql(
+      s"""|SELECT * FROM $catalogName.employee
+          |WHERE CASE WHEN (name = 'Legolas') THEN
+          | CASE WHEN (name = 'Elf') THEN 'Elf' ELSE 'Wizard' END
+          | ELSE 'Sauron' END = name
+          |""".stripMargin
+    )
+
+    // scalastyle:off
+    assert(getExternalEngineQuery(df.queryExecution.executedPlan) ==
+      """SELECT  "dept","name","salary","bonus" FROM "employee"  WHERE ("name" IS NOT NULL) AND ((CASE WHEN "name" = 'Legolas' THEN CASE WHEN "name" = 'Elf' THEN 'Elf' ELSE 'Wizard' END ELSE 'Sauron' END) = "name")  """
+    )
+    // scalastyle:on
+    df.collect()
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
index e170951bfa284..193ffee003a3d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
@@ -290,7 +290,7 @@ protected String visitContains(String l, String r) {
     return l + " LIKE '%" + escapeSpecialCharsForLikePattern(value) + "%' ESCAPE '\\'";
   }
 
-  private String inputToSQL(Expression input) {
+  protected String inputToSQL(Expression input) {
     if (input.children().length > 1) {
       return "(" + build(input) + ")";
     } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
index c7bca751e56e4..bbd5bace76b6c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
@@ -189,8 +189,8 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) {
     case _: BitwiseNot => generateExpressionWithName("~", expr, isPredicate)
     case caseWhen @ CaseWhen(branches, elseValue) =>
       val conditions = branches.map(_._1).flatMap(generateExpression(_, true))
-      val values = branches.map(_._2).flatMap(generateExpression(_))
-      val elseExprOpt = elseValue.flatMap(generateExpression(_))
+      val values = branches.map(_._2).flatMap(generateExpression(_, isPredicate))
+      val elseExprOpt = elseValue.flatMap(generateExpression(_, isPredicate))
       if (conditions.length == branches.length && values.length == branches.length &&
           elseExprOpt.size == elseValue.size) {
         val branchExpressions = conditions.zip(values).flatMap { case (c, v) =>
@@ -356,7 +356,7 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) {
       children: Seq[Expression],
       dataType: DataType,
       isPredicate: Boolean): Option[V2Expression] = {
-    val childrenExpressions = children.flatMap(generateExpression(_))
+    val childrenExpressions = children.flatMap(generateExpression(_, isPredicate))
     if (childrenExpressions.length == children.length) {
       if (isPredicate && dataType.isInstanceOf[BooleanType]) {
         Some(new V2Predicate(v2ExpressionName, childrenExpressions.toArray[V2Expression]))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index e241951abe392..2bb2a3a1f67a0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -157,7 +157,7 @@ object JDBCRDD extends Logging {
  * Both the driver code and the workers must be able to access the database; the driver
  * needs to fetch the schema while the workers need to fetch the data.
  */
-private[jdbc] class JDBCRDD(
+class JDBCRDD(
     sc: SparkContext,
     getConnection: Int => Connection,
     schema: StructType,
@@ -173,11 +173,40 @@ private[jdbc] class JDBCRDD(
     offset: Int)
   extends RDD[InternalRow](sc, Nil) {
 
+  private lazy val dialect = JdbcDialects.get(url)
+
+  def generateJdbcQuery(partition: Option[JDBCPartition]): String = {
+    // H2's JDBC driver does not support the setSchema() method.  We pass a
+    // fully-qualified table name in the SELECT statement.  I don't know how to
+    // talk about a table in a completely portable way.
+    var builder = dialect
+      .getJdbcSQLQueryBuilder(options)
+      .withPredicates(predicates, partition.getOrElse(JDBCPartition(whereClause = null, idx = 1)))
+      .withColumns(columns)
+      .withSortOrders(sortOrders)
+      .withLimit(limit)
+      .withOffset(offset)
+
+    groupByColumns.foreach { groupByKeys =>
+      builder = builder.withGroupByColumns(groupByKeys)
+    }
+
+    sample.foreach { tableSampleInfo =>
+      builder = builder.withTableSample(tableSampleInfo)
+    }
+
+    builder.build()
+  }
+
   /**
    * Retrieve the list of partitions corresponding to this RDD.
    */
   override def getPartitions: Array[Partition] = partitions
 
+  def getExternalEngineQuery: String = {
+    generateJdbcQuery(partition = None)
+  }
+
   /**
    * Runs the SQL query against the JDBC driver.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index ae8d89f0f0469..90fc48ff5276f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -38,6 +38,7 @@ import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
 import org.apache.spark.sql.connector.catalog.index.TableIndex
 import org.apache.spark.sql.connector.expressions.{Expression, Literal, NamedReference}
 import org.apache.spark.sql.connector.expressions.aggregate.AggregateFunc
+import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.jdbc.{DriverRegistry, JDBCOptions, JdbcOptionsInWrite, JdbcUtils}
@@ -279,6 +280,18 @@ abstract class JdbcDialect extends Serializable with Logging {
   }
 
   private[jdbc] class JDBCSQLBuilder extends V2ExpressionSQLBuilder {
+    // Some dialects do not support boolean type and this convenient util function is
+    // provided to generate SQL string without boolean values.
+    protected def inputToSQLNoBool(input: Expression): String = input match {
+      case p: Predicate if p.name() == "ALWAYS_TRUE" => "1"
+      case p: Predicate if p.name() == "ALWAYS_FALSE" => "0"
+      case p: Predicate => predicateToIntSQL(inputToSQL(p))
+      case _ => super.inputToSQL(input)
+    }
+
+    protected def predicateToIntSQL(input: String): String =
+      "CASE WHEN " + input + " THEN 1 ELSE 0 END"
+
     override def visitLiteral(literal: Literal[_]): String = {
       Option(literal.value()).map(v =>
         compileValue(CatalystTypeConverters.convertToScala(v, literal.dataType())).toString)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
index 3022bca87a9f9..f7c9cdad2a43c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
@@ -66,6 +66,8 @@ private object MsSqlServerDialect extends JdbcDialect {
     supportedFunctions.contains(funcName)
 
   class MsSqlServerSQLBuilder extends JDBCSQLBuilder {
+    override protected def predicateToIntSQL(input: String): String =
+      "IIF(" + input + ", 1, 0)"
     override def visitSortOrder(
         sortKey: String, sortDirection: SortDirection, nullOrdering: NullOrdering): String = {
       (sortDirection, nullOrdering) match {
@@ -93,9 +95,25 @@ private object MsSqlServerDialect extends JdbcDialect {
       // We shouldn't propagate these queries to MsSqlServer
       expr match {
         case e: Predicate => e.name() match {
-          case "=" | "<>" | "<=>" | "<" | "<=" | ">" | ">="
-              if e.children().exists(_.isInstanceOf[Predicate]) =>
-            super.visitUnexpectedExpr(expr)
+          case "=" | "<>" | "<=>" | "<" | "<=" | ">" | ">=" =>
+            val Array(l, r) = e.children().map(inputToSQLNoBool)
+            visitBinaryComparison(e.name(), l, r)
+          case "CASE_WHEN" =>
+            // Since MsSqlServer cannot handle boolean expressions inside
+            // a CASE WHEN, it is necessary to convert those to another
+            // CASE WHEN expression that will return 1 or 0 depending on
+            // the result.
+            // Example:
+            // In:  ... CASE WHEN a = b THEN c = d ... END
+            // Out: ... CASE WHEN a = b THEN CASE WHEN c = d THEN 1 ELSE 0 END ... END = 1
+            val stringArray = e.children().grouped(2).flatMap {
+              case Array(whenExpression, thenExpression) =>
+                Array(inputToSQL(whenExpression), inputToSQLNoBool(thenExpression))
+              case Array(elseExpression) =>
+                Array(inputToSQLNoBool(elseExpression))
+            }.toArray
+
+            visitCaseWhen(stringArray) + " = 1"
           case _ => super.build(expr)
         }
         case _ => super.build(expr)

From 92e650c9ccab7a5f4aa25af2d6c0d6052dfe576b Mon Sep 17 00:00:00 2001
From: Cheng Pan <chengpan@apache.org>
Date: Thu, 12 Dec 2024 20:47:18 +0800
Subject: [PATCH 37/51] [SPARK-50545][CORE][SQL][3.5] `AccessControlException`
 should be thrown even if `ignoreCorruptFiles` is enabled

Cherry-pick https://github.com/apache/spark/issues/49143 to branch-3.5

### What changes were proposed in this pull request?

`AccessControlException` extends `IOException` but we should not treat it as a data corruption issue.

This is similar to SPARK-50483 which handles `BlockMissingException` in the same way.

```
2024-12-11 06:29:05 WARN HadoopRDD: Skipped the rest content in the corrupted file: hdfs://hadoop-master1.orb.local:8020/warehouse/region/part-00000-2dc8a6f6-8cea-4652-8ba1-762c1b65e2b4-c000:192+192
org.apache.hadoop.security.AccessControlException: Permission denied: user=hive, access=READ, inode="/warehouse/region/part-00000-2dc8a6f6-8cea-4652-8ba1-762c1b65e2b4-c000":kyuubi.hadoop:hadoop:-rw-------
	at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:506)
```
<img width="1462" alt="image" src="https://github.com/user-attachments/assets/d3a64578-90c6-49bb-b92f-7c5c71451a9b">

### Why are the changes needed?

Avoid data issue if `ignoreCorruptFiles` is enabled when `AccessControlException` occurred.

### Does this PR introduce _any_ user-facing change?

Yes.

### How was this patch tested?

Manual test.

Task fails with `org.apache.hadoop.security.AccessControlException` even with `spark.sql.files.ignoreCorruptFiles=true` and `spark.files.ignoreCorruptFiles=true` <img width="1477" alt="image" src="https://github.com/user-attachments/assets/373ad5fc-15f5-486f-aba3-53b7f7af3b13">

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #49162 from pan3793/SPARK-50545-3.5.

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala     | 5 +++--
 core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala  | 5 +++--
 .../apache/spark/sql/execution/datasources/FileScanRDD.scala | 3 ++-
 .../sql/execution/datasources/v2/FilePartitionReader.scala   | 5 +++--
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index edd07a2649dbb..8aa7d54fd61b9 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -31,6 +31,7 @@ import org.apache.hadoop.mapred._
 import org.apache.hadoop.mapred.lib.CombineFileSplit
 import org.apache.hadoop.mapreduce.TaskType
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
+import org.apache.hadoop.security.AccessControlException
 import org.apache.hadoop.util.ReflectionUtils
 
 import org.apache.spark._
@@ -294,7 +295,7 @@ class HadoopRDD[K, V](
             null
           // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
           case e: FileNotFoundException if !ignoreMissingFiles => throw e
-          case e: BlockMissingException => throw e
+          case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e
           case e: IOException if ignoreCorruptFiles =>
             logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e)
             finished = true
@@ -320,7 +321,7 @@ class HadoopRDD[K, V](
             finished = true
           // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
           case e: FileNotFoundException if !ignoreMissingFiles => throw e
-          case e: BlockMissingException => throw e
+          case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e
           case e: IOException if ignoreCorruptFiles =>
             logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e)
             finished = true
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index fbd2235aabaf6..7fc93806998bf 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -32,6 +32,7 @@ import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapreduce._
 import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, FileInputFormat, FileSplit, InvalidInputException}
 import org.apache.hadoop.mapreduce.task.{JobContextImpl, TaskAttemptContextImpl}
+import org.apache.hadoop.security.AccessControlException
 
 import org.apache.spark._
 import org.apache.spark.annotation.DeveloperApi
@@ -228,7 +229,7 @@ class NewHadoopRDD[K, V](
             null
           // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
           case e: FileNotFoundException if !ignoreMissingFiles => throw e
-          case e: BlockMissingException => throw e
+          case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e
           case e: IOException if ignoreCorruptFiles =>
             logWarning(
               s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}",
@@ -257,7 +258,7 @@ class NewHadoopRDD[K, V](
               finished = true
             // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
             case e: FileNotFoundException if !ignoreMissingFiles => throw e
-            case e: BlockMissingException => throw e
+            case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e
             case e: IOException if ignoreCorruptFiles =>
               logWarning(
                 s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}",
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index ce56fc1b28296..8f6f981ec6a73 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -24,6 +24,7 @@ import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hdfs.BlockMissingException
+import org.apache.hadoop.security.AccessControlException
 
 import org.apache.spark.{Partition => RDDPartition, SparkUpgradeException, TaskContext}
 import org.apache.spark.deploy.SparkHadoopUtil
@@ -260,7 +261,7 @@ class FileScanRDD(
                     null
                   // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
                   case e: FileNotFoundException if !ignoreMissingFiles => throw e
-                  case e: BlockMissingException => throw e
+                  case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e
                   case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
                     logWarning(
                       s"Skipped the rest of the content in the corrupted file: $currentFile", e)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
index 8f51226dcfe99..6a63d8268c3b0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
@@ -21,6 +21,7 @@ import java.io.{FileNotFoundException, IOException}
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.hdfs.BlockMissingException
+import org.apache.hadoop.security.AccessControlException
 
 import org.apache.spark.SparkUpgradeException
 import org.apache.spark.internal.Logging
@@ -51,7 +52,7 @@ class FilePartitionReader[T](
           // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
           case e: FileNotFoundException if !ignoreMissingFiles =>
             throw QueryExecutionErrors.fileNotFoundError(e)
-          case e: BlockMissingException => throw e
+          case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e
           case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
             logWarning(
               s"Skipped the rest of the content in the corrupted file.", e)
@@ -71,7 +72,7 @@ class FilePartitionReader[T](
         throw QueryExecutionErrors.unsupportedSchemaColumnConvertError(
           currentReader.file.urlEncodedPath,
           e.getColumn, e.getLogicalType, e.getPhysicalType, e)
-      case e: BlockMissingException => throw e
+      case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e
       case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
         logWarning(
           s"Skipped the rest of the content in the corrupted file: $currentReader", e)

From 91af6f9c16f773bdf84dce678eb6ab7b6acb90fd Mon Sep 17 00:00:00 2001
From: Jie Yang <yangjie01@apache.org>
Date: Mon, 16 Dec 2024 01:34:44 +0000
Subject: [PATCH 38/51] Preparing Spark release v3.5.4-rc2

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 5eca59375425e..8657755b8d0ea 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.5
+Version: 3.5.4
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index ee2e7b48871ee..47b38621d6400 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index a5ac18252d9c9..3757f69e9bd17 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index c3f33905ae20c..83243d183b7b9 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index de0af6da6c9ec..e74fb05beb0ae 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 54edc410aa9cc..13c4b5cca1e32 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 88ae8e2715a04..709bbed0c553c 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index adeab180901c6..59e9973c42d05 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 0518930d7eb5b..e222499eec228 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index fc15de78ed505..7b2a1ad57b0ff 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index 8d78204ddce30..1a6fe528b9168 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index c220ce4d032e5..695146d7a1113 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 8fd5820c35c71..6c50469717f95 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.5-SNAPSHOT</version>
+        <version>3.5.4</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 747c56d148ebb..aeadbacb7c692 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 878bd4590b50f..435c0fbd797aa 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index 119761501dad9..8b6d7d47b0392 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 51ab703585beb..dce1990f1c9d0 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 7e1c6ebac9a7f..5973b9595db8a 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 5014e1a1494ce..54ba2b22093d0 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 65d86f5617b1c..5e0c0fcafc12b 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 47495110bf7d8..65b0fa33db29f 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 3f417fba4b021..95be9ab74f105 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 47db5eb9253e8..62f1c4ab2b124 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index ed5c82d28ca68..e59066e19850d 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 969b61fa00363..a207cc2d911b5 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.5-SNAPSHOT
-SPARK_VERSION_SHORT: 3.5.5
+SPARK_VERSION: 3.5.4
+SPARK_VERSION_SHORT: 3.5.4
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.5"]
+        'facetFilters': ["version:3.5.4"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 506aa7836485a..5efc255218570 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index f330d4d1a5377..74ad5a732f054 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 29f47eec8a5c6..cb89c27d5f0ba 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index e309c1f734296..e36d57fe6a573 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index f8a3cf1cc16df..26e9a1502abab 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 1fa3e215977d9..079ce72d5d959 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 0ccb6ac76a9bc..8dc47f391f967 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.5-SNAPSHOT</version>
+  <version>3.5.4</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index df09fc3284fbd..db20a2ffae586 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.5.dev0"
+__version__: str = "3.5.4"
diff --git a/repl/pom.xml b/repl/pom.xml
index e3f52f07cc4d7..3f8c931a60664 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index e4d890a930a2a..5991f1848ccf8 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 85a125ddfe4b7..8ec4e86ab8f12 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 118f0034ddf5a..9ace7e29de0a0 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 4c85e90c4e485..f0df0ff0ea2f4 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 1613c3218649b..c180a208bf093 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.5-SNAPSHOT</version>
+        <version>3.5.4</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index e7736c95007ad..58a2333b5b5a0 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 889fba8892568..9577de81c2057 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 110f9e168de6b..6c86bc35a89d7 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 8fc5d81ab8982..be3c952e4131b 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index e3002680f2173..21d2981fe1088 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 3530297bf8f83..7e5724b5d9dd3 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From a764524c8ae49f0125ff868f96dd6fc751af0ede Mon Sep 17 00:00:00 2001
From: Jie Yang <yangjie01@apache.org>
Date: Mon, 16 Dec 2024 01:34:51 +0000
Subject: [PATCH 39/51] Preparing development version 3.5.5-SNAPSHOT

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 8657755b8d0ea..5eca59375425e 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.4
+Version: 3.5.5
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 47b38621d6400..ee2e7b48871ee 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 3757f69e9bd17..a5ac18252d9c9 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 83243d183b7b9..c3f33905ae20c 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index e74fb05beb0ae..de0af6da6c9ec 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 13c4b5cca1e32..54edc410aa9cc 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 709bbed0c553c..88ae8e2715a04 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 59e9973c42d05..adeab180901c6 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index e222499eec228..0518930d7eb5b 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index 7b2a1ad57b0ff..fc15de78ed505 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index 1a6fe528b9168..8d78204ddce30 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 695146d7a1113..c220ce4d032e5 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 6c50469717f95..8fd5820c35c71 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.4</version>
+        <version>3.5.5-SNAPSHOT</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index aeadbacb7c692..747c56d148ebb 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 435c0fbd797aa..878bd4590b50f 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index 8b6d7d47b0392..119761501dad9 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index dce1990f1c9d0..51ab703585beb 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 5973b9595db8a..7e1c6ebac9a7f 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 54ba2b22093d0..5014e1a1494ce 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 5e0c0fcafc12b..65d86f5617b1c 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 65b0fa33db29f..47495110bf7d8 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 95be9ab74f105..3f417fba4b021 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 62f1c4ab2b124..47db5eb9253e8 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index e59066e19850d..ed5c82d28ca68 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index a207cc2d911b5..969b61fa00363 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.4
-SPARK_VERSION_SHORT: 3.5.4
+SPARK_VERSION: 3.5.5-SNAPSHOT
+SPARK_VERSION_SHORT: 3.5.5
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.4"]
+        'facetFilters': ["version:3.5.5"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 5efc255218570..506aa7836485a 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 74ad5a732f054..f330d4d1a5377 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index cb89c27d5f0ba..29f47eec8a5c6 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index e36d57fe6a573..e309c1f734296 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 26e9a1502abab..f8a3cf1cc16df 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 079ce72d5d959..1fa3e215977d9 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 8dc47f391f967..0ccb6ac76a9bc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.4</version>
+  <version>3.5.5-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index db20a2ffae586..df09fc3284fbd 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.4"
+__version__: str = "3.5.5.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 3f8c931a60664..e3f52f07cc4d7 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 5991f1848ccf8..e4d890a930a2a 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 8ec4e86ab8f12..85a125ddfe4b7 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 9ace7e29de0a0..118f0034ddf5a 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index f0df0ff0ea2f4..4c85e90c4e485 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index c180a208bf093..1613c3218649b 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.4</version>
+        <version>3.5.5-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 58a2333b5b5a0..e7736c95007ad 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 9577de81c2057..889fba8892568 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 6c86bc35a89d7..110f9e168de6b 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index be3c952e4131b..8fc5d81ab8982 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 21d2981fe1088..e3002680f2173 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 7e5724b5d9dd3..3530297bf8f83 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 8168ea8548a4935a0506edcadc601850096a10ab Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 3 Dec 2024 15:37:43 +0900
Subject: [PATCH 40/51] [SPARK-50430][CORE][FOLLOW-UP] Keep the logic of manual
 putting key and values in Properties

This PR proposes to actually more conservatively preserve the original code of creating new properties instead of cloning.

Previous codes only copied the key and values but `clone` actually copies more fields in `Properties`. `cloneProperties` is being used in Spark Core, and all other components so I propose to keep the logic as is.

This is more a fix of a potential bug.

No, it is difficult to add a test.

No.

Closes #49036 from HyukjinKwon/SPARK-50430-followup.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 4abaab3ffeba5a3d39216e7224928bb82b254e22)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 core/src/main/scala/org/apache/spark/util/Utils.scala | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 8762f0a6cdbc3..a33bb33ea9c0f 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2982,7 +2982,9 @@ private[spark] object Utils
     if (props == null) {
       return props
     }
-    props.clone().asInstanceOf[Properties]
+    val resultProps = new Properties()
+    resultProps.putAll(props.clone().asInstanceOf[Properties])
+    resultProps
   }
 
   /**

From 0fbe292774a856ae49c436e9eb83441e9c38f7de Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 16 Dec 2024 14:06:38 +0900
Subject: [PATCH 41/51] Revert "[SPARK-50430][CORE][FOLLOW-UP] Keep the logic
 of manual putting key and values in Properties"

This reverts commit 8168ea8548a4935a0506edcadc601850096a10ab.
---
 core/src/main/scala/org/apache/spark/util/Utils.scala | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index a33bb33ea9c0f..8762f0a6cdbc3 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2982,9 +2982,7 @@ private[spark] object Utils
     if (props == null) {
       return props
     }
-    val resultProps = new Properties()
-    resultProps.putAll(props.clone().asInstanceOf[Properties])
-    resultProps
+    props.clone().asInstanceOf[Properties]
   }
 
   /**

From f7c48fe24fd8fb5f7f3eb9b3bec2659d99e4307c Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 16 Dec 2024 14:06:45 +0900
Subject: [PATCH 42/51] Revert "[SPARK-50430][CORE] Use the standard
 Properties.clone instead of manual clone"

This reverts commit 5ff129ac8261c674b90545f3e1651e166dbc6249.
---
 core/src/main/scala/org/apache/spark/util/Utils.scala | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 8762f0a6cdbc3..3b0efffedec6f 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2982,7 +2982,9 @@ private[spark] object Utils
     if (props == null) {
       return props
     }
-    props.clone().asInstanceOf[Properties]
+    val resultProps = new Properties()
+    props.forEach((k, v) => resultProps.put(k, v))
+    resultProps
   }
 
   /**

From b0a7d4d4c5be46282aa9be3a08754c201f38fc2e Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Mon, 16 Dec 2024 20:30:26 +0800
Subject: [PATCH 43/51] [SPARK-50587][INFRA][3.5] Remove unsupported `curl`
 option `--retry-all-errors` from `release-build.sh`

### What changes were proposed in this pull request?
This pr aims to remove unsupported `curl` option `--retry-all-errors` from branch-3.5's `release-build.sh`

### Why are the changes needed?
branch-3.5 uses Ubuntu 20.04 for release, and the `curl` installed via `apt-get install` on Ubuntu 20.04 does not yet support `--retry-all-errors`.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Manual tested

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #49201 from LuciferYang/SPARK-50587.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 dev/create-release/release-build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 3546e721edbd2..7728e84bf9066 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -505,7 +505,7 @@ if [[ "$1" == "publish-release" ]]; then
       file_short=$(echo $file | sed -e "s/\.\///")
       dest_url="$nexus_upload/org/apache/spark/$file_short"
       echo "  Uploading $file_short"
-      curl --retry 3 --retry-all-errors -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url
+      curl --retry 3 -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url
     done
 
     echo "Closing nexus staging repository"

From a6f220d951742f4074b37772485ee0ec7a774e7d Mon Sep 17 00:00:00 2001
From: Jie Yang <yangjie01@apache.org>
Date: Tue, 17 Dec 2024 04:09:53 +0000
Subject: [PATCH 44/51] Preparing Spark release v3.5.4-rc3

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 5eca59375425e..8657755b8d0ea 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.5
+Version: 3.5.4
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index ee2e7b48871ee..47b38621d6400 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index a5ac18252d9c9..3757f69e9bd17 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index c3f33905ae20c..83243d183b7b9 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index de0af6da6c9ec..e74fb05beb0ae 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 54edc410aa9cc..13c4b5cca1e32 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 88ae8e2715a04..709bbed0c553c 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index adeab180901c6..59e9973c42d05 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 0518930d7eb5b..e222499eec228 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index fc15de78ed505..7b2a1ad57b0ff 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index 8d78204ddce30..1a6fe528b9168 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index c220ce4d032e5..695146d7a1113 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 8fd5820c35c71..6c50469717f95 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.5-SNAPSHOT</version>
+        <version>3.5.4</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 747c56d148ebb..aeadbacb7c692 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 878bd4590b50f..435c0fbd797aa 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index 119761501dad9..8b6d7d47b0392 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 51ab703585beb..dce1990f1c9d0 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 7e1c6ebac9a7f..5973b9595db8a 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 5014e1a1494ce..54ba2b22093d0 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 65d86f5617b1c..5e0c0fcafc12b 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 47495110bf7d8..65b0fa33db29f 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 3f417fba4b021..95be9ab74f105 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 47db5eb9253e8..62f1c4ab2b124 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index ed5c82d28ca68..e59066e19850d 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 969b61fa00363..a207cc2d911b5 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.5-SNAPSHOT
-SPARK_VERSION_SHORT: 3.5.5
+SPARK_VERSION: 3.5.4
+SPARK_VERSION_SHORT: 3.5.4
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.5"]
+        'facetFilters': ["version:3.5.4"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 506aa7836485a..5efc255218570 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index f330d4d1a5377..74ad5a732f054 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 29f47eec8a5c6..cb89c27d5f0ba 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index e309c1f734296..e36d57fe6a573 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index f8a3cf1cc16df..26e9a1502abab 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 1fa3e215977d9..079ce72d5d959 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 0ccb6ac76a9bc..8dc47f391f967 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.5-SNAPSHOT</version>
+  <version>3.5.4</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index df09fc3284fbd..db20a2ffae586 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.5.dev0"
+__version__: str = "3.5.4"
diff --git a/repl/pom.xml b/repl/pom.xml
index e3f52f07cc4d7..3f8c931a60664 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index e4d890a930a2a..5991f1848ccf8 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 85a125ddfe4b7..8ec4e86ab8f12 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 118f0034ddf5a..9ace7e29de0a0 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 4c85e90c4e485..f0df0ff0ea2f4 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 1613c3218649b..c180a208bf093 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.5-SNAPSHOT</version>
+        <version>3.5.4</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index e7736c95007ad..58a2333b5b5a0 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 889fba8892568..9577de81c2057 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 110f9e168de6b..6c86bc35a89d7 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 8fc5d81ab8982..be3c952e4131b 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index e3002680f2173..21d2981fe1088 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 3530297bf8f83..7e5724b5d9dd3 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.5-SNAPSHOT</version>
+    <version>3.5.4</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From bcaa5a99ab35bdcf51da130a26d998dafc2f3a64 Mon Sep 17 00:00:00 2001
From: Jie Yang <yangjie01@apache.org>
Date: Tue, 17 Dec 2024 04:09:59 +0000
Subject: [PATCH 45/51] Preparing development version 3.5.5-SNAPSHOT

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 8657755b8d0ea..5eca59375425e 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.4
+Version: 3.5.5
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 47b38621d6400..ee2e7b48871ee 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 3757f69e9bd17..a5ac18252d9c9 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 83243d183b7b9..c3f33905ae20c 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index e74fb05beb0ae..de0af6da6c9ec 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 13c4b5cca1e32..54edc410aa9cc 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 709bbed0c553c..88ae8e2715a04 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 59e9973c42d05..adeab180901c6 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index e222499eec228..0518930d7eb5b 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index 7b2a1ad57b0ff..fc15de78ed505 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index 1a6fe528b9168..8d78204ddce30 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 695146d7a1113..c220ce4d032e5 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 6c50469717f95..8fd5820c35c71 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.4</version>
+        <version>3.5.5-SNAPSHOT</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index aeadbacb7c692..747c56d148ebb 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 435c0fbd797aa..878bd4590b50f 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index 8b6d7d47b0392..119761501dad9 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index dce1990f1c9d0..51ab703585beb 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 5973b9595db8a..7e1c6ebac9a7f 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 54ba2b22093d0..5014e1a1494ce 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 5e0c0fcafc12b..65d86f5617b1c 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 65b0fa33db29f..47495110bf7d8 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 95be9ab74f105..3f417fba4b021 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 62f1c4ab2b124..47db5eb9253e8 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index e59066e19850d..ed5c82d28ca68 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index a207cc2d911b5..969b61fa00363 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.4
-SPARK_VERSION_SHORT: 3.5.4
+SPARK_VERSION: 3.5.5-SNAPSHOT
+SPARK_VERSION_SHORT: 3.5.5
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.4"]
+        'facetFilters': ["version:3.5.5"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 5efc255218570..506aa7836485a 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 74ad5a732f054..f330d4d1a5377 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index cb89c27d5f0ba..29f47eec8a5c6 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index e36d57fe6a573..e309c1f734296 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 26e9a1502abab..f8a3cf1cc16df 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 079ce72d5d959..1fa3e215977d9 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 8dc47f391f967..0ccb6ac76a9bc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.4</version>
+  <version>3.5.5-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index db20a2ffae586..df09fc3284fbd 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.4"
+__version__: str = "3.5.5.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 3f8c931a60664..e3f52f07cc4d7 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 5991f1848ccf8..e4d890a930a2a 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 8ec4e86ab8f12..85a125ddfe4b7 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 9ace7e29de0a0..118f0034ddf5a 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index f0df0ff0ea2f4..4c85e90c4e485 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index c180a208bf093..1613c3218649b 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.4</version>
+        <version>3.5.5-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 58a2333b5b5a0..e7736c95007ad 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 9577de81c2057..889fba8892568 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 6c86bc35a89d7..110f9e168de6b 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index be3c952e4131b..8fc5d81ab8982 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 21d2981fe1088..e3002680f2173 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 7e5724b5d9dd3..3530297bf8f83 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4</version>
+    <version>3.5.5-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 45349b652e2d6c24b4f5eef1f6118389c0d68ed8 Mon Sep 17 00:00:00 2001
From: changgyoopark-db <changgyoo.park@databricks.com>
Date: Tue, 17 Dec 2024 20:12:22 +0900
Subject: [PATCH 46/51] [SPARK-50510][CONNECT][TEST][3.5] Fix flaky
 ReattachableExecuteSuite

### What changes were proposed in this pull request?

Simplify org.apache.spark.sql.connect.execution.ReattachableExecuteSuite."reattach after connection expired" to make it more deterministic.

### Why are the changes needed?

The test previously involved execution and interruption that made the test unnecessarily flaky, e.g., an exception was thrown when releasing the corresponding [execution](https://github.com/apache/spark/actions/runs/12296721038/job/34316344940), not when reattaching the execution.
- The test's sole purpose is to check whether the lack of 'session' results in the correct error code.
- The involvement of actual query execution only makes the test flaky and complicated.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Repeatedly ran testOnly org.apache.spark.sql.connect.execution.ReattachableExecuteSuite.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #49203 from changgyoopark-db/SPARK-50510.

Authored-by: changgyoopark-db <changgyoo.park@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../execution/ReattachableExecuteSuite.scala      | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
index f828e45e6a6c8..00de9fb6fd260 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
@@ -58,18 +58,15 @@ class ReattachableExecuteSuite extends SparkConnectServerTest {
 
   test("reattach after connection expired") {
     withClient { client =>
-      val iter = client.execute(buildPlan(MEDIUM_RESULTS_QUERY))
-      val operationId = getReattachableIterator(iter).operationId
-      // open the iterator
-      iter.next()
-
-      SparkConnectService.invalidateSession(defaultUserId, defaultSessionId)
       withRawBlockingStub { stub =>
-        val iter2 = stub.reattachExecute(buildReattachExecuteRequest(operationId, None))
+        // emulate session expiration
+        SparkConnectService.invalidateSession(defaultUserId, defaultSessionId)
 
-        // session closed, bound to fail
+        // session closed, bound to fail immediately
+        val operationId = UUID.randomUUID().toString
+        val iter = stub.reattachExecute(buildReattachExecuteRequest(operationId, None))
         val e = intercept[StatusRuntimeException] {
-          while (iter2.hasNext) iter2.next()
+          iter.next()
         }
         assert(e.getMessage.contains("INVALID_HANDLE.SESSION_NOT_FOUND"))
       }

From a3d23fdb775bee3f03c52a77b80bc0c724108e20 Mon Sep 17 00:00:00 2001
From: Neil Ramaswamy <neil.ramaswamy@databricks.com>
Date: Wed, 18 Dec 2024 15:45:36 +0900
Subject: [PATCH 47/51] [MINOR][SS] Minor update to watermark propagation
 comments

### What changes were proposed in this pull request?

A few minor changes to clarify (and fix one typo) in the comments for watermark propagation in Structured Streaming.

### Why are the changes needed?

I found some of the terminology around "simulation" confusing, and the current comment describes incorrect logic for output watermark calculation.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

N/A.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #49188 from neilramaswamy/nr/minor-wm-prop.

Authored-by: Neil Ramaswamy <neil.ramaswamy@databricks.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
(cherry picked from commit 2b41131d7fa66ef5b23fbe247e057d631ee5e4f6)
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../sql/execution/streaming/WatermarkPropagator.scala     | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkPropagator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkPropagator.scala
index 6f3725bebb9ab..3d9325f9c98c5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkPropagator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkPropagator.scala
@@ -124,12 +124,14 @@ class UseSingleWatermarkPropagator extends WatermarkPropagator {
 /**
  * This implementation simulates propagation of watermark among operators.
  *
- * The simulation algorithm traverses the physical plan tree via post-order (children first) to
- * calculate (input watermark, output watermark) for all nodes.
+ * It is considered a "simulation" because watermarks are not being physically sent between
+ * operators, but rather propagated up the tree via post-order (children first) traversal of
+ * the query plan. This allows Structured Streaming to determine the new (input watermark, output
+ * watermark) for all nodes.
  *
  * For each node, below logic is applied:
  *
- * - Input watermark for specific node is decided by `min(input watermarks from all children)`.
+ * - Input watermark for specific node is decided by `min(output watermarks from all children)`.
  *   -- Children providing no input watermark (DEFAULT_WATERMARK_MS) are excluded.
  *   -- If there is no valid input watermark from children, input watermark = DEFAULT_WATERMARK_MS.
  * - Output watermark for specific node is decided as following:

From 5a91172c019c119e686f8221bbdb31f59d3d7776 Mon Sep 17 00:00:00 2001
From: Cheng Pan <chengpan@apache.org>
Date: Mon, 23 Dec 2024 09:05:56 +0900
Subject: [PATCH 48/51] [SPARK-50483][SPARK-50545][DOC][FOLLOWUP][3.5] Mention
 behavior changes in migration guide

Backport https://github.com/apache/spark/pull/49252 to branch-3.5

### What changes were proposed in this pull request?

Update migration guide for SPARK-50483 and SPARK-50545

### Why are the changes needed?

Mention behavior changes in migration guide

### Does this PR introduce _any_ user-facing change?

Yes, docs are updated.

### How was this patch tested?

Review.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #49256 from pan3793/SPARK-50483-SPARK-50545-followup-3.5.

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/core-migration-guide.md | 4 ++++
 docs/sql-migration-guide.md  | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md
index 36465cc3f4e86..9381b28c8b078 100644
--- a/docs/core-migration-guide.md
+++ b/docs/core-migration-guide.md
@@ -22,6 +22,10 @@ license: |
 * Table of contents
 {:toc}
 
+## Upgrading from Core 3.5.3 to 3.5.4
+
+- Since Spark 3.5.4, when reading files hits `org.apache.hadoop.security.AccessControlException` and `org.apache.hadoop.hdfs.BlockMissingException`, the exception will be thrown and fail the task, even if `spark.files.ignoreCorruptFiles` is set to `true`.
+
 ## Upgrading from Core 3.4 to 3.5
 
 - Since Spark 3.5, `spark.yarn.executor.failuresValidityInterval` is deprecated. Use `spark.executor.failuresValidityInterval` instead.
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 0f3adbdafeaf9..be4e92ec4df43 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -22,6 +22,10 @@ license: |
 * Table of contents
 {:toc}
 
+## Upgrading from Spark SQL 3.5.3 to 3.5.4
+
+- Since Spark 3.5.4, when reading SQL tables hits `org.apache.hadoop.security.AccessControlException` and `org.apache.hadoop.hdfs.BlockMissingException`, the exception will be thrown and fail the task, even if `spark.sql.files.ignoreCorruptFiles` is set to `true`.
+
 ## Upgrading from Spark SQL 3.5.1 to 3.5.2
 
 - Since 3.5.2, MySQL JDBC datasource will read TINYINT UNSIGNED as ShortType, while in 3.5.1, it was wrongly read as ByteType.

From c458b6ae38b81fc727972d530f3c0bbc0948aa9f Mon Sep 17 00:00:00 2001
From: ejblanco <ericjoel.blancohermida@telefonica.com>
Date: Tue, 7 Jan 2025 09:26:45 +0100
Subject: [PATCH 49/51] fix mockito

---
 resource-managers/kubernetes/core/pom.xml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 5991f1848ccf8..a34750602eba1 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -117,7 +117,8 @@
 
     <dependency>
       <groupId>org.mockito</groupId>
-      <artifactId>mockito-core</artifactId>
+      <artifactId>mockito-scala_2.12</artifactId>
+      <version>1.17.22</version>
       <scope>test</scope>
     </dependency>
 

From b58ca6d1c5c84e44135da0c5d808ba14a1c3b7e5 Mon Sep 17 00:00:00 2001
From: ejblanco <ericjoel.blancohermida@telefonica.com>
Date: Tue, 7 Jan 2025 10:53:18 +0100
Subject: [PATCH 50/51] add custom

---
 common/tags/dev/checkstyle-suppressions.xml   |  63 +++
 common/tags/dev/checkstyle.xml                | 191 +++++++
 common/tags/pom.xml                           |   1 +
 common/tags/scalastyle-config.xml             | 465 ++++++++++++++++++
 .../core/dev/checkstyle-suppressions.xml      |  63 +++
 .../kubernetes/core/dev/checkstyle.xml        | 191 +++++++
 resource-managers/kubernetes/core/pom.xml     |   6 +-
 .../kubernetes/core/scalastyle-config.xml     | 465 ++++++++++++++++++
 8 files changed, 1443 insertions(+), 2 deletions(-)
 create mode 100644 common/tags/dev/checkstyle-suppressions.xml
 create mode 100644 common/tags/dev/checkstyle.xml
 create mode 100644 common/tags/scalastyle-config.xml
 create mode 100644 resource-managers/kubernetes/core/dev/checkstyle-suppressions.xml
 create mode 100644 resource-managers/kubernetes/core/dev/checkstyle.xml
 create mode 100644 resource-managers/kubernetes/core/scalastyle-config.xml

diff --git a/common/tags/dev/checkstyle-suppressions.xml b/common/tags/dev/checkstyle-suppressions.xml
new file mode 100644
index 0000000000000..8ba1ff1b3b1eb
--- /dev/null
+++ b/common/tags/dev/checkstyle-suppressions.xml
@@ -0,0 +1,63 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<!DOCTYPE suppressions PUBLIC
+"-//Puppy Crawl//DTD Suppressions 1.1//EN"
+"https://checkstyle.org/dtds/suppressions_1_1.dtd">
+
+<!--
+
+    This file contains suppression rules for Checkstyle checks.
+    Ideally only files that cannot be modified (e.g. third-party code)
+    should be added here. All other violations should be fixed.
+
+-->
+
+<suppressions>
+    <suppress checks=".*"
+              files="core/target/*"/>
+    <suppress checks=".*"
+              files="core/src/main/java/org/apache/spark/util/collection/TimSort.java"/>
+    <suppress checks=".*"
+              files="connector/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java"/>
+    <suppress checks=".*"
+              files="sql/core/src/main/java/org/apache/spark/sql/api/java/*"/>
+    <suppress checks=".*"
+              files="antlr4/org/apache/spark/sql/catalyst/parser/*"/>
+    <suppress checks=".*"
+              files="org/apache/spark/connect/proto/*"/>
+    <suppress checks=".*"
+              files="test/gen-java/org/apache/spark/sql/execution/datasources/parquet/test/avro/*"/>
+    <suppress checks=".*"
+              files="generated-test-sources/org/apache/spark/sql/protobuf/*"/>
+    <suppress checks="LineLength"
+              files="src/test/java/org/apache/spark/sql/hive/test/Complex.java"/>
+    <suppress checks="LineLength"
+              files="src/main/java/org/apache/spark/examples/JavaLogQuery.java"/>
+    <suppress checks="LineLength"
+              files="src/main/java/org/apache/hive/service/*"/>
+    <suppress checks="MethodName"
+              files="src/main/java/org/apache/hive/service/auth/PasswdAuthenticationProvider.java"/>
+    <suppress checks="MethodName"
+              files="sql/api/src/main/java/org/apache/spark/sql/streaming/OutputMode.java"/>
+    <suppress checks="MethodName"
+              files="sql/api/src/main/java/org/apache/spark/sql/streaming/GroupStateTimeout.java"/>
+    <suppress checks="MethodName"
+              files="sql/api/src/main/java/org/apache/spark/sql/streaming/Trigger.java"/>
+    <suppress checks="LineLength"
+              files="src/main/java/org/apache/spark/sql/api/java/*"/>
+</suppressions>
diff --git a/common/tags/dev/checkstyle.xml b/common/tags/dev/checkstyle.xml
new file mode 100644
index 0000000000000..5af15318081a6
--- /dev/null
+++ b/common/tags/dev/checkstyle.xml
@@ -0,0 +1,191 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<!DOCTYPE module PUBLIC
+          "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
+          "https://checkstyle.org/dtds/configuration_1_3.dtd">
+
+<!--
+
+    Checkstyle configuration based on the Google coding conventions from:
+
+    -  Google Java Style
+       https://google.github.io/styleguide/javaguide.html
+
+    with Spark-specific changes from:
+
+    https://spark.apache.org/contributing.html#code-style-guide
+
+    Checkstyle is very configurable. Be sure to read the documentation at
+    http://checkstyle.sf.net (or in your downloaded distribution).
+
+    Most Checks are configurable, be sure to consult the documentation.
+
+    To completely disable a check, just comment it out or delete it from the file.
+
+    Authors: Max Vetrenko, Ruslan Diachenko, Roman Ivanov.
+
+ -->
+
+<module name = "Checker">
+    <property name="charset" value="UTF-8"/>
+
+    <property name="severity" value="error"/>
+
+    <property name="fileExtensions" value="java, properties, xml"/>
+
+    <module name="SuppressionFilter">
+      <property name="file" value="dev/checkstyle-suppressions.xml"/>
+    </module>
+
+    <!-- Checks for whitespace                               -->
+    <!-- See http://checkstyle.sf.net/config_whitespace.html -->
+    <module name="FileTabCharacter">
+        <property name="eachLine" value="true"/>
+    </module>
+
+    <module name="RegexpSingleline">
+        <!-- \s matches whitespace character, $ matches end of line. -->
+        <property name="format" value="\s+$"/>
+        <property name="message" value="No trailing whitespace allowed."/>
+    </module>
+
+    <module name="LineLength">
+        <property name="max" value="100"/>
+        <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
+    </module>
+
+    <module name="NewlineAtEndOfFile"/>
+
+    <module name="TreeWalker">
+        <!--
+        If you wish to turn off checking for a section of code, you can put a comment in the source
+        before and after the section, with the following syntax:
+
+          // checkstyle.off: XXX (such as checkstyle.off: NoFinalizer)
+          ...  // stuff that breaks the styles
+          // checkstyle.on: XXX (such as checkstyle.on: NoFinalizer)
+        -->
+        <module name="SuppressionCommentFilter">
+            <property name="offCommentFormat" value="checkstyle\.off\: ([\w\|]+)"/>
+            <property name="onCommentFormat" value="checkstyle\.on\: ([\w\|]+)"/>
+            <property name="checkFormat" value="$1"/>
+        </module>
+        <module name="OuterTypeFilename"/>
+        <module name="IllegalTokenText">
+            <property name="tokens" value="STRING_LITERAL, CHAR_LITERAL"/>
+            <property name="format" value="\\u00(08|09|0(a|A)|0(c|C)|0(d|D)|22|27|5(C|c))|\\(0(10|11|12|14|15|42|47)|134)"/>
+            <property name="message" value="Avoid using corresponding octal or Unicode escape."/>
+        </module>
+        <module name="AvoidEscapedUnicodeCharacters">
+            <property name="allowEscapesForControlCharacters" value="true"/>
+            <property name="allowByTailComment" value="true"/>
+            <property name="allowNonPrintableEscapes" value="true"/>
+        </module>
+        <module name="NoLineWrap"/>
+        <module name="EmptyBlock">
+            <property name="option" value="TEXT"/>
+            <property name="tokens" value="LITERAL_TRY, LITERAL_FINALLY, LITERAL_IF, LITERAL_ELSE, LITERAL_SWITCH"/>
+        </module>
+        <module name="NeedBraces">
+            <property name="allowSingleLineStatement" value="true"/>
+        </module>
+        <module name="OneStatementPerLine"/>
+        <module name="ArrayTypeStyle"/>
+        <module name="FallThrough"/>
+        <module name="UpperEll"/>
+        <module name="ModifierOrder"/>
+        <module name="SeparatorWrap">
+            <property name="tokens" value="DOT"/>
+            <property name="option" value="nl"/>
+        </module>
+        <module name="SeparatorWrap">
+            <property name="tokens" value="COMMA"/>
+            <property name="option" value="EOL"/>
+        </module>
+        <module name="PackageName">
+            <property name="format" value="^[a-z]+(\.[a-z][a-z0-9]*)*$"/>
+            <message key="name.invalidPattern"
+             value="Package name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="ClassTypeParameterName">
+            <property name="format" value="([A-Z][a-zA-Z0-9]*$)"/>
+            <message key="name.invalidPattern"
+             value="Class type name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="MethodTypeParameterName">
+            <property name="format" value="([A-Z][a-zA-Z0-9]*)"/>
+            <message key="name.invalidPattern"
+             value="Method type name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="GenericWhitespace">
+            <message key="ws.followed"
+             value="GenericWhitespace ''{0}'' is followed by whitespace."/>
+             <message key="ws.preceded"
+             value="GenericWhitespace ''{0}'' is preceded with whitespace."/>
+             <message key="ws.illegalFollow"
+             value="GenericWhitespace ''{0}'' should followed by whitespace."/>
+             <message key="ws.notPreceded"
+             value="GenericWhitespace ''{0}'' is not preceded with whitespace."/>
+        </module>
+        <module name="MethodParamPad"/>
+        <module name="AnnotationLocation">
+            <property name="tokens" value="CLASS_DEF, INTERFACE_DEF, ENUM_DEF, METHOD_DEF, CTOR_DEF"/>
+        </module>
+        <module name="AnnotationLocation">
+            <property name="tokens" value="VARIABLE_DEF"/>
+            <property name="allowSamelineMultipleAnnotations" value="true"/>
+        </module>
+        <module name="MethodName">
+            <property name="format" value="^[a-z][a-z0-9][a-zA-Z0-9_]*$"/>
+            <message key="name.invalidPattern"
+             value="Method name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="EmptyCatchBlock">
+            <property name="exceptionVariableName" value="expected"/>
+        </module>
+        <module name="CommentsIndentation"/>
+        <module name="UnusedImports"/>
+        <module name="RedundantImport"/>
+        <module name="RedundantModifier"/>
+        <module name="RegexpSinglelineJava">
+            <property name="format" value="throw new \w+Error\("/>
+            <property name="message" value="Avoid throwing error in application code."/>
+        </module>
+        <module name="RegexpSinglelineJava">
+            <property name="format" value="Objects\.toStringHelper"/>
+            <property name="message" value="Avoid using Object.toStringHelper. Use ToStringBuilder instead." />
+        </module>
+        <module name="RegexpSinglelineJava">
+            <property name="format" value="new (java\.lang\.)?(Byte|Integer|Long|Short)\("/>
+            <property name="message" value="Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors." />
+        </module>
+        <module name="RegexpSinglelineJava">
+            <property name="format" value="Files\.createTempDir\("/>
+            <property name="message"
+              value="Avoid using com.google.common.io.Files.createTempDir() due to CVE-2020-8908.
+                Use org.apache.spark.network.util.JavaUtils.createTempDir() instead." />
+        </module>
+        <module name="RegexpSinglelineJava">
+            <property name="format" value="@Test\(expected"/>
+            <property name="message" value="Please use the `assertThrows` method to test for exceptions." />
+        </module>
+        <module name="IllegalImport">
+            <property name="illegalPkgs" value="org.apache.log4j" />
+        </module>
+    </module>
+</module>
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 59e9973c42d05..638c33ad4647f 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -27,6 +27,7 @@
   </parent>
 
   <artifactId>spark-tags_2.12</artifactId>
+  <version>3.5.4-CUSTOM</version>
   <packaging>jar</packaging>
   <name>Spark Project Tags</name>
   <url>https://spark.apache.org/</url>
diff --git a/common/tags/scalastyle-config.xml b/common/tags/scalastyle-config.xml
new file mode 100644
index 0000000000000..0ccd937e72e88
--- /dev/null
+++ b/common/tags/scalastyle-config.xml
@@ -0,0 +1,465 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<!--
+
+If you wish to turn off checking for a section of code, you can put a comment in the source
+before and after the section, with the following syntax:
+
+  // scalastyle:off
+  ...  // stuff that breaks the styles
+  // scalastyle:on
+
+You can also disable only one rule, by specifying its rule id, as specified in:
+  http://www.scalastyle.org/rules-0.7.0.html
+
+  // scalastyle:off no.finalize
+  override def finalize(): Unit = ...
+  // scalastyle:on no.finalize
+
+This file is divided into 3 sections:
+ (1) rules that we enforce.
+ (2) rules that we would like to enforce, but haven't cleaned up the codebase to turn on yet
+     (or we need to make the scalastyle rule more configurable).
+ (3) rules that we don't want to enforce.
+-->
+
+<scalastyle>
+  <name>Scalastyle standard configuration</name>
+
+  <!-- ================================================================================ -->
+  <!--                               rules we enforce                                   -->
+  <!-- ================================================================================ -->
+
+  <check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
+    <parameters>
+       <parameter name="header"><![CDATA[/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */]]></parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
+    <parameters>
+      <parameter name="maxLineLength"><![CDATA[100]]></parameter>
+      <parameter name="tabSize"><![CDATA[2]]></parameter>
+      <parameter name="ignoreImports">true</parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
+    <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
+    <parameters><parameter name="regex"><![CDATA[(config|[A-Z][A-Za-z]*)]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
+    <parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
+  </check>
+
+  <check customId="argcount" level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
+    <parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
+    <parameters>
+      <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
+      <parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
+
+  <check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
+   <parameters>
+     <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
+   </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
+    <parameters>
+     <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
+    </parameters>
+  </check>
+
+  <!-- ??? usually shouldn't be checked into the code base. -->
+  <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
+
+  <!-- As of SPARK-7558, all tests in Spark should extend o.a.s.SparkFunSuite instead of AnyFunSuite directly -->
+  <check customId="funsuite" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">^AnyFunSuite[A-Za-z]*$</parameter></parameters>
+    <customMessage>Tests must extend org.apache.spark.SparkFunSuite instead.</customMessage>
+  </check>
+
+  <!-- As of SPARK-7977 all printlns need to be wrapped in '// scalastyle:off/on println' -->
+  <check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">^println$</parameter></parameters>
+    <customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
+      // scalastyle:off println
+      println(...)
+      // scalastyle:on println]]></customMessage>
+  </check>
+
+  <check customId="hadoopconfiguration" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">spark(.sqlContext)?.sparkContext.hadoopConfiguration</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use sparkContext.hadoopConfiguration? In most cases, you should use
+      spark.sessionState.newHadoopConf() instead, so that the hadoop configurations specified in Spark session
+      configuration will come into effect.
+      If you must use sparkContext.hadoopConfiguration, wrap the code block with
+      // scalastyle:off hadoopconfiguration
+      spark.sparkContext.hadoopConfiguration...
+      // scalastyle:on hadoopconfiguration
+    ]]></customMessage>
+  </check>
+
+  <check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">@VisibleForTesting</parameter></parameters>
+    <customMessage><![CDATA[
+      @VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).
+    ]]></customMessage>
+  </check>
+
+  <check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use
+      ShutdownHookManager.addShutdownHook instead.
+      If you must use Runtime.getRuntime.addShutdownHook, wrap the code block with
+      // scalastyle:off runtimeaddshutdownhook
+      Runtime.getRuntime.addShutdownHook(...)
+      // scalastyle:on runtimeaddshutdownhook
+    ]]></customMessage>
+  </check>
+
+  <check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use
+      java.util.concurrent.ConcurrentLinkedQueue instead.
+      If you must use mutable.SynchronizedBuffer, wrap the code block with
+      // scalastyle:off mutablesynchronizedbuffer
+      mutable.SynchronizedBuffer[...]
+      // scalastyle:on mutablesynchronizedbuffer
+    ]]></customMessage>
+  </check>
+
+  <check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Class\.forName</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.
+      If you must use Class.forName, wrap the code block with
+      // scalastyle:off classforname
+      Class.forName(...)
+      // scalastyle:on classforname
+    ]]></customMessage>
+  </check>
+
+  <check customId="awaitresult" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Await\.result</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Await.result? In most cases, you should use ThreadUtils.awaitResult instead.
+      If you must use Await.result, wrap the code block with
+      // scalastyle:off awaitresult
+      Await.result(...)
+      // scalastyle:on awaitresult
+    ]]></customMessage>
+  </check>
+
+  <check customId="awaitready" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Await\.ready</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Await.ready? In most cases, you should use ThreadUtils.awaitReady instead.
+      If you must use Await.ready, wrap the code block with
+      // scalastyle:off awaitready
+      Await.ready(...)
+      // scalastyle:on awaitready
+    ]]></customMessage>
+  </check>
+
+  <check customId="parvector" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">new.*ParVector</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure you want to create a ParVector? It will not automatically propagate Spark ThreadLocals or the
+      active SparkSession for the submitted tasks. In most cases, you should use ThreadUtils.parmap instead.
+      If you must use ParVector, then wrap your creation of the ParVector with
+      // scalastyle:off parvector
+      ...ParVector...
+      // scalastyle:on parvector
+    ]]></customMessage>
+  </check>
+
+  <check customId="caselocale" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">(\.toUpperCase|\.toLowerCase)(?!(\(|\(Locale.ROOT\)))</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use toUpperCase or toLowerCase without the root locale? In most cases, you
+      should use toUpperCase(Locale.ROOT) or toLowerCase(Locale.ROOT) instead.
+      If you must use toUpperCase or toLowerCase without the root locale, wrap the code block with
+      // scalastyle:off caselocale
+      .toUpperCase
+      .toLowerCase
+      // scalastyle:on caselocale
+    ]]></customMessage>
+  </check>
+
+  <check customId="throwerror" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">throw new \w+Error\(</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to throw Error? In most cases, you should use appropriate Exception instead.
+      If you must throw Error, wrap the code block with
+      // scalastyle:off throwerror
+      throw new XXXError(...)
+      // scalastyle:on throwerror
+    ]]></customMessage>
+  </check>
+
+  <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->
+  <check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">JavaConversions</parameter></parameters>
+    <customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import
+    scala.collection.JavaConverters._ and use .asScala / .asJava methods</customMessage>
+  </check>
+
+  <check customId="commonslang2" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
+    <customMessage>Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
+    of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
+  </check>
+
+  <check customId="executioncontextglobal" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">scala\.concurrent\.ExecutionContext\.Implicits\.global</parameter></parameters>
+    <customMessage> User queries can use global thread pool, causing starvation and eventual OOM.
+      Thus, Spark-internal APIs should not use this thread pool</customMessage>
+  </check>
+
+  <check customId="FileSystemGet" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">FileSystem.get\([a-zA-Z_$][a-zA-Z_$0-9]*\)</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use "FileSystem.get(Configuration conf)"? If the input
+      configuration is not set properly, a default FileSystem instance will be returned. It can
+      lead to errors when you deal with multiple file systems. Please consider using
+      "FileSystem.get(URI uri, Configuration conf)" or "Path.getFileSystem(Configuration conf)" instead.
+      If you must use the method "FileSystem.get(Configuration conf)", wrap the code block with
+      // scalastyle:off FileSystemGet
+      FileSystem.get(...)
+      // scalastyle:on FileSystemGet
+    ]]></customMessage>
+  </check>
+
+  <check customId="extractopt" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">extractOpt</parameter></parameters>
+    <customMessage>Use jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter
+    is slower.  </customMessage>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
+    <parameters>
+      <parameter name="groups">java,scala,3rdParty,spark</parameter>
+      <parameter name="group.java">javax?\..*</parameter>
+      <parameter name="group.scala">scala\..*</parameter>
+      <parameter name="group.3rdParty">(?!(javax?\.|scala\.|org\.apache\.spark\.)).*</parameter>
+      <parameter name="group.spark">org\.apache\.spark\..*</parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
+    <parameters>
+      <parameter name="tokens">COMMA</parameter>
+    </parameters>
+  </check>
+
+  <!-- SPARK-3854: Single Space between ')' and '{' -->
+  <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">\)\{</parameter></parameters>
+    <customMessage><![CDATA[
+      Single Space between ')' and `{`.
+    ]]></customMessage>
+  </check>
+
+  <check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1  [*]</parameter></parameters>
+    <customMessage>Use Javadoc style indentation for multiline comments</customMessage>
+  </check>
+
+  <check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
+    <customMessage>Omit braces in case clauses.</customMessage>
+  </check>
+
+  <check level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">new (java\.lang\.)?(Byte|Integer|Long|Short)\(</parameter></parameters>
+    <customMessage>Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors.</customMessage>
+  </check>
+
+  <!-- SPARK-16877: Avoid Java annotations -->
+  <check level="error" class="org.scalastyle.scalariform.OverrideJavaChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
+    <parameters><parameter name="illegalImports"><![CDATA[scala.collection.Seq,scala.collection.IndexedSeq]]></parameter></parameters>
+    <customMessage><![CDATA[
+      Don't import scala.collection.Seq and scala.collection.IndexedSeq as it may bring some problems with cross-build between Scala 2.12 and 2.13.
+
+      Please refer below page to see the details of changes around Seq / IndexedSeq.
+      https://docs.scala-lang.org/overviews/core/collections-migration-213.html
+
+      If you really need to use scala.collection.Seq or scala.collection.IndexedSeq, please use the fully-qualified name instead.
+    ]]></customMessage>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
+    <parameters><parameter name="illegalImports"><![CDATA[collection]]></parameter></parameters>
+    <customMessage>Please use scala.collection instead.</customMessage>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
+    <parameters><parameter name="illegalImports"><![CDATA[org.apache.log4j]]></parameter></parameters>
+    <customMessage>Please use Apache Log4j 2 instead.</customMessage>
+  </check>
+
+
+  <!-- ================================================================================ -->
+  <!--       rules we'd like to enforce, but haven't cleaned up the codebase yet        -->
+  <!-- ================================================================================ -->
+
+  <!-- We cannot turn the following two on, because it'd fail a lot of string interpolation use cases. -->
+  <!-- Ideally the following two rules should be configurable to rule out string interpolation. -->
+  <check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
+  <check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
+
+  <!-- This breaks symbolic method names so we don't turn it on. -->
+  <!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->
+  <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
+    <parameters>
+    <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
+    </parameters>
+  </check>
+
+  <!-- Should turn this on, but we have a few places that need to be fixed first -->
+  <check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
+
+  <!-- ================================================================================ -->
+  <!--                               rules we don't want                                -->
+  <!-- ================================================================================ -->
+
+  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
+    <parameters><parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
+  </check>
+
+  <!-- We want the opposite of this: NewLineAtEofChecker -->
+  <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
+
+  <!-- This one complains about all kinds of random things. Disable. -->
+  <check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
+
+  <!-- We use return quite a bit for control flows and guards -->
+  <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="false"></check>
+
+  <!-- We use null a lot in low level code and to interface with 3rd party code -->
+  <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="false"></check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="false"></check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false">
+    <parameters><parameter name="maxFileLength">800></parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false">
+    <parameters><parameter name="maxTypes">30</parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
+    <parameters><parameter name="maximum">10</parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false">
+    <parameters><parameter name="maxLength">50</parameter></parameters>
+  </check>
+
+  <!-- Not exactly feasible to enforce this right now. -->
+  <!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->
+  <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
+    <parameters><parameter name="maxMethods"><![CDATA[30]]></parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->
+  <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
+    <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
+  </check>
+
+  <check customId="GuavaToStringHelper" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Objects.toStringHelper</parameter></parameters>
+    <customMessage>Avoid using Object.toStringHelper. Use ToStringBuilder instead.</customMessage>
+  </check>
+
+  <check customId="GuavaFilesCreateTempDir" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Files\.createTempDir\(</parameter></parameters>
+    <customMessage>Avoid using com.google.common.io.Files.createTempDir due to CVE-2020-8908.
+      Use org.apache.spark.util.Utils.createTempDir instead.
+    </customMessage>
+  </check>
+
+  <check customId="pathfromuri" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">new Path\(new URI\(</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that this string is uri encoded? Please be careful when converting hadoop Paths
+      and URIs to and from String. If possible, please use SparkPath.
+    ]]></customMessage>
+  </check>
+</scalastyle>
diff --git a/resource-managers/kubernetes/core/dev/checkstyle-suppressions.xml b/resource-managers/kubernetes/core/dev/checkstyle-suppressions.xml
new file mode 100644
index 0000000000000..8ba1ff1b3b1eb
--- /dev/null
+++ b/resource-managers/kubernetes/core/dev/checkstyle-suppressions.xml
@@ -0,0 +1,63 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<!DOCTYPE suppressions PUBLIC
+"-//Puppy Crawl//DTD Suppressions 1.1//EN"
+"https://checkstyle.org/dtds/suppressions_1_1.dtd">
+
+<!--
+
+    This file contains suppression rules for Checkstyle checks.
+    Ideally only files that cannot be modified (e.g. third-party code)
+    should be added here. All other violations should be fixed.
+
+-->
+
+<suppressions>
+    <suppress checks=".*"
+              files="core/target/*"/>
+    <suppress checks=".*"
+              files="core/src/main/java/org/apache/spark/util/collection/TimSort.java"/>
+    <suppress checks=".*"
+              files="connector/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java"/>
+    <suppress checks=".*"
+              files="sql/core/src/main/java/org/apache/spark/sql/api/java/*"/>
+    <suppress checks=".*"
+              files="antlr4/org/apache/spark/sql/catalyst/parser/*"/>
+    <suppress checks=".*"
+              files="org/apache/spark/connect/proto/*"/>
+    <suppress checks=".*"
+              files="test/gen-java/org/apache/spark/sql/execution/datasources/parquet/test/avro/*"/>
+    <suppress checks=".*"
+              files="generated-test-sources/org/apache/spark/sql/protobuf/*"/>
+    <suppress checks="LineLength"
+              files="src/test/java/org/apache/spark/sql/hive/test/Complex.java"/>
+    <suppress checks="LineLength"
+              files="src/main/java/org/apache/spark/examples/JavaLogQuery.java"/>
+    <suppress checks="LineLength"
+              files="src/main/java/org/apache/hive/service/*"/>
+    <suppress checks="MethodName"
+              files="src/main/java/org/apache/hive/service/auth/PasswdAuthenticationProvider.java"/>
+    <suppress checks="MethodName"
+              files="sql/api/src/main/java/org/apache/spark/sql/streaming/OutputMode.java"/>
+    <suppress checks="MethodName"
+              files="sql/api/src/main/java/org/apache/spark/sql/streaming/GroupStateTimeout.java"/>
+    <suppress checks="MethodName"
+              files="sql/api/src/main/java/org/apache/spark/sql/streaming/Trigger.java"/>
+    <suppress checks="LineLength"
+              files="src/main/java/org/apache/spark/sql/api/java/*"/>
+</suppressions>
diff --git a/resource-managers/kubernetes/core/dev/checkstyle.xml b/resource-managers/kubernetes/core/dev/checkstyle.xml
new file mode 100644
index 0000000000000..5af15318081a6
--- /dev/null
+++ b/resource-managers/kubernetes/core/dev/checkstyle.xml
@@ -0,0 +1,191 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<!DOCTYPE module PUBLIC
+          "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
+          "https://checkstyle.org/dtds/configuration_1_3.dtd">
+
+<!--
+
+    Checkstyle configuration based on the Google coding conventions from:
+
+    -  Google Java Style
+       https://google.github.io/styleguide/javaguide.html
+
+    with Spark-specific changes from:
+
+    https://spark.apache.org/contributing.html#code-style-guide
+
+    Checkstyle is very configurable. Be sure to read the documentation at
+    http://checkstyle.sf.net (or in your downloaded distribution).
+
+    Most Checks are configurable, be sure to consult the documentation.
+
+    To completely disable a check, just comment it out or delete it from the file.
+
+    Authors: Max Vetrenko, Ruslan Diachenko, Roman Ivanov.
+
+ -->
+
+<module name = "Checker">
+    <property name="charset" value="UTF-8"/>
+
+    <property name="severity" value="error"/>
+
+    <property name="fileExtensions" value="java, properties, xml"/>
+
+    <module name="SuppressionFilter">
+      <property name="file" value="dev/checkstyle-suppressions.xml"/>
+    </module>
+
+    <!-- Checks for whitespace                               -->
+    <!-- See http://checkstyle.sf.net/config_whitespace.html -->
+    <module name="FileTabCharacter">
+        <property name="eachLine" value="true"/>
+    </module>
+
+    <module name="RegexpSingleline">
+        <!-- \s matches whitespace character, $ matches end of line. -->
+        <property name="format" value="\s+$"/>
+        <property name="message" value="No trailing whitespace allowed."/>
+    </module>
+
+    <module name="LineLength">
+        <property name="max" value="100"/>
+        <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
+    </module>
+
+    <module name="NewlineAtEndOfFile"/>
+
+    <module name="TreeWalker">
+        <!--
+        If you wish to turn off checking for a section of code, you can put a comment in the source
+        before and after the section, with the following syntax:
+
+          // checkstyle.off: XXX (such as checkstyle.off: NoFinalizer)
+          ...  // stuff that breaks the styles
+          // checkstyle.on: XXX (such as checkstyle.on: NoFinalizer)
+        -->
+        <module name="SuppressionCommentFilter">
+            <property name="offCommentFormat" value="checkstyle\.off\: ([\w\|]+)"/>
+            <property name="onCommentFormat" value="checkstyle\.on\: ([\w\|]+)"/>
+            <property name="checkFormat" value="$1"/>
+        </module>
+        <module name="OuterTypeFilename"/>
+        <module name="IllegalTokenText">
+            <property name="tokens" value="STRING_LITERAL, CHAR_LITERAL"/>
+            <property name="format" value="\\u00(08|09|0(a|A)|0(c|C)|0(d|D)|22|27|5(C|c))|\\(0(10|11|12|14|15|42|47)|134)"/>
+            <property name="message" value="Avoid using corresponding octal or Unicode escape."/>
+        </module>
+        <module name="AvoidEscapedUnicodeCharacters">
+            <property name="allowEscapesForControlCharacters" value="true"/>
+            <property name="allowByTailComment" value="true"/>
+            <property name="allowNonPrintableEscapes" value="true"/>
+        </module>
+        <module name="NoLineWrap"/>
+        <module name="EmptyBlock">
+            <property name="option" value="TEXT"/>
+            <property name="tokens" value="LITERAL_TRY, LITERAL_FINALLY, LITERAL_IF, LITERAL_ELSE, LITERAL_SWITCH"/>
+        </module>
+        <module name="NeedBraces">
+            <property name="allowSingleLineStatement" value="true"/>
+        </module>
+        <module name="OneStatementPerLine"/>
+        <module name="ArrayTypeStyle"/>
+        <module name="FallThrough"/>
+        <module name="UpperEll"/>
+        <module name="ModifierOrder"/>
+        <module name="SeparatorWrap">
+            <property name="tokens" value="DOT"/>
+            <property name="option" value="nl"/>
+        </module>
+        <module name="SeparatorWrap">
+            <property name="tokens" value="COMMA"/>
+            <property name="option" value="EOL"/>
+        </module>
+        <module name="PackageName">
+            <property name="format" value="^[a-z]+(\.[a-z][a-z0-9]*)*$"/>
+            <message key="name.invalidPattern"
+             value="Package name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="ClassTypeParameterName">
+            <property name="format" value="([A-Z][a-zA-Z0-9]*$)"/>
+            <message key="name.invalidPattern"
+             value="Class type name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="MethodTypeParameterName">
+            <property name="format" value="([A-Z][a-zA-Z0-9]*)"/>
+            <message key="name.invalidPattern"
+             value="Method type name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="GenericWhitespace">
+            <message key="ws.followed"
+             value="GenericWhitespace ''{0}'' is followed by whitespace."/>
+             <message key="ws.preceded"
+             value="GenericWhitespace ''{0}'' is preceded with whitespace."/>
+             <message key="ws.illegalFollow"
+             value="GenericWhitespace ''{0}'' should followed by whitespace."/>
+             <message key="ws.notPreceded"
+             value="GenericWhitespace ''{0}'' is not preceded with whitespace."/>
+        </module>
+        <module name="MethodParamPad"/>
+        <module name="AnnotationLocation">
+            <property name="tokens" value="CLASS_DEF, INTERFACE_DEF, ENUM_DEF, METHOD_DEF, CTOR_DEF"/>
+        </module>
+        <module name="AnnotationLocation">
+            <property name="tokens" value="VARIABLE_DEF"/>
+            <property name="allowSamelineMultipleAnnotations" value="true"/>
+        </module>
+        <module name="MethodName">
+            <property name="format" value="^[a-z][a-z0-9][a-zA-Z0-9_]*$"/>
+            <message key="name.invalidPattern"
+             value="Method name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="EmptyCatchBlock">
+            <property name="exceptionVariableName" value="expected"/>
+        </module>
+        <module name="CommentsIndentation"/>
+        <module name="UnusedImports"/>
+        <module name="RedundantImport"/>
+        <module name="RedundantModifier"/>
+        <module name="RegexpSinglelineJava">
+            <property name="format" value="throw new \w+Error\("/>
+            <property name="message" value="Avoid throwing error in application code."/>
+        </module>
+        <module name="RegexpSinglelineJava">
+            <property name="format" value="Objects\.toStringHelper"/>
+            <property name="message" value="Avoid using Object.toStringHelper. Use ToStringBuilder instead." />
+        </module>
+        <module name="RegexpSinglelineJava">
+            <property name="format" value="new (java\.lang\.)?(Byte|Integer|Long|Short)\("/>
+            <property name="message" value="Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors." />
+        </module>
+        <module name="RegexpSinglelineJava">
+            <property name="format" value="Files\.createTempDir\("/>
+            <property name="message"
+              value="Avoid using com.google.common.io.Files.createTempDir() due to CVE-2020-8908.
+                Use org.apache.spark.network.util.JavaUtils.createTempDir() instead." />
+        </module>
+        <module name="RegexpSinglelineJava">
+            <property name="format" value="@Test\(expected"/>
+            <property name="message" value="Please use the `assertThrows` method to test for exceptions." />
+        </module>
+        <module name="IllegalImport">
+            <property name="illegalPkgs" value="org.apache.log4j" />
+        </module>
+    </module>
+</module>
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index a34750602eba1..7296ca7ae0efc 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -25,6 +25,7 @@
   </parent>
 
   <artifactId>spark-kubernetes_2.12</artifactId>
+  <version>3.5.4-CUSTOM</version>
   <packaging>jar</packaging>
   <name>Spark Project Kubernetes</name>
   <properties>
@@ -57,13 +58,13 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
+      <version>3.5.4</version>
     </dependency>
 
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
+      <version>3.5.4</version>
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
@@ -71,6 +72,7 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <version>3.5.4-CUSTOM</version>
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
diff --git a/resource-managers/kubernetes/core/scalastyle-config.xml b/resource-managers/kubernetes/core/scalastyle-config.xml
new file mode 100644
index 0000000000000..0ccd937e72e88
--- /dev/null
+++ b/resource-managers/kubernetes/core/scalastyle-config.xml
@@ -0,0 +1,465 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<!--
+
+If you wish to turn off checking for a section of code, you can put a comment in the source
+before and after the section, with the following syntax:
+
+  // scalastyle:off
+  ...  // stuff that breaks the styles
+  // scalastyle:on
+
+You can also disable only one rule, by specifying its rule id, as specified in:
+  http://www.scalastyle.org/rules-0.7.0.html
+
+  // scalastyle:off no.finalize
+  override def finalize(): Unit = ...
+  // scalastyle:on no.finalize
+
+This file is divided into 3 sections:
+ (1) rules that we enforce.
+ (2) rules that we would like to enforce, but haven't cleaned up the codebase to turn on yet
+     (or we need to make the scalastyle rule more configurable).
+ (3) rules that we don't want to enforce.
+-->
+
+<scalastyle>
+  <name>Scalastyle standard configuration</name>
+
+  <!-- ================================================================================ -->
+  <!--                               rules we enforce                                   -->
+  <!-- ================================================================================ -->
+
+  <check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
+    <parameters>
+       <parameter name="header"><![CDATA[/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */]]></parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
+    <parameters>
+      <parameter name="maxLineLength"><![CDATA[100]]></parameter>
+      <parameter name="tabSize"><![CDATA[2]]></parameter>
+      <parameter name="ignoreImports">true</parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
+    <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
+    <parameters><parameter name="regex"><![CDATA[(config|[A-Z][A-Za-z]*)]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
+    <parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
+  </check>
+
+  <check customId="argcount" level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
+    <parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
+    <parameters>
+      <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
+      <parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
+
+  <check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
+   <parameters>
+     <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
+   </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
+    <parameters>
+     <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
+    </parameters>
+  </check>
+
+  <!-- ??? usually shouldn't be checked into the code base. -->
+  <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
+
+  <!-- As of SPARK-7558, all tests in Spark should extend o.a.s.SparkFunSuite instead of AnyFunSuite directly -->
+  <check customId="funsuite" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">^AnyFunSuite[A-Za-z]*$</parameter></parameters>
+    <customMessage>Tests must extend org.apache.spark.SparkFunSuite instead.</customMessage>
+  </check>
+
+  <!-- As of SPARK-7977 all printlns need to be wrapped in '// scalastyle:off/on println' -->
+  <check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">^println$</parameter></parameters>
+    <customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
+      // scalastyle:off println
+      println(...)
+      // scalastyle:on println]]></customMessage>
+  </check>
+
+  <check customId="hadoopconfiguration" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">spark(.sqlContext)?.sparkContext.hadoopConfiguration</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use sparkContext.hadoopConfiguration? In most cases, you should use
+      spark.sessionState.newHadoopConf() instead, so that the hadoop configurations specified in Spark session
+      configuration will come into effect.
+      If you must use sparkContext.hadoopConfiguration, wrap the code block with
+      // scalastyle:off hadoopconfiguration
+      spark.sparkContext.hadoopConfiguration...
+      // scalastyle:on hadoopconfiguration
+    ]]></customMessage>
+  </check>
+
+  <check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">@VisibleForTesting</parameter></parameters>
+    <customMessage><![CDATA[
+      @VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).
+    ]]></customMessage>
+  </check>
+
+  <check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use
+      ShutdownHookManager.addShutdownHook instead.
+      If you must use Runtime.getRuntime.addShutdownHook, wrap the code block with
+      // scalastyle:off runtimeaddshutdownhook
+      Runtime.getRuntime.addShutdownHook(...)
+      // scalastyle:on runtimeaddshutdownhook
+    ]]></customMessage>
+  </check>
+
+  <check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use
+      java.util.concurrent.ConcurrentLinkedQueue instead.
+      If you must use mutable.SynchronizedBuffer, wrap the code block with
+      // scalastyle:off mutablesynchronizedbuffer
+      mutable.SynchronizedBuffer[...]
+      // scalastyle:on mutablesynchronizedbuffer
+    ]]></customMessage>
+  </check>
+
+  <check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Class\.forName</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.
+      If you must use Class.forName, wrap the code block with
+      // scalastyle:off classforname
+      Class.forName(...)
+      // scalastyle:on classforname
+    ]]></customMessage>
+  </check>
+
+  <check customId="awaitresult" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Await\.result</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Await.result? In most cases, you should use ThreadUtils.awaitResult instead.
+      If you must use Await.result, wrap the code block with
+      // scalastyle:off awaitresult
+      Await.result(...)
+      // scalastyle:on awaitresult
+    ]]></customMessage>
+  </check>
+
+  <check customId="awaitready" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Await\.ready</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Await.ready? In most cases, you should use ThreadUtils.awaitReady instead.
+      If you must use Await.ready, wrap the code block with
+      // scalastyle:off awaitready
+      Await.ready(...)
+      // scalastyle:on awaitready
+    ]]></customMessage>
+  </check>
+
+  <check customId="parvector" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">new.*ParVector</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure you want to create a ParVector? It will not automatically propagate Spark ThreadLocals or the
+      active SparkSession for the submitted tasks. In most cases, you should use ThreadUtils.parmap instead.
+      If you must use ParVector, then wrap your creation of the ParVector with
+      // scalastyle:off parvector
+      ...ParVector...
+      // scalastyle:on parvector
+    ]]></customMessage>
+  </check>
+
+  <check customId="caselocale" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">(\.toUpperCase|\.toLowerCase)(?!(\(|\(Locale.ROOT\)))</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use toUpperCase or toLowerCase without the root locale? In most cases, you
+      should use toUpperCase(Locale.ROOT) or toLowerCase(Locale.ROOT) instead.
+      If you must use toUpperCase or toLowerCase without the root locale, wrap the code block with
+      // scalastyle:off caselocale
+      .toUpperCase
+      .toLowerCase
+      // scalastyle:on caselocale
+    ]]></customMessage>
+  </check>
+
+  <check customId="throwerror" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">throw new \w+Error\(</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to throw Error? In most cases, you should use appropriate Exception instead.
+      If you must throw Error, wrap the code block with
+      // scalastyle:off throwerror
+      throw new XXXError(...)
+      // scalastyle:on throwerror
+    ]]></customMessage>
+  </check>
+
+  <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->
+  <check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">JavaConversions</parameter></parameters>
+    <customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import
+    scala.collection.JavaConverters._ and use .asScala / .asJava methods</customMessage>
+  </check>
+
+  <check customId="commonslang2" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
+    <customMessage>Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
+    of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
+  </check>
+
+  <check customId="executioncontextglobal" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">scala\.concurrent\.ExecutionContext\.Implicits\.global</parameter></parameters>
+    <customMessage> User queries can use global thread pool, causing starvation and eventual OOM.
+      Thus, Spark-internal APIs should not use this thread pool</customMessage>
+  </check>
+
+  <check customId="FileSystemGet" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">FileSystem.get\([a-zA-Z_$][a-zA-Z_$0-9]*\)</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use "FileSystem.get(Configuration conf)"? If the input
+      configuration is not set properly, a default FileSystem instance will be returned. It can
+      lead to errors when you deal with multiple file systems. Please consider using
+      "FileSystem.get(URI uri, Configuration conf)" or "Path.getFileSystem(Configuration conf)" instead.
+      If you must use the method "FileSystem.get(Configuration conf)", wrap the code block with
+      // scalastyle:off FileSystemGet
+      FileSystem.get(...)
+      // scalastyle:on FileSystemGet
+    ]]></customMessage>
+  </check>
+
+  <check customId="extractopt" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">extractOpt</parameter></parameters>
+    <customMessage>Use jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter
+    is slower.  </customMessage>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
+    <parameters>
+      <parameter name="groups">java,scala,3rdParty,spark</parameter>
+      <parameter name="group.java">javax?\..*</parameter>
+      <parameter name="group.scala">scala\..*</parameter>
+      <parameter name="group.3rdParty">(?!(javax?\.|scala\.|org\.apache\.spark\.)).*</parameter>
+      <parameter name="group.spark">org\.apache\.spark\..*</parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
+    <parameters>
+      <parameter name="tokens">COMMA</parameter>
+    </parameters>
+  </check>
+
+  <!-- SPARK-3854: Single Space between ')' and '{' -->
+  <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">\)\{</parameter></parameters>
+    <customMessage><![CDATA[
+      Single Space between ')' and `{`.
+    ]]></customMessage>
+  </check>
+
+  <check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1  [*]</parameter></parameters>
+    <customMessage>Use Javadoc style indentation for multiline comments</customMessage>
+  </check>
+
+  <check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
+    <customMessage>Omit braces in case clauses.</customMessage>
+  </check>
+
+  <check level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">new (java\.lang\.)?(Byte|Integer|Long|Short)\(</parameter></parameters>
+    <customMessage>Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors.</customMessage>
+  </check>
+
+  <!-- SPARK-16877: Avoid Java annotations -->
+  <check level="error" class="org.scalastyle.scalariform.OverrideJavaChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
+    <parameters><parameter name="illegalImports"><![CDATA[scala.collection.Seq,scala.collection.IndexedSeq]]></parameter></parameters>
+    <customMessage><![CDATA[
+      Don't import scala.collection.Seq and scala.collection.IndexedSeq as it may bring some problems with cross-build between Scala 2.12 and 2.13.
+
+      Please refer below page to see the details of changes around Seq / IndexedSeq.
+      https://docs.scala-lang.org/overviews/core/collections-migration-213.html
+
+      If you really need to use scala.collection.Seq or scala.collection.IndexedSeq, please use the fully-qualified name instead.
+    ]]></customMessage>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
+    <parameters><parameter name="illegalImports"><![CDATA[collection]]></parameter></parameters>
+    <customMessage>Please use scala.collection instead.</customMessage>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
+    <parameters><parameter name="illegalImports"><![CDATA[org.apache.log4j]]></parameter></parameters>
+    <customMessage>Please use Apache Log4j 2 instead.</customMessage>
+  </check>
+
+
+  <!-- ================================================================================ -->
+  <!--       rules we'd like to enforce, but haven't cleaned up the codebase yet        -->
+  <!-- ================================================================================ -->
+
+  <!-- We cannot turn the following two on, because it'd fail a lot of string interpolation use cases. -->
+  <!-- Ideally the following two rules should be configurable to rule out string interpolation. -->
+  <check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
+  <check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
+
+  <!-- This breaks symbolic method names so we don't turn it on. -->
+  <!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->
+  <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
+    <parameters>
+    <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
+    </parameters>
+  </check>
+
+  <!-- Should turn this on, but we have a few places that need to be fixed first -->
+  <check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
+
+  <!-- ================================================================================ -->
+  <!--                               rules we don't want                                -->
+  <!-- ================================================================================ -->
+
+  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
+    <parameters><parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
+  </check>
+
+  <!-- We want the opposite of this: NewLineAtEofChecker -->
+  <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
+
+  <!-- This one complains about all kinds of random things. Disable. -->
+  <check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
+
+  <!-- We use return quite a bit for control flows and guards -->
+  <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="false"></check>
+
+  <!-- We use null a lot in low level code and to interface with 3rd party code -->
+  <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="false"></check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="false"></check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false">
+    <parameters><parameter name="maxFileLength">800></parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false">
+    <parameters><parameter name="maxTypes">30</parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
+    <parameters><parameter name="maximum">10</parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false">
+    <parameters><parameter name="maxLength">50</parameter></parameters>
+  </check>
+
+  <!-- Not exactly feasible to enforce this right now. -->
+  <!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->
+  <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
+    <parameters><parameter name="maxMethods"><![CDATA[30]]></parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->
+  <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
+    <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
+  </check>
+
+  <check customId="GuavaToStringHelper" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Objects.toStringHelper</parameter></parameters>
+    <customMessage>Avoid using Object.toStringHelper. Use ToStringBuilder instead.</customMessage>
+  </check>
+
+  <check customId="GuavaFilesCreateTempDir" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Files\.createTempDir\(</parameter></parameters>
+    <customMessage>Avoid using com.google.common.io.Files.createTempDir due to CVE-2020-8908.
+      Use org.apache.spark.util.Utils.createTempDir instead.
+    </customMessage>
+  </check>
+
+  <check customId="pathfromuri" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">new Path\(new URI\(</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that this string is uri encoded? Please be careful when converting hadoop Paths
+      and URIs to and from String. If possible, please use SparkPath.
+    ]]></customMessage>
+  </check>
+</scalastyle>

From 94e6bb71679ac45bba989f67353519f8723b4885 Mon Sep 17 00:00:00 2001
From: ejblanco <ericjoel.blancohermida@telefonica.com>
Date: Tue, 7 Jan 2025 11:01:12 +0100
Subject: [PATCH 51/51] Revert "add custom"

This reverts commit b58ca6d1c5c84e44135da0c5d808ba14a1c3b7e5.
---
 common/tags/dev/checkstyle-suppressions.xml   |  63 ---
 common/tags/dev/checkstyle.xml                | 191 -------
 common/tags/pom.xml                           |   1 -
 common/tags/scalastyle-config.xml             | 465 ------------------
 .../core/dev/checkstyle-suppressions.xml      |  63 ---
 .../kubernetes/core/dev/checkstyle.xml        | 191 -------
 resource-managers/kubernetes/core/pom.xml     |   6 +-
 .../kubernetes/core/scalastyle-config.xml     | 465 ------------------
 8 files changed, 2 insertions(+), 1443 deletions(-)
 delete mode 100644 common/tags/dev/checkstyle-suppressions.xml
 delete mode 100644 common/tags/dev/checkstyle.xml
 delete mode 100644 common/tags/scalastyle-config.xml
 delete mode 100644 resource-managers/kubernetes/core/dev/checkstyle-suppressions.xml
 delete mode 100644 resource-managers/kubernetes/core/dev/checkstyle.xml
 delete mode 100644 resource-managers/kubernetes/core/scalastyle-config.xml

diff --git a/common/tags/dev/checkstyle-suppressions.xml b/common/tags/dev/checkstyle-suppressions.xml
deleted file mode 100644
index 8ba1ff1b3b1eb..0000000000000
--- a/common/tags/dev/checkstyle-suppressions.xml
+++ /dev/null
@@ -1,63 +0,0 @@
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<!DOCTYPE suppressions PUBLIC
-"-//Puppy Crawl//DTD Suppressions 1.1//EN"
-"https://checkstyle.org/dtds/suppressions_1_1.dtd">
-
-<!--
-
-    This file contains suppression rules for Checkstyle checks.
-    Ideally only files that cannot be modified (e.g. third-party code)
-    should be added here. All other violations should be fixed.
-
--->
-
-<suppressions>
-    <suppress checks=".*"
-              files="core/target/*"/>
-    <suppress checks=".*"
-              files="core/src/main/java/org/apache/spark/util/collection/TimSort.java"/>
-    <suppress checks=".*"
-              files="connector/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java"/>
-    <suppress checks=".*"
-              files="sql/core/src/main/java/org/apache/spark/sql/api/java/*"/>
-    <suppress checks=".*"
-              files="antlr4/org/apache/spark/sql/catalyst/parser/*"/>
-    <suppress checks=".*"
-              files="org/apache/spark/connect/proto/*"/>
-    <suppress checks=".*"
-              files="test/gen-java/org/apache/spark/sql/execution/datasources/parquet/test/avro/*"/>
-    <suppress checks=".*"
-              files="generated-test-sources/org/apache/spark/sql/protobuf/*"/>
-    <suppress checks="LineLength"
-              files="src/test/java/org/apache/spark/sql/hive/test/Complex.java"/>
-    <suppress checks="LineLength"
-              files="src/main/java/org/apache/spark/examples/JavaLogQuery.java"/>
-    <suppress checks="LineLength"
-              files="src/main/java/org/apache/hive/service/*"/>
-    <suppress checks="MethodName"
-              files="src/main/java/org/apache/hive/service/auth/PasswdAuthenticationProvider.java"/>
-    <suppress checks="MethodName"
-              files="sql/api/src/main/java/org/apache/spark/sql/streaming/OutputMode.java"/>
-    <suppress checks="MethodName"
-              files="sql/api/src/main/java/org/apache/spark/sql/streaming/GroupStateTimeout.java"/>
-    <suppress checks="MethodName"
-              files="sql/api/src/main/java/org/apache/spark/sql/streaming/Trigger.java"/>
-    <suppress checks="LineLength"
-              files="src/main/java/org/apache/spark/sql/api/java/*"/>
-</suppressions>
diff --git a/common/tags/dev/checkstyle.xml b/common/tags/dev/checkstyle.xml
deleted file mode 100644
index 5af15318081a6..0000000000000
--- a/common/tags/dev/checkstyle.xml
+++ /dev/null
@@ -1,191 +0,0 @@
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<!DOCTYPE module PUBLIC
-          "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
-          "https://checkstyle.org/dtds/configuration_1_3.dtd">
-
-<!--
-
-    Checkstyle configuration based on the Google coding conventions from:
-
-    -  Google Java Style
-       https://google.github.io/styleguide/javaguide.html
-
-    with Spark-specific changes from:
-
-    https://spark.apache.org/contributing.html#code-style-guide
-
-    Checkstyle is very configurable. Be sure to read the documentation at
-    http://checkstyle.sf.net (or in your downloaded distribution).
-
-    Most Checks are configurable, be sure to consult the documentation.
-
-    To completely disable a check, just comment it out or delete it from the file.
-
-    Authors: Max Vetrenko, Ruslan Diachenko, Roman Ivanov.
-
- -->
-
-<module name = "Checker">
-    <property name="charset" value="UTF-8"/>
-
-    <property name="severity" value="error"/>
-
-    <property name="fileExtensions" value="java, properties, xml"/>
-
-    <module name="SuppressionFilter">
-      <property name="file" value="dev/checkstyle-suppressions.xml"/>
-    </module>
-
-    <!-- Checks for whitespace                               -->
-    <!-- See http://checkstyle.sf.net/config_whitespace.html -->
-    <module name="FileTabCharacter">
-        <property name="eachLine" value="true"/>
-    </module>
-
-    <module name="RegexpSingleline">
-        <!-- \s matches whitespace character, $ matches end of line. -->
-        <property name="format" value="\s+$"/>
-        <property name="message" value="No trailing whitespace allowed."/>
-    </module>
-
-    <module name="LineLength">
-        <property name="max" value="100"/>
-        <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
-    </module>
-
-    <module name="NewlineAtEndOfFile"/>
-
-    <module name="TreeWalker">
-        <!--
-        If you wish to turn off checking for a section of code, you can put a comment in the source
-        before and after the section, with the following syntax:
-
-          // checkstyle.off: XXX (such as checkstyle.off: NoFinalizer)
-          ...  // stuff that breaks the styles
-          // checkstyle.on: XXX (such as checkstyle.on: NoFinalizer)
-        -->
-        <module name="SuppressionCommentFilter">
-            <property name="offCommentFormat" value="checkstyle\.off\: ([\w\|]+)"/>
-            <property name="onCommentFormat" value="checkstyle\.on\: ([\w\|]+)"/>
-            <property name="checkFormat" value="$1"/>
-        </module>
-        <module name="OuterTypeFilename"/>
-        <module name="IllegalTokenText">
-            <property name="tokens" value="STRING_LITERAL, CHAR_LITERAL"/>
-            <property name="format" value="\\u00(08|09|0(a|A)|0(c|C)|0(d|D)|22|27|5(C|c))|\\(0(10|11|12|14|15|42|47)|134)"/>
-            <property name="message" value="Avoid using corresponding octal or Unicode escape."/>
-        </module>
-        <module name="AvoidEscapedUnicodeCharacters">
-            <property name="allowEscapesForControlCharacters" value="true"/>
-            <property name="allowByTailComment" value="true"/>
-            <property name="allowNonPrintableEscapes" value="true"/>
-        </module>
-        <module name="NoLineWrap"/>
-        <module name="EmptyBlock">
-            <property name="option" value="TEXT"/>
-            <property name="tokens" value="LITERAL_TRY, LITERAL_FINALLY, LITERAL_IF, LITERAL_ELSE, LITERAL_SWITCH"/>
-        </module>
-        <module name="NeedBraces">
-            <property name="allowSingleLineStatement" value="true"/>
-        </module>
-        <module name="OneStatementPerLine"/>
-        <module name="ArrayTypeStyle"/>
-        <module name="FallThrough"/>
-        <module name="UpperEll"/>
-        <module name="ModifierOrder"/>
-        <module name="SeparatorWrap">
-            <property name="tokens" value="DOT"/>
-            <property name="option" value="nl"/>
-        </module>
-        <module name="SeparatorWrap">
-            <property name="tokens" value="COMMA"/>
-            <property name="option" value="EOL"/>
-        </module>
-        <module name="PackageName">
-            <property name="format" value="^[a-z]+(\.[a-z][a-z0-9]*)*$"/>
-            <message key="name.invalidPattern"
-             value="Package name ''{0}'' must match pattern ''{1}''."/>
-        </module>
-        <module name="ClassTypeParameterName">
-            <property name="format" value="([A-Z][a-zA-Z0-9]*$)"/>
-            <message key="name.invalidPattern"
-             value="Class type name ''{0}'' must match pattern ''{1}''."/>
-        </module>
-        <module name="MethodTypeParameterName">
-            <property name="format" value="([A-Z][a-zA-Z0-9]*)"/>
-            <message key="name.invalidPattern"
-             value="Method type name ''{0}'' must match pattern ''{1}''."/>
-        </module>
-        <module name="GenericWhitespace">
-            <message key="ws.followed"
-             value="GenericWhitespace ''{0}'' is followed by whitespace."/>
-             <message key="ws.preceded"
-             value="GenericWhitespace ''{0}'' is preceded with whitespace."/>
-             <message key="ws.illegalFollow"
-             value="GenericWhitespace ''{0}'' should followed by whitespace."/>
-             <message key="ws.notPreceded"
-             value="GenericWhitespace ''{0}'' is not preceded with whitespace."/>
-        </module>
-        <module name="MethodParamPad"/>
-        <module name="AnnotationLocation">
-            <property name="tokens" value="CLASS_DEF, INTERFACE_DEF, ENUM_DEF, METHOD_DEF, CTOR_DEF"/>
-        </module>
-        <module name="AnnotationLocation">
-            <property name="tokens" value="VARIABLE_DEF"/>
-            <property name="allowSamelineMultipleAnnotations" value="true"/>
-        </module>
-        <module name="MethodName">
-            <property name="format" value="^[a-z][a-z0-9][a-zA-Z0-9_]*$"/>
-            <message key="name.invalidPattern"
-             value="Method name ''{0}'' must match pattern ''{1}''."/>
-        </module>
-        <module name="EmptyCatchBlock">
-            <property name="exceptionVariableName" value="expected"/>
-        </module>
-        <module name="CommentsIndentation"/>
-        <module name="UnusedImports"/>
-        <module name="RedundantImport"/>
-        <module name="RedundantModifier"/>
-        <module name="RegexpSinglelineJava">
-            <property name="format" value="throw new \w+Error\("/>
-            <property name="message" value="Avoid throwing error in application code."/>
-        </module>
-        <module name="RegexpSinglelineJava">
-            <property name="format" value="Objects\.toStringHelper"/>
-            <property name="message" value="Avoid using Object.toStringHelper. Use ToStringBuilder instead." />
-        </module>
-        <module name="RegexpSinglelineJava">
-            <property name="format" value="new (java\.lang\.)?(Byte|Integer|Long|Short)\("/>
-            <property name="message" value="Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors." />
-        </module>
-        <module name="RegexpSinglelineJava">
-            <property name="format" value="Files\.createTempDir\("/>
-            <property name="message"
-              value="Avoid using com.google.common.io.Files.createTempDir() due to CVE-2020-8908.
-                Use org.apache.spark.network.util.JavaUtils.createTempDir() instead." />
-        </module>
-        <module name="RegexpSinglelineJava">
-            <property name="format" value="@Test\(expected"/>
-            <property name="message" value="Please use the `assertThrows` method to test for exceptions." />
-        </module>
-        <module name="IllegalImport">
-            <property name="illegalPkgs" value="org.apache.log4j" />
-        </module>
-    </module>
-</module>
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 638c33ad4647f..59e9973c42d05 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -27,7 +27,6 @@
   </parent>
 
   <artifactId>spark-tags_2.12</artifactId>
-  <version>3.5.4-CUSTOM</version>
   <packaging>jar</packaging>
   <name>Spark Project Tags</name>
   <url>https://spark.apache.org/</url>
diff --git a/common/tags/scalastyle-config.xml b/common/tags/scalastyle-config.xml
deleted file mode 100644
index 0ccd937e72e88..0000000000000
--- a/common/tags/scalastyle-config.xml
+++ /dev/null
@@ -1,465 +0,0 @@
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-<!--
-
-If you wish to turn off checking for a section of code, you can put a comment in the source
-before and after the section, with the following syntax:
-
-  // scalastyle:off
-  ...  // stuff that breaks the styles
-  // scalastyle:on
-
-You can also disable only one rule, by specifying its rule id, as specified in:
-  http://www.scalastyle.org/rules-0.7.0.html
-
-  // scalastyle:off no.finalize
-  override def finalize(): Unit = ...
-  // scalastyle:on no.finalize
-
-This file is divided into 3 sections:
- (1) rules that we enforce.
- (2) rules that we would like to enforce, but haven't cleaned up the codebase to turn on yet
-     (or we need to make the scalastyle rule more configurable).
- (3) rules that we don't want to enforce.
--->
-
-<scalastyle>
-  <name>Scalastyle standard configuration</name>
-
-  <!-- ================================================================================ -->
-  <!--                               rules we enforce                                   -->
-  <!-- ================================================================================ -->
-
-  <check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
-    <parameters>
-       <parameter name="header"><![CDATA[/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */]]></parameter>
-    </parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
-    <parameters>
-      <parameter name="maxLineLength"><![CDATA[100]]></parameter>
-      <parameter name="tabSize"><![CDATA[2]]></parameter>
-      <parameter name="ignoreImports">true</parameter>
-    </parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
-    <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
-    <parameters><parameter name="regex"><![CDATA[(config|[A-Z][A-Za-z]*)]]></parameter></parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
-    <parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
-  </check>
-
-  <check customId="argcount" level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
-    <parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
-    <parameters>
-      <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
-      <parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
-    </parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
-
-  <check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
-   <parameters>
-     <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
-   </parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
-    <parameters>
-     <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
-    </parameters>
-  </check>
-
-  <!-- ??? usually shouldn't be checked into the code base. -->
-  <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
-
-  <!-- As of SPARK-7558, all tests in Spark should extend o.a.s.SparkFunSuite instead of AnyFunSuite directly -->
-  <check customId="funsuite" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
-    <parameters><parameter name="regex">^AnyFunSuite[A-Za-z]*$</parameter></parameters>
-    <customMessage>Tests must extend org.apache.spark.SparkFunSuite instead.</customMessage>
-  </check>
-
-  <!-- As of SPARK-7977 all printlns need to be wrapped in '// scalastyle:off/on println' -->
-  <check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
-    <parameters><parameter name="regex">^println$</parameter></parameters>
-    <customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
-      // scalastyle:off println
-      println(...)
-      // scalastyle:on println]]></customMessage>
-  </check>
-
-  <check customId="hadoopconfiguration" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">spark(.sqlContext)?.sparkContext.hadoopConfiguration</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use sparkContext.hadoopConfiguration? In most cases, you should use
-      spark.sessionState.newHadoopConf() instead, so that the hadoop configurations specified in Spark session
-      configuration will come into effect.
-      If you must use sparkContext.hadoopConfiguration, wrap the code block with
-      // scalastyle:off hadoopconfiguration
-      spark.sparkContext.hadoopConfiguration...
-      // scalastyle:on hadoopconfiguration
-    ]]></customMessage>
-  </check>
-
-  <check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">@VisibleForTesting</parameter></parameters>
-    <customMessage><![CDATA[
-      @VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).
-    ]]></customMessage>
-  </check>
-
-  <check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use
-      ShutdownHookManager.addShutdownHook instead.
-      If you must use Runtime.getRuntime.addShutdownHook, wrap the code block with
-      // scalastyle:off runtimeaddshutdownhook
-      Runtime.getRuntime.addShutdownHook(...)
-      // scalastyle:on runtimeaddshutdownhook
-    ]]></customMessage>
-  </check>
-
-  <check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use
-      java.util.concurrent.ConcurrentLinkedQueue instead.
-      If you must use mutable.SynchronizedBuffer, wrap the code block with
-      // scalastyle:off mutablesynchronizedbuffer
-      mutable.SynchronizedBuffer[...]
-      // scalastyle:on mutablesynchronizedbuffer
-    ]]></customMessage>
-  </check>
-
-  <check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Class\.forName</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.
-      If you must use Class.forName, wrap the code block with
-      // scalastyle:off classforname
-      Class.forName(...)
-      // scalastyle:on classforname
-    ]]></customMessage>
-  </check>
-
-  <check customId="awaitresult" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Await\.result</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use Await.result? In most cases, you should use ThreadUtils.awaitResult instead.
-      If you must use Await.result, wrap the code block with
-      // scalastyle:off awaitresult
-      Await.result(...)
-      // scalastyle:on awaitresult
-    ]]></customMessage>
-  </check>
-
-  <check customId="awaitready" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Await\.ready</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use Await.ready? In most cases, you should use ThreadUtils.awaitReady instead.
-      If you must use Await.ready, wrap the code block with
-      // scalastyle:off awaitready
-      Await.ready(...)
-      // scalastyle:on awaitready
-    ]]></customMessage>
-  </check>
-
-  <check customId="parvector" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">new.*ParVector</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure you want to create a ParVector? It will not automatically propagate Spark ThreadLocals or the
-      active SparkSession for the submitted tasks. In most cases, you should use ThreadUtils.parmap instead.
-      If you must use ParVector, then wrap your creation of the ParVector with
-      // scalastyle:off parvector
-      ...ParVector...
-      // scalastyle:on parvector
-    ]]></customMessage>
-  </check>
-
-  <check customId="caselocale" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">(\.toUpperCase|\.toLowerCase)(?!(\(|\(Locale.ROOT\)))</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use toUpperCase or toLowerCase without the root locale? In most cases, you
-      should use toUpperCase(Locale.ROOT) or toLowerCase(Locale.ROOT) instead.
-      If you must use toUpperCase or toLowerCase without the root locale, wrap the code block with
-      // scalastyle:off caselocale
-      .toUpperCase
-      .toLowerCase
-      // scalastyle:on caselocale
-    ]]></customMessage>
-  </check>
-
-  <check customId="throwerror" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">throw new \w+Error\(</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to throw Error? In most cases, you should use appropriate Exception instead.
-      If you must throw Error, wrap the code block with
-      // scalastyle:off throwerror
-      throw new XXXError(...)
-      // scalastyle:on throwerror
-    ]]></customMessage>
-  </check>
-
-  <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->
-  <check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
-    <parameters><parameter name="regex">JavaConversions</parameter></parameters>
-    <customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import
-    scala.collection.JavaConverters._ and use .asScala / .asJava methods</customMessage>
-  </check>
-
-  <check customId="commonslang2" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
-    <customMessage>Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
-    of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
-  </check>
-
-  <check customId="executioncontextglobal" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">scala\.concurrent\.ExecutionContext\.Implicits\.global</parameter></parameters>
-    <customMessage> User queries can use global thread pool, causing starvation and eventual OOM.
-      Thus, Spark-internal APIs should not use this thread pool</customMessage>
-  </check>
-
-  <check customId="FileSystemGet" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">FileSystem.get\([a-zA-Z_$][a-zA-Z_$0-9]*\)</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use "FileSystem.get(Configuration conf)"? If the input
-      configuration is not set properly, a default FileSystem instance will be returned. It can
-      lead to errors when you deal with multiple file systems. Please consider using
-      "FileSystem.get(URI uri, Configuration conf)" or "Path.getFileSystem(Configuration conf)" instead.
-      If you must use the method "FileSystem.get(Configuration conf)", wrap the code block with
-      // scalastyle:off FileSystemGet
-      FileSystem.get(...)
-      // scalastyle:on FileSystemGet
-    ]]></customMessage>
-  </check>
-
-  <check customId="extractopt" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
-    <parameters><parameter name="regex">extractOpt</parameter></parameters>
-    <customMessage>Use jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter
-    is slower.  </customMessage>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
-    <parameters>
-      <parameter name="groups">java,scala,3rdParty,spark</parameter>
-      <parameter name="group.java">javax?\..*</parameter>
-      <parameter name="group.scala">scala\..*</parameter>
-      <parameter name="group.3rdParty">(?!(javax?\.|scala\.|org\.apache\.spark\.)).*</parameter>
-      <parameter name="group.spark">org\.apache\.spark\..*</parameter>
-    </parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
-    <parameters>
-      <parameter name="tokens">COMMA</parameter>
-    </parameters>
-  </check>
-
-  <!-- SPARK-3854: Single Space between ')' and '{' -->
-  <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">\)\{</parameter></parameters>
-    <customMessage><![CDATA[
-      Single Space between ')' and `{`.
-    ]]></customMessage>
-  </check>
-
-  <check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1  [*]</parameter></parameters>
-    <customMessage>Use Javadoc style indentation for multiline comments</customMessage>
-  </check>
-
-  <check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
-    <customMessage>Omit braces in case clauses.</customMessage>
-  </check>
-
-  <check level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">new (java\.lang\.)?(Byte|Integer|Long|Short)\(</parameter></parameters>
-    <customMessage>Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors.</customMessage>
-  </check>
-
-  <!-- SPARK-16877: Avoid Java annotations -->
-  <check level="error" class="org.scalastyle.scalariform.OverrideJavaChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
-    <parameters><parameter name="illegalImports"><![CDATA[scala.collection.Seq,scala.collection.IndexedSeq]]></parameter></parameters>
-    <customMessage><![CDATA[
-      Don't import scala.collection.Seq and scala.collection.IndexedSeq as it may bring some problems with cross-build between Scala 2.12 and 2.13.
-
-      Please refer below page to see the details of changes around Seq / IndexedSeq.
-      https://docs.scala-lang.org/overviews/core/collections-migration-213.html
-
-      If you really need to use scala.collection.Seq or scala.collection.IndexedSeq, please use the fully-qualified name instead.
-    ]]></customMessage>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
-    <parameters><parameter name="illegalImports"><![CDATA[collection]]></parameter></parameters>
-    <customMessage>Please use scala.collection instead.</customMessage>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
-    <parameters><parameter name="illegalImports"><![CDATA[org.apache.log4j]]></parameter></parameters>
-    <customMessage>Please use Apache Log4j 2 instead.</customMessage>
-  </check>
-
-
-  <!-- ================================================================================ -->
-  <!--       rules we'd like to enforce, but haven't cleaned up the codebase yet        -->
-  <!-- ================================================================================ -->
-
-  <!-- We cannot turn the following two on, because it'd fail a lot of string interpolation use cases. -->
-  <!-- Ideally the following two rules should be configurable to rule out string interpolation. -->
-  <check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
-  <check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
-
-  <!-- This breaks symbolic method names so we don't turn it on. -->
-  <!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->
-  <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
-    <parameters>
-    <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
-    </parameters>
-  </check>
-
-  <!-- Should turn this on, but we have a few places that need to be fixed first -->
-  <check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
-
-  <!-- ================================================================================ -->
-  <!--                               rules we don't want                                -->
-  <!-- ================================================================================ -->
-
-  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
-    <parameters><parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
-  </check>
-
-  <!-- We want the opposite of this: NewLineAtEofChecker -->
-  <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
-
-  <!-- This one complains about all kinds of random things. Disable. -->
-  <check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
-
-  <!-- We use return quite a bit for control flows and guards -->
-  <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="false"></check>
-
-  <!-- We use null a lot in low level code and to interface with 3rd party code -->
-  <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="false"></check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="false"></check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false">
-    <parameters><parameter name="maxFileLength">800></parameter></parameters>
-  </check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false">
-    <parameters><parameter name="maxTypes">30</parameter></parameters>
-  </check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
-    <parameters><parameter name="maximum">10</parameter></parameters>
-  </check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false">
-    <parameters><parameter name="maxLength">50</parameter></parameters>
-  </check>
-
-  <!-- Not exactly feasible to enforce this right now. -->
-  <!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->
-  <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
-    <parameters><parameter name="maxMethods"><![CDATA[30]]></parameter></parameters>
-  </check>
-
-  <!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->
-  <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
-    <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
-  </check>
-
-  <check customId="GuavaToStringHelper" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Objects.toStringHelper</parameter></parameters>
-    <customMessage>Avoid using Object.toStringHelper. Use ToStringBuilder instead.</customMessage>
-  </check>
-
-  <check customId="GuavaFilesCreateTempDir" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Files\.createTempDir\(</parameter></parameters>
-    <customMessage>Avoid using com.google.common.io.Files.createTempDir due to CVE-2020-8908.
-      Use org.apache.spark.util.Utils.createTempDir instead.
-    </customMessage>
-  </check>
-
-  <check customId="pathfromuri" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">new Path\(new URI\(</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that this string is uri encoded? Please be careful when converting hadoop Paths
-      and URIs to and from String. If possible, please use SparkPath.
-    ]]></customMessage>
-  </check>
-</scalastyle>
diff --git a/resource-managers/kubernetes/core/dev/checkstyle-suppressions.xml b/resource-managers/kubernetes/core/dev/checkstyle-suppressions.xml
deleted file mode 100644
index 8ba1ff1b3b1eb..0000000000000
--- a/resource-managers/kubernetes/core/dev/checkstyle-suppressions.xml
+++ /dev/null
@@ -1,63 +0,0 @@
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<!DOCTYPE suppressions PUBLIC
-"-//Puppy Crawl//DTD Suppressions 1.1//EN"
-"https://checkstyle.org/dtds/suppressions_1_1.dtd">
-
-<!--
-
-    This file contains suppression rules for Checkstyle checks.
-    Ideally only files that cannot be modified (e.g. third-party code)
-    should be added here. All other violations should be fixed.
-
--->
-
-<suppressions>
-    <suppress checks=".*"
-              files="core/target/*"/>
-    <suppress checks=".*"
-              files="core/src/main/java/org/apache/spark/util/collection/TimSort.java"/>
-    <suppress checks=".*"
-              files="connector/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java"/>
-    <suppress checks=".*"
-              files="sql/core/src/main/java/org/apache/spark/sql/api/java/*"/>
-    <suppress checks=".*"
-              files="antlr4/org/apache/spark/sql/catalyst/parser/*"/>
-    <suppress checks=".*"
-              files="org/apache/spark/connect/proto/*"/>
-    <suppress checks=".*"
-              files="test/gen-java/org/apache/spark/sql/execution/datasources/parquet/test/avro/*"/>
-    <suppress checks=".*"
-              files="generated-test-sources/org/apache/spark/sql/protobuf/*"/>
-    <suppress checks="LineLength"
-              files="src/test/java/org/apache/spark/sql/hive/test/Complex.java"/>
-    <suppress checks="LineLength"
-              files="src/main/java/org/apache/spark/examples/JavaLogQuery.java"/>
-    <suppress checks="LineLength"
-              files="src/main/java/org/apache/hive/service/*"/>
-    <suppress checks="MethodName"
-              files="src/main/java/org/apache/hive/service/auth/PasswdAuthenticationProvider.java"/>
-    <suppress checks="MethodName"
-              files="sql/api/src/main/java/org/apache/spark/sql/streaming/OutputMode.java"/>
-    <suppress checks="MethodName"
-              files="sql/api/src/main/java/org/apache/spark/sql/streaming/GroupStateTimeout.java"/>
-    <suppress checks="MethodName"
-              files="sql/api/src/main/java/org/apache/spark/sql/streaming/Trigger.java"/>
-    <suppress checks="LineLength"
-              files="src/main/java/org/apache/spark/sql/api/java/*"/>
-</suppressions>
diff --git a/resource-managers/kubernetes/core/dev/checkstyle.xml b/resource-managers/kubernetes/core/dev/checkstyle.xml
deleted file mode 100644
index 5af15318081a6..0000000000000
--- a/resource-managers/kubernetes/core/dev/checkstyle.xml
+++ /dev/null
@@ -1,191 +0,0 @@
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<!DOCTYPE module PUBLIC
-          "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
-          "https://checkstyle.org/dtds/configuration_1_3.dtd">
-
-<!--
-
-    Checkstyle configuration based on the Google coding conventions from:
-
-    -  Google Java Style
-       https://google.github.io/styleguide/javaguide.html
-
-    with Spark-specific changes from:
-
-    https://spark.apache.org/contributing.html#code-style-guide
-
-    Checkstyle is very configurable. Be sure to read the documentation at
-    http://checkstyle.sf.net (or in your downloaded distribution).
-
-    Most Checks are configurable, be sure to consult the documentation.
-
-    To completely disable a check, just comment it out or delete it from the file.
-
-    Authors: Max Vetrenko, Ruslan Diachenko, Roman Ivanov.
-
- -->
-
-<module name = "Checker">
-    <property name="charset" value="UTF-8"/>
-
-    <property name="severity" value="error"/>
-
-    <property name="fileExtensions" value="java, properties, xml"/>
-
-    <module name="SuppressionFilter">
-      <property name="file" value="dev/checkstyle-suppressions.xml"/>
-    </module>
-
-    <!-- Checks for whitespace                               -->
-    <!-- See http://checkstyle.sf.net/config_whitespace.html -->
-    <module name="FileTabCharacter">
-        <property name="eachLine" value="true"/>
-    </module>
-
-    <module name="RegexpSingleline">
-        <!-- \s matches whitespace character, $ matches end of line. -->
-        <property name="format" value="\s+$"/>
-        <property name="message" value="No trailing whitespace allowed."/>
-    </module>
-
-    <module name="LineLength">
-        <property name="max" value="100"/>
-        <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
-    </module>
-
-    <module name="NewlineAtEndOfFile"/>
-
-    <module name="TreeWalker">
-        <!--
-        If you wish to turn off checking for a section of code, you can put a comment in the source
-        before and after the section, with the following syntax:
-
-          // checkstyle.off: XXX (such as checkstyle.off: NoFinalizer)
-          ...  // stuff that breaks the styles
-          // checkstyle.on: XXX (such as checkstyle.on: NoFinalizer)
-        -->
-        <module name="SuppressionCommentFilter">
-            <property name="offCommentFormat" value="checkstyle\.off\: ([\w\|]+)"/>
-            <property name="onCommentFormat" value="checkstyle\.on\: ([\w\|]+)"/>
-            <property name="checkFormat" value="$1"/>
-        </module>
-        <module name="OuterTypeFilename"/>
-        <module name="IllegalTokenText">
-            <property name="tokens" value="STRING_LITERAL, CHAR_LITERAL"/>
-            <property name="format" value="\\u00(08|09|0(a|A)|0(c|C)|0(d|D)|22|27|5(C|c))|\\(0(10|11|12|14|15|42|47)|134)"/>
-            <property name="message" value="Avoid using corresponding octal or Unicode escape."/>
-        </module>
-        <module name="AvoidEscapedUnicodeCharacters">
-            <property name="allowEscapesForControlCharacters" value="true"/>
-            <property name="allowByTailComment" value="true"/>
-            <property name="allowNonPrintableEscapes" value="true"/>
-        </module>
-        <module name="NoLineWrap"/>
-        <module name="EmptyBlock">
-            <property name="option" value="TEXT"/>
-            <property name="tokens" value="LITERAL_TRY, LITERAL_FINALLY, LITERAL_IF, LITERAL_ELSE, LITERAL_SWITCH"/>
-        </module>
-        <module name="NeedBraces">
-            <property name="allowSingleLineStatement" value="true"/>
-        </module>
-        <module name="OneStatementPerLine"/>
-        <module name="ArrayTypeStyle"/>
-        <module name="FallThrough"/>
-        <module name="UpperEll"/>
-        <module name="ModifierOrder"/>
-        <module name="SeparatorWrap">
-            <property name="tokens" value="DOT"/>
-            <property name="option" value="nl"/>
-        </module>
-        <module name="SeparatorWrap">
-            <property name="tokens" value="COMMA"/>
-            <property name="option" value="EOL"/>
-        </module>
-        <module name="PackageName">
-            <property name="format" value="^[a-z]+(\.[a-z][a-z0-9]*)*$"/>
-            <message key="name.invalidPattern"
-             value="Package name ''{0}'' must match pattern ''{1}''."/>
-        </module>
-        <module name="ClassTypeParameterName">
-            <property name="format" value="([A-Z][a-zA-Z0-9]*$)"/>
-            <message key="name.invalidPattern"
-             value="Class type name ''{0}'' must match pattern ''{1}''."/>
-        </module>
-        <module name="MethodTypeParameterName">
-            <property name="format" value="([A-Z][a-zA-Z0-9]*)"/>
-            <message key="name.invalidPattern"
-             value="Method type name ''{0}'' must match pattern ''{1}''."/>
-        </module>
-        <module name="GenericWhitespace">
-            <message key="ws.followed"
-             value="GenericWhitespace ''{0}'' is followed by whitespace."/>
-             <message key="ws.preceded"
-             value="GenericWhitespace ''{0}'' is preceded with whitespace."/>
-             <message key="ws.illegalFollow"
-             value="GenericWhitespace ''{0}'' should followed by whitespace."/>
-             <message key="ws.notPreceded"
-             value="GenericWhitespace ''{0}'' is not preceded with whitespace."/>
-        </module>
-        <module name="MethodParamPad"/>
-        <module name="AnnotationLocation">
-            <property name="tokens" value="CLASS_DEF, INTERFACE_DEF, ENUM_DEF, METHOD_DEF, CTOR_DEF"/>
-        </module>
-        <module name="AnnotationLocation">
-            <property name="tokens" value="VARIABLE_DEF"/>
-            <property name="allowSamelineMultipleAnnotations" value="true"/>
-        </module>
-        <module name="MethodName">
-            <property name="format" value="^[a-z][a-z0-9][a-zA-Z0-9_]*$"/>
-            <message key="name.invalidPattern"
-             value="Method name ''{0}'' must match pattern ''{1}''."/>
-        </module>
-        <module name="EmptyCatchBlock">
-            <property name="exceptionVariableName" value="expected"/>
-        </module>
-        <module name="CommentsIndentation"/>
-        <module name="UnusedImports"/>
-        <module name="RedundantImport"/>
-        <module name="RedundantModifier"/>
-        <module name="RegexpSinglelineJava">
-            <property name="format" value="throw new \w+Error\("/>
-            <property name="message" value="Avoid throwing error in application code."/>
-        </module>
-        <module name="RegexpSinglelineJava">
-            <property name="format" value="Objects\.toStringHelper"/>
-            <property name="message" value="Avoid using Object.toStringHelper. Use ToStringBuilder instead." />
-        </module>
-        <module name="RegexpSinglelineJava">
-            <property name="format" value="new (java\.lang\.)?(Byte|Integer|Long|Short)\("/>
-            <property name="message" value="Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors." />
-        </module>
-        <module name="RegexpSinglelineJava">
-            <property name="format" value="Files\.createTempDir\("/>
-            <property name="message"
-              value="Avoid using com.google.common.io.Files.createTempDir() due to CVE-2020-8908.
-                Use org.apache.spark.network.util.JavaUtils.createTempDir() instead." />
-        </module>
-        <module name="RegexpSinglelineJava">
-            <property name="format" value="@Test\(expected"/>
-            <property name="message" value="Please use the `assertThrows` method to test for exceptions." />
-        </module>
-        <module name="IllegalImport">
-            <property name="illegalPkgs" value="org.apache.log4j" />
-        </module>
-    </module>
-</module>
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 7296ca7ae0efc..a34750602eba1 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -25,7 +25,6 @@
   </parent>
 
   <artifactId>spark-kubernetes_2.12</artifactId>
-  <version>3.5.4-CUSTOM</version>
   <packaging>jar</packaging>
   <name>Spark Project Kubernetes</name>
   <properties>
@@ -58,13 +57,13 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>3.5.4</version>
+      <version>${project.version}</version>
     </dependency>
 
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>3.5.4</version>
+      <version>${project.version}</version>
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
@@ -72,7 +71,6 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
-      <version>3.5.4-CUSTOM</version>
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
diff --git a/resource-managers/kubernetes/core/scalastyle-config.xml b/resource-managers/kubernetes/core/scalastyle-config.xml
deleted file mode 100644
index 0ccd937e72e88..0000000000000
--- a/resource-managers/kubernetes/core/scalastyle-config.xml
+++ /dev/null
@@ -1,465 +0,0 @@
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-<!--
-
-If you wish to turn off checking for a section of code, you can put a comment in the source
-before and after the section, with the following syntax:
-
-  // scalastyle:off
-  ...  // stuff that breaks the styles
-  // scalastyle:on
-
-You can also disable only one rule, by specifying its rule id, as specified in:
-  http://www.scalastyle.org/rules-0.7.0.html
-
-  // scalastyle:off no.finalize
-  override def finalize(): Unit = ...
-  // scalastyle:on no.finalize
-
-This file is divided into 3 sections:
- (1) rules that we enforce.
- (2) rules that we would like to enforce, but haven't cleaned up the codebase to turn on yet
-     (or we need to make the scalastyle rule more configurable).
- (3) rules that we don't want to enforce.
--->
-
-<scalastyle>
-  <name>Scalastyle standard configuration</name>
-
-  <!-- ================================================================================ -->
-  <!--                               rules we enforce                                   -->
-  <!-- ================================================================================ -->
-
-  <check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
-    <parameters>
-       <parameter name="header"><![CDATA[/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */]]></parameter>
-    </parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
-    <parameters>
-      <parameter name="maxLineLength"><![CDATA[100]]></parameter>
-      <parameter name="tabSize"><![CDATA[2]]></parameter>
-      <parameter name="ignoreImports">true</parameter>
-    </parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
-    <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
-    <parameters><parameter name="regex"><![CDATA[(config|[A-Z][A-Za-z]*)]]></parameter></parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
-    <parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
-  </check>
-
-  <check customId="argcount" level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
-    <parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
-    <parameters>
-      <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
-      <parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
-    </parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
-
-  <check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
-   <parameters>
-     <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
-   </parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
-    <parameters>
-     <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
-    </parameters>
-  </check>
-
-  <!-- ??? usually shouldn't be checked into the code base. -->
-  <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
-
-  <!-- As of SPARK-7558, all tests in Spark should extend o.a.s.SparkFunSuite instead of AnyFunSuite directly -->
-  <check customId="funsuite" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
-    <parameters><parameter name="regex">^AnyFunSuite[A-Za-z]*$</parameter></parameters>
-    <customMessage>Tests must extend org.apache.spark.SparkFunSuite instead.</customMessage>
-  </check>
-
-  <!-- As of SPARK-7977 all printlns need to be wrapped in '// scalastyle:off/on println' -->
-  <check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
-    <parameters><parameter name="regex">^println$</parameter></parameters>
-    <customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
-      // scalastyle:off println
-      println(...)
-      // scalastyle:on println]]></customMessage>
-  </check>
-
-  <check customId="hadoopconfiguration" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">spark(.sqlContext)?.sparkContext.hadoopConfiguration</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use sparkContext.hadoopConfiguration? In most cases, you should use
-      spark.sessionState.newHadoopConf() instead, so that the hadoop configurations specified in Spark session
-      configuration will come into effect.
-      If you must use sparkContext.hadoopConfiguration, wrap the code block with
-      // scalastyle:off hadoopconfiguration
-      spark.sparkContext.hadoopConfiguration...
-      // scalastyle:on hadoopconfiguration
-    ]]></customMessage>
-  </check>
-
-  <check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">@VisibleForTesting</parameter></parameters>
-    <customMessage><![CDATA[
-      @VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).
-    ]]></customMessage>
-  </check>
-
-  <check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use
-      ShutdownHookManager.addShutdownHook instead.
-      If you must use Runtime.getRuntime.addShutdownHook, wrap the code block with
-      // scalastyle:off runtimeaddshutdownhook
-      Runtime.getRuntime.addShutdownHook(...)
-      // scalastyle:on runtimeaddshutdownhook
-    ]]></customMessage>
-  </check>
-
-  <check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use
-      java.util.concurrent.ConcurrentLinkedQueue instead.
-      If you must use mutable.SynchronizedBuffer, wrap the code block with
-      // scalastyle:off mutablesynchronizedbuffer
-      mutable.SynchronizedBuffer[...]
-      // scalastyle:on mutablesynchronizedbuffer
-    ]]></customMessage>
-  </check>
-
-  <check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Class\.forName</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.
-      If you must use Class.forName, wrap the code block with
-      // scalastyle:off classforname
-      Class.forName(...)
-      // scalastyle:on classforname
-    ]]></customMessage>
-  </check>
-
-  <check customId="awaitresult" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Await\.result</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use Await.result? In most cases, you should use ThreadUtils.awaitResult instead.
-      If you must use Await.result, wrap the code block with
-      // scalastyle:off awaitresult
-      Await.result(...)
-      // scalastyle:on awaitresult
-    ]]></customMessage>
-  </check>
-
-  <check customId="awaitready" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Await\.ready</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use Await.ready? In most cases, you should use ThreadUtils.awaitReady instead.
-      If you must use Await.ready, wrap the code block with
-      // scalastyle:off awaitready
-      Await.ready(...)
-      // scalastyle:on awaitready
-    ]]></customMessage>
-  </check>
-
-  <check customId="parvector" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">new.*ParVector</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure you want to create a ParVector? It will not automatically propagate Spark ThreadLocals or the
-      active SparkSession for the submitted tasks. In most cases, you should use ThreadUtils.parmap instead.
-      If you must use ParVector, then wrap your creation of the ParVector with
-      // scalastyle:off parvector
-      ...ParVector...
-      // scalastyle:on parvector
-    ]]></customMessage>
-  </check>
-
-  <check customId="caselocale" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">(\.toUpperCase|\.toLowerCase)(?!(\(|\(Locale.ROOT\)))</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use toUpperCase or toLowerCase without the root locale? In most cases, you
-      should use toUpperCase(Locale.ROOT) or toLowerCase(Locale.ROOT) instead.
-      If you must use toUpperCase or toLowerCase without the root locale, wrap the code block with
-      // scalastyle:off caselocale
-      .toUpperCase
-      .toLowerCase
-      // scalastyle:on caselocale
-    ]]></customMessage>
-  </check>
-
-  <check customId="throwerror" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">throw new \w+Error\(</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to throw Error? In most cases, you should use appropriate Exception instead.
-      If you must throw Error, wrap the code block with
-      // scalastyle:off throwerror
-      throw new XXXError(...)
-      // scalastyle:on throwerror
-    ]]></customMessage>
-  </check>
-
-  <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->
-  <check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
-    <parameters><parameter name="regex">JavaConversions</parameter></parameters>
-    <customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import
-    scala.collection.JavaConverters._ and use .asScala / .asJava methods</customMessage>
-  </check>
-
-  <check customId="commonslang2" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
-    <customMessage>Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
-    of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
-  </check>
-
-  <check customId="executioncontextglobal" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">scala\.concurrent\.ExecutionContext\.Implicits\.global</parameter></parameters>
-    <customMessage> User queries can use global thread pool, causing starvation and eventual OOM.
-      Thus, Spark-internal APIs should not use this thread pool</customMessage>
-  </check>
-
-  <check customId="FileSystemGet" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">FileSystem.get\([a-zA-Z_$][a-zA-Z_$0-9]*\)</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use "FileSystem.get(Configuration conf)"? If the input
-      configuration is not set properly, a default FileSystem instance will be returned. It can
-      lead to errors when you deal with multiple file systems. Please consider using
-      "FileSystem.get(URI uri, Configuration conf)" or "Path.getFileSystem(Configuration conf)" instead.
-      If you must use the method "FileSystem.get(Configuration conf)", wrap the code block with
-      // scalastyle:off FileSystemGet
-      FileSystem.get(...)
-      // scalastyle:on FileSystemGet
-    ]]></customMessage>
-  </check>
-
-  <check customId="extractopt" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
-    <parameters><parameter name="regex">extractOpt</parameter></parameters>
-    <customMessage>Use jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter
-    is slower.  </customMessage>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
-    <parameters>
-      <parameter name="groups">java,scala,3rdParty,spark</parameter>
-      <parameter name="group.java">javax?\..*</parameter>
-      <parameter name="group.scala">scala\..*</parameter>
-      <parameter name="group.3rdParty">(?!(javax?\.|scala\.|org\.apache\.spark\.)).*</parameter>
-      <parameter name="group.spark">org\.apache\.spark\..*</parameter>
-    </parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
-    <parameters>
-      <parameter name="tokens">COMMA</parameter>
-    </parameters>
-  </check>
-
-  <!-- SPARK-3854: Single Space between ')' and '{' -->
-  <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">\)\{</parameter></parameters>
-    <customMessage><![CDATA[
-      Single Space between ')' and `{`.
-    ]]></customMessage>
-  </check>
-
-  <check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1  [*]</parameter></parameters>
-    <customMessage>Use Javadoc style indentation for multiline comments</customMessage>
-  </check>
-
-  <check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
-    <customMessage>Omit braces in case clauses.</customMessage>
-  </check>
-
-  <check level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">new (java\.lang\.)?(Byte|Integer|Long|Short)\(</parameter></parameters>
-    <customMessage>Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors.</customMessage>
-  </check>
-
-  <!-- SPARK-16877: Avoid Java annotations -->
-  <check level="error" class="org.scalastyle.scalariform.OverrideJavaChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
-    <parameters><parameter name="illegalImports"><![CDATA[scala.collection.Seq,scala.collection.IndexedSeq]]></parameter></parameters>
-    <customMessage><![CDATA[
-      Don't import scala.collection.Seq and scala.collection.IndexedSeq as it may bring some problems with cross-build between Scala 2.12 and 2.13.
-
-      Please refer below page to see the details of changes around Seq / IndexedSeq.
-      https://docs.scala-lang.org/overviews/core/collections-migration-213.html
-
-      If you really need to use scala.collection.Seq or scala.collection.IndexedSeq, please use the fully-qualified name instead.
-    ]]></customMessage>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
-    <parameters><parameter name="illegalImports"><![CDATA[collection]]></parameter></parameters>
-    <customMessage>Please use scala.collection instead.</customMessage>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
-    <parameters><parameter name="illegalImports"><![CDATA[org.apache.log4j]]></parameter></parameters>
-    <customMessage>Please use Apache Log4j 2 instead.</customMessage>
-  </check>
-
-
-  <!-- ================================================================================ -->
-  <!--       rules we'd like to enforce, but haven't cleaned up the codebase yet        -->
-  <!-- ================================================================================ -->
-
-  <!-- We cannot turn the following two on, because it'd fail a lot of string interpolation use cases. -->
-  <!-- Ideally the following two rules should be configurable to rule out string interpolation. -->
-  <check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
-  <check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
-
-  <!-- This breaks symbolic method names so we don't turn it on. -->
-  <!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->
-  <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
-    <parameters>
-    <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
-    </parameters>
-  </check>
-
-  <!-- Should turn this on, but we have a few places that need to be fixed first -->
-  <check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
-
-  <!-- ================================================================================ -->
-  <!--                               rules we don't want                                -->
-  <!-- ================================================================================ -->
-
-  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
-    <parameters><parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
-  </check>
-
-  <!-- We want the opposite of this: NewLineAtEofChecker -->
-  <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
-
-  <!-- This one complains about all kinds of random things. Disable. -->
-  <check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
-
-  <!-- We use return quite a bit for control flows and guards -->
-  <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="false"></check>
-
-  <!-- We use null a lot in low level code and to interface with 3rd party code -->
-  <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="false"></check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="false"></check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false">
-    <parameters><parameter name="maxFileLength">800></parameter></parameters>
-  </check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false">
-    <parameters><parameter name="maxTypes">30</parameter></parameters>
-  </check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
-    <parameters><parameter name="maximum">10</parameter></parameters>
-  </check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false">
-    <parameters><parameter name="maxLength">50</parameter></parameters>
-  </check>
-
-  <!-- Not exactly feasible to enforce this right now. -->
-  <!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->
-  <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
-    <parameters><parameter name="maxMethods"><![CDATA[30]]></parameter></parameters>
-  </check>
-
-  <!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->
-  <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
-    <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
-  </check>
-
-  <check customId="GuavaToStringHelper" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Objects.toStringHelper</parameter></parameters>
-    <customMessage>Avoid using Object.toStringHelper. Use ToStringBuilder instead.</customMessage>
-  </check>
-
-  <check customId="GuavaFilesCreateTempDir" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Files\.createTempDir\(</parameter></parameters>
-    <customMessage>Avoid using com.google.common.io.Files.createTempDir due to CVE-2020-8908.
-      Use org.apache.spark.util.Utils.createTempDir instead.
-    </customMessage>
-  </check>
-
-  <check customId="pathfromuri" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">new Path\(new URI\(</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that this string is uri encoded? Please be careful when converting hadoop Paths
-      and URIs to and from String. If possible, please use SparkPath.
-    ]]></customMessage>
-  </check>
-</scalastyle>