diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index b016a29a86be..7ec8f94a6292 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -94,7 +94,7 @@ jobs: tpcds=false docker=false fi - build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,graphx,catalyst,hive-thriftserver,streaming,sql-kafka-0-10,streaming-kafka-0-10,mllib-local,mllib,yarn,mesos,kubernetes,hadoop-cloud,spark-ganglia-lgpl,sql,hive"` + build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,graphx,catalyst,hive-thriftserver,streaming,sql-kafka-0-10,streaming-kafka-0-10,mllib-local,mllib,yarn,mesos,kubernetes,hadoop-cloud,spark-ganglia-lgpl,sql,hive,connect,protobuf,api"` precondition=" { \"build\": \"$build\", diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index ea3d7d2a63ca..8657755b8d0e 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.5.3 +Version: 3.5.4 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 9c47b1686579..47b38621d640 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../pom.xml diff --git a/build/mvn b/build/mvn index 3179099304c7..2c778fd6c71a 100755 --- a/build/mvn +++ b/build/mvn @@ -56,7 +56,7 @@ install_app() { local binary="${_DIR}/$6" local remote_tarball="${mirror_host}/${url_path}${url_query}" local local_checksum="${local_tarball}.${checksum_suffix}" - local remote_checksum="https://archive.apache.org/dist/${url_path}.${checksum_suffix}" + local remote_checksum="${mirror_host}/${url_path}.${checksum_suffix}${url_query}" local curl_opts="--silent --show-error -L" local wget_opts="--no-verbose" diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 46f4be085728..3757f69e9bd1 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 9704e4aeee93..83243d183b7b 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 25c1b785961c..e74fb05beb0a 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 3c9549269039..13c4b5cca1e3 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 2bf35c713923..709bbed0c553 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 509e067dd246..59e9973c42d0 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 8b0130e55129..e222499eec22 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/common/utils/pom.xml b/common/utils/pom.xml index a402affc8822..7b2a1ad57b0f 100644 --- a/common/utils/pom.xml +++ b/common/utils/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/common/utils/src/main/scala/org/apache/spark/unsafe/array/ByteArrayUtils.java b/common/utils/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java similarity index 100% rename from common/utils/src/main/scala/org/apache/spark/unsafe/array/ByteArrayUtils.java rename to common/utils/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml index 5380c3705f4f..1a6fe528b916 100644 --- a/connector/avro/pom.xml +++ b/connector/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala index 2554106d78e9..67e4583fe482 100644 --- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala +++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala @@ -29,6 +29,7 @@ import org.apache.avro.mapred.{AvroOutputFormat, FsInput} import org.apache.avro.mapreduce.AvroJob import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.FileStatus +import org.apache.hadoop.hdfs.BlockMissingException import org.apache.hadoop.mapreduce.Job import org.apache.spark.SparkException @@ -140,6 +141,8 @@ private[sql] object AvroUtils extends Logging { try { Some(DataFileReader.openReader(in, new GenericDatumReader[GenericRecord]())) } catch { + case e: BlockMissingException => + throw new SparkException(s"Could not read file: $path", e) case e: IOException => if (ignoreCorruptFiles) { logWarning(s"Skipped the footer in the corrupted file: $path", e) diff --git a/connector/avro/src/main/java/org/apache/spark/sql/avro/CustomDecimal.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/CustomDecimal.scala similarity index 100% rename from connector/avro/src/main/java/org/apache/spark/sql/avro/CustomDecimal.scala rename to connector/avro/src/main/scala/org/apache/spark/sql/avro/CustomDecimal.scala diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml index 56b66be3f774..695146d7a111 100644 --- a/connector/connect/client/jvm/pom.xml +++ b/connector/connect/client/jvm/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../../../pom.xml diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala index 61d08912aec2..3ae9b9fc73b4 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala @@ -74,7 +74,7 @@ object IntegrationTestUtils { // Redirect server log into console "--conf", - s"spark.driver.extraJavaOptions=-Dlog4j.configuration=$log4j2") + s"spark.driver.extraJavaOptions=-Dlog4j.configurationFile=$log4j2") } else Seq.empty } diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml index 2d209746dc7f..6c50469717f9 100644 --- a/connector/connect/common/pom.xml +++ b/connector/connect/common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../../pom.xml diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml index 983d4f087a68..aeadbacb7c69 100644 --- a/connector/connect/server/pom.xml +++ b/connector/connect/server/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../../pom.xml diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala index 0e4f344da901..93a1757cd687 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala @@ -21,6 +21,7 @@ import java.util.UUID import scala.collection.JavaConverters._ import scala.collection.mutable +import scala.util.control.NonFatal import org.apache.spark.{SparkEnv, SparkSQLException} import org.apache.spark.connect.proto @@ -237,7 +238,14 @@ private[connect] class ExecuteHolder( // it does. responseObserver.removeAll() // post closed to UI - eventsManager.postClosed() + try { + eventsManager.postClosed() + } catch { + // Catching the exception to prevent the wrong error code from being returned to the + // user: SPARK-49688. The issue was fixed by completely refactoring the code in Spark 4.0. + case e: Throwable if NonFatal.apply(e) => + logError(s"Error posting closed event to UI: ${e.getMessage()}") + } } // interrupt any attached grpcResponseSenders grpcResponseSenders.foreach(_.interrupt()) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala index 393b832de878..b8363c15e6ba 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala @@ -29,6 +29,11 @@ class SparkConnectReattachExecuteHandler( extends Logging { def handle(v: proto.ReattachExecuteRequest): Unit = { + // An exception will be raised if the session is not available. + val sessionHolder = + SparkConnectService.getIsolatedSession(v.getUserContext.getUserId, v.getSessionId) + assert(sessionHolder != null) + val executeHolder = SparkConnectService.executionManager .getExecuteHolder(ExecuteKey(v.getUserContext.getUserId, v.getSessionId, v.getOperationId)) .getOrElse { diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala index e8af2acfd2e2..edf8147eff85 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala @@ -360,6 +360,13 @@ object SparkConnectService extends Logging { userSessionMapping.invalidateAll() } + /** + * Used for testing + */ + private[connect] def invalidateSession(userId: String, sessionId: String): Unit = { + userSessionMapping.invalidate((userId, sessionId)) + } + /** * Used for testing. */ diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala index eddd1c6be72b..234ee526d438 100644 --- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala +++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala @@ -52,10 +52,6 @@ trait SparkConnectServerTest extends SharedSparkSession { withSparkEnvConfs((Connect.CONNECT_GRPC_BINDING_PORT.key, serverPort.toString)) { SparkConnectService.start(spark.sparkContext) } - // register udf directly on the server, we're not testing client UDFs here... - val serverSession = - SparkConnectService.getOrCreateIsolatedSession(defaultUserId, defaultSessionId).session - serverSession.udf.register("sleep", ((ms: Int) => { Thread.sleep(ms); ms })) } override def afterAll(): Unit = { diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala index 06cd1a5666b6..00de9fb6fd26 100644 --- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala +++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala @@ -56,6 +56,23 @@ class ReattachableExecuteSuite extends SparkConnectServerTest { } } + test("reattach after connection expired") { + withClient { client => + withRawBlockingStub { stub => + // emulate session expiration + SparkConnectService.invalidateSession(defaultUserId, defaultSessionId) + + // session closed, bound to fail immediately + val operationId = UUID.randomUUID().toString + val iter = stub.reattachExecute(buildReattachExecuteRequest(operationId, None)) + val e = intercept[StatusRuntimeException] { + iter.next() + } + assert(e.getMessage.contains("INVALID_HANDLE.SESSION_NOT_FOUND")) + } + } + } + test("raw interrupted RPC results in INVALID_CURSOR.DISCONNECTED error") { withRawBlockingStub { stub => val iter = stub.executePlan(buildExecutePlanRequest(buildPlan(MEDIUM_RESULTS_QUERY))) @@ -347,6 +364,10 @@ class ReattachableExecuteSuite extends SparkConnectServerTest { } test("long sleeping query") { + // register udf directly on the server, we're not testing client UDFs here... + val serverSession = + SparkConnectService.getOrCreateIsolatedSession(defaultUserId, defaultSessionId).session + serverSession.udf.register("sleep", ((ms: Int) => { Thread.sleep(ms); ms })) // query will be sleeping and not returning results, while having multiple reattach withSparkEnvConfs( (Connect.CONNECT_EXECUTE_REATTACHABLE_SENDER_MAX_STREAM_DURATION.key, "1s")) { diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml index 6f48d2775b4c..435c0fbd797a 100644 --- a/connector/docker-integration-tests/pom.xml +++ b/connector/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala index de8fcf1a4a78..78fdbe7158bb 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala @@ -22,7 +22,11 @@ import java.sql.Connection import org.scalatest.time.SpanSugar._ import org.apache.spark.{SparkConf, SparkSQLFeatureNotSupportedException} +import org.apache.spark.rdd.RDD import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan} +import org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog import org.apache.spark.sql.jdbc.MsSQLServerDatabaseOnDocker import org.apache.spark.sql.types._ @@ -39,6 +43,17 @@ import org.apache.spark.tags.DockerTest @DockerTest class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest { + def getExternalEngineQuery(executedPlan: SparkPlan): String = { + getExternalEngineRdd(executedPlan).asInstanceOf[JDBCRDD].getExternalEngineQuery + } + + def getExternalEngineRdd(executedPlan: SparkPlan): RDD[InternalRow] = { + val queryNode = executedPlan.collect { case r: RowDataSourceScanExec => + r + }.head + queryNode.rdd + } + override def excluded: Seq[String] = Seq( "simple scan with OFFSET", "simple scan with LIMIT and OFFSET", @@ -137,4 +152,68 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD "WHERE (dept > 1 AND ((name LIKE 'am%') = (name LIKE '%y')))") assert(df3.collect().length == 3) } + + test("SPARK-50087: SqlServer handle booleans in CASE WHEN test") { + val df = sql( + s"""|SELECT * FROM $catalogName.employee + |WHERE CASE WHEN name = 'Legolas' THEN name = 'Elf' ELSE NOT (name = 'Wizard') END + |""".stripMargin + ) + + // scalastyle:off + assert(getExternalEngineQuery(df.queryExecution.executedPlan) == + """SELECT "dept","name","salary","bonus" FROM "employee" WHERE (CASE WHEN ("name" = 'Legolas') THEN IIF(("name" = 'Elf'), 1, 0) ELSE IIF(("name" <> 'Wizard'), 1, 0) END = 1) """ + ) + // scalastyle:on + df.collect() + } + + test("SPARK-50087: SqlServer handle booleans in CASE WHEN with always true test") { + val df = sql( + s"""|SELECT * FROM $catalogName.employee + |WHERE CASE WHEN (name = 'Legolas') THEN (name = 'Elf') ELSE (1=1) END + |""".stripMargin + ) + + // scalastyle:off + assert(getExternalEngineQuery(df.queryExecution.executedPlan) == + """SELECT "dept","name","salary","bonus" FROM "employee" WHERE (CASE WHEN ("name" = 'Legolas') THEN IIF(("name" = 'Elf'), 1, 0) ELSE 1 END = 1) """ + ) + // scalastyle:on + df.collect() + } + + test("SPARK-50087: SqlServer handle booleans in nested CASE WHEN test") { + val df = sql( + s"""|SELECT * FROM $catalogName.employee + |WHERE CASE WHEN (name = 'Legolas') THEN + | CASE WHEN (name = 'Elf') THEN (name = 'Elrond') ELSE (name = 'Gandalf') END + | ELSE (name = 'Sauron') END + |""".stripMargin + ) + + // scalastyle:off + assert(getExternalEngineQuery(df.queryExecution.executedPlan) == + """SELECT "dept","name","salary","bonus" FROM "employee" WHERE (CASE WHEN ("name" = 'Legolas') THEN IIF((CASE WHEN ("name" = 'Elf') THEN IIF(("name" = 'Elrond'), 1, 0) ELSE IIF(("name" = 'Gandalf'), 1, 0) END = 1), 1, 0) ELSE IIF(("name" = 'Sauron'), 1, 0) END = 1) """ + ) + // scalastyle:on + df.collect() + } + + test("SPARK-50087: SqlServer handle non-booleans in nested CASE WHEN test") { + val df = sql( + s"""|SELECT * FROM $catalogName.employee + |WHERE CASE WHEN (name = 'Legolas') THEN + | CASE WHEN (name = 'Elf') THEN 'Elf' ELSE 'Wizard' END + | ELSE 'Sauron' END = name + |""".stripMargin + ) + + // scalastyle:off + assert(getExternalEngineQuery(df.queryExecution.executedPlan) == + """SELECT "dept","name","salary","bonus" FROM "employee" WHERE ("name" IS NOT NULL) AND ((CASE WHEN "name" = 'Legolas' THEN CASE WHEN "name" = 'Elf' THEN 'Elf' ELSE 'Wizard' END ELSE 'Sauron' END) = "name") """ + ) + // scalastyle:on + df.collect() + } } diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala index 7fef3ccd6b3f..b0edac3fcdd1 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala @@ -22,6 +22,7 @@ import java.sql.Connection import org.apache.spark.SparkConf import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException +import org.apache.spark.sql.execution.FilterExec import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog import org.apache.spark.sql.jdbc.DatabaseOnDocker import org.apache.spark.sql.types._ @@ -123,4 +124,13 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT ) } } + + test("SPARK-49695: Postgres fix xor push-down") { + val df = spark.sql(s"select dept, name from $catalogName.employee where dept ^ 6 = 0") + val rows = df.collect() + assert(!df.queryExecution.sparkPlan.exists(_.isInstanceOf[FilterExec])) + assert(rows.length == 1) + assert(rows(0).getInt(0) === 6) + assert(rows(0).getString(1) === "jen") + } } diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml index f85820ba1454..8b6d7d47b039 100644 --- a/connector/kafka-0-10-assembly/pom.xml +++ b/connector/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml index 4a0d1af7968e..dce1990f1c9d 100644 --- a/connector/kafka-0-10-sql/pom.xml +++ b/connector/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml index b7aff2d217bc..5973b9595db8 100644 --- a/connector/kafka-0-10-token-provider/pom.xml +++ b/connector/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml index a1ebf137e324..54ba2b22093d 100644 --- a/connector/kafka-0-10/pom.xml +++ b/connector/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml index 5aa937d48158..5e0c0fcafc12 100644 --- a/connector/kinesis-asl-assembly/pom.xml +++ b/connector/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml index ab48e0eeae2c..65b0fa33db29 100644 --- a/connector/kinesis-asl/pom.xml +++ b/connector/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala index 7d12af3256f1..d388b480e065 100644 --- a/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala +++ b/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala @@ -275,7 +275,7 @@ private[streaming] object StreamingExamples extends Logging { // We first log something to initialize Spark's default logging, then we override the // logging level. logInfo("Setting log level to [WARN] for streaming example." + - " To override add a custom log4j.properties to the classpath.") + " To override add a custom log4j2.properties to the classpath.") Configurator.setRootLevel(Level.WARN) } } diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml index 9fdaaa925a75..95be9ab74f10 100644 --- a/connector/protobuf/pom.xml +++ b/connector/protobuf/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml index e52b828bef97..62f1c4ab2b12 100644 --- a/connector/spark-ganglia-lgpl/pom.xml +++ b/connector/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 17004f875869..e59066e19850 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../pom.xml diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css index f952f86503e3..58c5add2d240 100755 --- a/core/src/main/resources/org/apache/spark/ui/static/webui.css +++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css @@ -355,6 +355,10 @@ a.expandbutton { width: 170px; } +.shuffle-write-time-checkbox-div { + width: 155px; +} + .result-serialization-time-checkbox-div { width: 185px; } diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 0f0d8b6c07c0..43c95bed5c68 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -104,8 +104,13 @@ private[spark] class SparkSubmit extends Logging { */ private def kill(args: SparkSubmitArguments): Unit = { if (RestSubmissionClient.supportsRestClient(args.master)) { - new RestSubmissionClient(args.master) + val response = new RestSubmissionClient(args.master) .killSubmission(args.submissionToKill) + if (response.success) { + logInfo(s"${args.submissionToKill} is killed successfully.") + } else { + logError(response.message) + } } else { val sparkConf = args.toSparkConf() sparkConf.set("spark.master", args.master) diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala index c060ef9da8c1..12413698d283 100644 --- a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala +++ b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala @@ -21,7 +21,7 @@ import java.io.File import javax.servlet.http.HttpServletResponse import org.apache.spark.{SPARK_VERSION => sparkVersion, SparkConf} -import org.apache.spark.deploy.{Command, DeployMessages, DriverDescription} +import org.apache.spark.deploy.{Command, DeployMessages, DriverDescription, SparkSubmit} import org.apache.spark.deploy.ClientArguments._ import org.apache.spark.internal.config import org.apache.spark.launcher.SparkLauncher @@ -168,9 +168,16 @@ private[rest] class StandaloneSubmitRequestServlet( val extraJavaOpts = driverExtraJavaOptions.map(Utils.splitCommandString).getOrElse(Seq.empty) val sparkJavaOpts = Utils.sparkJavaOpts(conf) val javaOpts = sparkJavaOpts ++ defaultJavaOpts ++ extraJavaOpts + val sparkSubmitOpts = if (mainClass.equals(classOf[SparkSubmit].getName)) { + sparkProperties.get("spark.app.name") + .map { v => Seq("-c", s"spark.app.name=$v") } + .getOrElse(Seq.empty[String]) + } else { + Seq.empty[String] + } val command = new Command( "org.apache.spark.deploy.worker.DriverWrapper", - Seq("{{WORKER_URL}}", "{{USER_JAR}}", mainClass) ++ appArgs, // args to the DriverWrapper + Seq("{{WORKER_URL}}", "{{USER_JAR}}", mainClass) ++ sparkSubmitOpts ++ appArgs, environmentVariables, extraClassPath, extraLibraryPath, javaOpts) val actualDriverMemory = driverMemory.map(Utils.memoryStringToMb).getOrElse(DEFAULT_MEMORY) val actualDriverCores = driverCores.map(_.toInt).getOrElse(DEFAULT_CORES) diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala index 537522326fc7..fe90895cacb5 100644 --- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala @@ -474,6 +474,27 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging { } driverConf.set(EXECUTOR_ID, arguments.executorId) + // Set executor memory related config here according to resource profile + if (cfg.resourceProfile.id != ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID) { + cfg.resourceProfile + .executorResources + .foreach { + case (ResourceProfile.OFFHEAP_MEM, request) => + driverConf.set(MEMORY_OFFHEAP_SIZE.key, request.amount.toString + "m") + logInfo(s"Set executor off-heap memory to $request") + case (ResourceProfile.MEMORY, request) => + driverConf.set(EXECUTOR_MEMORY.key, request.amount.toString + "m") + logInfo(s"Set executor memory to $request") + case (ResourceProfile.OVERHEAD_MEM, request) => + // Maybe don't need to set this since it's nearly used by tasks. + driverConf.set(EXECUTOR_MEMORY_OVERHEAD.key, request.amount.toString + "m") + logInfo(s"Set executor memory_overhead to $request") + case (ResourceProfile.CORES, request) => + driverConf.set(EXECUTOR_CORES.key, request.amount.toString) + logInfo(s"Set executor cores to $request") + case _ => + } + } val env = SparkEnv.createExecutorEnv(driverConf, arguments.executorId, arguments.bindAddress, arguments.hostname, arguments.cores, cfg.ioEncryptionKey, isLocal = false) // Set the application attemptId in the BlockStoreClient if available. diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala index cad107256c58..8aa7d54fd61b 100644 --- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala @@ -25,11 +25,13 @@ import scala.collection.immutable.Map import scala.reflect.ClassTag import org.apache.hadoop.conf.{Configurable, Configuration} +import org.apache.hadoop.hdfs.BlockMissingException import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.hadoop.mapred._ import org.apache.hadoop.mapred.lib.CombineFileSplit import org.apache.hadoop.mapreduce.TaskType import org.apache.hadoop.mapreduce.lib.input.FileInputFormat +import org.apache.hadoop.security.AccessControlException import org.apache.hadoop.util.ReflectionUtils import org.apache.spark._ @@ -293,6 +295,7 @@ class HadoopRDD[K, V]( null // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e + case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e case e: IOException if ignoreCorruptFiles => logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e) finished = true @@ -318,6 +321,7 @@ class HadoopRDD[K, V]( finished = true // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e + case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e case e: IOException if ignoreCorruptFiles => logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e) finished = true diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala index 119fdae531f2..7fc93806998b 100644 --- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala @@ -25,12 +25,14 @@ import scala.collection.JavaConverters.asScalaBufferConverter import scala.reflect.ClassTag import org.apache.hadoop.conf.{Configurable, Configuration} +import org.apache.hadoop.hdfs.BlockMissingException import org.apache.hadoop.io.Writable import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapreduce._ import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, FileInputFormat, FileSplit, InvalidInputException} import org.apache.hadoop.mapreduce.task.{JobContextImpl, TaskAttemptContextImpl} +import org.apache.hadoop.security.AccessControlException import org.apache.spark._ import org.apache.spark.annotation.DeveloperApi @@ -227,6 +229,7 @@ class NewHadoopRDD[K, V]( null // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e + case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e case e: IOException if ignoreCorruptFiles => logWarning( s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}", @@ -255,6 +258,7 @@ class NewHadoopRDD[K, V]( finished = true // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e + case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e case e: IOException if ignoreCorruptFiles => logWarning( s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}", diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index f695b1020275..b63e5999127d 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -1812,8 +1812,9 @@ abstract class RDD[T: ClassTag]( * Please read the linked SPIP and design docs to understand the limitations and future plans. * @return an [[RDDBarrier]] instance that provides actions within a barrier stage * @see [[org.apache.spark.BarrierTaskContext]] - * @see SPIP: Barrier Execution Mode - * @see Design Doc + * @see + * SPIP: Barrier Execution Mode + * @see Design Doc */ @Experimental @Since("2.4.0") diff --git a/dev/.rat-excludes b/dev/.rat-excludes index d8fcfdf39ab8..311b30973577 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -138,9 +138,6 @@ ansible-for-test-node/* node_modules spark-events-broken/* SqlBaseLexer.tokens -# Spark Connect related files with custom licence -any.proto -empty.proto .*\.explain .*\.proto.bin LimitedInputStream.java diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index 99841916cf29..7728e84bf906 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -505,7 +505,7 @@ if [[ "$1" == "publish-release" ]]; then file_short=$(echo $file | sed -e "s/\.\///") dest_url="$nexus_upload/org/apache/spark/$file_short" echo " Uploading $file_short" - curl -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url + curl --retry 3 -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url done echo "Closing nexus staging repository" diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index a9d63c1ad0f9..06e6aa199021 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -130,8 +130,8 @@ jersey-container-servlet/2.40//jersey-container-servlet-2.40.jar jersey-hk2/2.40//jersey-hk2-2.40.jar jersey-server/2.40//jersey-server-2.40.jar jettison/1.1//jettison-1.1.jar -jetty-util-ajax/9.4.54.v20240208//jetty-util-ajax-9.4.54.v20240208.jar -jetty-util/9.4.54.v20240208//jetty-util-9.4.54.v20240208.jar +jetty-util-ajax/9.4.56.v20240826//jetty-util-ajax-9.4.56.v20240826.jar +jetty-util/9.4.56.v20240826//jetty-util-9.4.56.v20240826.jar jline/2.14.6//jline-2.14.6.jar joda-time/2.12.5//joda-time-2.12.5.jar jodd-core/3.5.2//jodd-core-3.5.2.jar @@ -212,9 +212,9 @@ opencsv/2.3//opencsv-2.3.jar opentracing-api/0.33.0//opentracing-api-0.33.0.jar opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar opentracing-util/0.33.0//opentracing-util-0.33.0.jar -orc-core/1.9.4/shaded-protobuf/orc-core-1.9.4-shaded-protobuf.jar -orc-mapreduce/1.9.4/shaded-protobuf/orc-mapreduce-1.9.4-shaded-protobuf.jar -orc-shims/1.9.4//orc-shims-1.9.4.jar +orc-core/1.9.5/shaded-protobuf/orc-core-1.9.5-shaded-protobuf.jar +orc-mapreduce/1.9.5/shaded-protobuf/orc-mapreduce-1.9.5-shaded-protobuf.jar +orc-shims/1.9.5//orc-shims-1.9.5.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar diff --git a/dev/requirements.txt b/dev/requirements.txt index 0749af75aa4b..e3c3cae59d05 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -67,5 +67,5 @@ torchvision torcheval # DeepspeedTorchDistributor dependencies -deepspeed +deepspeed; sys_platform != 'darwin' diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index d29fc8726018..5df59476007a 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -113,6 +113,14 @@ def __hash__(self): ], ) +utils = Module( + name="utils", + dependencies=[tags], + source_file_regexes=[ + "common/utils/", + ], +) + kvstore = Module( name="kvstore", dependencies=[tags], @@ -126,7 +134,7 @@ def __hash__(self): network_common = Module( name="network-common", - dependencies=[tags], + dependencies=[tags, utils], source_file_regexes=[ "common/network-common/", ], @@ -148,7 +156,7 @@ def __hash__(self): unsafe = Module( name="unsafe", - dependencies=[tags], + dependencies=[tags, utils], source_file_regexes=[ "common/unsafe", ], @@ -179,7 +187,7 @@ def __hash__(self): core = Module( name="core", - dependencies=[kvstore, network_common, network_shuffle, unsafe, launcher], + dependencies=[kvstore, network_common, network_shuffle, unsafe, launcher, utils], source_file_regexes=[ "core/", ], @@ -188,9 +196,17 @@ def __hash__(self): ], ) +api = Module( + name="api", + dependencies=[utils, unsafe], + source_file_regexes=[ + "sql/api/", + ], +) + catalyst = Module( name="catalyst", - dependencies=[tags, sketch, core], + dependencies=[tags, sketch, core, api], source_file_regexes=[ "sql/catalyst/", ], diff --git a/docs/_config.yml b/docs/_config.yml index 2dfc5322b2ff..a207cc2d911b 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.5.3 -SPARK_VERSION_SHORT: 3.5.3 +SPARK_VERSION: 3.5.4 +SPARK_VERSION_SHORT: 3.5.4 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.18" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.5.3"] + 'facetFilters': ["version:3.5.4"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml index ff93f09a83ce..99d7aded3f25 100644 --- a/docs/_data/menu-sql.yaml +++ b/docs/_data/menu-sql.yaml @@ -87,6 +87,8 @@ url: sql-ref-functions.html - text: Identifiers url: sql-ref-identifier.html + - text: IDENTIFIER clause + url: sql-ref-identifier-clause.html - text: Literals url: sql-ref-literals.html - text: Null Semantics diff --git a/docs/configuration.md b/docs/configuration.md index abd934572bd0..302348aa0f0e 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1959,7 +1959,7 @@ Apart from these, the following properties are also available, and may be useful spark.storage.replication.proactive - false + true Enables proactive block replication for RDD blocks. Cached RDD block replicas lost due to executor failures are replenished if there are any existing available replicas. This tries diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md index 36465cc3f4e8..9381b28c8b07 100644 --- a/docs/core-migration-guide.md +++ b/docs/core-migration-guide.md @@ -22,6 +22,10 @@ license: | * Table of contents {:toc} +## Upgrading from Core 3.5.3 to 3.5.4 + +- Since Spark 3.5.4, when reading files hits `org.apache.hadoop.security.AccessControlException` and `org.apache.hadoop.hdfs.BlockMissingException`, the exception will be thrown and fail the task, even if `spark.files.ignoreCorruptFiles` is set to `true`. + ## Upgrading from Core 3.4 to 3.5 - Since Spark 3.5, `spark.yarn.executor.failuresValidityInterval` is deprecated. Use `spark.executor.failuresValidityInterval` instead. diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index ce7121b806cb..d0e725f6a98f 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -121,15 +121,15 @@ all environment variables used for launching each container. This process is use classpath problems in particular. (Note that enabling this requires admin privileges on cluster settings and a restart of all node managers. Thus, this is not applicable to hosted clusters). -To use a custom log4j configuration for the application master or executors, here are the options: +To use a custom log4j2 configuration for the application master or executors, here are the options: -- upload a custom `log4j.properties` using `spark-submit`, by adding it to the `--files` list of files +- upload a custom `log4j2.properties` using `spark-submit`, by adding it to the `--files` list of files to be uploaded with the application. -- add `-Dlog4j.configuration=` to `spark.driver.extraJavaOptions` +- add `-Dlog4j.configurationFile=` to `spark.driver.extraJavaOptions` (for the driver) or `spark.executor.extraJavaOptions` (for executors). Note that if using a file, the `file:` protocol should be explicitly provided, and the file needs to exist locally on all the nodes. -- update the `$SPARK_CONF_DIR/log4j.properties` file and it will be automatically uploaded along +- update the `$SPARK_CONF_DIR/log4j2.properties` file and it will be automatically uploaded along with the other configurations. Note that other 2 options has higher priority than this option if multiple options are specified. diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 964f7de637e8..be4e92ec4df4 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -22,6 +22,10 @@ license: | * Table of contents {:toc} +## Upgrading from Spark SQL 3.5.3 to 3.5.4 + +- Since Spark 3.5.4, when reading SQL tables hits `org.apache.hadoop.security.AccessControlException` and `org.apache.hadoop.hdfs.BlockMissingException`, the exception will be thrown and fail the task, even if `spark.sql.files.ignoreCorruptFiles` is set to `true`. + ## Upgrading from Spark SQL 3.5.1 to 3.5.2 - Since 3.5.2, MySQL JDBC datasource will read TINYINT UNSIGNED as ShortType, while in 3.5.1, it was wrongly read as ByteType. @@ -595,142 +599,111 @@ license: | - Since Spark 2.3, the Join/Filter's deterministic predicates that are after the first non-deterministic predicates are also pushed down/through the child operators, if possible. In prior Spark versions, these filters are not eligible for predicate pushdown. - Partition column inference previously found incorrect common type for different inferred types, for example, previously it ended up with double type as the common type for double type and date type. Now it finds the correct common type for such conflicts. The conflict resolution follows the table below: + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
- InputA \ InputB - - NullType - - IntegerType - - LongType - - DecimalType(38,0)* - - DoubleType - - DateType - - TimestampType - - StringType -
- NullType - NullTypeIntegerTypeLongTypeDecimalType(38,0)DoubleTypeDateTypeTimestampTypeStringType
- IntegerType - IntegerTypeIntegerTypeLongTypeDecimalType(38,0)DoubleTypeStringTypeStringTypeStringType
- LongType - LongTypeLongTypeLongTypeDecimalType(38,0)StringTypeStringTypeStringTypeStringType
- DecimalType(38,0)* - DecimalType(38,0)DecimalType(38,0)DecimalType(38,0)DecimalType(38,0)StringTypeStringTypeStringTypeStringType
- DoubleType - DoubleTypeDoubleTypeStringTypeStringTypeDoubleTypeStringTypeStringTypeStringType
- DateType - DateTypeStringTypeStringTypeStringTypeStringTypeDateTypeTimestampTypeStringType
- TimestampType - TimestampTypeStringTypeStringTypeStringTypeStringTypeTimestampTypeTimestampTypeStringType
- StringType - StringTypeStringTypeStringTypeStringTypeStringTypeStringTypeStringTypeStringType
InputA \ InputBNullTypeIntegerTypeLongTypeDecimalType(38,0)*DoubleTypeDateTypeTimestampTypeStringType
NullTypeNullTypeIntegerTypeLongTypeDecimalType(38,0)DoubleTypeDateTypeTimestampTypeStringType
IntegerTypeIntegerTypeIntegerTypeLongTypeDecimalType(38,0)DoubleTypeStringTypeStringTypeStringType
LongTypeLongTypeLongTypeLongTypeDecimalType(38,0)StringTypeStringTypeStringTypeStringType
DecimalType(38,0)*DecimalType(38,0)DecimalType(38,0)DecimalType(38,0)DecimalType(38,0)StringTypeStringTypeStringTypeStringType
DoubleTypeDoubleTypeDoubleTypeStringTypeStringTypeDoubleTypeStringTypeStringTypeStringType
DateTypeDateTypeStringTypeStringTypeStringTypeStringTypeDateTypeTimestampTypeStringType
TimestampTypeTimestampTypeStringTypeStringTypeStringTypeStringTypeTimestampTypeTimestampTypeStringType
StringTypeStringTypeStringTypeStringTypeStringTypeStringTypeStringTypeStringTypeStringType
Note that, for DecimalType(38,0)*, the table above intentionally does not cover all other combinations of scales and precisions because currently we only infer decimal type like `BigInteger`/`BigInt`. For example, 1.1 is inferred as double type. diff --git a/examples/pom.xml b/examples/pom.xml index 74f7a8562cbb..5efc25521857 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../pom.xml diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala index 20c5eb170015..9289b005e3ba 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala @@ -31,7 +31,7 @@ object StreamingExamples extends Logging { // We first log something to initialize Spark's default logging, then we override the // logging level. logInfo("Setting log level to [WARN] for streaming example." + - " To override add a custom log4j.properties to the classpath.") + " To override add a custom log4j2.properties to the classpath.") Configurator.setRootLevel(Level.WARN) } } diff --git a/graphx/pom.xml b/graphx/pom.xml index 8a562f6ab0a1..74ad5a732f05 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 72fb8e9ae9be..cb89c27d5f0b 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index d548e91536d9..e36d57fe6a57 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 8733e01ac89b..26e9a1502aba 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 395f00c4c42f..079ce72d5d95 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../pom.xml diff --git a/pom.xml b/pom.xml index ec77fcc9455d..8dc47f391f96 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 pom Spark Project Parent POM https://spark.apache.org/ @@ -141,9 +141,9 @@ 10.14.2.0 1.13.1 - 1.9.4 + 1.9.5 shaded-protobuf - 9.4.54.v20240208 + 9.4.56.v20240826 4.0.3 0.10.0