diff --git a/core/src/main/resources/configs/reports/coreRawMetricsReport.yaml b/core/src/main/resources/configs/reports/coreRawMetricsReport.yaml index d9e4b1b6a..404bc7ede 100644 --- a/core/src/main/resources/configs/reports/coreRawMetricsReport.yaml +++ b/core/src/main/resources/configs/reports/coreRawMetricsReport.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2025, NVIDIA CORPORATION. +# Copyright (c) 2025-2026, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ # Note that all the output files in this list should be considered internal-use only. # / +# ├── app_level_recommendation_signals.csv # ├── application_information.csv # ├── application_log_path_mapping.csv # ├── cluster_information.json @@ -372,6 +373,10 @@ reportDefinitions: dataType: Long description: >- TBD + - name: input_bytesRead_max + dataType: Long + description: >- + Maximum per-task input bytes read within the aggregation unit - name: input_recordsRead_sum dataType: Long description: >- @@ -569,6 +574,10 @@ reportDefinitions: dataType: Long description: >- TBD + - name: input_bytesRead_max + dataType: Long + description: >- + Maximum per-task input bytes read within the aggregation unit - name: input_recordsRead_sum dataType: Long description: >- @@ -782,6 +791,10 @@ reportDefinitions: dataType: Long description: >- TBD + - name: input_bytesRead_max + dataType: Long + description: >- + Maximum per-task input bytes read within the aggregation unit - name: input_recordsRead_sum dataType: Long description: >- @@ -1426,3 +1439,27 @@ reportDefinitions: fileName: profile.log fileFormat: TXT scope: per-app + # AppLevelRecommendationSignalsProfileResult + - label: coreRawAppLevelRecommendationSignalsCSV + description: >- + Per-app derived signals that feed recommendation engines (AutoTuner, qualx, + etc.). Single row per app; wide layout, one column per signal. + GPU-only signals are 0 for qualification (CPU event logs). + fileName: app_level_recommendation_signals.csv + scope: per-app + columns: + - name: appId + dataType: String + description: >- + Application ID. + - name: numScanStagesWithGpuOom + dataType: Int + description: >- + Number of scan stages where failed tasks had GPU OOM errors + (GpuRetryOOM / GpuSplitAndRetryOOM / jni.GpuOOM). Profiling only. + - name: numGpuShuffleStagesWithContainerOom + dataType: Int + description: >- + Number of GPU shuffle stages (GpuShuffleExchangeExec) where YARN killed + the container due to container-level OOM (ExecutorLostFailure + exit 137). + Profiling only. diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AggRawMetricsResult.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AggRawMetricsResult.scala index 2584d702f..c460f3d5c 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AggRawMetricsResult.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AggRawMetricsResult.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2026, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ package com.nvidia.spark.rapids.tool.analysis -import com.nvidia.spark.rapids.tool.profiling.{IOAnalysisProfileResult, JobAggTaskMetricsProfileResult, ShuffleSkewProfileResult, SQLDurationExecutorTimeProfileResult, SQLMaxTaskInputSizes, SQLTaskAggMetricsProfileResult, StageAggTaskMetricsProfileResult, StageDiagnosticResult} +import com.nvidia.spark.rapids.tool.profiling.{IOAnalysisProfileResult, JobAggTaskMetricsProfileResult, ShuffleSkewProfileResult, SQLDurationExecutorTimeProfileResult, SQLTaskAggMetricsProfileResult, StageAggTaskMetricsProfileResult, StageDiagnosticResult} /** * The result of the aggregation of the raw metrics. It contains the aggregated metrics for an @@ -31,7 +31,6 @@ import com.nvidia.spark.rapids.tool.profiling.{IOAnalysisProfileResult, JobAggTa * @param sqlAggs the aggregated Spark metrics for SQLs * @param ioAggs lists the SQLs along their IO metrics * @param sqlDurAggs the aggregated duration and CPU time for SQLs - * @param maxTaskInputSizes a sequence of SQLMaxTaskInputSizes that contains the maximum input size * @param stageDiagnostics the stage level Spark metrics for diagnostic purposes */ case class AggRawMetricsResult( @@ -41,5 +40,4 @@ case class AggRawMetricsResult( sqlAggs: Seq[SQLTaskAggMetricsProfileResult], ioAggs: Seq[IOAnalysisProfileResult], sqlDurAggs: Seq[SQLDurationExecutorTimeProfileResult], - maxTaskInputSizes: Seq[SQLMaxTaskInputSizes], stageDiagnostics: Seq[StageDiagnosticResult]) diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala index cec8cc9b8..67b899962 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024-2025, NVIDIA CORPORATION. + * Copyright (c) 2024-2026, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,7 +45,6 @@ trait AppSparkMetricsAggTrait extends AppIndexMapperTrait { sqlMetricsAgg, analysisObj.aggregateIOMetricsBySql(sqlMetricsAgg), analysisObj.aggregateDurationAndCPUTimeBySql(index), - Seq(analysisObj.maxTaskInputSizeBytesPerSQL(index)), analysisObj.aggregateDiagnosticMetricsByStage(index, sqlAnalyzer)) } @@ -67,7 +66,6 @@ trait AppSparkMetricsAggTrait extends AppIndexMapperTrait { agg1.sqlAggs ++ agg2.sqlAggs, agg1.ioAggs ++ agg2.ioAggs, agg1.sqlDurAggs ++ agg2.sqlDurAggs, - agg1.maxTaskInputSizes ++ agg2.maxTaskInputSizes, agg1.stageDiagnostics ++ agg2.stageDiagnostics) } } diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAnalyzer.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAnalyzer.scala index ee7332cb6..c1b13463a 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAnalyzer.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAnalyzer.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024-2025, NVIDIA CORPORATION. + * Copyright (c) 2024-2026, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -106,6 +106,7 @@ class AppSparkMetricsAnalyzer(app: AppBase) extends AppAnalysisBase(app) { perJobRec.executorDeserializeTimeSum, perJobRec.executorRunTimeSum, perJobRec.inputBytesReadSum, + perJobRec.inputBytesReadMax, perJobRec.inputRecordsReadSum, perJobRec.jvmGCTimeSum, perJobRec.memoryBytesSpilledSum, @@ -203,6 +204,7 @@ class AppSparkMetricsAnalyzer(app: AppBase) extends AppAnalysisBase(app) { preSqlRec.executorDeserializeTimeSum, preSqlRec.executorRunTimeSum, preSqlRec.inputBytesReadSum, + preSqlRec.inputBytesReadMax, preSqlRec.inputBytesReadAvg, preSqlRec.inputRecordsReadSum, preSqlRec.jvmGCTimeSum, @@ -252,32 +254,6 @@ class AppSparkMetricsAnalyzer(app: AppBase) extends AppAnalysisBase(app) { }.toSeq } - /** - * Find the maximum task input size - * @param index App index (used by the profiler tool) - * @return a single SQLMaxTaskInputSizes record that contains the maximum value. If none, it will - * be 0L - */ - def maxTaskInputSizeBytesPerSQL(index: Int): SQLMaxTaskInputSizes = { - // TODO: We should keep maxInputSize as a field in the stageAggregate to avoid doing an - // extra path on the tasks - val maxOfSqls = app.sqlIdToStages.map { case (_, stageIds) => - // TODO: Should we only consider successful tasks? - val tasksInSQL = app.taskManager.getTasksByStageIds(stageIds) - if (tasksInSQL.isEmpty) { - 0L - } else { - tasksInSQL.map(_.input_bytesRead).max - } - } - val maxVal = if (maxOfSqls.nonEmpty) { - maxOfSqls.max - } else { - 0L - } - SQLMaxTaskInputSizes(app.appId, maxVal) - } - /** * Aggregates the duration and CPU time (milliseconds) by SQL * @param index App index (used by the profiler tool) @@ -398,6 +374,7 @@ class AppSparkMetricsAnalyzer(app: AppBase) extends AppAnalysisBase(app) { perStageRec.executorDeserializeTimeSum, perStageRec.executorRunTimeSum, perStageRec.inputBytesReadSum, + perStageRec.inputBytesReadMax, perStageRec.inputRecordsReadSum, perStageRec.jvmGCTimeSum, perStageRec.memoryBytesSpilledSum, diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/TaskMetricsAccumRec.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/TaskMetricsAccumRec.scala index b5d98b9ac..ad92f98a4 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/TaskMetricsAccumRec.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/TaskMetricsAccumRec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2026, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,6 +38,7 @@ class TaskMetricsAccumRec { var executorDeserializeTimeSum: Long = 0 var executorRunTimeSum: Long = 0 var inputBytesReadSum: Long = 0 + var inputBytesReadMax: Long = Long.MinValue var inputRecordsReadSum: Long = 0 var jvmGCTimeSum: Long = 0 var memoryBytesSpilledSum: Long = 0 @@ -70,6 +71,7 @@ class TaskMetricsAccumRec { def resetFields(): Unit = { durationMax = 0 durationMin = 0 + inputBytesReadMax = 0 peakExecutionMemoryMax = 0 resultSizeMax = 0 } @@ -102,6 +104,7 @@ class TaskMetricsAccumRec { swWriteTimeSum += rec.sw_writeTime // Max fields durationMax = math.max(durationMax, rec.duration) + inputBytesReadMax = math.max(inputBytesReadMax, rec.input_bytesRead) peakExecutionMemoryMax = math.max(peakExecutionMemoryMax, rec.peakExecutionMemory) resultSizeMax = math.max(resultSizeMax, rec.resultSize) // Min Fields @@ -136,6 +139,7 @@ class TaskMetricsAccumRec { swWriteTimeSum += rec.swWriteTimeSum // Max durationMax = math.max(durationMax, rec.durationMax) + inputBytesReadMax = math.max(inputBytesReadMax, rec.inputBytesReadMax) peakExecutionMemoryMax = math.max(peakExecutionMemoryMax, rec.peakExecutionMemoryMax) resultSizeMax = math.max(resultSizeMax, rec.resultSizeMax) // Min diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ApplicationSummaryInfo.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ApplicationSummaryInfo.scala index 783e2fc50..9e1d2ebc8 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ApplicationSummaryInfo.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ApplicationSummaryInfo.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2025, NVIDIA CORPORATION. + * Copyright (c) 2021-2026, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,14 +47,14 @@ case class ApplicationSummaryInfo( sparkProps: Seq[RapidsPropertyProfileResult], sqlStageInfo: Seq[SQLStageInfoProfileResult], wholeStage: Seq[WholeStageCodeGenResults], - maxTaskInputBytesRead: Seq[SQLMaxTaskInputSizes], appLogPath: Seq[AppLogPathProfileResults], ioMetrics: Seq[IOAnalysisProfileResult], sysProps: Seq[RapidsPropertyProfileResult], sqlCleanedAlignedIds: Seq[SQLCleanAndAlignIdsProfileResult], sparkRapidsBuildInfo: Seq[SparkRapidsBuildInfoEvent], writeOpsInfo: Seq[WriteOpProfileResult], - sqlPlanInfo: Seq[SQLPlanInfoProfileResult]) + sqlPlanInfo: Seq[SQLPlanInfoProfileResult], + appLevelRecommendationSignals: Seq[AppLevelRecommendationSignalsProfileResult] = Seq.empty) trait AppInfoPropertyGetter { // returns all the properties (i.e., spark) @@ -90,8 +90,8 @@ trait AppInfoReadMetrics { } trait AppInfoGpuOomCheck { - def hasScanStagesWithGpuOom: Boolean = false - def hasShuffleStagesWithOom: Boolean = false + def scanStagesWithGpuOom: Set[Long] = Set.empty + def gpuShuffleStagesWithContainerOom: Set[Long] = Set.empty } trait AppInfoColumnarExchangeMetrics { @@ -186,8 +186,8 @@ class SingleAppSummaryInfoProvider( } override def getMaxInput: Double = { - if (app.maxTaskInputBytesRead.nonEmpty) { - app.maxTaskInputBytesRead.head.maxTaskInputBytesRead + if (app.sqlTaskAggMetrics.nonEmpty) { + app.sqlTaskAggMetrics.map(_.inputBytesReadMax).max.toDouble } else { 0.0 } @@ -229,66 +229,95 @@ class SingleAppSummaryInfoProvider( } } + override def scanStagesWithGpuOom: Set[Long] = { + SingleAppSummaryInfoProvider.computeScanStagesWithGpuOom( + app.appInfo.exists(_.pluginEnabled), + app.failedTasks, app.stageMetrics, appInfo) + } + + override def gpuShuffleStagesWithContainerOom: Set[Long] = { + SingleAppSummaryInfoProvider.computeShuffleStagesWithContainerOom( + app.appInfo.exists(_.pluginEnabled), + getSparkProperty("spark.master"), + app.failedStages, app.failedTasks) + } + + override def getMaxColumnarExchangeDataSizeBytes: Option[Long] = { + SingleAppSummaryInfoProvider.computeMaxColumnarExchangeDataSizeBytes(app.sqlMetrics) + } + + override def getClassPathEntries: Map[String, String] = { + appInfo.classpathEntries + } +} + +object SingleAppSummaryInfoProvider { /** - * Check if there are any scan stages with failed tasks due to GPU OOM errors - * (GpuRetryOOM and GpuSplitAndRetryOOM). + * Computes the set of scan stage IDs that had GPU OOM failures. */ - override def hasScanStagesWithGpuOom: Boolean = { - // If the plugin is not enabled (i.e. non-GPU app), return false - if (!app.appInfo.exists(_.pluginEnabled)) { - return false + def computeScanStagesWithGpuOom( + pluginEnabled: Boolean, + failedTasks: Seq[FailedTaskProfileResults], + stageMetrics: Seq[AccumProfileResults], + appInfo: ApplicationInfo): Set[Long] = { + if (!pluginEnabled) { + return Set.empty } // Find stages with failed tasks due to GPU OOM errors - val failedStagesWithGpuOom = app.failedTasks.collect { - case task if SparkRapidsOomExceptions.gpuExceptionClassNames - .exists(task.endReason.contains) => task.stageId + val failedStagesWithGpuOom = failedTasks.collect { + case task if SparkRapidsOomExceptions.isGpuOom(task.endReason) => task.stageId.toLong } if (failedStagesWithGpuOom.isEmpty) { - return false + return Set.empty } // Calculate stageIds of scan stages (i.e. stages with 'scan time' metrics) - val scanStages = app.stageMetrics.collect { + val scanStages = stageMetrics.collect { case metric if IoMetrics.getIoMetricsHelper(appInfo).isScanTimeMetric(metric) => - metric.stageId + metric.stageId.toLong }.toSet if (scanStages.isEmpty) { - return false + return Set.empty } - // Check if any failed GPU OOM stage is also a scan stage - failedStagesWithGpuOom.exists(scanStages.contains) + // Return scan stages that also had GPU OOM failures + failedStagesWithGpuOom.filter(scanStages.contains).toSet } /** - * This method checks for failed shuffle stages with OOM errors in the task's end reason. - * Note: This check is enabled only if the plugin is enabled (i.e. GPU app) and running on YARN. - * """ + * Computes the set of shuffle stage IDs that had container OOM failures (YARN only). + * Detects ExecutorLostFailure with exit code 137 (SIGKILL from container memory enforcement). + * See: https://github.com/NVIDIA/spark-rapids-tools/issues/1566 */ - override def hasShuffleStagesWithOom: Boolean = { - // If the plugin is not enabled (i.e. non-GPU app) or not running on YARN, return false - val sparkMaster = SparkMaster(getSparkProperty("spark.master")) - if (!app.appInfo.exists(_.pluginEnabled) || !sparkMaster.contains(Yarn)) { - return false + def computeShuffleStagesWithContainerOom( + pluginEnabled: Boolean, + sparkMasterStr: Option[String], + failedStages: Seq[FailedStagesProfileResults], + failedTasks: Seq[FailedTaskProfileResults]): Set[Long] = { + if (!pluginEnabled || !SparkMaster(sparkMasterStr).contains(Yarn)) { + return Set.empty } // Get stage IDs of failed shuffle stages // Sample stage name: "submitShuffleJob$ at GpuShuffleExchangeExec.scala:53" - val failedStagesWithShuffle = app.failedStages.collect { + val failedStagesWithShuffle = failedStages.collect { case stage if stage.name.contains(SparkRapidsOomExceptions.gpuShuffleClassName) => - stage.stageId + stage.stageId.toLong }.toSet if (failedStagesWithShuffle.isEmpty) { - return false + return Set.empty } // scalastyle:off line.size.limit // Check if the failed task's end reason contains OOM errors - // Sample end reason for failed tasks on YARN: "ExecutorLostFailure (executor 2 exited caused by one of the running tasks) Reason: Container from a bad node: container_e02_17xxx on host: test-cluster-w-0. Exit status: 137" + // Sample end reason for failed tasks on YARN: + // "ExecutorLostFailure (executor 2 exited caused by one of the running tasks) + // Reason: Container from a bad node: container_e02_17xxx on host: test-cluster-w-0. + // Exit status: 137" // Reference: https://github.com/apache/spark/blob/master/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala // scalastyle:on line.size.limit // Regular expressions to identify OOM failures in task's end reason @@ -299,30 +328,22 @@ class SingleAppSummaryInfoProvider( s"Exit status: ${UnixExitCode.FORCE_KILLED}" ).map(_.r) - // Check if any failed task in shuffle stages have OOM failures - app.failedTasks.exists { task => - if (failedStagesWithShuffle.contains(task.stageId)) { - // Check if the task failed due to OOM - oomFailurePatterns.forall(p => p.findFirstIn(task.endReason).isDefined) - } else { - // Ignore if the failed task is not in a shuffle stage - false - } - } + // Return shuffle stages that had tasks with OOM failures + failedTasks.collect { + case task if failedStagesWithShuffle.contains(task.stageId.toLong) && + oomFailurePatterns.forall(p => p.findFirstIn(task.endReason).isDefined) => + task.stageId.toLong + }.toSet } /** - * Get the maximum data size from ColumnarExchange metrics. - * This method searches through SQLPlan metrics to find all ColumnarExchange nodes - * with "data size" metrics and returns the maximum total value. - * - * @return Option[Long] containing the maximum data size in bytes, or None if no - * ColumnarExchange "data size" metrics are found + * Computes the maximum data size from ColumnarExchange metrics. */ - override def getMaxColumnarExchangeDataSizeBytes: Option[Long] = { - val columnarExchangeDataSizesBytes = app.sqlMetrics.collect { + def computeMaxColumnarExchangeDataSizeBytes( + sqlMetrics: Seq[SQLAccumProfileResults]): Option[Long] = { + val columnarExchangeDataSizesBytes = sqlMetrics.collect { case metric if metric.nodeName.contains("ColumnarExchange") && - metric.name == "data size" => + metric.name == "data size" => metric.total } if (columnarExchangeDataSizesBytes.nonEmpty) { @@ -331,8 +352,4 @@ class SingleAppSummaryInfoProvider( None } } - - override def getClassPathEntries: Map[String, String] = { - appInfo.classpathEntries - } } diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/CollectInformation.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/CollectInformation.scala index 1addbbb48..e04a74ea0 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/CollectInformation.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/CollectInformation.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2025, NVIDIA CORPORATION. + * Copyright (c) 2021-2026, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ProfileClassWarehouse.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ProfileClassWarehouse.scala index 7bde04197..b467db8c6 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ProfileClassWarehouse.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ProfileClassWarehouse.scala @@ -516,6 +516,38 @@ case class AppInfoProfileResults( } } +case class AppLevelRecommendationSignalsProfileResult( + appId: String, + numScanStagesWithGpuOom: Int, + numGpuShuffleStagesWithContainerOom: Int) extends ProfileResult { + override def outputHeaders: Array[String] = { + OutHeaderRegistry.outputHeaders("AppLevelRecommendationSignalsProfileResult") + } + + override def convertToSeq(): Array[String] = Array( + appId, + numScanStagesWithGpuOom.toString, + numGpuShuffleStagesWithContainerOom.toString) + + override def convertToCSVSeq(): Array[String] = Array( + StringUtils.reformatCSVString(appId), + numScanStagesWithGpuOom.toString, + numGpuShuffleStagesWithContainerOom.toString) +} + +object AppLevelRecommendationSignalsProfileResult { + def build( + appId: String, + scanStagesWithGpuOom: Set[Long], + gpuShuffleStagesWithContainerOom: Set[Long]) + : Seq[AppLevelRecommendationSignalsProfileResult] = { + Seq(AppLevelRecommendationSignalsProfileResult( + appId, + scanStagesWithGpuOom.size, + gpuShuffleStagesWithContainerOom.size)) + } +} + case class AppLogPathProfileResults( appName: String, appId: Option[String], eventLogPath: String) extends ProfileResult { override def outputHeaders: Array[String] = { @@ -751,6 +783,7 @@ trait BaseJobStageAggTaskMetricsProfileResult extends ProfileResult { def executorDeserializeTimeSum: Long def executorRunTimeSum: Long def inputBytesReadSum: Long + def inputBytesReadMax: Long def inputRecordsReadSum: Long def jvmGCTimeSum: Long def memoryBytesSpilledSum: Long @@ -791,6 +824,7 @@ trait BaseJobStageAggTaskMetricsProfileResult extends ProfileResult { executorDeserializeTimeSum.toString, executorRunTimeSum.toString, inputBytesReadSum.toString, + inputBytesReadMax.toString, inputRecordsReadSum.toString, jvmGCTimeSum.toString, memoryBytesSpilledSum.toString, @@ -828,6 +862,7 @@ case class JobAggTaskMetricsProfileResult( executorDeserializeTimeSum: Long, executorRunTimeSum: Long, inputBytesReadSum: Long, + inputBytesReadMax: Long, inputRecordsReadSum: Long, jvmGCTimeSum: Long, memoryBytesSpilledSum: Long, @@ -868,6 +903,7 @@ case class StageAggTaskMetricsProfileResult( executorDeserializeTimeSum: Long, executorRunTimeSum: Long, inputBytesReadSum: Long, + inputBytesReadMax: Long, inputRecordsReadSum: Long, jvmGCTimeSum: Long, memoryBytesSpilledSum: Long, @@ -922,6 +958,7 @@ case class StageAggTaskMetricsProfileResult( other.executorDeserializeTimeSum, executorRunTimeSum = this.executorRunTimeSum + other.executorRunTimeSum, inputBytesReadSum = this.inputBytesReadSum + other.inputBytesReadSum, + inputBytesReadMax = Math.max(this.inputBytesReadMax, other.inputBytesReadMax), inputRecordsReadSum = this.inputRecordsReadSum + other.inputRecordsReadSum, jvmGCTimeSum = this.jvmGCTimeSum + other.jvmGCTimeSum, memoryBytesSpilledSum = this.memoryBytesSpilledSum + other.memoryBytesSpilledSum, @@ -1084,12 +1121,6 @@ case class StageDiagnosticResult( } } -case class SQLMaxTaskInputSizes( - appId: String, - // Not added to the output since it is used only by the AutoTuner - maxTaskInputBytesRead: Double -) - case class SQLTaskAggMetricsProfileResult( appId: String, sqlId: Long, @@ -1107,6 +1138,7 @@ case class SQLTaskAggMetricsProfileResult( executorDeserializeTimeSum: Long, executorRunTimeSum: Long, inputBytesReadSum: Long, + inputBytesReadMax: Long, // Not added to the output since it is used only by the AutoTuner inputBytesReadAvg: Double, inputRecordsReadSum: Long, @@ -1155,6 +1187,7 @@ case class SQLTaskAggMetricsProfileResult( executorDeserializeTimeSum.toString, executorRunTimeSum.toString, inputBytesReadSum.toString, + inputBytesReadMax.toString, inputRecordsReadSum.toString, jvmGCTimeSum.toString, memoryBytesSpilledSum.toString, @@ -1191,6 +1224,7 @@ case class SQLTaskAggMetricsProfileResult( executorDeserializeTimeSum.toString, executorRunTimeSum.toString, inputBytesReadSum.toString, + inputBytesReadMax.toString, inputRecordsReadSum.toString, jvmGCTimeSum.toString, memoryBytesSpilledSum.toString, @@ -1499,15 +1533,33 @@ case class RecommendedCommentResult(comment: String) { override def toString: String = "- %s".format(comment) } +// scalastyle:off line.size.limit /** - * Helper object to store the list of SparkRapids OOM exceptions. + * Helper object to detect OOM exceptions from SparkRapids event logs. + * + * GPU OOM class names from spark-rapids-jni: + * - GpuOOM -> GpuRetryOOM, GpuSplitAndRetryOOM + * See: https://github.com/NVIDIA/spark-rapids-jni/blob/725cd64be2115cd072bf51d7d6c5281d6d08bf4f/src/main/cpp/src/SparkResourceAdaptorJni.cpp#L1313 + * See: https://github.com/NVIDIA/spark-rapids/blob/79922d62a1c5759963e969018322ad8e544629ff/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RmmRapidsRetryIterator.scala */ +// scalastyle:on line.size.limit object SparkRapidsOomExceptions { - val gpuExceptionClassNames: Set[String] = { - Set("GpuSplitAndRetryOOM", "GpuRetryOOM") - } + // Current JNI: GpuOOM -> GpuRetryOOM, GpuSplitAndRetryOOM + // Pre-24.02 JNI: jni.SplitAndRetryOOM, jni.RetryOOM (no Gpu prefix) + // Using "jni." prefix to avoid matching CpuSplitAndRetryOOM / CpuRetryOOM + // Using "jni.GpuOOM" (anchored) for the base class to avoid partial matches + val gpuExceptionClassNames: Set[String] = + Set("GpuSplitAndRetryOOM", "GpuRetryOOM", "jni.GpuOOM", + "jni.SplitAndRetryOOM", "jni.RetryOOM") val gpuShuffleClassName: String = "GpuShuffleExchangeExec" + + /** + * Check if a failure reason indicates a GPU OOM error. + */ + def isGpuOom(failureReason: String): Boolean = { + gpuExceptionClassNames.exists(failureReason.contains) + } } /** diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala index 1534fbf2b..2b689397b 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala @@ -293,20 +293,33 @@ class Profiler(hadoopConf: Configuration, appArgs: ProfileArgs, enablePB: Boolea } } val analysis = RawMetricProfilerView.getAggMetrics(analyzedApps) - val maxTaskInputInfo = if (useAutoTuner) { - analysis.maxTaskInputSizes - } else { - Seq.empty - } val sqlIdAlign = if (outputAlignedSQLIds) { collect.getSQLCleanAndAligned } else { Seq.empty } val endTime = System.currentTimeMillis() - val appInfo = collect.getAppInfo val sqlMetrics = collect.getSQLPlanMetrics - logDebug(s"Time to collect Profiling Info [${appInfo.head.appId}]: ${endTime - startTime}.") + val stageMetrics = collect.getStageLevelMetrics + val failedTasks = healthCheck.getFailedTasks + val failedStages = healthCheck.getFailedStages + + // Compute GPU OOM signals for tuning_signals.csv + val singleApp = analyzedApps.head + val pluginEnabled = singleApp.gpuMode + val scanOomStages = SingleAppSummaryInfoProvider.computeScanStagesWithGpuOom( + pluginEnabled, failedTasks, stageMetrics, singleApp) + val gpuShuffleContainerOomStages = + SingleAppSummaryInfoProvider.computeShuffleStagesWithContainerOom( + pluginEnabled, singleApp.sparkProperties.get("spark.master"), + failedStages, failedTasks) + + val appInfo = collect.getAppInfo + val appId = appInfo.headOption.flatMap(_.appId).getOrElse("") + val appLevelRecommendationSignals = AppLevelRecommendationSignalsProfileResult.build( + appId, scanOomStages, gpuShuffleContainerOomStages) + + logDebug(s"Time to collect Profiling Info [$appId]: ${endTime - startTime}.") val appInfoSummary = ApplicationSummaryInfo( appInfo = appInfo, dsInfo = collect.getDataSourceInfo(sqlMetrics), @@ -315,14 +328,14 @@ class Profiler(hadoopConf: Configuration, appArgs: ProfileArgs, enablePB: Boolea rapidsProps = collect.getRapidsProperties, rapidsJar = collect.getRapidsJARInfo, sqlMetrics = sqlMetrics, - stageMetrics = collect.getStageLevelMetrics, + stageMetrics = stageMetrics, jobAggMetrics = analysis.jobAggs, stageAggMetrics = analysis.stageAggs, sqlTaskAggMetrics = analysis.sqlAggs, durAndCpuMet = analysis.sqlDurAggs, skewInfo = analysis.taskShuffleSkew, - failedTasks = healthCheck.getFailedTasks, - failedStages = healthCheck.getFailedStages, + failedTasks = failedTasks, + failedStages = failedStages, failedJobs = healthCheck.getFailedJobs, removedBMs = healthCheck.getRemovedBlockManager, removedExecutors = healthCheck.getRemovedExecutors, @@ -330,14 +343,14 @@ class Profiler(hadoopConf: Configuration, appArgs: ProfileArgs, enablePB: Boolea sparkProps = collect.getSparkProperties, sqlStageInfo = collect.getSQLToStage, wholeStage = collect.getWholeStageCodeGenMapping, - maxTaskInputBytesRead = maxTaskInputInfo, appLogPath = collect.getAppLogPath, ioMetrics = analysis.ioAggs, sysProps = collect.getSystemProperties, sqlCleanedAlignedIds = sqlIdAlign, sparkRapidsBuildInfo = collect.getSparkRapidsInfo, writeOpsInfo = collect.getWriteOperationInfo, - sqlPlanInfo = collect.getSQLPlanInfoTruncated) + sqlPlanInfo = collect.getSQLPlanInfoTruncated, + appLevelRecommendationSignals = appLevelRecommendationSignals) (appInfoSummary, DiagnosticSummaryInfo(analysis.stageDiagnostics, collect.getIODiagnosticMetrics)) } @@ -410,6 +423,8 @@ class Profiler(hadoopConf: Configuration, appArgs: ProfileArgs, enablePB: Boolea // writeOps are generated in only CSV format profileOutputWriter.writeCSVTable(ProfWriteOpsView.getLabel, app.writeOpsInfo) profileOutputWriter.writeCSVTable(TASK_SHUFFLE_SKEW, app.skewInfo) + profileOutputWriter.writeCSVTable(APP_LEVEL_RECOMMENDATION_SIGNALS, + app.appLevelRecommendationSignals) profileOutputWriter.writeText("\n### C. Health Check###\n") profileOutputWriter.writeCSVTable(ProfFailedTaskView.getLabel, app.failedTasks) profileOutputWriter.writeTable(ProfFailedStageView.getLabel, app.failedStages) diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala index 18c026c1d..9cf961620 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/AutoTuner.scala @@ -2182,7 +2182,7 @@ class ProfilingAutoTuner( // First, calculate the recommendation based on input sizes val calculatedValueFromInputSize = super.calculateMaxPartitionBytesInMB(maxPartitionBytes) getPropertyValue("spark.sql.files.maxPartitionBytes") match { - case Some(currentValue) if appInfoProvider.hasScanStagesWithGpuOom => + case Some(currentValue) if appInfoProvider.scanStagesWithGpuOom.nonEmpty => // GPU OOM detected. We may want to reduce max partition size. val halvedValue = StringUtils.convertToMB(currentValue, Some(ByteUnit.BYTE)) / 2 // Choose the minimum between the calculated value and half of the current value. @@ -2203,7 +2203,7 @@ class ProfilingAutoTuner( */ override def recommendShufflePartitionsInternal(): Int = { val calculatedValue = super.recommendShufflePartitionsInternal() - if (appInfoProvider.hasShuffleStagesWithOom) { + if (appInfoProvider.gpuShuffleStagesWithContainerOom.nonEmpty) { // Shuffle Stages with Task OOM detected. We may want to increase shuffle partitions. val recShufflePartitions = shufflePartitionValue * configProvider.getEntry("SHUFFLE_PARTITION_MULTIPLIER").getDefault.toInt diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/QualAppSummaryInfoProvider.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/QualAppSummaryInfoProvider.scala index 8e681e70e..f69ffd298 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/QualAppSummaryInfoProvider.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/tuning/QualAppSummaryInfoProvider.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024-2025, NVIDIA CORPORATION. + * Copyright (c) 2024-2026, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -98,8 +98,8 @@ class QualAppSummaryInfoProvider( } override def getMaxInput: Double = { - if (rawAggMetrics.maxTaskInputSizes.nonEmpty) { - rawAggMetrics.maxTaskInputSizes.head.maxTaskInputBytesRead + if (rawAggMetrics.sqlAggs.nonEmpty) { + rawAggMetrics.sqlAggs.map(_.inputBytesReadMax).max.toDouble } else { 0.0 } diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/views/OutHeaderRegistry.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/views/OutHeaderRegistry.scala index 795de4c6a..ac61a5d3b 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/views/OutHeaderRegistry.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/views/OutHeaderRegistry.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. + * Copyright (c) 2025-2026, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -70,6 +70,8 @@ object OutHeaderRegistry { "AppInfoProfileResults" -> Array("appName", "appId", "attemptId", "sparkUser", "startTime", "endTime", "duration", "durationStr", "sparkRuntime", "sparkVersion", "pluginEnabled", "totalCoreSeconds"), + "AppLevelRecommendationSignalsProfileResult" -> + Array("appId", "numScanStagesWithGpuOom", "numGpuShuffleStagesWithContainerOom"), "AppLogPathProfileResults" -> Array("appName", "appId", "eventLogPath"), "FailedTaskProfileResults" -> @@ -124,6 +126,7 @@ object OutHeaderRegistry { "executorDeserializeTime_sum", "executorRunTime_sum", "input_bytesRead_sum", + "input_bytesRead_max", "input_recordsRead_sum", "jvmGCTime_sum", "memoryBytesSpilled_sum", @@ -156,6 +159,7 @@ object OutHeaderRegistry { "executorDeserializeTime_sum", "executorRunTime_sum", "input_bytesRead_sum", + "input_bytesRead_max", "input_recordsRead_sum", "jvmGCTime_sum", "memoryBytesSpilled_sum", @@ -231,6 +235,7 @@ object OutHeaderRegistry { "executorDeserializeTime_sum", "executorRunTime_sum", "input_bytesRead_sum", + "input_bytesRead_max", "input_recordsRead_sum", "jvmGCTime_sum", "memoryBytesSpilled_sum", diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/views/QualRawReportGenerator.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/views/QualRawReportGenerator.scala index 604fbfa7b..c26a9b789 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/views/QualRawReportGenerator.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/views/QualRawReportGenerator.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024-2025, NVIDIA CORPORATION. + * Copyright (c) 2024-2026, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ package com.nvidia.spark.rapids.tool.views import com.nvidia.spark.rapids.tool.analysis.{AggRawMetricsResult, AppSQLPlanAnalyzer, QualSparkMetricsAggregator} -import com.nvidia.spark.rapids.tool.profiling.{DataSourceProfileResult, ProfileOutputWriter, ProfileResult, SQLAccumProfileResults} +import com.nvidia.spark.rapids.tool.profiling.{AppLevelRecommendationSignalsProfileResult, DataSourceProfileResult, ProfileOutputWriter, ProfileResult, SQLAccumProfileResults} import org.apache.spark.internal.Logging import org.apache.spark.sql.rapids.tool.qualification.QualificationAppInfo @@ -37,7 +37,6 @@ object QualRawReportGenerator extends Logging { AggMetricsResultSorter.sortSqlAgg(aggRawResult.sqlAggs), AggMetricsResultSorter.sortIO(aggRawResult.ioAggs), AggMetricsResultSorter.sortSqlDurationAgg(aggRawResult.sqlDurAggs), - aggRawResult.maxTaskInputSizes, AggMetricsResultSorter.sortStageDiagnostics(aggRawResult.stageDiagnostics)) Map( STAGE_AGG_LABEL -> sortedRes.stageAggs, @@ -71,6 +70,9 @@ object QualRawReportGenerator extends Logging { val pWriter = new ProfileOutputWriter(metricsDirectory, "profile", 10000000, outputCSV = true) try { + val aggRawMetrics = QualSparkMetricsAggregator + .getAggRawMetrics(app, sqlAnalyzer = Some(sqlPlanAnalyzer)) + pWriter.writeText("### A. Information Collected ###") pWriter.writeTable( QualInformationView.getLabel, QualInformationView.getRawView(Seq(app))) @@ -96,11 +98,13 @@ object QualRawReportGenerator extends Logging { SystemQualPropertiesView.getRawView(Seq(app)), Some(SystemQualPropertiesView.getDescription)) pWriter.writeText("\n### B. Analysis ###\n") - constructLabelsMaps(QualSparkMetricsAggregator - .getAggRawMetrics( - app, sqlAnalyzer = Some(sqlPlanAnalyzer))).foreach { case (label, metrics) => + constructLabelsMaps(aggRawMetrics).foreach { case (label, metrics) => pWriter.writeCSVTable(label, metrics) } + // GPU-only signals default to 0 for qualification (CPU event logs) + val appLevelRecommendationSignals = AppLevelRecommendationSignalsProfileResult.build( + app.appId, Set.empty[Long], Set.empty[Long]) + pWriter.writeCSVTable(APP_LEVEL_RECOMMENDATION_SIGNALS, appLevelRecommendationSignals) pWriter.writeText("\n### C. Health Check###\n") pWriter.writeCSVTable(QualFailedTaskView.getLabel, QualFailedTaskView.getRawView(Seq(app))) pWriter.writeTable( diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/views/RawMetricProfView.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/views/RawMetricProfView.scala index 2d10c8f36..3ba76db7b 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/views/RawMetricProfView.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/views/RawMetricProfView.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024-2025, NVIDIA CORPORATION. + * Copyright (c) 2024-2026, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ package com.nvidia.spark.rapids.tool.views import com.nvidia.spark.rapids.tool.analysis.ProfSparkMetricsAggregator -import com.nvidia.spark.rapids.tool.profiling.{IOAnalysisProfileResult, JobAggTaskMetricsProfileResult, ShuffleSkewProfileResult, SQLDurationExecutorTimeProfileResult, SQLMaxTaskInputSizes, SQLTaskAggMetricsProfileResult, StageAggTaskMetricsProfileResult, StageDiagnosticResult} +import com.nvidia.spark.rapids.tool.profiling.{IOAnalysisProfileResult, JobAggTaskMetricsProfileResult, ShuffleSkewProfileResult, SQLDurationExecutorTimeProfileResult, SQLTaskAggMetricsProfileResult, StageAggTaskMetricsProfileResult, StageDiagnosticResult} import org.apache.spark.sql.rapids.tool.profiling.ApplicationInfo @@ -30,7 +30,6 @@ case class ProfilerAggregatedView( sqlAggs: Seq[SQLTaskAggMetricsProfileResult], ioAggs: Seq[IOAnalysisProfileResult], sqlDurAggs: Seq[SQLDurationExecutorTimeProfileResult], - maxTaskInputSizes: Seq[SQLMaxTaskInputSizes], stageDiagnostics: Seq[StageDiagnosticResult]) object RawMetricProfilerView { @@ -43,7 +42,6 @@ object RawMetricProfilerView { AggMetricsResultSorter.sortSqlAgg(aggMetricsResults.sqlAggs), AggMetricsResultSorter.sortIO(aggMetricsResults.ioAggs), AggMetricsResultSorter.sortSqlDurationAgg(aggMetricsResults.sqlDurAggs), - aggMetricsResults.maxTaskInputSizes, AggMetricsResultSorter.sortStageDiagnostics(aggMetricsResults.stageDiagnostics)) } } diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/views/package.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/views/package.scala index bab4b8a2b..7251a4090 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/views/package.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/views/package.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024-2025, NVIDIA CORPORATION. + * Copyright (c) 2024-2026, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ package object views { val SQL_DUR_LABEL = "SQL Duration and Executor CPU Time Percent" val SQL_MAX_INPUT_SIZE = "SQL Max Task Input Size" val STAGE_DIAGNOSTICS_LABEL = "Stage Level Diagnostic Metrics" + val APP_LEVEL_RECOMMENDATION_SIGNALS = "App Level Recommendation Signals" val CLUSTER_INFORMATION_LABEL = "Cluster Information" val AGG_DESCRIPTION = Map( diff --git a/core/src/test/resources/ProfilingExpectations/nds_q88_photon_db_13_3_job_metrics_agg_expectation.csv b/core/src/test/resources/ProfilingExpectations/nds_q88_photon_db_13_3_job_metrics_agg_expectation.csv index 26d62a885..a400bd298 100644 --- a/core/src/test/resources/ProfilingExpectations/nds_q88_photon_db_13_3_job_metrics_agg_expectation.csv +++ b/core/src/test/resources/ProfilingExpectations/nds_q88_photon_db_13_3_job_metrics_agg_expectation.csv @@ -1,58 +1,58 @@ -jobId,numTasks,Duration,diskBytesSpilled_sum,duration_sum,duration_max,duration_min,duration_avg,executorCPUTime_sum,executorDeserializeCPUTime_sum,executorDeserializeTime_sum,executorRunTime_sum,input_bytesRead_sum,input_recordsRead_sum,jvmGCTime_sum,memoryBytesSpilled_sum,output_bytesWritten_sum,output_recordsWritten_sum,peakExecutionMemory_max,resultSerializationTime_sum,resultSize_max,sr_fetchWaitTime_sum,sr_localBlocksFetched_sum,sr_localBytesRead_sum,sr_remoteBlocksFetched_sum,sr_remoteBytesRead_sum,sr_remoteBytesReadToDisk_sum,sr_totalBytesRead_sum,sw_bytesWritten_sum,sw_recordsWritten_sum,sw_writeTime_sum -48,431,237976,0,371230,1032,333,861.3,343507,1365,1286,367785,190131859,8639936081,160,0,0,0,169667947,21,15767,8,3,15657,3,15657,0,31314,18964,431,16 -47,432,214657,0,376777,1133,499,872.2,346447,1388,1310,373271,14007280,8639936081,144,0,0,0,159530057,11,15577,4,3,15657,3,15657,0,31314,19008,432,20 -46,433,191440,0,457364,3323,391,1056.3,352977,1639,2509,451358,5242628040,8639936081,1912,0,0,0,250840493,9,16203,551,3,15657,3,15657,0,31314,19052,433,16 -49,1,186241,0,266,266,266,266.0,86,1,1,261,0,0,0,0,0,0,138414192,0,5344,10,209,9196,222,9768,0,18964,44,1,0 -45,433,166081,0,415849,1448,339,960.4,350015,1415,1375,412139,2276478144,8639936081,568,0,0,0,195992906,2,15780,7,3,15657,3,15657,0,31314,19052,433,34 -44,431,139667,0,398973,1403,365,925.7,354332,1420,1327,395265,1075691986,8639936081,328,0,0,0,188587155,0,15767,10,3,15657,3,15657,0,31314,18964,431,17 -50,1,122711,0,267,267,267,267.0,71,1,1,262,0,0,0,0,0,0,138414192,0,5343,58,219,9636,213,9372,0,19008,44,1,0 -43,432,114755,0,403652,1369,329,934.4,353529,1424,1326,399766,1395949742,8639936081,624,0,0,0,201771890,13,15767,14,3,15657,3,15657,0,31314,19008,432,16 -51,1,97958,0,386,386,386,386.0,60,1,1,381,0,0,0,0,0,0,138414192,0,5343,154,221,9724,210,9240,0,18964,44,1,0 -42,431,89634,0,616500,1899,589,1430.4,378287,1521,1515,612098,16461920726,8639936081,4132,0,0,0,216740322,23,15805,10,3,15657,3,15657,0,31314,18964,431,16 -52,1,71718,0,384,384,384,384.0,54,1,1,379,0,0,0,0,0,0,138414192,0,5343,170,223,9812,210,9240,0,19052,44,1,0 -41,431,51085,0,759623,2321,918,1762.5,395214,1706,2027,754015,26337468742,8639936081,7772,0,0,0,250648581,87,16157,170,3,15657,3,15657,0,31314,18964,431,19 -53,1,46297,0,136,136,136,136.0,57,1,1,131,0,0,0,0,0,0,138414192,0,5344,0,214,9416,219,9636,0,19052,44,1,0 -54,1,23051,0,340,340,340,340.0,36,1,1,334,0,0,0,0,0,0,138414192,0,5343,223,215,9460,217,9548,0,19008,44,1,0 -31,1,6979,0,6738,6738,6738,6738.0,5104,128,688,6035,349526,86400,53,0,0,0,155563380,1,10759,0,0,0,0,0,0,0,7239,1800,0 -34,1,6953,0,6725,6725,6725,6725.0,479,185,677,6036,349526,86400,53,0,0,0,155563380,0,9814,0,0,0,0,0,0,0,7239,1800,0 -33,1,6940,0,6729,6729,6729,6729.0,206,216,679,6035,349526,86400,53,0,0,0,155563380,1,9896,0,0,0,0,0,0,0,7239,1800,0 -35,1,6925,0,6729,6729,6729,6729.0,157,136,681,6035,12261,1350,53,0,0,0,155199546,1,9839,0,0,0,0,0,0,0,699,165,0 -38,1,6855,0,6743,6743,6743,6743.0,187,256,688,6035,349526,86400,53,0,0,0,155563380,1,9927,0,0,0,0,0,0,0,7239,1800,0 -0,1,6033,0,5699,5699,5699,5699.0,422,948,1114,4382,0,0,37,0,0,0,0,8,2794,0,0,0,0,0,0,0,0,0,0 -13,200,5707,0,87661,966,349,438.3,9924,528,951,84265,0,0,144,0,0,0,0,9,6258,0,0,0,0,0,0,0,0,0,0 -23,200,5479,0,84240,490,355,421.2,5394,292,214,82784,0,0,136,0,0,0,0,0,6214,0,0,0,0,0,0,0,0,0,0 -21,200,5271,0,80904,485,353,404.5,6100,304,220,79384,0,0,136,0,0,0,0,1,6302,0,0,0,0,0,0,0,0,0,0 -27,200,4728,0,70760,442,309,353.8,4145,287,209,69494,0,0,152,0,0,0,0,10,5788,0,0,0,0,0,0,0,0,0,0 -3,1,4708,0,4693,4693,4693,4693.0,280,701,804,3796,0,0,26,0,0,0,0,7,2834,0,0,0,0,0,0,0,0,0,0 -25,200,4603,0,70379,569,314,351.9,4200,294,216,69040,0,0,168,0,0,0,0,14,5708,0,0,0,0,0,0,0,0,0,0 -36,1,4556,0,4332,4332,4332,4332.0,3359,95,401,3907,30328,7200,39,0,0,0,155245068,1,10552,0,0,0,0,0,0,0,7719,1920,0 -29,200,4555,0,69682,423,310,348.4,3830,272,218,68521,0,0,168,0,0,0,0,9,5748,0,0,0,0,0,0,0,0,0,0 -32,1,4515,0,4334,4334,4334,4334.0,260,130,404,3907,349526,86400,39,0,0,0,155563380,1,9851,0,0,0,0,0,0,0,7239,1800,0 -39,1,4488,0,4322,4322,4322,4322.0,112,124,392,3907,349526,86400,39,0,0,0,155563380,1,9926,0,0,0,0,0,0,0,7239,1800,0 -37,1,4481,0,4334,4334,4334,4334.0,136,144,405,3907,349526,86400,39,0,0,0,155563380,1,9851,0,0,0,0,0,0,0,7239,1800,0 -40,1,4476,0,4327,4327,4327,4327.0,98,147,394,3907,349526,86400,39,0,0,0,155563380,1,9895,0,0,0,0,0,0,0,7239,1800,0 -56,1,1055,0,1022,1022,1022,1022.0,758,77,95,901,0,0,0,0,0,0,134218344,6,10091,5,218,9592,220,9680,0,19272,0,0,0 -19,200,803,0,11895,145,38,59.5,1050,321,252,10017,0,0,56,0,0,0,0,22,2739,0,0,0,0,0,0,0,0,0,0 -26,1,316,0,312,312,312,312.0,2,1,1,306,0,0,0,0,0,0,0,0,3777,0,0,0,0,0,0,0,0,0,0 -2,1,280,0,267,267,267,267.0,6,4,4,124,0,0,0,0,0,0,0,1,3342,0,0,0,0,0,0,0,0,0,0 -11,1,264,0,254,254,254,254.0,5,3,3,241,0,0,0,0,0,0,0,0,2913,0,0,0,0,0,0,0,0,0,0 -7,1,240,0,227,227,227,227.0,5,4,4,213,0,0,114,0,0,0,0,0,2206,0,0,0,0,0,0,0,0,0,0 -1,1,209,0,173,173,173,173.0,28,5,6,152,0,0,0,0,0,0,0,0,2506,0,0,0,0,0,0,0,0,0,0 -5,1,179,0,165,165,165,165.0,4,4,4,151,0,0,0,0,0,0,0,0,2475,0,0,0,0,0,0,0,0,0,0 -14,1,151,0,143,143,143,143.0,3,1,1,132,0,0,0,0,0,0,0,1,3120,0,0,0,0,0,0,0,0,0,0 -4,1,147,0,139,139,139,139.0,22,5,6,121,0,0,0,0,0,0,0,0,2334,0,0,0,0,0,0,0,0,0,0 -20,1,141,0,137,137,137,137.0,1,1,1,130,0,0,0,0,0,0,0,0,2170,0,0,0,0,0,0,0,0,0,0 -28,1,140,0,136,136,136,136.0,2,1,1,130,0,0,0,0,0,0,0,0,3784,0,0,0,0,0,0,0,0,0,0 -18,1,129,0,124,124,124,124.0,2,1,1,116,0,0,0,0,0,0,0,0,2501,0,0,0,0,0,0,0,0,0,0 -16,1,125,0,117,117,117,117.0,2,1,1,108,0,0,0,0,0,0,0,0,2758,0,0,0,0,0,0,0,0,0,0 -6,1,123,0,113,113,113,113.0,4,3,3,100,0,0,0,0,0,0,0,0,2208,0,0,0,0,0,0,0,0,0,0 -10,1,120,0,110,110,110,110.0,6,3,3,98,0,0,0,0,0,0,0,0,3565,0,0,0,0,0,0,0,0,0,0 -9,1,114,0,104,104,104,104.0,5,3,3,90,0,0,0,0,0,0,0,1,3514,0,0,0,0,0,0,0,0,0,0 -12,1,105,0,85,85,85,85.0,4,3,3,72,0,0,0,0,0,0,0,0,3369,0,0,0,0,0,0,0,0,0,0 -17,1,103,0,97,97,97,97.0,2,2,2,89,0,0,0,0,0,0,0,0,3003,0,0,0,0,0,0,0,0,0,0 -8,1,102,0,95,95,95,95.0,4,3,3,82,0,0,0,0,0,0,0,0,3142,0,0,0,0,0,0,0,0,0,0 -30,1,73,0,67,67,67,67.0,2,1,1,62,0,0,0,0,0,0,0,0,3199,0,0,0,0,0,0,0,0,0,0 -24,1,72,0,59,59,59,59.0,2,1,1,51,0,0,0,0,0,0,0,0,3288,0,0,0,0,0,0,0,0,0,0 -22,1,70,0,65,65,65,65.0,2,1,1,59,0,0,0,0,0,0,0,0,3436,0,0,0,0,0,0,0,0,0,0 -55,1,65,0,54,54,54,54.0,27,1,1,49,0,0,0,0,0,0,138414192,0,5343,0,216,9504,215,9460,0,18964,44,1,0 -15,1,64,0,58,58,58,58.0,2,1,1,50,0,0,0,0,0,0,0,0,2306,0,0,0,0,0,0,0,0,0,0 +id,numTasks,duration,diskBytesSpilledSum,durationSum,durationMax,durationMin,durationAvg,executorCPUTimeSum,executorDeserializeCpuTimeSum,executorDeserializeTimeSum,executorRunTimeSum,inputBytesReadSum,inputBytesReadMax,inputRecordsReadSum,jvmGCTimeSum,memoryBytesSpilledSum,outputBytesWrittenSum,outputRecordsWrittenSum,peakExecutionMemoryMax,resultSerializationTimeSum,resultSizeMax,srFetchWaitTimeSum,srLocalBlocksFetchedSum,srcLocalBytesReadSum,srRemoteBlocksFetchSum,srRemoteBytesReadSum,srRemoteBytesReadToDiskSum,srTotalBytesReadSum,swBytesWrittenSum,swRecordsWrittenSum,swWriteTimeSum +48,431,237976,0,371230,1032,333,861.3,343507,1365,1286,367785,190131859,18373883,8639936081,160,0,0,0,169667947,21,15767,8,3,15657,3,15657,0,31314,18964,431,16 +47,432,214657,0,376777,1133,499,872.2,346447,1388,1310,373271,14007280,8235993,8639936081,144,0,0,0,159530057,11,15577,4,3,15657,3,15657,0,31314,19008,432,20 +46,433,191440,0,457364,3323,391,1056.3,352977,1639,2509,451358,5242628040,101021117,8639936081,1912,0,0,0,250840493,9,16203,551,3,15657,3,15657,0,31314,19052,433,16 +49,1,186241,0,266,266,266,266.0,86,1,1,261,0,0,0,0,0,0,0,138414192,0,5344,10,209,9196,222,9768,0,18964,44,1,0 +45,433,166081,0,415849,1448,339,960.4,350015,1415,1375,412139,2276478144,44711594,8639936081,568,0,0,0,195992906,2,15780,7,3,15657,3,15657,0,31314,19052,433,34 +44,431,139667,0,398973,1403,365,925.7,354332,1420,1327,395265,1075691986,37555235,8639936081,328,0,0,0,188587155,0,15767,10,3,15657,3,15657,0,31314,18964,431,17 +50,1,122711,0,267,267,267,267.0,71,1,1,262,0,0,0,0,0,0,0,138414192,0,5343,58,219,9636,213,9372,0,19008,44,1,0 +43,432,114755,0,403652,1369,329,934.4,353529,1424,1326,399766,1395949742,50513106,8639936081,624,0,0,0,201771890,13,15767,14,3,15657,3,15657,0,31314,19008,432,16 +51,1,97958,0,386,386,386,386.0,60,1,1,381,0,0,0,0,0,0,0,138414192,0,5343,154,221,9724,210,9240,0,18964,44,1,0 +42,431,89634,0,616500,1899,589,1430.4,378287,1521,1515,612098,16461920726,65476865,8639936081,4132,0,0,0,216740322,23,15805,10,3,15657,3,15657,0,31314,18964,431,16 +52,1,71718,0,384,384,384,384.0,54,1,1,379,0,0,0,0,0,0,0,138414192,0,5343,170,223,9812,210,9240,0,19052,44,1,0 +41,431,51085,0,759623,2321,918,1762.5,395214,1706,2027,754015,26337468742,99616661,8639936081,7772,0,0,0,250648581,87,16157,170,3,15657,3,15657,0,31314,18964,431,19 +53,1,46297,0,136,136,136,136.0,57,1,1,131,0,0,0,0,0,0,0,138414192,0,5344,0,214,9416,219,9636,0,19052,44,1,0 +54,1,23051,0,340,340,340,340.0,36,1,1,334,0,0,0,0,0,0,0,138414192,0,5343,223,215,9460,217,9548,0,19008,44,1,0 +31,1,6979,0,6738,6738,6738,6738.0,5104,128,688,6035,349526,349526,86400,53,0,0,0,155563380,1,10759,0,0,0,0,0,0,0,7239,1800,0 +34,1,6953,0,6725,6725,6725,6725.0,479,185,677,6036,349526,349526,86400,53,0,0,0,155563380,0,9814,0,0,0,0,0,0,0,7239,1800,0 +33,1,6940,0,6729,6729,6729,6729.0,206,216,679,6035,349526,349526,86400,53,0,0,0,155563380,1,9896,0,0,0,0,0,0,0,7239,1800,0 +35,1,6925,0,6729,6729,6729,6729.0,157,136,681,6035,12261,12261,1350,53,0,0,0,155199546,1,9839,0,0,0,0,0,0,0,699,165,0 +38,1,6855,0,6743,6743,6743,6743.0,187,256,688,6035,349526,349526,86400,53,0,0,0,155563380,1,9927,0,0,0,0,0,0,0,7239,1800,0 +0,1,6033,0,5699,5699,5699,5699.0,422,948,1114,4382,0,0,0,37,0,0,0,0,8,2794,0,0,0,0,0,0,0,0,0,0 +13,200,5707,0,87661,966,349,438.3,9924,528,951,84265,0,0,0,144,0,0,0,0,9,6258,0,0,0,0,0,0,0,0,0,0 +23,200,5479,0,84240,490,355,421.2,5394,292,214,82784,0,0,0,136,0,0,0,0,0,6214,0,0,0,0,0,0,0,0,0,0 +21,200,5271,0,80904,485,353,404.5,6100,304,220,79384,0,0,0,136,0,0,0,0,1,6302,0,0,0,0,0,0,0,0,0,0 +27,200,4728,0,70760,442,309,353.8,4145,287,209,69494,0,0,0,152,0,0,0,0,10,5788,0,0,0,0,0,0,0,0,0,0 +3,1,4708,0,4693,4693,4693,4693.0,280,701,804,3796,0,0,0,26,0,0,0,0,7,2834,0,0,0,0,0,0,0,0,0,0 +25,200,4603,0,70379,569,314,351.9,4200,294,216,69040,0,0,0,168,0,0,0,0,14,5708,0,0,0,0,0,0,0,0,0,0 +36,1,4556,0,4332,4332,4332,4332.0,3359,95,401,3907,30328,30328,7200,39,0,0,0,155245068,1,10552,0,0,0,0,0,0,0,7719,1920,0 +29,200,4555,0,69682,423,310,348.4,3830,272,218,68521,0,0,0,168,0,0,0,0,9,5748,0,0,0,0,0,0,0,0,0,0 +32,1,4515,0,4334,4334,4334,4334.0,260,130,404,3907,349526,349526,86400,39,0,0,0,155563380,1,9851,0,0,0,0,0,0,0,7239,1800,0 +39,1,4488,0,4322,4322,4322,4322.0,112,124,392,3907,349526,349526,86400,39,0,0,0,155563380,1,9926,0,0,0,0,0,0,0,7239,1800,0 +37,1,4481,0,4334,4334,4334,4334.0,136,144,405,3907,349526,349526,86400,39,0,0,0,155563380,1,9851,0,0,0,0,0,0,0,7239,1800,0 +40,1,4476,0,4327,4327,4327,4327.0,98,147,394,3907,349526,349526,86400,39,0,0,0,155563380,1,9895,0,0,0,0,0,0,0,7239,1800,0 +56,1,1055,0,1022,1022,1022,1022.0,758,77,95,901,0,0,0,0,0,0,0,134218344,6,10091,5,218,9592,220,9680,0,19272,0,0,0 +19,200,803,0,11895,145,38,59.5,1050,321,252,10017,0,0,0,56,0,0,0,0,22,2739,0,0,0,0,0,0,0,0,0,0 +26,1,316,0,312,312,312,312.0,2,1,1,306,0,0,0,0,0,0,0,0,0,3777,0,0,0,0,0,0,0,0,0,0 +2,1,280,0,267,267,267,267.0,6,4,4,124,0,0,0,0,0,0,0,0,1,3342,0,0,0,0,0,0,0,0,0,0 +11,1,264,0,254,254,254,254.0,5,3,3,241,0,0,0,0,0,0,0,0,0,2913,0,0,0,0,0,0,0,0,0,0 +7,1,240,0,227,227,227,227.0,5,4,4,213,0,0,0,114,0,0,0,0,0,2206,0,0,0,0,0,0,0,0,0,0 +1,1,209,0,173,173,173,173.0,28,5,6,152,0,0,0,0,0,0,0,0,0,2506,0,0,0,0,0,0,0,0,0,0 +5,1,179,0,165,165,165,165.0,4,4,4,151,0,0,0,0,0,0,0,0,0,2475,0,0,0,0,0,0,0,0,0,0 +14,1,151,0,143,143,143,143.0,3,1,1,132,0,0,0,0,0,0,0,0,1,3120,0,0,0,0,0,0,0,0,0,0 +4,1,147,0,139,139,139,139.0,22,5,6,121,0,0,0,0,0,0,0,0,0,2334,0,0,0,0,0,0,0,0,0,0 +20,1,141,0,137,137,137,137.0,1,1,1,130,0,0,0,0,0,0,0,0,0,2170,0,0,0,0,0,0,0,0,0,0 +28,1,140,0,136,136,136,136.0,2,1,1,130,0,0,0,0,0,0,0,0,0,3784,0,0,0,0,0,0,0,0,0,0 +18,1,129,0,124,124,124,124.0,2,1,1,116,0,0,0,0,0,0,0,0,0,2501,0,0,0,0,0,0,0,0,0,0 +16,1,125,0,117,117,117,117.0,2,1,1,108,0,0,0,0,0,0,0,0,0,2758,0,0,0,0,0,0,0,0,0,0 +6,1,123,0,113,113,113,113.0,4,3,3,100,0,0,0,0,0,0,0,0,0,2208,0,0,0,0,0,0,0,0,0,0 +10,1,120,0,110,110,110,110.0,6,3,3,98,0,0,0,0,0,0,0,0,0,3565,0,0,0,0,0,0,0,0,0,0 +9,1,114,0,104,104,104,104.0,5,3,3,90,0,0,0,0,0,0,0,0,1,3514,0,0,0,0,0,0,0,0,0,0 +12,1,105,0,85,85,85,85.0,4,3,3,72,0,0,0,0,0,0,0,0,0,3369,0,0,0,0,0,0,0,0,0,0 +17,1,103,0,97,97,97,97.0,2,2,2,89,0,0,0,0,0,0,0,0,0,3003,0,0,0,0,0,0,0,0,0,0 +8,1,102,0,95,95,95,95.0,4,3,3,82,0,0,0,0,0,0,0,0,0,3142,0,0,0,0,0,0,0,0,0,0 +30,1,73,0,67,67,67,67.0,2,1,1,62,0,0,0,0,0,0,0,0,0,3199,0,0,0,0,0,0,0,0,0,0 +24,1,72,0,59,59,59,59.0,2,1,1,51,0,0,0,0,0,0,0,0,0,3288,0,0,0,0,0,0,0,0,0,0 +22,1,70,0,65,65,65,65.0,2,1,1,59,0,0,0,0,0,0,0,0,0,3436,0,0,0,0,0,0,0,0,0,0 +55,1,65,0,54,54,54,54.0,27,1,1,49,0,0,0,0,0,0,0,138414192,0,5343,0,216,9504,215,9460,0,18964,44,1,0 +15,1,64,0,58,58,58,58.0,2,1,1,50,0,0,0,0,0,0,0,0,0,2306,0,0,0,0,0,0,0,0,0,0 diff --git a/core/src/test/resources/ProfilingExpectations/nds_q88_photon_db_13_3_sql_metrics_agg_expectation.csv b/core/src/test/resources/ProfilingExpectations/nds_q88_photon_db_13_3_sql_metrics_agg_expectation.csv index 36f5fe11d..26517710f 100644 --- a/core/src/test/resources/ProfilingExpectations/nds_q88_photon_db_13_3_sql_metrics_agg_expectation.csv +++ b/core/src/test/resources/ProfilingExpectations/nds_q88_photon_db_13_3_sql_metrics_agg_expectation.csv @@ -1,2 +1,2 @@ -appID,sqlID,description,numTasks,Duration,executorCPURatio,diskBytesSpilled_sum,duration_sum,duration_max,duration_min,duration_avg,executorCPUTime_sum,executorDeserializeCPUTime_sum,executorDeserializeTime_sum,executorRunTime_sum,input_bytesRead_sum,input_recordsRead_sum,jvmGCTime_sum,memoryBytesSpilled_sum,output_bytesWritten_sum,output_recordsWritten_sum,peakExecutionMemory_max,resultSerializationTime_sum,resultSize_max,sr_fetchWaitTime_sum,sr_localBlocksFetched_sum,sr_localBytesRead_sum,sr_remoteBlocksFetched_sum,sr_remoteBytesRead_sum,sr_remoteBytesReadToDisk_sum,sr_totalBytesRead_sum,sw_bytesWritten_sum,sw_recordsWritten_sum,sw_writeTime_sum -"app-20240919162642-0000",26,"query88",3472,250542,75.58,0,3858136,6743,54,1111.2,2885555,13523,18186,3818106,52997115316,69120188398,16100,0,0,0,250840493,181,16203,1394,1759,201596,1750,201200,0,402796,218614,19946,154 +appId,sqlId,description,numTasks,duration,executorCpuRatio,diskBytesSpilledSum,durationSum,durationMax,durationMin,durationAvg,executorCPUTimeSum,executorDeserializeCpuTimeSum,executorDeserializeTimeSum,executorRunTimeSum,inputBytesReadSum,inputBytesReadMax,inputRecordsReadSum,jvmGCTimeSum,memoryBytesSpilledSum,outputBytesWrittenSum,outputRecordsWrittenSum,peakExecutionMemoryMax,resultSerializationTimeSum,resultSizeMax,srFetchWaitTimeSum,srLocalBlocksFetchedSum,srcLocalBytesReadSum,srRemoteBlocksFetchSum,srRemoteBytesReadSum,srRemoteBytesReadToDiskSum,srTotalBytesReadSum,swBytesWrittenSum,swRecordsWrittenSum,swWriteTimeSum +app-20240919162642-0000,26,query88,3472,250542,75.58,0,3858136,6743,54,1111.2,2885555,13523,18186,3818106,52997115316,101021117,69120188398,16100,0,0,0,250840493,181,16203,1394,1759,201596,1750,201200,0,402796,218614,19946,154 diff --git a/core/src/test/resources/ProfilingExpectations/nds_q88_photon_db_13_3_stage_metrics_agg_expectation.csv b/core/src/test/resources/ProfilingExpectations/nds_q88_photon_db_13_3_stage_metrics_agg_expectation.csv index 0b7fd2182..7ff8071d8 100644 --- a/core/src/test/resources/ProfilingExpectations/nds_q88_photon_db_13_3_stage_metrics_agg_expectation.csv +++ b/core/src/test/resources/ProfilingExpectations/nds_q88_photon_db_13_3_stage_metrics_agg_expectation.csv @@ -1,58 +1,58 @@ -stageId,numTasks,Duration,diskBytesSpilled_sum,duration_sum,duration_max,duration_min,duration_avg,executorCPUTime_sum,executorDeserializeCPUTime_sum,executorDeserializeTime_sum,executorRunTime_sum,input_bytesRead_sum,input_recordsRead_sum,jvmGCTime_sum,memoryBytesSpilled_sum,output_bytesWritten_sum,output_recordsWritten_sum,peakExecutionMemory_max,resultSerializationTime_sum,resultSize_max,sr_fetchWaitTime_sum,sr_localBlocksFetched_sum,sr_localBytesRead_sum,sr_remoteBlocksFetched_sum,sr_remoteBytesRead_sum,sr_remoteBytesReadToDisk_sum,sr_totalBytesRead_sum,sw_bytesWritten_sum,sw_recordsWritten_sum,sw_writeTime_sum -58,431,237799,0,371230,1032,333,861.3,343507,1365,1286,367785,190131859,8639936081,160,0,0,0,169667947,21,15767,8,3,15657,3,15657,0,31314,18964,431,16 -54,432,214633,0,376777,1133,499,872.2,346447,1388,1310,373271,14007280,8639936081,144,0,0,0,159530057,11,15577,4,3,15657,3,15657,0,31314,19008,432,20 -44,433,191384,0,457364,3323,391,1056.3,352977,1639,2509,451358,5242628040,8639936081,1912,0,0,0,250840493,9,16203,551,3,15657,3,15657,0,31314,19052,433,16 -61,1,186240,0,266,266,266,266.0,86,1,1,261,0,0,0,0,0,0,138414192,0,5344,10,209,9196,222,9768,0,18964,44,1,0 -46,433,166015,0,415849,1448,339,960.4,350015,1415,1375,412139,2276478144,8639936081,568,0,0,0,195992906,2,15780,7,3,15657,3,15657,0,31314,19052,433,34 -50,431,139628,0,398973,1403,365,925.7,354332,1420,1327,395265,1075691986,8639936081,328,0,0,0,188587155,0,15767,10,3,15657,3,15657,0,31314,18964,431,17 -64,1,122708,0,267,267,267,267.0,71,1,1,262,0,0,0,0,0,0,138414192,0,5343,58,219,9636,213,9372,0,19008,44,1,0 -48,432,114722,0,403652,1369,329,934.4,353529,1424,1326,399766,1395949742,8639936081,624,0,0,0,201771890,13,15767,14,3,15657,3,15657,0,31314,19008,432,16 -67,1,97957,0,386,386,386,386.0,60,1,1,381,0,0,0,0,0,0,138414192,0,5343,154,221,9724,210,9240,0,18964,44,1,0 -56,431,89600,0,616500,1899,589,1430.4,378287,1521,1515,612098,16461920726,8639936081,4132,0,0,0,216740322,23,15805,10,3,15657,3,15657,0,31314,18964,431,16 -70,1,71716,0,384,384,384,384.0,54,1,1,379,0,0,0,0,0,0,138414192,0,5343,170,223,9812,210,9240,0,19052,44,1,0 -52,431,51060,0,759623,2321,918,1762.5,395214,1706,2027,754015,26337468742,8639936081,7772,0,0,0,250648581,87,16157,170,3,15657,3,15657,0,31314,18964,431,19 -73,1,46297,0,136,136,136,136.0,57,1,1,131,0,0,0,0,0,0,138414192,0,5344,0,214,9416,219,9636,0,19052,44,1,0 -76,1,23048,0,340,340,340,340.0,36,1,1,334,0,0,0,0,0,0,138414192,0,5343,223,215,9460,217,9548,0,19008,44,1,0 -31,1,6956,0,6738,6738,6738,6738.0,5104,128,688,6035,349526,86400,53,0,0,0,155563380,1,10759,0,0,0,0,0,0,0,7239,1800,0 -32,1,6945,0,6725,6725,6725,6725.0,479,185,677,6036,349526,86400,53,0,0,0,155563380,0,9814,0,0,0,0,0,0,0,7239,1800,0 -33,1,6930,0,6729,6729,6729,6729.0,206,216,679,6035,349526,86400,53,0,0,0,155563380,1,9896,0,0,0,0,0,0,0,7239,1800,0 -34,1,6907,0,6729,6729,6729,6729.0,157,136,681,6035,12261,1350,53,0,0,0,155199546,1,9839,0,0,0,0,0,0,0,699,165,0 -38,1,6842,0,6743,6743,6743,6743.0,187,256,688,6035,349526,86400,53,0,0,0,155563380,1,9927,0,0,0,0,0,0,0,7239,1800,0 -0,1,5904,0,5699,5699,5699,5699.0,422,948,1114,4382,0,0,37,0,0,0,0,8,2794,0,0,0,0,0,0,0,0,0,0 -13,200,5697,0,87661,966,349,438.3,9924,528,951,84265,0,0,144,0,0,0,0,9,6258,0,0,0,0,0,0,0,0,0,0 -23,200,5476,0,84240,490,355,421.2,5394,292,214,82784,0,0,136,0,0,0,0,0,6214,0,0,0,0,0,0,0,0,0,0 -21,200,5265,0,80904,485,353,404.5,6100,304,220,79384,0,0,136,0,0,0,0,1,6302,0,0,0,0,0,0,0,0,0,0 -27,200,4719,0,70760,442,309,353.8,4145,287,209,69494,0,0,152,0,0,0,0,10,5788,0,0,0,0,0,0,0,0,0,0 -3,1,4701,0,4693,4693,4693,4693.0,280,701,804,3796,0,0,26,0,0,0,0,7,2834,0,0,0,0,0,0,0,0,0,0 -25,200,4599,0,70379,569,314,351.9,4200,294,216,69040,0,0,168,0,0,0,0,14,5708,0,0,0,0,0,0,0,0,0,0 -29,200,4552,0,69682,423,310,348.4,3830,272,218,68521,0,0,168,0,0,0,0,9,5748,0,0,0,0,0,0,0,0,0,0 -35,1,4525,0,4332,4332,4332,4332.0,3359,95,401,3907,30328,7200,39,0,0,0,155245068,1,10552,0,0,0,0,0,0,0,7719,1920,0 -36,1,4509,0,4334,4334,4334,4334.0,260,130,404,3907,349526,86400,39,0,0,0,155563380,1,9851,0,0,0,0,0,0,0,7239,1800,0 -39,1,4474,0,4322,4322,4322,4322.0,112,124,392,3907,349526,86400,39,0,0,0,155563380,1,9926,0,0,0,0,0,0,0,7239,1800,0 -40,1,4469,0,4327,4327,4327,4327.0,98,147,394,3907,349526,86400,39,0,0,0,155563380,1,9895,0,0,0,0,0,0,0,7239,1800,0 -37,1,4464,0,4334,4334,4334,4334.0,136,144,405,3907,349526,86400,39,0,0,0,155563380,1,9851,0,0,0,0,0,0,0,7239,1800,0 -107,1,1052,0,1022,1022,1022,1022.0,758,77,95,901,0,0,0,0,0,0,134218344,6,10091,5,218,9592,220,9680,0,19272,0,0,0 -19,200,794,0,11895,145,38,59.5,1050,321,252,10017,0,0,56,0,0,0,0,22,2739,0,0,0,0,0,0,0,0,0,0 -26,1,315,0,312,312,312,312.0,2,1,1,306,0,0,0,0,0,0,0,0,3777,0,0,0,0,0,0,0,0,0,0 -2,1,276,0,267,267,267,267.0,6,4,4,124,0,0,0,0,0,0,0,1,3342,0,0,0,0,0,0,0,0,0,0 -11,1,260,0,254,254,254,254.0,5,3,3,241,0,0,0,0,0,0,0,0,2913,0,0,0,0,0,0,0,0,0,0 -7,1,238,0,227,227,227,227.0,5,4,4,213,0,0,114,0,0,0,0,0,2206,0,0,0,0,0,0,0,0,0,0 -1,1,202,0,173,173,173,173.0,28,5,6,152,0,0,0,0,0,0,0,0,2506,0,0,0,0,0,0,0,0,0,0 -5,1,177,0,165,165,165,165.0,4,4,4,151,0,0,0,0,0,0,0,0,2475,0,0,0,0,0,0,0,0,0,0 -14,1,148,0,143,143,143,143.0,3,1,1,132,0,0,0,0,0,0,0,1,3120,0,0,0,0,0,0,0,0,0,0 -4,1,145,0,139,139,139,139.0,22,5,6,121,0,0,0,0,0,0,0,0,2334,0,0,0,0,0,0,0,0,0,0 -20,1,140,0,137,137,137,137.0,1,1,1,130,0,0,0,0,0,0,0,0,2170,0,0,0,0,0,0,0,0,0,0 -28,1,139,0,136,136,136,136.0,2,1,1,130,0,0,0,0,0,0,0,0,3784,0,0,0,0,0,0,0,0,0,0 -18,1,127,0,124,124,124,124.0,2,1,1,116,0,0,0,0,0,0,0,0,2501,0,0,0,0,0,0,0,0,0,0 -16,1,122,0,117,117,117,117.0,2,1,1,108,0,0,0,0,0,0,0,0,2758,0,0,0,0,0,0,0,0,0,0 -6,1,120,0,113,113,113,113.0,4,3,3,100,0,0,0,0,0,0,0,0,2208,0,0,0,0,0,0,0,0,0,0 -10,1,118,0,110,110,110,110.0,6,3,3,98,0,0,0,0,0,0,0,0,3565,0,0,0,0,0,0,0,0,0,0 -9,1,110,0,104,104,104,104.0,5,3,3,90,0,0,0,0,0,0,0,1,3514,0,0,0,0,0,0,0,0,0,0 -17,1,102,0,97,97,97,97.0,2,2,2,89,0,0,0,0,0,0,0,0,3003,0,0,0,0,0,0,0,0,0,0 -8,1,100,0,95,95,95,95.0,4,3,3,82,0,0,0,0,0,0,0,0,3142,0,0,0,0,0,0,0,0,0,0 -12,1,98,0,85,85,85,85.0,4,3,3,72,0,0,0,0,0,0,0,0,3369,0,0,0,0,0,0,0,0,0,0 -30,1,71,0,67,67,67,67.0,2,1,1,62,0,0,0,0,0,0,0,0,3199,0,0,0,0,0,0,0,0,0,0 -22,1,68,0,65,65,65,65.0,2,1,1,59,0,0,0,0,0,0,0,0,3436,0,0,0,0,0,0,0,0,0,0 -24,1,68,0,59,59,59,59.0,2,1,1,51,0,0,0,0,0,0,0,0,3288,0,0,0,0,0,0,0,0,0,0 -81,1,63,0,54,54,54,54.0,27,1,1,49,0,0,0,0,0,0,138414192,0,5343,0,216,9504,215,9460,0,18964,44,1,0 -15,1,62,0,58,58,58,58.0,2,1,1,50,0,0,0,0,0,0,0,0,2306,0,0,0,0,0,0,0,0,0,0 +id,numTasks,duration,diskBytesSpilledSum,durationSum,durationMax,durationMin,durationAvg,executorCPUTimeSum,executorDeserializeCpuTimeSum,executorDeserializeTimeSum,executorRunTimeSum,inputBytesReadSum,inputBytesReadMax,inputRecordsReadSum,jvmGCTimeSum,memoryBytesSpilledSum,outputBytesWrittenSum,outputRecordsWrittenSum,peakExecutionMemoryMax,resultSerializationTimeSum,resultSizeMax,srFetchWaitTimeSum,srLocalBlocksFetchedSum,srcLocalBytesReadSum,srRemoteBlocksFetchSum,srRemoteBytesReadSum,srRemoteBytesReadToDiskSum,srTotalBytesReadSum,swBytesWrittenSum,swRecordsWrittenSum,swWriteTimeSum +58,431,237799,0,371230,1032,333,861.3,343507,1365,1286,367785,190131859,18373883,8639936081,160,0,0,0,169667947,21,15767,8,3,15657,3,15657,0,31314,18964,431,16 +54,432,214633,0,376777,1133,499,872.2,346447,1388,1310,373271,14007280,8235993,8639936081,144,0,0,0,159530057,11,15577,4,3,15657,3,15657,0,31314,19008,432,20 +44,433,191384,0,457364,3323,391,1056.3,352977,1639,2509,451358,5242628040,101021117,8639936081,1912,0,0,0,250840493,9,16203,551,3,15657,3,15657,0,31314,19052,433,16 +61,1,186240,0,266,266,266,266.0,86,1,1,261,0,0,0,0,0,0,0,138414192,0,5344,10,209,9196,222,9768,0,18964,44,1,0 +46,433,166015,0,415849,1448,339,960.4,350015,1415,1375,412139,2276478144,44711594,8639936081,568,0,0,0,195992906,2,15780,7,3,15657,3,15657,0,31314,19052,433,34 +50,431,139628,0,398973,1403,365,925.7,354332,1420,1327,395265,1075691986,37555235,8639936081,328,0,0,0,188587155,0,15767,10,3,15657,3,15657,0,31314,18964,431,17 +64,1,122708,0,267,267,267,267.0,71,1,1,262,0,0,0,0,0,0,0,138414192,0,5343,58,219,9636,213,9372,0,19008,44,1,0 +48,432,114722,0,403652,1369,329,934.4,353529,1424,1326,399766,1395949742,50513106,8639936081,624,0,0,0,201771890,13,15767,14,3,15657,3,15657,0,31314,19008,432,16 +67,1,97957,0,386,386,386,386.0,60,1,1,381,0,0,0,0,0,0,0,138414192,0,5343,154,221,9724,210,9240,0,18964,44,1,0 +56,431,89600,0,616500,1899,589,1430.4,378287,1521,1515,612098,16461920726,65476865,8639936081,4132,0,0,0,216740322,23,15805,10,3,15657,3,15657,0,31314,18964,431,16 +70,1,71716,0,384,384,384,384.0,54,1,1,379,0,0,0,0,0,0,0,138414192,0,5343,170,223,9812,210,9240,0,19052,44,1,0 +52,431,51060,0,759623,2321,918,1762.5,395214,1706,2027,754015,26337468742,99616661,8639936081,7772,0,0,0,250648581,87,16157,170,3,15657,3,15657,0,31314,18964,431,19 +73,1,46297,0,136,136,136,136.0,57,1,1,131,0,0,0,0,0,0,0,138414192,0,5344,0,214,9416,219,9636,0,19052,44,1,0 +76,1,23048,0,340,340,340,340.0,36,1,1,334,0,0,0,0,0,0,0,138414192,0,5343,223,215,9460,217,9548,0,19008,44,1,0 +31,1,6956,0,6738,6738,6738,6738.0,5104,128,688,6035,349526,349526,86400,53,0,0,0,155563380,1,10759,0,0,0,0,0,0,0,7239,1800,0 +32,1,6945,0,6725,6725,6725,6725.0,479,185,677,6036,349526,349526,86400,53,0,0,0,155563380,0,9814,0,0,0,0,0,0,0,7239,1800,0 +33,1,6930,0,6729,6729,6729,6729.0,206,216,679,6035,349526,349526,86400,53,0,0,0,155563380,1,9896,0,0,0,0,0,0,0,7239,1800,0 +34,1,6907,0,6729,6729,6729,6729.0,157,136,681,6035,12261,12261,1350,53,0,0,0,155199546,1,9839,0,0,0,0,0,0,0,699,165,0 +38,1,6842,0,6743,6743,6743,6743.0,187,256,688,6035,349526,349526,86400,53,0,0,0,155563380,1,9927,0,0,0,0,0,0,0,7239,1800,0 +0,1,5904,0,5699,5699,5699,5699.0,422,948,1114,4382,0,0,0,37,0,0,0,0,8,2794,0,0,0,0,0,0,0,0,0,0 +13,200,5697,0,87661,966,349,438.3,9924,528,951,84265,0,0,0,144,0,0,0,0,9,6258,0,0,0,0,0,0,0,0,0,0 +23,200,5476,0,84240,490,355,421.2,5394,292,214,82784,0,0,0,136,0,0,0,0,0,6214,0,0,0,0,0,0,0,0,0,0 +21,200,5265,0,80904,485,353,404.5,6100,304,220,79384,0,0,0,136,0,0,0,0,1,6302,0,0,0,0,0,0,0,0,0,0 +27,200,4719,0,70760,442,309,353.8,4145,287,209,69494,0,0,0,152,0,0,0,0,10,5788,0,0,0,0,0,0,0,0,0,0 +3,1,4701,0,4693,4693,4693,4693.0,280,701,804,3796,0,0,0,26,0,0,0,0,7,2834,0,0,0,0,0,0,0,0,0,0 +25,200,4599,0,70379,569,314,351.9,4200,294,216,69040,0,0,0,168,0,0,0,0,14,5708,0,0,0,0,0,0,0,0,0,0 +29,200,4552,0,69682,423,310,348.4,3830,272,218,68521,0,0,0,168,0,0,0,0,9,5748,0,0,0,0,0,0,0,0,0,0 +35,1,4525,0,4332,4332,4332,4332.0,3359,95,401,3907,30328,30328,7200,39,0,0,0,155245068,1,10552,0,0,0,0,0,0,0,7719,1920,0 +36,1,4509,0,4334,4334,4334,4334.0,260,130,404,3907,349526,349526,86400,39,0,0,0,155563380,1,9851,0,0,0,0,0,0,0,7239,1800,0 +39,1,4474,0,4322,4322,4322,4322.0,112,124,392,3907,349526,349526,86400,39,0,0,0,155563380,1,9926,0,0,0,0,0,0,0,7239,1800,0 +40,1,4469,0,4327,4327,4327,4327.0,98,147,394,3907,349526,349526,86400,39,0,0,0,155563380,1,9895,0,0,0,0,0,0,0,7239,1800,0 +37,1,4464,0,4334,4334,4334,4334.0,136,144,405,3907,349526,349526,86400,39,0,0,0,155563380,1,9851,0,0,0,0,0,0,0,7239,1800,0 +107,1,1052,0,1022,1022,1022,1022.0,758,77,95,901,0,0,0,0,0,0,0,134218344,6,10091,5,218,9592,220,9680,0,19272,0,0,0 +19,200,794,0,11895,145,38,59.5,1050,321,252,10017,0,0,0,56,0,0,0,0,22,2739,0,0,0,0,0,0,0,0,0,0 +26,1,315,0,312,312,312,312.0,2,1,1,306,0,0,0,0,0,0,0,0,0,3777,0,0,0,0,0,0,0,0,0,0 +2,1,276,0,267,267,267,267.0,6,4,4,124,0,0,0,0,0,0,0,0,1,3342,0,0,0,0,0,0,0,0,0,0 +11,1,260,0,254,254,254,254.0,5,3,3,241,0,0,0,0,0,0,0,0,0,2913,0,0,0,0,0,0,0,0,0,0 +7,1,238,0,227,227,227,227.0,5,4,4,213,0,0,0,114,0,0,0,0,0,2206,0,0,0,0,0,0,0,0,0,0 +1,1,202,0,173,173,173,173.0,28,5,6,152,0,0,0,0,0,0,0,0,0,2506,0,0,0,0,0,0,0,0,0,0 +5,1,177,0,165,165,165,165.0,4,4,4,151,0,0,0,0,0,0,0,0,0,2475,0,0,0,0,0,0,0,0,0,0 +14,1,148,0,143,143,143,143.0,3,1,1,132,0,0,0,0,0,0,0,0,1,3120,0,0,0,0,0,0,0,0,0,0 +4,1,145,0,139,139,139,139.0,22,5,6,121,0,0,0,0,0,0,0,0,0,2334,0,0,0,0,0,0,0,0,0,0 +20,1,140,0,137,137,137,137.0,1,1,1,130,0,0,0,0,0,0,0,0,0,2170,0,0,0,0,0,0,0,0,0,0 +28,1,139,0,136,136,136,136.0,2,1,1,130,0,0,0,0,0,0,0,0,0,3784,0,0,0,0,0,0,0,0,0,0 +18,1,127,0,124,124,124,124.0,2,1,1,116,0,0,0,0,0,0,0,0,0,2501,0,0,0,0,0,0,0,0,0,0 +16,1,122,0,117,117,117,117.0,2,1,1,108,0,0,0,0,0,0,0,0,0,2758,0,0,0,0,0,0,0,0,0,0 +6,1,120,0,113,113,113,113.0,4,3,3,100,0,0,0,0,0,0,0,0,0,2208,0,0,0,0,0,0,0,0,0,0 +10,1,118,0,110,110,110,110.0,6,3,3,98,0,0,0,0,0,0,0,0,0,3565,0,0,0,0,0,0,0,0,0,0 +9,1,110,0,104,104,104,104.0,5,3,3,90,0,0,0,0,0,0,0,0,1,3514,0,0,0,0,0,0,0,0,0,0 +17,1,102,0,97,97,97,97.0,2,2,2,89,0,0,0,0,0,0,0,0,0,3003,0,0,0,0,0,0,0,0,0,0 +8,1,100,0,95,95,95,95.0,4,3,3,82,0,0,0,0,0,0,0,0,0,3142,0,0,0,0,0,0,0,0,0,0 +12,1,98,0,85,85,85,85.0,4,3,3,72,0,0,0,0,0,0,0,0,0,3369,0,0,0,0,0,0,0,0,0,0 +30,1,71,0,67,67,67,67.0,2,1,1,62,0,0,0,0,0,0,0,0,0,3199,0,0,0,0,0,0,0,0,0,0 +22,1,68,0,65,65,65,65.0,2,1,1,59,0,0,0,0,0,0,0,0,0,3436,0,0,0,0,0,0,0,0,0,0 +24,1,68,0,59,59,59,59.0,2,1,1,51,0,0,0,0,0,0,0,0,0,3288,0,0,0,0,0,0,0,0,0,0 +81,1,63,0,54,54,54,54.0,27,1,1,49,0,0,0,0,0,0,0,138414192,0,5343,0,216,9504,215,9460,0,18964,44,1,0 +15,1,62,0,58,58,58,58.0,2,1,1,50,0,0,0,0,0,0,0,0,0,2306,0,0,0,0,0,0,0,0,0,0 diff --git a/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_jobmetricsagg2_expectation.csv b/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_jobmetricsagg2_expectation.csv index 83eaa2b17..4c53a33c8 100644 --- a/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_jobmetricsagg2_expectation.csv +++ b/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_jobmetricsagg2_expectation.csv @@ -1,2 +1,2 @@ -jobId,numTasks,Duration,diskBytesSpilled_sum,duration_sum,duration_max,duration_min,duration_avg,executorCPUTime_sum,executorDeserializeCPUTime_sum,executorDeserializeTime_sum,executorRunTime_sum,input_bytesRead_sum,input_recordsRead_sum,jvmGCTime_sum,memoryBytesSpilled_sum,output_bytesWritten_sum,output_recordsWritten_sum,peakExecutionMemory_max,resultSerializationTime_sum,resultSize_max,sr_fetchWaitTime_sum,sr_localBlocksFetched_sum,sr_localBytesRead_sum,sr_remoteBlocksFetched_sum,sr_remoteBytesRead_sum,sr_remoteBytesReadToDisk_sum,sr_totalBytesRead_sum,sw_bytesWritten_sum,sw_recordsWritten_sum,sw_writeTime_sum -0,213,2515,0,25761,1624,9,120.9,7151,3134,11178,13522,0,0,424,0,0,0,0,10,8075,0,2600,80279920,0,0,0,80279920,80279920,2600,901 +id,numTasks,duration,diskBytesSpilledSum,durationSum,durationMax,durationMin,durationAvg,executorCPUTimeSum,executorDeserializeCpuTimeSum,executorDeserializeTimeSum,executorRunTimeSum,inputBytesReadSum,inputBytesReadMax,inputRecordsReadSum,jvmGCTimeSum,memoryBytesSpilledSum,outputBytesWrittenSum,outputRecordsWrittenSum,peakExecutionMemoryMax,resultSerializationTimeSum,resultSizeMax,srFetchWaitTimeSum,srLocalBlocksFetchedSum,srcLocalBytesReadSum,srRemoteBlocksFetchSum,srRemoteBytesReadSum,srRemoteBytesReadToDiskSum,srTotalBytesReadSum,swBytesWrittenSum,swRecordsWrittenSum,swWriteTimeSum +0,213,2515,0,25761,1624,9,120.9,7151,3134,11178,13522,0,0,0,424,0,0,0,0,10,8075,0,2600,80279920,0,0,0,80279920,80279920,2600,901 diff --git a/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_jobmetricsagg_expectation.csv b/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_jobmetricsagg_expectation.csv index 956c547a8..15772ddfb 100644 --- a/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_jobmetricsagg_expectation.csv +++ b/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_jobmetricsagg_expectation.csv @@ -1,2 +1,2 @@ -jobId,numTasks,Duration,diskBytesSpilled_sum,duration_sum,duration_max,duration_min,duration_avg,executorCPUTime_sum,executorDeserializeCPUTime_sum,executorDeserializeTime_sum,executorRunTime_sum,input_bytesRead_sum,input_recordsRead_sum,jvmGCTime_sum,memoryBytesSpilled_sum,output_bytesWritten_sum,output_recordsWritten_sum,peakExecutionMemory_max,resultSerializationTime_sum,resultSize_max,sr_fetchWaitTime_sum,sr_localBlocksFetched_sum,sr_localBytesRead_sum,sr_remoteBlocksFetched_sum,sr_remoteBytesRead_sum,sr_remoteBytesReadToDisk_sum,sr_totalBytesRead_sum,sw_bytesWritten_sum,sw_recordsWritten_sum,sw_writeTime_sum -0,213,2569,0,26735,1598,10,125.5,6608,3531,12095,13414,0,0,336,0,0,0,0,8,8075,0,2600,80279908,0,0,0,80279908,80279908,2600,1001 +id,numTasks,duration,diskBytesSpilledSum,durationSum,durationMax,durationMin,durationAvg,executorCPUTimeSum,executorDeserializeCpuTimeSum,executorDeserializeTimeSum,executorRunTimeSum,inputBytesReadSum,inputBytesReadMax,inputRecordsReadSum,jvmGCTimeSum,memoryBytesSpilledSum,outputBytesWrittenSum,outputRecordsWrittenSum,peakExecutionMemoryMax,resultSerializationTimeSum,resultSizeMax,srFetchWaitTimeSum,srLocalBlocksFetchedSum,srcLocalBytesReadSum,srRemoteBlocksFetchSum,srRemoteBytesReadSum,srRemoteBytesReadToDiskSum,srTotalBytesReadSum,swBytesWrittenSum,swRecordsWrittenSum,swWriteTimeSum +0,213,2569,0,26735,1598,10,125.5,6608,3531,12095,13414,0,0,0,336,0,0,0,0,8,8075,0,2600,80279908,0,0,0,80279908,80279908,2600,1001 diff --git a/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_sqlmetricsagg2_expectation.csv b/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_sqlmetricsagg2_expectation.csv index 0efee3efd..6085ba94c 100644 --- a/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_sqlmetricsagg2_expectation.csv +++ b/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_sqlmetricsagg2_expectation.csv @@ -1,2 +1,2 @@ -appID,sqlID,description,numTasks,Duration,executorCPURatio,diskBytesSpilled_sum,duration_sum,duration_max,duration_min,duration_avg,executorCPUTime_sum,executorDeserializeCPUTime_sum,executorDeserializeTime_sum,executorRunTime_sum,input_bytesRead_sum,input_recordsRead_sum,jvmGCTime_sum,memoryBytesSpilled_sum,output_bytesWritten_sum,output_recordsWritten_sum,peakExecutionMemory_max,resultSerializationTime_sum,resultSize_max,sr_fetchWaitTime_sum,sr_localBlocksFetched_sum,sr_localBytesRead_sum,sr_remoteBlocksFetched_sum,sr_remoteBytesRead_sum,sr_remoteBytesReadToDisk_sum,sr_totalBytesRead_sum,sw_bytesWritten_sum,sw_recordsWritten_sum,sw_writeTime_sum -"local-1622821994212",0,"count at :28",213,3041,52.88,0,25761,1624,9,120.9,7151,3134,11178,13522,0,0,424,0,0,0,0,10,8075,0,2600,80279920,0,0,0,80279920,80279920,2600,901 +appId,sqlId,description,numTasks,duration,executorCpuRatio,diskBytesSpilledSum,durationSum,durationMax,durationMin,durationAvg,executorCPUTimeSum,executorDeserializeCpuTimeSum,executorDeserializeTimeSum,executorRunTimeSum,inputBytesReadSum,inputBytesReadMax,inputRecordsReadSum,jvmGCTimeSum,memoryBytesSpilledSum,outputBytesWrittenSum,outputRecordsWrittenSum,peakExecutionMemoryMax,resultSerializationTimeSum,resultSizeMax,srFetchWaitTimeSum,srLocalBlocksFetchedSum,srcLocalBytesReadSum,srRemoteBlocksFetchSum,srRemoteBytesReadSum,srRemoteBytesReadToDiskSum,srTotalBytesReadSum,swBytesWrittenSum,swRecordsWrittenSum,swWriteTimeSum +local-1622821994212,0,count at :28,213,3041,52.88,0,25761,1624,9,120.9,7151,3134,11178,13522,0,0,0,424,0,0,0,0,10,8075,0,2600,80279920,0,0,0,80279920,80279920,2600,901 diff --git a/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_sqlmetricsagg_expectation.csv b/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_sqlmetricsagg_expectation.csv index 8b52ac3cb..066b4c184 100644 --- a/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_sqlmetricsagg_expectation.csv +++ b/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_sqlmetricsagg_expectation.csv @@ -1,2 +1,2 @@ -appID,sqlID,description,numTasks,Duration,executorCPURatio,diskBytesSpilled_sum,duration_sum,duration_max,duration_min,duration_avg,executorCPUTime_sum,executorDeserializeCPUTime_sum,executorDeserializeTime_sum,executorRunTime_sum,input_bytesRead_sum,input_recordsRead_sum,jvmGCTime_sum,memoryBytesSpilled_sum,output_bytesWritten_sum,output_recordsWritten_sum,peakExecutionMemory_max,resultSerializationTime_sum,resultSize_max,sr_fetchWaitTime_sum,sr_localBlocksFetched_sum,sr_localBytesRead_sum,sr_remoteBlocksFetched_sum,sr_remoteBytesRead_sum,sr_remoteBytesReadToDisk_sum,sr_totalBytesRead_sum,sw_bytesWritten_sum,sw_recordsWritten_sum,sw_writeTime_sum -"local-1622814619968",0,"count at :28",213,3087,49.26,0,26735,1598,10,125.5,6608,3531,12095,13414,0,0,336,0,0,0,0,8,8075,0,2600,80279908,0,0,0,80279908,80279908,2600,1001 +appId,sqlId,description,numTasks,duration,executorCpuRatio,diskBytesSpilledSum,durationSum,durationMax,durationMin,durationAvg,executorCPUTimeSum,executorDeserializeCpuTimeSum,executorDeserializeTimeSum,executorRunTimeSum,inputBytesReadSum,inputBytesReadMax,inputRecordsReadSum,jvmGCTimeSum,memoryBytesSpilledSum,outputBytesWrittenSum,outputRecordsWrittenSum,peakExecutionMemoryMax,resultSerializationTimeSum,resultSizeMax,srFetchWaitTimeSum,srLocalBlocksFetchedSum,srcLocalBytesReadSum,srRemoteBlocksFetchSum,srRemoteBytesReadSum,srRemoteBytesReadToDiskSum,srTotalBytesReadSum,swBytesWrittenSum,swRecordsWrittenSum,swWriteTimeSum +local-1622814619968,0,count at :28,213,3087,49.26,0,26735,1598,10,125.5,6608,3531,12095,13414,0,0,0,336,0,0,0,0,8,8075,0,2600,80279908,0,0,0,80279908,80279908,2600,1001 diff --git a/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_stagemetricsagg2_expectation.csv b/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_stagemetricsagg2_expectation.csv index 1dc0462ac..36e2b92cb 100644 --- a/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_stagemetricsagg2_expectation.csv +++ b/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_stagemetricsagg2_expectation.csv @@ -1,5 +1,5 @@ -stageId,numTasks,Duration,diskBytesSpilled_sum,duration_sum,duration_max,duration_min,duration_avg,executorCPUTime_sum,executorDeserializeCPUTime_sum,executorDeserializeTime_sum,executorRunTime_sum,input_bytesRead_sum,input_recordsRead_sum,jvmGCTime_sum,memoryBytesSpilled_sum,output_bytesWritten_sum,output_recordsWritten_sum,peakExecutionMemory_max,resultSerializationTime_sum,resultSize_max,sr_fetchWaitTime_sum,sr_localBlocksFetched_sum,sr_localBytesRead_sum,sr_remoteBlocksFetched_sum,sr_remoteBytesRead_sum,sr_remoteBytesReadToDisk_sum,sr_totalBytesRead_sum,sw_bytesWritten_sum,sw_recordsWritten_sum,sw_writeTime_sum -0,6,1761,0,9455,1624,1540,1575.8,2917,1287,5056,4248,0,0,228,0,0,0,0,3,2951,0,0,0,0,0,0,0,40132263,1200,376 -1,6,1666,0,9274,1621,1528,1545.7,2570,1007,5016,4099,0,0,196,0,0,0,0,4,2951,0,0,0,0,0,0,0,40132257,1200,475 -2,200,592,0,6937,221,9,34.7,1619,802,1065,5125,0,0,0,0,0,0,0,3,7402,0,2400,80264520,0,0,0,80264520,15400,200,50 -3,1,101,0,95,95,95,95.0,45,38,41,50,0,0,0,0,0,0,0,0,8075,0,200,15400,0,0,0,15400,0,0,0 +id,numTasks,duration,diskBytesSpilledSum,durationSum,durationMax,durationMin,durationAvg,executorCPUTimeSum,executorDeserializeCpuTimeSum,executorDeserializeTimeSum,executorRunTimeSum,inputBytesReadSum,inputBytesReadMax,inputRecordsReadSum,jvmGCTimeSum,memoryBytesSpilledSum,outputBytesWrittenSum,outputRecordsWrittenSum,peakExecutionMemoryMax,resultSerializationTimeSum,resultSizeMax,srFetchWaitTimeSum,srLocalBlocksFetchedSum,srcLocalBytesReadSum,srRemoteBlocksFetchSum,srRemoteBytesReadSum,srRemoteBytesReadToDiskSum,srTotalBytesReadSum,swBytesWrittenSum,swRecordsWrittenSum,swWriteTimeSum +0,6,1761,0,9455,1624,1540,1575.8,2917,1287,5056,4248,0,0,0,228,0,0,0,0,3,2951,0,0,0,0,0,0,0,40132263,1200,376 +1,6,1666,0,9274,1621,1528,1545.7,2570,1007,5016,4099,0,0,0,196,0,0,0,0,4,2951,0,0,0,0,0,0,0,40132257,1200,475 +2,200,592,0,6937,221,9,34.7,1619,802,1065,5125,0,0,0,0,0,0,0,0,3,7402,0,2400,80264520,0,0,0,80264520,15400,200,50 +3,1,101,0,95,95,95,95.0,45,38,41,50,0,0,0,0,0,0,0,0,0,8075,0,200,15400,0,0,0,15400,0,0,0 diff --git a/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_stagemetricsagg_expectation.csv b/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_stagemetricsagg_expectation.csv index 1a937cf9e..93a4ed36f 100644 --- a/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_stagemetricsagg_expectation.csv +++ b/core/src/test/resources/ProfilingExpectations/rapids_join_eventlog_stagemetricsagg_expectation.csv @@ -1,5 +1,5 @@ -stageId,numTasks,Duration,diskBytesSpilled_sum,duration_sum,duration_max,duration_min,duration_avg,executorCPUTime_sum,executorDeserializeCPUTime_sum,executorDeserializeTime_sum,executorRunTime_sum,input_bytesRead_sum,input_recordsRead_sum,jvmGCTime_sum,memoryBytesSpilled_sum,output_bytesWritten_sum,output_recordsWritten_sum,peakExecutionMemory_max,resultSerializationTime_sum,resultSize_max,sr_fetchWaitTime_sum,sr_localBlocksFetched_sum,sr_localBytesRead_sum,sr_remoteBlocksFetched_sum,sr_remoteBytesRead_sum,sr_remoteBytesReadToDisk_sum,sr_totalBytesRead_sum,sw_bytesWritten_sum,sw_recordsWritten_sum,sw_writeTime_sum -0,6,1743,0,9518,1598,1580,1586.3,2512,1393,5309,4043,0,0,168,0,0,0,0,3,2951,0,0,0,0,0,0,0,40132250,1200,400 -1,6,1631,0,9434,1582,1568,1572.3,2406,1067,5273,3998,0,0,168,0,0,0,0,5,2951,0,0,0,0,0,0,0,40132258,1200,508 -2,200,688,0,7705,237,10,38.5,1660,1034,1474,5337,0,0,0,0,0,0,0,0,7359,0,2400,80264508,0,0,0,80264508,15400,200,93 -3,1,83,0,78,78,78,78.0,30,37,39,36,0,0,0,0,0,0,0,0,8075,0,200,15400,0,0,0,15400,0,0,0 +id,numTasks,duration,diskBytesSpilledSum,durationSum,durationMax,durationMin,durationAvg,executorCPUTimeSum,executorDeserializeCpuTimeSum,executorDeserializeTimeSum,executorRunTimeSum,inputBytesReadSum,inputBytesReadMax,inputRecordsReadSum,jvmGCTimeSum,memoryBytesSpilledSum,outputBytesWrittenSum,outputRecordsWrittenSum,peakExecutionMemoryMax,resultSerializationTimeSum,resultSizeMax,srFetchWaitTimeSum,srLocalBlocksFetchedSum,srcLocalBytesReadSum,srRemoteBlocksFetchSum,srRemoteBytesReadSum,srRemoteBytesReadToDiskSum,srTotalBytesReadSum,swBytesWrittenSum,swRecordsWrittenSum,swWriteTimeSum +0,6,1743,0,9518,1598,1580,1586.3,2512,1393,5309,4043,0,0,0,168,0,0,0,0,3,2951,0,0,0,0,0,0,0,40132250,1200,400 +1,6,1631,0,9434,1582,1568,1572.3,2406,1067,5273,3998,0,0,0,168,0,0,0,0,5,2951,0,0,0,0,0,0,0,40132258,1200,508 +2,200,688,0,7705,237,10,38.5,1660,1034,1474,5337,0,0,0,0,0,0,0,0,0,7359,0,2400,80264508,0,0,0,80264508,15400,200,93 +3,1,83,0,78,78,78,78.0,30,37,39,36,0,0,0,0,0,0,0,0,0,8075,0,200,15400,0,0,0,15400,0,0,0 diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/profiling/ApplicationInfoSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/profiling/ApplicationInfoSuite.scala index 8af32d73c..60e2572cb 100644 --- a/core/src/test/scala/com/nvidia/spark/rapids/tool/profiling/ApplicationInfoSuite.scala +++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/profiling/ApplicationInfoSuite.scala @@ -901,22 +901,22 @@ class ApplicationInfoSuite extends AnyFunSuite with Logging { ("failures without diagnostic views", s"$logDir/tasks_executors_fail_compressed_eventlog.zstd", "application_1603128018386_7846", - 20, + 21, false), ("failures with diagnostic views", s"$logDir/tasks_executors_fail_compressed_eventlog.zstd", "application_1603128018386_7846", - 22, + 23, true), ("gpu without diagnostic views", s"$qualLogDir/udf_dataset_eventlog", "local-1651188809790", - 16, + 17, false), ("gpu with diagnostic views", s"$qualLogDir/udf_dataset_eventlog", "local-1651188809790", - 18, + 19, true) ) diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/profiling/OomDetectionSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/profiling/OomDetectionSuite.scala new file mode 100644 index 000000000..c5f01f22a --- /dev/null +++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/profiling/OomDetectionSuite.scala @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2026, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.tool.profiling + +import org.scalatest.funsuite.AnyFunSuite + +class OomDetectionSuite extends AnyFunSuite { + + // (description, failureReason, expectedIsGpuOom) + private val gpuOomTestCases = Seq( + ("GpuSplitAndRetryOOM", + "com.nvidia.spark.rapids.jni.GpuSplitAndRetryOOM: " + + "GPU OutOfMemory: a batch of 1 cannot be split!", + true), + ("GpuRetryOOM", + "com.nvidia.spark.rapids.jni.GpuRetryOOM: GPU OutOfMemory", + true), + ("GpuOOM base class", + "com.nvidia.spark.rapids.jni.GpuOOM: GPU OutOfMemory", + true), + ("pre-24.02 jni.SplitAndRetryOOM (no Gpu prefix)", + "com.nvidia.spark.rapids.jni.SplitAndRetryOOM: " + + "GPU OutOfMemory: a batch of 1 cannot be split!", + true), + ("pre-24.02 jni.RetryOOM (no Gpu prefix)", + "com.nvidia.spark.rapids.jni.RetryOOM: GPU OutOfMemory", + true), + ("CpuSplitAndRetryOOM should not match", + "com.nvidia.spark.rapids.jni.CpuSplitAndRetryOOM: " + + "CPU OutOfMemory", + false), + ("CpuRetryOOM should not match", + "com.nvidia.spark.rapids.jni.CpuRetryOOM: CPU OutOfMemory", + false), + ("NullPointerException should not match", + "java.lang.NullPointerException: some error", + false), + ("ExecutorLostFailure should not match", + "ExecutorLostFailure (executor 5 exited) Exit status: 137", + false) + ) + + gpuOomTestCases.foreach { case (desc, reason, expected) => + test(s"isGpuOom: $desc") { + assert(SparkRapidsOomExceptions.isGpuOom(reason) === expected) + } + } +} diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/BaseAutoTunerSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/BaseAutoTunerSuite.scala index 9279625f4..d76ca0b38 100644 --- a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/BaseAutoTunerSuite.scala +++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/BaseAutoTunerSuite.scala @@ -49,8 +49,8 @@ class AppInfoProviderMockTest(val maxInput: Double, val meanShuffleRead: Double, val shuffleStagesWithPosSpilling: Set[Long], val shuffleSkewStages: Set[Long], - val scanStagesWithGpuOom: Boolean, - val shuffleStagesWithOom: Boolean, + val scanStagesWithGpuOomSet: Set[Long], + val gpuShuffleStagesWithContainerOomSet: Set[Long], val maxColumnarExchangeDataSizeBytes: Option[Long] = None) extends BaseProfilingAppSummaryInfoProvider { override def isAppInfoAvailable = true @@ -69,8 +69,8 @@ class AppInfoProviderMockTest(val maxInput: Double, override def getRedundantReadSize: Long = redundantReadSize override def getShuffleStagesWithPosSpilling: Set[Long] = shuffleStagesWithPosSpilling override def getShuffleSkewStages: Set[Long] = shuffleSkewStages - override def hasScanStagesWithGpuOom: Boolean = scanStagesWithGpuOom - override def hasShuffleStagesWithOom: Boolean = shuffleStagesWithOom + override def scanStagesWithGpuOom: Set[Long] = scanStagesWithGpuOomSet + override def gpuShuffleStagesWithContainerOom: Set[Long] = gpuShuffleStagesWithContainerOomSet override def getMaxColumnarExchangeDataSizeBytes: Option[Long] = maxColumnarExchangeDataSizeBytes /** @@ -135,13 +135,13 @@ abstract class BaseAutoTunerSuite extends AnyFunSuite with BeforeAndAfterEach meanShuffleRead: Double = 0.0, shuffleStagesWithPosSpilling: Set[Long] = Set(), shuffleSkewStages: Set[Long] = Set(), - scanStagesWithGpuOom: Boolean = false, - shuffleStagesWithOom: Boolean = false, + scanStagesWithGpuOom: Set[Long] = Set(), + gpuShuffleStagesWithContainerOom: Set[Long] = Set(), maxColumnarExchangeDataSizeBytes: Option[Long] = None): AppInfoProviderMockTest = { new AppInfoProviderMockTest(maxInput, spilledMetrics, jvmGCFractions, propsFromLog, sparkVersion, rapidsJars, distinctLocationPct, redundantReadSize, meanInput, meanShuffleRead, shuffleStagesWithPosSpilling, shuffleSkewStages, scanStagesWithGpuOom, - shuffleStagesWithOom, maxColumnarExchangeDataSizeBytes) + gpuShuffleStagesWithContainerOom, maxColumnarExchangeDataSizeBytes) } /** diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala index c6431ce5f..24583ea60 100644 --- a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala +++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuite.scala @@ -3139,7 +3139,8 @@ class ProfilingAutoTunerSuite extends ProfilingAutoTunerSuiteBase { "spark.plugins" -> "com.nvidia.spark.SQLPlugin", "spark.rapids.sql.concurrentGpuTasks" -> "4") val infoProvider = getMockInfoProvider(0, Seq(0), Seq(0.0), - logEventsProps, Some(testSparkVersion), scanStagesWithGpuOom = hasGpuOOm) + logEventsProps, Some(testSparkVersion), + scanStagesWithGpuOom = if (hasGpuOOm) Set(1L) else Set.empty) val platform = PlatformFactory.createInstance(PlatformNames.DATAPROC) // Configure cluster info using Platform's existing method @@ -3291,7 +3292,7 @@ class ProfilingAutoTunerSuite extends ProfilingAutoTunerSuiteBase { "spark.plugins" -> "com.nvidia.spark.SQLPlugin", "spark.rapids.sql.concurrentGpuTasks" -> "4") val infoProvider = getMockInfoProvider(0, Seq(0), Seq(0.0), - logEventsProps, Some(testSparkVersion), shuffleStagesWithOom = true) + logEventsProps, Some(testSparkVersion), gpuShuffleStagesWithContainerOom = Set(1L)) val platform = PlatformFactory.createInstance(PlatformNames.DATAPROC) // Configure cluster info using Platform's existing method @@ -3379,7 +3380,7 @@ class ProfilingAutoTunerSuite extends ProfilingAutoTunerSuiteBase { "spark.plugins" -> "com.nvidia.spark.SQLPlugin", "spark.rapids.sql.concurrentGpuTasks" -> "4") val infoProvider = getMockInfoProvider(0, Seq(0), Seq(0.0), - logEventsProps, Some(testSparkVersion), shuffleStagesWithOom = true, + logEventsProps, Some(testSparkVersion), gpuShuffleStagesWithContainerOom = Set(1L), meanInput = 50000, meanShuffleRead = 80000) val platform = PlatformFactory.createInstance(PlatformNames.DATAPROC) diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuiteV2.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuiteV2.scala index c0459e257..612ad4a62 100644 --- a/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuiteV2.scala +++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/tuning/ProfilingAutoTunerSuiteV2.scala @@ -1692,8 +1692,8 @@ class ProfilingAutoTunerSuiteV2 extends ProfilingAutoTunerSuiteBase { redundantReadSize = 0L, shuffleStagesWithPosSpilling = Set.empty, shuffleSkewStages = Set.empty, - scanStagesWithGpuOom = false, - shuffleStagesWithOom = false, + scanStagesWithGpuOom = Set.empty, + gpuShuffleStagesWithContainerOom = Set.empty, maxColumnarExchangeDataSizeBytes = Some(1000L * 1024 * 1024 * 1024) // 1000GB ) val platform = PlatformFactory.createInstance(PlatformNames.DATAPROC, Some(targetClusterInfo))