From 696b193d5aac2577867ae9116937e2cb992582a3 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Thu, 9 Oct 2025 17:04:59 -0400 Subject: [PATCH 01/28] `Machine` class for GCP --- .../api/GcpBatchRequestFactoryImpl.scala | 31 +++++++++++++++---- .../models/GcpBatchRuntimeAttributes.scala | 3 ++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala index e55142ac918..4d47e3e9479 100644 --- a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala @@ -256,14 +256,33 @@ class GcpBatchRequestFactoryImpl()(implicit gcsTransferConfiguration: GcsTransfe isBackground = _.getBackground ) + /** + * The "compute resource" concept is a suggestion to Batch regarding how many jobs can fit on a single VM. + * The Cromwell backend currently creates VMs at a 1:1 ratio with jobs, so the compute resource is effectively ignored. + * + * That said, it has a cosmetic effect in the Batch web UI, where it drives the "Cores" and "Memory" readouts. + * The machine type is the "real" VM shape; one can set bogus cores/memory in the compute resource, + * and it will have no effect other than the display. + */ val computeResource = createComputeResource(cpuCores, memory, gcpBootDiskSizeMb) - val taskSpec = createTaskSpec(sortedRunnables, computeResource, durationInSeconds, allVolumes) + val taskSpec = createTaskSpec(sortedRunnables, computeResource , durationInSeconds, allVolumes) val taskGroup: TaskGroup = createTaskGroup(taskCount, taskSpec) - val machineType = GcpBatchMachineConstraints.machineType(runtimeAttributes.memory, - runtimeAttributes.cpu, - cpuPlatformOption = runtimeAttributes.cpuPlatform, - jobLogger = jobLogger - ) + + val machineType = runtimeAttributes.machine match { + case Some(m) => + // Allow users to select predefined machine types, such as `n2-standard-4`. + // Overrides CPU count and memory attributes. + // Compatible with CPU platform, it is the user's responsibility to select a valid type/platform combination + m.machineType + case None => + // CPU platform drives selection of machine type, but is not encoded in the `machineType` return value itself + GcpBatchMachineConstraints.machineType(runtimeAttributes.memory, + runtimeAttributes.cpu, + cpuPlatformOption = runtimeAttributes.cpuPlatform, + jobLogger = jobLogger + ) + } + val instancePolicy = createInstancePolicy(cpuPlatform = cpuPlatform, spotModel, accelerators, allDisks, machineType = machineType) val locationPolicy = LocationPolicy.newBuilder.addAllAllowedLocations(zones.asJava).build diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/GcpBatchRuntimeAttributes.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/GcpBatchRuntimeAttributes.scala index 247b7568fb4..2d9787e75e9 100644 --- a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/GcpBatchRuntimeAttributes.scala +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/GcpBatchRuntimeAttributes.scala @@ -34,6 +34,8 @@ object GpuResource { final case class GpuResource(gpuType: GpuType, gpuCount: Int Refined Positive) +final case class Machine(machineType: String) + final case class GcpBatchRuntimeAttributes(cpu: Int Refined Positive, cpuPlatform: Option[String], gpuResource: Option[GpuResource], @@ -41,6 +43,7 @@ final case class GcpBatchRuntimeAttributes(cpu: Int Refined Positive, preemptible: Int, bootDiskSize: Int, memory: MemorySize, + machine: Option[Machine] = None, disks: Seq[GcpBatchAttachedDisk], dockerImage: String, failOnStderr: Boolean, From e2f5bbbf88cd763a91f648817286109cfda45203 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Thu, 9 Oct 2025 17:33:46 -0400 Subject: [PATCH 02/28] Test cases --- .../gcp_machine_type/gcp_machine_type.test | 10 ++++ .../gcp_machine_type/gcp_machine_type.wdl | 49 +++++++++++++++++++ .../gcp_machine_type_preemptible.test | 11 +++++ .../gcp_machine_type/preemptible_inputs.json | 3 ++ 4 files changed, 73 insertions(+) create mode 100644 centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test create mode 100644 centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.wdl create mode 100644 centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test create mode 100644 centaur/src/main/resources/standardTestCases/gcp_machine_type/preemptible_inputs.json diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test new file mode 100644 index 00000000000..ca4c9c6c7f2 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test @@ -0,0 +1,10 @@ +name: gcp_machine_type +testFormat: workflowsuccess +backends: [GCPBATCH] + +files { + workflow: gcp_machine_type.wdl +} + +metadata { +} diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.wdl b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.wdl new file mode 100644 index 00000000000..c60efcbe916 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.wdl @@ -0,0 +1,49 @@ +version 1.0 + +workflow minimal_hello_world { + input { + String image = "rockylinux/rockylinux:10" + String machine_type = "n2-standard-2" + Int preemptible = 0 + } + + call hello_world { + input: + image = image, + machine_type = machine_type, + preemptible = preemptible + } + + output { + String stdout = hello_world.stdout + } +} + +task hello_world { + + input { + String image + String machine_type + Int preemptible + } + + command <<< + cat /etc/os-release + uname -a + cat /proc/cpuinfo + >>> + + runtime { + docker: image + gcp_machine_type: machine_type + preemptible: preemptible + } + + meta { + volatile: true + } + + output { + String stdout = read_string(stdout()) + } +} diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test new file mode 100644 index 00000000000..23a6b2efe41 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test @@ -0,0 +1,11 @@ +name: gcp_machine_type_preemptible +testFormat: workflowsuccess +backends: [GCPBATCH] + +files { + workflow: gcp_machine_type.wdl + inputs: preemptible_inputs.json +} + +metadata { +} diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/preemptible_inputs.json b/centaur/src/main/resources/standardTestCases/gcp_machine_type/preemptible_inputs.json new file mode 100644 index 00000000000..36b502f8687 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/preemptible_inputs.json @@ -0,0 +1,3 @@ +{ + "minimal_hello_world.preemptible": 5 +} From 2b6b19997cd869dd9275aa2946d1ed96668959a9 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Tue, 14 Oct 2025 18:15:40 -0400 Subject: [PATCH 03/28] New validation --- .../api/GcpBatchRequestFactoryImpl.scala | 21 +++------------- .../models/GcpBatchRuntimeAttributes.scala | 16 ++++++++++--- .../batch/util/MachineTypeValidation.scala | 24 +++++++++++++++++++ 3 files changed, 40 insertions(+), 21 deletions(-) create mode 100644 supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/util/MachineTypeValidation.scala diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala index 4d47e3e9479..7948c53adaa 100644 --- a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala @@ -6,26 +6,11 @@ import com.google.api.services.oauth2.Oauth2Scopes import com.google.api.services.storage.StorageScopes import com.google.cloud.batch.v1.AllocationPolicy._ import com.google.cloud.batch.v1.LogsPolicy.Destination -import com.google.cloud.batch.v1.{ - AllocationPolicy, - CancelJobRequest, - ComputeResource, - CreateJobRequest, - GCS, - GetJobRequest, - Job, - JobName, - LogsPolicy, - Runnable, - ServiceAccount, - TaskGroup, - TaskSpec, - Volume -} +import com.google.cloud.batch.v1.{AllocationPolicy, CancelJobRequest, ComputeResource, CreateJobRequest, GCS, GetJobRequest, Job, JobName, LogsPolicy, Runnable, ServiceAccount, TaskGroup, TaskSpec, Volume} import com.google.protobuf.Duration import cromwell.backend.google.batch.io.GcpBatchAttachedDisk import cromwell.backend.google.batch.models.GcpBatchConfigurationAttributes.GcsTransferConfiguration -import cromwell.backend.google.batch.models.{GcpBatchRequest, VpcAndSubnetworkProjectLabelValues} +import cromwell.backend.google.batch.models.{GcpBatchRequest, MachineType, VpcAndSubnetworkProjectLabelValues} import cromwell.backend.google.batch.runnable._ import cromwell.backend.google.batch.util.{BatchUtilityConversions, GcpBatchMachineConstraints} import cromwell.core.labels.{Label, Labels} @@ -269,7 +254,7 @@ class GcpBatchRequestFactoryImpl()(implicit gcsTransferConfiguration: GcsTransfe val taskGroup: TaskGroup = createTaskGroup(taskCount, taskSpec) val machineType = runtimeAttributes.machine match { - case Some(m) => + case Some(m: MachineType) => // Allow users to select predefined machine types, such as `n2-standard-4`. // Overrides CPU count and memory attributes. // Compatible with CPU platform, it is the user's responsibility to select a valid type/platform combination diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/GcpBatchRuntimeAttributes.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/GcpBatchRuntimeAttributes.scala index 610628322d1..1a9628dabf3 100644 --- a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/GcpBatchRuntimeAttributes.scala +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/GcpBatchRuntimeAttributes.scala @@ -6,7 +6,7 @@ import common.validation.ErrorOr.ErrorOr import cromwell.backend.google.batch.io.{GcpBatchAttachedDisk, GcpBatchWorkingDisk} import cromwell.backend.google.batch.models.GcpBatchRuntimeAttributes.BootDiskSizeKey import cromwell.backend.google.batch.models.GpuResource.GpuType -import cromwell.backend.google.batch.util.{GpuTypeValidation, GpuValidation} +import cromwell.backend.google.batch.util.{GpuTypeValidation, GpuValidation, MachineTypeValidation} import cromwell.backend.standard.StandardValidatedRuntimeAttributesBuilder import cromwell.backend.validation._ import eu.timepit.refined._ @@ -34,7 +34,7 @@ object GpuResource { final case class GpuResource(gpuType: GpuType, gpuCount: Int Refined Positive) -final case class Machine(machineType: String) +final case class MachineType(machineType: String) final case class GcpBatchRuntimeAttributes(cpu: Int Refined Positive, cpuPlatform: Option[String], @@ -43,7 +43,7 @@ final case class GcpBatchRuntimeAttributes(cpu: Int Refined Positive, preemptible: Int, bootDiskSize: Int, memory: MemorySize, - machine: Option[Machine] = None, + machine: Option[MachineType] = None, disks: Seq[GcpBatchAttachedDisk], dockerImage: String, failOnStderr: Boolean, @@ -79,6 +79,8 @@ object GcpBatchRuntimeAttributes { val CpuPlatformIntelIceLakeValue = "Intel Ice Lake" val CpuPlatformAMDRomeValue = "AMD Rome" + val MachineTypeKey = "gcp_machine_type" + val CheckpointFileKey = "checkpointFile" private val checkpointFileValidationInstance = new StringRuntimeAttributesValidation(CheckpointFileKey).optional @@ -92,6 +94,8 @@ object GcpBatchRuntimeAttributes { ) private def cpuPlatformValidation(runtimeConfig: Option[Config]): OptionalRuntimeAttributesValidation[String] = cpuPlatformValidationInstance + private def machineTypeValidation(runtimeConfig: Option[Config]): OptionalRuntimeAttributesValidation[MachineType] = + MachineTypeValidation.optional private def gpuTypeValidation(runtimeConfig: Option[Config]): OptionalRuntimeAttributesValidation[GpuType] = GpuTypeValidation.optional @@ -148,6 +152,7 @@ object GcpBatchRuntimeAttributes { gpuTypeValidation(runtimeConfig), cpuValidation(runtimeConfig), cpuPlatformValidation(runtimeConfig), + machineTypeValidation(runtimeConfig), disksValidation(runtimeConfig), noAddressValidation(runtimeConfig), zonesValidation(runtimeConfig), @@ -169,6 +174,10 @@ object GcpBatchRuntimeAttributes { cpuPlatformValidation(runtimeAttrsConfig).key, validatedRuntimeAttributes ) + val machineType: Option[MachineType] = RuntimeAttributesValidation.extractOption( + machineTypeValidation(runtimeAttrsConfig).key, + validatedRuntimeAttributes + ) val checkpointFileName: Option[String] = RuntimeAttributesValidation.extractOption(checkpointFileValidationInstance.key, validatedRuntimeAttributes) @@ -216,6 +225,7 @@ object GcpBatchRuntimeAttributes { preemptible = preemptible, bootDiskSize = bootDiskSize, memory = memory, + machine = machineType, disks = disks, dockerImage = docker, failOnStderr = failOnStderr, diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/util/MachineTypeValidation.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/util/MachineTypeValidation.scala new file mode 100644 index 00000000000..1c500bee311 --- /dev/null +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/util/MachineTypeValidation.scala @@ -0,0 +1,24 @@ +package cromwell.backend.google.batch.util + +import cats.implicits.catsSyntaxValidatedId +import common.validation.ErrorOr.ErrorOr +import cromwell.backend.google.batch.models.{GcpBatchRuntimeAttributes, MachineType} +import cromwell.backend.validation.{OptionalRuntimeAttributesValidation, RuntimeAttributesValidation} +import wom.types.{WomStringType, WomType} +import wom.values.{WomString, WomValue} + +object MachineTypeValidation { + lazy val instance: RuntimeAttributesValidation[MachineType] = new MachineTypeValidation + lazy val optional: OptionalRuntimeAttributesValidation[MachineType] = instance.optional +} + +class MachineTypeValidation extends RuntimeAttributesValidation[MachineType] { + override def key = GcpBatchRuntimeAttributes.MachineTypeKey + + override def coercion: Iterable[WomType] = Set(WomStringType) + override def validateValue: PartialFunction[WomValue, ErrorOr[MachineType]] = { + case WomString(s) => MachineType(s).validNel + case other => + s"Invalid '$key': String value required but got ${other.womType.friendlyName}.".invalidNel + } +} From 775b1ed0c7cb95e365a6d5c5893cca450ec6627f Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Tue, 14 Oct 2025 18:24:59 -0400 Subject: [PATCH 04/28] `scalafmtAll` --- .../api/GcpBatchRequestFactoryImpl.scala | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala index 7948c53adaa..8dfabc7b82c 100644 --- a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala @@ -6,7 +6,22 @@ import com.google.api.services.oauth2.Oauth2Scopes import com.google.api.services.storage.StorageScopes import com.google.cloud.batch.v1.AllocationPolicy._ import com.google.cloud.batch.v1.LogsPolicy.Destination -import com.google.cloud.batch.v1.{AllocationPolicy, CancelJobRequest, ComputeResource, CreateJobRequest, GCS, GetJobRequest, Job, JobName, LogsPolicy, Runnable, ServiceAccount, TaskGroup, TaskSpec, Volume} +import com.google.cloud.batch.v1.{ + AllocationPolicy, + CancelJobRequest, + ComputeResource, + CreateJobRequest, + GCS, + GetJobRequest, + Job, + JobName, + LogsPolicy, + Runnable, + ServiceAccount, + TaskGroup, + TaskSpec, + Volume +} import com.google.protobuf.Duration import cromwell.backend.google.batch.io.GcpBatchAttachedDisk import cromwell.backend.google.batch.models.GcpBatchConfigurationAttributes.GcsTransferConfiguration @@ -250,7 +265,7 @@ class GcpBatchRequestFactoryImpl()(implicit gcsTransferConfiguration: GcsTransfe * and it will have no effect other than the display. */ val computeResource = createComputeResource(cpuCores, memory, gcpBootDiskSizeMb) - val taskSpec = createTaskSpec(sortedRunnables, computeResource , durationInSeconds, allVolumes) + val taskSpec = createTaskSpec(sortedRunnables, computeResource, durationInSeconds, allVolumes) val taskGroup: TaskGroup = createTaskGroup(taskCount, taskSpec) val machineType = runtimeAttributes.machine match { @@ -262,9 +277,9 @@ class GcpBatchRequestFactoryImpl()(implicit gcsTransferConfiguration: GcsTransfe case None => // CPU platform drives selection of machine type, but is not encoded in the `machineType` return value itself GcpBatchMachineConstraints.machineType(runtimeAttributes.memory, - runtimeAttributes.cpu, - cpuPlatformOption = runtimeAttributes.cpuPlatform, - jobLogger = jobLogger + runtimeAttributes.cpu, + cpuPlatformOption = runtimeAttributes.cpuPlatform, + jobLogger = jobLogger ) } From 9153f0002d3218363477fffa27564de04c6f6174 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 15 Oct 2025 09:51:53 -0400 Subject: [PATCH 05/28] Enhanced `toString` --- .../google/batch/models/GcpBatchRuntimeAttributes.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/GcpBatchRuntimeAttributes.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/GcpBatchRuntimeAttributes.scala index 1a9628dabf3..3c492ba88cc 100644 --- a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/GcpBatchRuntimeAttributes.scala +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/GcpBatchRuntimeAttributes.scala @@ -34,7 +34,9 @@ object GpuResource { final case class GpuResource(gpuType: GpuType, gpuCount: Int Refined Positive) -final case class MachineType(machineType: String) +final case class MachineType(machineType: String) { + override def toString: String = machineType +} final case class GcpBatchRuntimeAttributes(cpu: Int Refined Positive, cpuPlatform: Option[String], From 2eca2f1ab07fdda226943b8a5e2277c7d4d952c7 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 15 Oct 2025 10:07:40 -0400 Subject: [PATCH 06/28] Enhance tests --- .../standardTestCases/gcp_machine_type/gcp_machine_type.test | 2 ++ .../gcp_machine_type/gcp_machine_type_preemptible.test | 2 ++ 2 files changed, 4 insertions(+) diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test index ca4c9c6c7f2..6a79ee4df2c 100644 --- a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test @@ -7,4 +7,6 @@ files { } metadata { + "calls.minimal_hello_world.hello_world.runtimeAttributes.gcp_machine_type": "n2-standard-2" + "calls.minimal_hello_world.hello_world.runtimeAttributes.preemptible": "0" } diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test index 23a6b2efe41..c9c673d60e1 100644 --- a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test @@ -8,4 +8,6 @@ files { } metadata { + "calls.minimal_hello_world.hello_world.runtimeAttributes.gcp_machine_type": "n2-standard-2" + "calls.minimal_hello_world.hello_world.runtimeAttributes.preemptible": "5" } From 192172a7172ea3f4b01e97d63f4d6895ec802812 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 15 Oct 2025 13:38:02 -0400 Subject: [PATCH 07/28] Disable no-op Scaladoc generation We write the HTML docs to disk on the CI instance and then throw them away. --- src/ci/bin/testCheckPublish.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ci/bin/testCheckPublish.sh b/src/ci/bin/testCheckPublish.sh index 46ca9033d9a..a51ad7e73ca 100755 --- a/src/ci/bin/testCheckPublish.sh +++ b/src/ci/bin/testCheckPublish.sh @@ -10,6 +10,6 @@ cromwell::build::setup_common_environment cromwell::build::pip_install mkdocs mkdocs build -s -sbt -Dsbt.supershell=false --warn +package assembly dockerPushCheck +doc +sbt -Dsbt.supershell=false --warn +package assembly dockerPushCheck git secrets --scan-history From 68ce96ed2ac9a1d31040cc1fd5ce9f68025d5403 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 15 Oct 2025 15:18:02 -0400 Subject: [PATCH 08/28] Enhance tests to check instance metadata --- .../gcp_machine_type/gcp_machine_type.test | 3 +++ .../standardTestCases/gcp_machine_type/gcp_machine_type.wdl | 6 ++++++ .../gcp_machine_type/gcp_machine_type_preemptible.test | 3 +++ 3 files changed, 12 insertions(+) diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test index 6a79ee4df2c..0a32bfac491 100644 --- a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test @@ -6,7 +6,10 @@ files { workflow: gcp_machine_type.wdl } +# Compute engine metadata docs: https://cloud.google.com/compute/docs/metadata/predefined-metadata-keys#instance-metadata metadata { "calls.minimal_hello_world.hello_world.runtimeAttributes.gcp_machine_type": "n2-standard-2" "calls.minimal_hello_world.hello_world.runtimeAttributes.preemptible": "0" + "outputs.minimal_hello_world.actual_machine_type": ~~"machineTypes/n2-standard-2" + "outputs.minimal_hello_world.is_preemptible": "FALSE" } diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.wdl b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.wdl index c60efcbe916..0ddb8b1d632 100644 --- a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.wdl +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.wdl @@ -16,6 +16,8 @@ workflow minimal_hello_world { output { String stdout = hello_world.stdout + String actual_machine_type = hello_world.actual_machine_type + String is_preemptible = hello_world.is_preemptible } } @@ -31,6 +33,8 @@ task hello_world { cat /etc/os-release uname -a cat /proc/cpuinfo + curl --header "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/instance/machine-type > actual_machine_type.txt + curl --header "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/instance/scheduling/preemptible > is_preemptible.txt >>> runtime { @@ -45,5 +49,7 @@ task hello_world { output { String stdout = read_string(stdout()) + String actual_machine_type = read_string("actual_machine_type.txt") + String is_preemptible = read_string("is_preemptible.txt") } } diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test index c9c673d60e1..064beae37d9 100644 --- a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test @@ -7,7 +7,10 @@ files { inputs: preemptible_inputs.json } +# Compute engine metadata docs: https://cloud.google.com/compute/docs/metadata/predefined-metadata-keys#instance-metadata metadata { "calls.minimal_hello_world.hello_world.runtimeAttributes.gcp_machine_type": "n2-standard-2" "calls.minimal_hello_world.hello_world.runtimeAttributes.preemptible": "5" + "outputs.minimal_hello_world.actual_machine_type": ~~"machineTypes/n2-standard-2" + "outputs.minimal_hello_world.is_preemptible": "TRUE" } From 678e29d1ceabd64b86207281045359efac8a7553 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 15 Oct 2025 16:01:30 -0400 Subject: [PATCH 09/28] Add GPU test --- .../gcp_machine_type/gcp_machine_type.test | 1 - .../gcp_machine_type/gcp_machine_type.wdl | 8 +++++++- .../gcp_machine_type_gpu.test | 19 +++++++++++++++++++ .../gcp_machine_type_preemptible.test | 1 - .../gcp_machine_type/gpu_inputs.json | 4 ++++ 5 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_gpu.test create mode 100644 centaur/src/main/resources/standardTestCases/gcp_machine_type/gpu_inputs.json diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test index 0a32bfac491..157607527eb 100644 --- a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test @@ -6,7 +6,6 @@ files { workflow: gcp_machine_type.wdl } -# Compute engine metadata docs: https://cloud.google.com/compute/docs/metadata/predefined-metadata-keys#instance-metadata metadata { "calls.minimal_hello_world.hello_world.runtimeAttributes.gcp_machine_type": "n2-standard-2" "calls.minimal_hello_world.hello_world.runtimeAttributes.preemptible": "0" diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.wdl b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.wdl index 0ddb8b1d632..022f71d385f 100644 --- a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.wdl +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.wdl @@ -5,13 +5,15 @@ workflow minimal_hello_world { String image = "rockylinux/rockylinux:10" String machine_type = "n2-standard-2" Int preemptible = 0 + String zones = "northamerica-northeast1-a northamerica-northeast1-b northamerica-northeast1-c" } call hello_world { input: image = image, machine_type = machine_type, - preemptible = preemptible + preemptible = preemptible, + zones = zones } output { @@ -27,8 +29,11 @@ task hello_world { String image String machine_type Int preemptible + String zones } + # Check machine specs by querying instance metadata + # https://cloud.google.com/compute/docs/metadata/predefined-metadata-keys#instance-metadata command <<< cat /etc/os-release uname -a @@ -41,6 +46,7 @@ task hello_world { docker: image gcp_machine_type: machine_type preemptible: preemptible + zones: zones } meta { diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_gpu.test b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_gpu.test new file mode 100644 index 00000000000..7602ecaf8f8 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_gpu.test @@ -0,0 +1,19 @@ +name: gcp_machine_type_gpu +testFormat: workflowsuccess +backends: [GCPBATCH] + +# Creates a `g2-standard-4` VM: 1 NVIDIA L4 GPU, 4 vCPUs, 16GB RAM +# This is the cheapest machine type under the new type-based GPU model, replacing the older machine type + gpu type scheme. +# For more information, see https://broadworkbench.atlassian.net/browse/AN-758 + +files { + workflow: gcp_machine_type.wdl + inputs: gpu_inputs.json +} + +metadata { + "calls.minimal_hello_world.hello_world.runtimeAttributes.gcp_machine_type": "g2-standard-4" + "calls.minimal_hello_world.hello_world.runtimeAttributes.preemptible": "0" + "outputs.minimal_hello_world.actual_machine_type": ~~"machineTypes/g2-standard-4" + "outputs.minimal_hello_world.is_preemptible": "FALSE" +} diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test index 064beae37d9..c8279652abe 100644 --- a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test @@ -7,7 +7,6 @@ files { inputs: preemptible_inputs.json } -# Compute engine metadata docs: https://cloud.google.com/compute/docs/metadata/predefined-metadata-keys#instance-metadata metadata { "calls.minimal_hello_world.hello_world.runtimeAttributes.gcp_machine_type": "n2-standard-2" "calls.minimal_hello_world.hello_world.runtimeAttributes.preemptible": "5" diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gpu_inputs.json b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gpu_inputs.json new file mode 100644 index 00000000000..64589b6f9d4 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gpu_inputs.json @@ -0,0 +1,4 @@ +{ + "minimal_hello_world.machine_type": "g2-standard-4", + "minimal_hello_world.zones": "us-east4-a us-east4-c" +} From c95c551c739d868d0223cbb72471a904bd4edcef Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 15 Oct 2025 16:26:16 -0400 Subject: [PATCH 10/28] Docs --- docs/RuntimeAttributes.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/docs/RuntimeAttributes.md b/docs/RuntimeAttributes.md index c3cbbfdccec..cf5f32fc6dc 100644 --- a/docs/RuntimeAttributes.md +++ b/docs/RuntimeAttributes.md @@ -60,6 +60,7 @@ There are a number of additional runtime attributes that apply to the Google Clo - [zones](#zones) - [preemptible](#preemptible) +- [gcp_machine_type](#gcp_machine_type-alpha) - [bootDiskSizeGb](#bootdisksizegb) - [noAddress](#noaddress) - [gpuCount, gpuType, and nvidiaDriverVersion](#gpucount-gputype-and-nvidiadriverversion) @@ -315,6 +316,33 @@ runtime { Defaults to the configuration setting `genomics.default-zones` in the Google Cloud configuration block, which in turn defaults to using `us-central1-b`. +### `gcp_machine_type` **(alpha)** + +*Default: none* + +**This attribute is in testing with alpha-level support. Please see limitations for details.** + +Select a specific GCP machine type, such as `n2-standard-2` or `a2-highgpu-1g`. + +Setting `gcp_machine_type` overrides `cpu`, `memory`, `gpuCount`, and `gpuType`. + +``` +runtime { + gcp_machine_type: "n2-standard-2" +} +``` + +Possible benefits: +* Access to [GPU types](https://cloud.google.com/compute/docs/gpus#gpu-models) such as Ampere, Lovelace, and other newer models +* Avoid [5% surcharge](https://cloud.google.com/compute/docs/instances/creating-instance-with-custom-machine-type#custom_machine_type_pricing) on custom machine types (Cromwell default) +* Reduce preemption by using predefined types with [better availability](https://cloud.google.com/compute/docs/instances/create-use-preemptible#best_practices) + +Limitations: +* Cost estimation not yet supported +* GPU availability may be limited due to resource or quota exhaustion +* GCP types are non-portable and proprietary to Google Cloud Platform +* GCP Batch job details display incorrect "Cores", "Memory" values (cosmetic) + ### `preemptible` *Default: _0_* From a5af024e15cc44354efad95a717560b209c96c37 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 15 Oct 2025 16:27:48 -0400 Subject: [PATCH 11/28] Remove Life Sciences references --- docs/RuntimeAttributes.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/RuntimeAttributes.md b/docs/RuntimeAttributes.md index cf5f32fc6dc..be5ff91ff57 100644 --- a/docs/RuntimeAttributes.md +++ b/docs/RuntimeAttributes.md @@ -435,15 +435,12 @@ The types of compute GPU supported are: * `nvidia-tesla-p4` * `nvidia-tesla-t4` -On Life Sciences API, the default driver is `418.87.00`. You may specify your own via the `nvidiaDriverVersion` key. Make sure that driver exists in the `nvidia-drivers-us-public` beforehand, per the [Google Pipelines API documentation](https://cloud.google.com/genomics/reference/rest/Shared.Types/Metadata#VirtualMachine). - -On GCP Batch, `nvidiaDriverVersion` is currently ignored; Batch selects the correct driver version automatically. +`nvidiaDriverVersion` is deprecated and ignored; GCP Batch selects the correct driver version automatically. ``` runtime { gpuType: "nvidia-tesla-t4" gpuCount: 2 - nvidiaDriverVersion: "418.87.00" zones: ["us-central1-c"] } ``` From 93598329de7b88a1e06468f286909c3e6483260c Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 15 Oct 2025 16:38:40 -0400 Subject: [PATCH 12/28] Fix markdown syntax --- docs/RuntimeAttributes.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/RuntimeAttributes.md b/docs/RuntimeAttributes.md index be5ff91ff57..512672061da 100644 --- a/docs/RuntimeAttributes.md +++ b/docs/RuntimeAttributes.md @@ -333,9 +333,9 @@ runtime { ``` Possible benefits: -* Access to [GPU types](https://cloud.google.com/compute/docs/gpus#gpu-models) such as Ampere, Lovelace, and other newer models -* Avoid [5% surcharge](https://cloud.google.com/compute/docs/instances/creating-instance-with-custom-machine-type#custom_machine_type_pricing) on custom machine types (Cromwell default) -* Reduce preemption by using predefined types with [better availability](https://cloud.google.com/compute/docs/instances/create-use-preemptible#best_practices) +- Access to [GPU types](https://cloud.google.com/compute/docs/gpus#gpu-models) such as Ampere, Lovelace, and other newer models +- Avoid [5% surcharge](https://cloud.google.com/compute/docs/instances/creating-instance-with-custom-machine-type#custom_machine_type_pricing) on custom machine types (Cromwell default) +- Reduce preemption by using predefined types with [better availability](https://cloud.google.com/compute/docs/instances/create-use-preemptible#best_practices) Limitations: * Cost estimation not yet supported From 11791cef7e9eb01465dcebb3dbc9f9619ccca866 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 15 Oct 2025 16:50:30 -0400 Subject: [PATCH 13/28] Maybe this fixes syntax? --- docs/RuntimeAttributes.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/docs/RuntimeAttributes.md b/docs/RuntimeAttributes.md index 512672061da..9f4d4ee17d9 100644 --- a/docs/RuntimeAttributes.md +++ b/docs/RuntimeAttributes.md @@ -333,11 +333,13 @@ runtime { ``` Possible benefits: -- Access to [GPU types](https://cloud.google.com/compute/docs/gpus#gpu-models) such as Ampere, Lovelace, and other newer models -- Avoid [5% surcharge](https://cloud.google.com/compute/docs/instances/creating-instance-with-custom-machine-type#custom_machine_type_pricing) on custom machine types (Cromwell default) -- Reduce preemption by using predefined types with [better availability](https://cloud.google.com/compute/docs/instances/create-use-preemptible#best_practices) + +* Access to [GPU types](https://cloud.google.com/compute/docs/gpus#gpu-models) such as Ampere, Lovelace, and other newer models +* Avoid [5% surcharge](https://cloud.google.com/compute/docs/instances/creating-instance-with-custom-machine-type#custom_machine_type_pricing) on custom machine types (Cromwell default) +* Reduce preemption by using predefined types with [better availability](https://cloud.google.com/compute/docs/instances/create-use-preemptible#best_practices) Limitations: + * Cost estimation not yet supported * GPU availability may be limited due to resource or quota exhaustion * GCP types are non-portable and proprietary to Google Cloud Platform @@ -425,8 +427,8 @@ That's it! You can now run with `noAddress` runtime attribute and it will work ### `gpuCount`, `gpuType`, and `nvidiaDriverVersion` -Attach GPUs to the instance when running on the Pipelines API([GPU documentation](https://cloud.google.com/compute/docs/gpus/)). -Make sure to choose a zone for which the type of GPU you want to attach is available. +Attach [GPUs](https://cloud.google.com/compute/docs/gpus/) to the [GCP Batch instance](https://cloud.google.com/batch/docs/create-run-job-gpus). +Make sure to choose a zone in which the type of GPU you want is available. The types of compute GPU supported are: @@ -435,8 +437,6 @@ The types of compute GPU supported are: * `nvidia-tesla-p4` * `nvidia-tesla-t4` -`nvidiaDriverVersion` is deprecated and ignored; GCP Batch selects the correct driver version automatically. - ``` runtime { gpuType: "nvidia-tesla-t4" @@ -445,6 +445,8 @@ runtime { } ``` +`nvidiaDriverVersion` is deprecated and ignored; GCP Batch selects the correct driver version automatically. + ### `cpuPlatform` This option is specific to the Google Cloud backend, specifically [this](https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform) feature when a certain minimum CPU platform is desired. From a21c8606f673d87cca6fd1ab15d1265ade4ecc59 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 15 Oct 2025 17:00:03 -0400 Subject: [PATCH 14/28] Changelog --- CHANGELOG.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f71b6319419..d7050d1459c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,16 @@ * WDL 1.1 support is in progress. Users that would like to try out the current partial support can do so by using WDL version `development-1.1`. In Cromwell 92, `development-1.1` has been enhanced to include: * Support for passthrough syntax for call inputs, e.g. `{ input: foo }` rather than `{ input: foo = foo }`. +### GPU changes on Google Cloud backend + +#### Removed `nvidiaDriverVersion` + +In GCP Batch, the `nvidiaDriverVersion` attribute is ignored. Now that Life Sciences has retired, the attribute is now fully deprecated and can be removed from workflows. + +#### Added `gcp_machine_type` (alpha) + +The new `gcp_machine_type` attribute is introduced in alpha. See [the attribute's docs](https://cromwell.readthedocs.io/en/develop/RuntimeAttributes/#gcp_machine_type-alpha) for details. + ## 91 Release Notes #### Removal of Google LifeSciences backend code From dd6fa13e4fc26fcc68ee056fd3ac823b20f8d33e Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 15 Oct 2025 17:00:33 -0400 Subject: [PATCH 15/28] Further clean up `nvidiaDriverVersion` --- docs/RuntimeAttributes.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/RuntimeAttributes.md b/docs/RuntimeAttributes.md index 9f4d4ee17d9..dfafc7c0289 100644 --- a/docs/RuntimeAttributes.md +++ b/docs/RuntimeAttributes.md @@ -63,7 +63,7 @@ There are a number of additional runtime attributes that apply to the Google Clo - [gcp_machine_type](#gcp_machine_type-alpha) - [bootDiskSizeGb](#bootdisksizegb) - [noAddress](#noaddress) -- [gpuCount, gpuType, and nvidiaDriverVersion](#gpucount-gputype-and-nvidiadriverversion) +- [gpuCount and gpuType](#gpucount-and-gputype) - [cpuPlatform](#cpuplatform) @@ -425,7 +425,7 @@ Configure your Google network to use "Private Google Access". This will allow yo That's it! You can now run with `noAddress` runtime attribute and it will work as expected. -### `gpuCount`, `gpuType`, and `nvidiaDriverVersion` +### `gpuCount` and `gpuType` Attach [GPUs](https://cloud.google.com/compute/docs/gpus/) to the [GCP Batch instance](https://cloud.google.com/batch/docs/create-run-job-gpus). Make sure to choose a zone in which the type of GPU you want is available. From 19966c6b3b54a788e3e7e16be52f157e8b2ff4f8 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 15 Oct 2025 17:13:13 -0400 Subject: [PATCH 16/28] Extra explain `cpuPlatform` --- docs/RuntimeAttributes.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/RuntimeAttributes.md b/docs/RuntimeAttributes.md index dfafc7c0289..6e1a9c1a7cd 100644 --- a/docs/RuntimeAttributes.md +++ b/docs/RuntimeAttributes.md @@ -326,6 +326,8 @@ Select a specific GCP machine type, such as `n2-standard-2` or `a2-highgpu-1g`. Setting `gcp_machine_type` overrides `cpu`, `memory`, `gpuCount`, and `gpuType`. +`gcp_machine_type` _is_ compatible with `cpuPlatform` so long as the platform is a valid option for the specified type. + ``` runtime { gcp_machine_type: "n2-standard-2" From 721bd0c9b13f8985eb8b7088a2451e335a2bbc41 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 15 Oct 2025 17:15:11 -0400 Subject: [PATCH 17/28] Clarify comment --- .../backend/google/batch/api/GcpBatchRequestFactoryImpl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala index 8dfabc7b82c..899a28daec1 100644 --- a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala @@ -272,7 +272,7 @@ class GcpBatchRequestFactoryImpl()(implicit gcsTransferConfiguration: GcsTransfe case Some(m: MachineType) => // Allow users to select predefined machine types, such as `n2-standard-4`. // Overrides CPU count and memory attributes. - // Compatible with CPU platform, it is the user's responsibility to select a valid type/platform combination + // We still pass platform when machine is specified, it is the user's responsibility to select a valid type/platform combination m.machineType case None => // CPU platform drives selection of machine type, but is not encoded in the `machineType` return value itself From 67dc13d2037db659ea138f308ab82e8e17d487bb Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 15 Oct 2025 17:51:18 -0400 Subject: [PATCH 18/28] Rename: camelCase to match other attrs --- CHANGELOG.md | 4 ++-- .../gcp_machine_type/gcp_machine_type.test | 2 +- .../gcp_machine_type/gcp_machine_type.wdl | 2 +- .../gcp_machine_type/gcp_machine_type_gpu.test | 2 +- .../gcp_machine_type/gcp_machine_type_preemptible.test | 2 +- docs/RuntimeAttributes.md | 10 +++++----- .../batch/models/GcpBatchRuntimeAttributes.scala | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7050d1459c..873feb6888f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,9 +12,9 @@ In GCP Batch, the `nvidiaDriverVersion` attribute is ignored. Now that Life Sciences has retired, the attribute is now fully deprecated and can be removed from workflows. -#### Added `gcp_machine_type` (alpha) +#### Added `predefinedMachineType` (alpha) -The new `gcp_machine_type` attribute is introduced in alpha. See [the attribute's docs](https://cromwell.readthedocs.io/en/develop/RuntimeAttributes/#gcp_machine_type-alpha) for details. +The new `predefinedMachineType` attribute is introduced in alpha. See [the attribute's docs](https://cromwell.readthedocs.io/en/develop/RuntimeAttributes/#predefinedMachineType-alpha) for details. ## 91 Release Notes diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test index 157607527eb..413dcd8c239 100644 --- a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.test @@ -7,7 +7,7 @@ files { } metadata { - "calls.minimal_hello_world.hello_world.runtimeAttributes.gcp_machine_type": "n2-standard-2" + "calls.minimal_hello_world.hello_world.runtimeAttributes.predefinedMachineType": "n2-standard-2" "calls.minimal_hello_world.hello_world.runtimeAttributes.preemptible": "0" "outputs.minimal_hello_world.actual_machine_type": ~~"machineTypes/n2-standard-2" "outputs.minimal_hello_world.is_preemptible": "FALSE" diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.wdl b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.wdl index 022f71d385f..64864fece2e 100644 --- a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.wdl +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type.wdl @@ -44,7 +44,7 @@ task hello_world { runtime { docker: image - gcp_machine_type: machine_type + predefinedMachineType: machine_type preemptible: preemptible zones: zones } diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_gpu.test b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_gpu.test index 7602ecaf8f8..b0795cfeb73 100644 --- a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_gpu.test +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_gpu.test @@ -12,7 +12,7 @@ files { } metadata { - "calls.minimal_hello_world.hello_world.runtimeAttributes.gcp_machine_type": "g2-standard-4" + "calls.minimal_hello_world.hello_world.runtimeAttributes.predefinedMachineType": "g2-standard-4" "calls.minimal_hello_world.hello_world.runtimeAttributes.preemptible": "0" "outputs.minimal_hello_world.actual_machine_type": ~~"machineTypes/g2-standard-4" "outputs.minimal_hello_world.is_preemptible": "FALSE" diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test index c8279652abe..3569036575f 100644 --- a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_preemptible.test @@ -8,7 +8,7 @@ files { } metadata { - "calls.minimal_hello_world.hello_world.runtimeAttributes.gcp_machine_type": "n2-standard-2" + "calls.minimal_hello_world.hello_world.runtimeAttributes.predefinedMachineType": "n2-standard-2" "calls.minimal_hello_world.hello_world.runtimeAttributes.preemptible": "5" "outputs.minimal_hello_world.actual_machine_type": ~~"machineTypes/n2-standard-2" "outputs.minimal_hello_world.is_preemptible": "TRUE" diff --git a/docs/RuntimeAttributes.md b/docs/RuntimeAttributes.md index 6e1a9c1a7cd..a56c7917844 100644 --- a/docs/RuntimeAttributes.md +++ b/docs/RuntimeAttributes.md @@ -60,7 +60,7 @@ There are a number of additional runtime attributes that apply to the Google Clo - [zones](#zones) - [preemptible](#preemptible) -- [gcp_machine_type](#gcp_machine_type-alpha) +- [predefinedMachineType](#predefinedMachineType-alpha) - [bootDiskSizeGb](#bootdisksizegb) - [noAddress](#noaddress) - [gpuCount and gpuType](#gpucount-and-gputype) @@ -316,7 +316,7 @@ runtime { Defaults to the configuration setting `genomics.default-zones` in the Google Cloud configuration block, which in turn defaults to using `us-central1-b`. -### `gcp_machine_type` **(alpha)** +### `predefinedMachineType` **(alpha)** *Default: none* @@ -324,13 +324,13 @@ Defaults to the configuration setting `genomics.default-zones` in the Google Clo Select a specific GCP machine type, such as `n2-standard-2` or `a2-highgpu-1g`. -Setting `gcp_machine_type` overrides `cpu`, `memory`, `gpuCount`, and `gpuType`. +Setting `predefinedMachineType` overrides `cpu`, `memory`, `gpuCount`, and `gpuType`. -`gcp_machine_type` _is_ compatible with `cpuPlatform` so long as the platform is a valid option for the specified type. +`predefinedMachineType` _is_ compatible with `cpuPlatform` so long as the platform is a valid option for the specified type. ``` runtime { - gcp_machine_type: "n2-standard-2" + predefinedMachineType: "n2-standard-2" } ``` diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/GcpBatchRuntimeAttributes.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/GcpBatchRuntimeAttributes.scala index 3c492ba88cc..f076989c055 100644 --- a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/GcpBatchRuntimeAttributes.scala +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/GcpBatchRuntimeAttributes.scala @@ -81,7 +81,7 @@ object GcpBatchRuntimeAttributes { val CpuPlatformIntelIceLakeValue = "Intel Ice Lake" val CpuPlatformAMDRomeValue = "AMD Rome" - val MachineTypeKey = "gcp_machine_type" + val MachineTypeKey = "predefinedMachineType" val CheckpointFileKey = "checkpointFile" private val checkpointFileValidationInstance = new StringRuntimeAttributesValidation(CheckpointFileKey).optional From aa8dbc42bcc21973a8f326d930e7d089be0640cd Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 15 Oct 2025 18:01:28 -0400 Subject: [PATCH 19/28] Just Say No to stack traces --- .../batch/actors/GcpBatchAsyncBackendJobExecutionActor.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/actors/GcpBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/actors/GcpBatchAsyncBackendJobExecutionActor.scala index 630cbf64eba..99c41d483fb 100644 --- a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/actors/GcpBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/actors/GcpBatchAsyncBackendJobExecutionActor.scala @@ -81,7 +81,7 @@ object GcpBatchAsyncBackendJobExecutionActor { new Exception( s"Task $jobTag failed. $returnCodeMessage GCP Batch task exited with ${errorCode}(${errorCode.code}). ${message}" - ) + ) with NoStackTrace } // GCS path regexes comments: From 5f799c82d124ced8fea65e197c8f9ae2c74046e0 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 15 Oct 2025 18:14:20 -0400 Subject: [PATCH 20/28] Test orderly failure for invalid type --- .../gcp_machine_type/fail_inputs.json | 3 +++ .../gcp_machine_type/gcp_machine_type_fail.test | 13 +++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 centaur/src/main/resources/standardTestCases/gcp_machine_type/fail_inputs.json create mode 100644 centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_fail.test diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/fail_inputs.json b/centaur/src/main/resources/standardTestCases/gcp_machine_type/fail_inputs.json new file mode 100644 index 00000000000..0e7770e41db --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/fail_inputs.json @@ -0,0 +1,3 @@ +{ + "minimal_hello_world.machine_type": "banana" +} diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_fail.test b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_fail.test new file mode 100644 index 00000000000..33478ae3584 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/gcp_machine_type_fail.test @@ -0,0 +1,13 @@ +name: gcp_machine_type_fail +testFormat: workflowfailure +backends: [GCPBATCH] + +files { + workflow: gcp_machine_type.wdl + inputs: fail_inputs.json +} + +# Batch rejects the task and Cromwell fails it in an orderly manner +metadata { + "failures.0.causedBy.0.message": ~~"GCP Batch task exited with Success(0). " +} From c5e9025a5f762e2eb7195fa67c784177f09df19c Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Thu, 16 Oct 2025 14:25:26 -0400 Subject: [PATCH 21/28] `e2-medium` is cheapest sensible VM --- .../gcp_machine_type/e2-medium.json | 3 +++ .../gcp_machine_type/e2-medium.test | 18 ++++++++++++++++++ docs/RuntimeAttributes.md | 3 ++- 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 centaur/src/main/resources/standardTestCases/gcp_machine_type/e2-medium.json create mode 100644 centaur/src/main/resources/standardTestCases/gcp_machine_type/e2-medium.test diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/e2-medium.json b/centaur/src/main/resources/standardTestCases/gcp_machine_type/e2-medium.json new file mode 100644 index 00000000000..f2e20915d3f --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/e2-medium.json @@ -0,0 +1,3 @@ +{ + "minimal_hello_world.machine_type": "e2-medium" +} diff --git a/centaur/src/main/resources/standardTestCases/gcp_machine_type/e2-medium.test b/centaur/src/main/resources/standardTestCases/gcp_machine_type/e2-medium.test new file mode 100644 index 00000000000..445c0b79fd3 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/gcp_machine_type/e2-medium.test @@ -0,0 +1,18 @@ +name: e2-medium +testFormat: workflowsuccess +backends: [GCPBATCH] + +files { + workflow: gcp_machine_type.wdl + inputs: e2-medium.json +} + +# `e2-medium` is the cheapest machine that works decently in Batch, costing 20% less +# than the next alternative. May be suitable for a variety of "I just need a VM" tasks. +# https://cloud.google.com/compute/docs/general-purpose-machines#sharedcore +metadata { + "calls.minimal_hello_world.hello_world.runtimeAttributes.predefinedMachineType": "e2-medium" + "calls.minimal_hello_world.hello_world.runtimeAttributes.preemptible": "0" + "outputs.minimal_hello_world.actual_machine_type": ~~"machineTypes/e2-medium" + "outputs.minimal_hello_world.is_preemptible": "FALSE" +} diff --git a/docs/RuntimeAttributes.md b/docs/RuntimeAttributes.md index a56c7917844..9062f792a08 100644 --- a/docs/RuntimeAttributes.md +++ b/docs/RuntimeAttributes.md @@ -336,9 +336,10 @@ runtime { Possible benefits: -* Access to [GPU types](https://cloud.google.com/compute/docs/gpus#gpu-models) such as Ampere, Lovelace, and other newer models +* Access [GPU types](https://cloud.google.com/compute/docs/gpus#gpu-models) such as Ampere, Lovelace, and other newer models * Avoid [5% surcharge](https://cloud.google.com/compute/docs/instances/creating-instance-with-custom-machine-type#custom_machine_type_pricing) on custom machine types (Cromwell default) * Reduce preemption by using predefined types with [better availability](https://cloud.google.com/compute/docs/instances/create-use-preemptible#best_practices) +* Run basic tasks at the lowest possible cost with [shared-core machines](https://cloud.google.com/compute/docs/general-purpose-machines#sharedcore) like `e2-medium` Limitations: From 750328b9f41b71b8359138cb2174c609dff60d89 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Thu, 16 Oct 2025 14:26:02 -0400 Subject: [PATCH 22/28] Docs --- docs/RuntimeAttributes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/RuntimeAttributes.md b/docs/RuntimeAttributes.md index 9062f792a08..e3eb3a9cd48 100644 --- a/docs/RuntimeAttributes.md +++ b/docs/RuntimeAttributes.md @@ -336,7 +336,7 @@ runtime { Possible benefits: -* Access [GPU types](https://cloud.google.com/compute/docs/gpus#gpu-models) such as Ampere, Lovelace, and other newer models +* Access [GPU machine types](https://cloud.google.com/compute/docs/gpus#gpu-models) such as Ampere, Lovelace, and other newer models * Avoid [5% surcharge](https://cloud.google.com/compute/docs/instances/creating-instance-with-custom-machine-type#custom_machine_type_pricing) on custom machine types (Cromwell default) * Reduce preemption by using predefined types with [better availability](https://cloud.google.com/compute/docs/instances/create-use-preemptible#best_practices) * Run basic tasks at the lowest possible cost with [shared-core machines](https://cloud.google.com/compute/docs/general-purpose-machines#sharedcore) like `e2-medium` From 112dedd6a134acfae0530d0096c66a0c40e37d06 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Thu, 16 Oct 2025 16:37:19 -0400 Subject: [PATCH 23/28] Boop tests by updating docs --- CHANGELOG.md | 2 +- docs/RuntimeAttributes.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 873feb6888f..ad3d9329774 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ In GCP Batch, the `nvidiaDriverVersion` attribute is ignored. Now that Life Scie #### Added `predefinedMachineType` (alpha) -The new `predefinedMachineType` attribute is introduced in alpha. See [the attribute's docs](https://cromwell.readthedocs.io/en/develop/RuntimeAttributes/#predefinedMachineType-alpha) for details. +The new `predefinedMachineType` attribute is introduced in experimental status. See [the attribute's docs](https://cromwell.readthedocs.io/en/develop/RuntimeAttributes/#predefinedMachineType-alpha) for details. ## 91 Release Notes diff --git a/docs/RuntimeAttributes.md b/docs/RuntimeAttributes.md index e3eb3a9cd48..8955631c70f 100644 --- a/docs/RuntimeAttributes.md +++ b/docs/RuntimeAttributes.md @@ -320,7 +320,7 @@ Defaults to the configuration setting `genomics.default-zones` in the Google Clo *Default: none* -**This attribute is in testing with alpha-level support. Please see limitations for details.** +**This attribute is in experimental status. Please see limitations for details.** Select a specific GCP machine type, such as `n2-standard-2` or `a2-highgpu-1g`. From 63b5f72c68922d4a3a50e12b45bcc81c1fd994fe Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Fri, 17 Oct 2025 14:44:41 -0400 Subject: [PATCH 24/28] Fix RTD anchor --- docs/RuntimeAttributes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/RuntimeAttributes.md b/docs/RuntimeAttributes.md index 8955631c70f..1ec1cd4f480 100644 --- a/docs/RuntimeAttributes.md +++ b/docs/RuntimeAttributes.md @@ -60,7 +60,7 @@ There are a number of additional runtime attributes that apply to the Google Clo - [zones](#zones) - [preemptible](#preemptible) -- [predefinedMachineType](#predefinedMachineType-alpha) +- [predefinedMachineType](#predefinedmachinetype-alpha) - [bootDiskSizeGb](#bootdisksizegb) - [noAddress](#noaddress) - [gpuCount and gpuType](#gpucount-and-gputype) From f9a948e26adfed326971ef589c89f525266da682 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Fri, 17 Oct 2025 14:45:51 -0400 Subject: [PATCH 25/28] Fix RTD --- docs/RuntimeAttributes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/RuntimeAttributes.md b/docs/RuntimeAttributes.md index 1ec1cd4f480..4086377fe3c 100644 --- a/docs/RuntimeAttributes.md +++ b/docs/RuntimeAttributes.md @@ -316,7 +316,7 @@ runtime { Defaults to the configuration setting `genomics.default-zones` in the Google Cloud configuration block, which in turn defaults to using `us-central1-b`. -### `predefinedMachineType` **(alpha)** +### `predefinedMachineType` (alpha) *Default: none* From 1029f9fdc96b65aed612a0704850460316093ab1 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Fri, 17 Oct 2025 14:48:04 -0400 Subject: [PATCH 26/28] Fix Changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31b19816f75..fb1668253c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ In GCP Batch, the `nvidiaDriverVersion` attribute is ignored. Now that Life Scie #### Added `predefinedMachineType` (alpha) -The new `predefinedMachineType` attribute is introduced in experimental status. See [the attribute's docs](https://cromwell.readthedocs.io/en/develop/RuntimeAttributes/#predefinedMachineType-alpha) for details. +The new `predefinedMachineType` attribute is introduced in experimental status. See [the attribute's docs](https://cromwell.readthedocs.io/en/develop/RuntimeAttributes/#predefinedtachinetype-alpha) for details. ### Database Migration The index `IX_METADATA_ENTRY_WEU_CFQN_JSI_JRA_MK` is added to `METADATA_ENTRY`. In pre-release testing, the migration proceeded at about 3 million rows per minute. Please plan downtime accordingly. From 47fd9504cebb97c18d5fc5a875b0428bee275e15 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Fri, 17 Oct 2025 14:49:26 -0400 Subject: [PATCH 27/28] Oh good gravy --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb1668253c3..25c6373f6f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ In GCP Batch, the `nvidiaDriverVersion` attribute is ignored. Now that Life Scie #### Added `predefinedMachineType` (alpha) -The new `predefinedMachineType` attribute is introduced in experimental status. See [the attribute's docs](https://cromwell.readthedocs.io/en/develop/RuntimeAttributes/#predefinedtachinetype-alpha) for details. +The new `predefinedMachineType` attribute is introduced in experimental status. See [the attribute's docs](https://cromwell.readthedocs.io/en/develop/RuntimeAttributes/#predefinedmachinetype-alpha) for details. ### Database Migration The index `IX_METADATA_ENTRY_WEU_CFQN_JSI_JRA_MK` is added to `METADATA_ENTRY`. In pre-release testing, the migration proceeded at about 3 million rows per minute. Please plan downtime accordingly. From 8c82dcf2c402cc3542e2338f6028bce79bd051e1 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Fri, 17 Oct 2025 15:52:35 -0400 Subject: [PATCH 28/28] Moar doc --- docs/RuntimeAttributes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/RuntimeAttributes.md b/docs/RuntimeAttributes.md index 4086377fe3c..58bb037e81f 100644 --- a/docs/RuntimeAttributes.md +++ b/docs/RuntimeAttributes.md @@ -326,7 +326,7 @@ Select a specific GCP machine type, such as `n2-standard-2` or `a2-highgpu-1g`. Setting `predefinedMachineType` overrides `cpu`, `memory`, `gpuCount`, and `gpuType`. -`predefinedMachineType` _is_ compatible with `cpuPlatform` so long as the platform is a valid option for the specified type. +`predefinedMachineType` _is_ compatible with `cpuPlatform` so long as the platform is [a valid option](https://cloud.google.com/compute/docs/cpu-platforms) for the specified type. ``` runtime {