broadinstitute · aednichols · Oct 17, 2025 · Oct 9, 2025 · Oct 9, 2025 · Oct 9, 2025
@@ -6,6 +6,16 @@
 * WDL 1.1 support is in progress. Users that would like to try out the current partial support can do so by using WDL version `development-1.1`. In Cromwell 92, `development-1.1` has been enhanced to include:
     * Support for passthrough syntax for call inputs, e.g. `{ input: foo }` rather than `{ input: foo = foo }`.
 
+### GPU changes on Google Cloud backend
+
+#### Removed `nvidiaDriverVersion`
+
+In GCP Batch, the `nvidiaDriverVersion` attribute is ignored. Now that Life Sciences has retired, the attribute is now fully deprecated and can be removed from workflows.
+
+#### Added `predefinedMachineType` (alpha)
+
+The new `predefinedMachineType` attribute is introduced in experimental status. See [the attribute's docs](https://cromwell.readthedocs.io/en/develop/RuntimeAttributes/#predefinedmachinetype-alpha) for details.  
+
 ### Database Migration
 The index `IX_METADATA_ENTRY_WEU_CFQN_JSI_JRA_MK` is added to `METADATA_ENTRY`. In pre-release testing, the migration proceeded at about 3 million rows per minute. Please plan downtime accordingly.
 

@@ -0,0 +1,3 @@
+{
+  "minimal_hello_world.machine_type": "e2-medium"
+}
@@ -0,0 +1,18 @@
+name: e2-medium
+testFormat: workflowsuccess
+backends: [GCPBATCH]
+
+files {
+  workflow: gcp_machine_type.wdl
+  inputs: e2-medium.json
+}
+
+# `e2-medium` is the cheapest machine that works decently in Batch, costing 20% less
+# than the next alternative. May be suitable for a variety of "I just need a VM" tasks.
+# https://cloud.google.com/compute/docs/general-purpose-machines#sharedcore
+metadata {
+    "calls.minimal_hello_world.hello_world.runtimeAttributes.predefinedMachineType": "e2-medium"
+    "calls.minimal_hello_world.hello_world.runtimeAttributes.preemptible": "0"
+    "outputs.minimal_hello_world.actual_machine_type": ~~"machineTypes/e2-medium"
+    "outputs.minimal_hello_world.is_preemptible": "FALSE"
+}
@@ -0,0 +1,3 @@
+{
+  "minimal_hello_world.machine_type": "banana"
+}
@@ -0,0 +1,14 @@
+name: gcp_machine_type
+testFormat: workflowsuccess
+backends: [GCPBATCH]
+
+files {
+  workflow: gcp_machine_type.wdl
+}
+
+metadata {
+    "calls.minimal_hello_world.hello_world.runtimeAttributes.predefinedMachineType": "n2-standard-2"
+    "calls.minimal_hello_world.hello_world.runtimeAttributes.preemptible": "0"
+    "outputs.minimal_hello_world.actual_machine_type": ~~"machineTypes/n2-standard-2"
+    "outputs.minimal_hello_world.is_preemptible": "FALSE"
+}
@@ -0,0 +1,61 @@
+version 1.0
+
+workflow minimal_hello_world {
+  input {
+    String image = "rockylinux/rockylinux:10"
+    String machine_type = "n2-standard-2"
+    Int preemptible = 0
+    String zones = "northamerica-northeast1-a northamerica-northeast1-b northamerica-northeast1-c"
+  }
+
+  call hello_world {
+    input:
+      image = image,
+      machine_type = machine_type,
+      preemptible = preemptible,
+      zones = zones
+  }
+
+  output {
+    String stdout = hello_world.stdout
+    String actual_machine_type = hello_world.actual_machine_type
+    String is_preemptible = hello_world.is_preemptible
+  }
+}
+
+task hello_world {
+
+  input {
+    String image
+    String machine_type
+    Int preemptible
+    String zones
+  }
+
+  # Check machine specs by querying instance metadata
+  # https://cloud.google.com/compute/docs/metadata/predefined-metadata-keys#instance-metadata
+  command <<<
+    cat /etc/os-release
+    uname -a
+    cat /proc/cpuinfo
+    curl --header "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/instance/machine-type > actual_machine_type.txt
+    curl --header "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/instance/scheduling/preemptible > is_preemptible.txt
+  >>>
+
+  runtime {
+    docker: image
+    predefinedMachineType: machine_type
+    preemptible: preemptible
+    zones: zones
+  }
+
+  meta {
+    volatile: true
+  }
+
+  output {
+    String stdout = read_string(stdout())
+    String actual_machine_type = read_string("actual_machine_type.txt")
+    String is_preemptible = read_string("is_preemptible.txt")
+  }
+}
@@ -0,0 +1,13 @@
+name: gcp_machine_type_fail
+testFormat: workflowfailure
+backends: [GCPBATCH]
+
+files {
+  workflow: gcp_machine_type.wdl
+  inputs: fail_inputs.json
+}
+
+# Batch rejects the task and Cromwell fails it in an orderly manner
+metadata {
+  "failures.0.causedBy.0.message": ~~"GCP Batch task exited with Success(0). "
+}
@@ -0,0 +1,19 @@
+name: gcp_machine_type_gpu
+testFormat: workflowsuccess
+backends: [GCPBATCH]
+
+# Creates a `g2-standard-4` VM: 1 NVIDIA L4 GPU, 4 vCPUs, 16GB RAM
+# This is the cheapest machine type under the new type-based GPU model, replacing the older machine type + gpu type scheme.
+# For more information, see https://broadworkbench.atlassian.net/browse/AN-758
+
+files {
+  workflow: gcp_machine_type.wdl
+  inputs: gpu_inputs.json
+}
+
+metadata {
+    "calls.minimal_hello_world.hello_world.runtimeAttributes.predefinedMachineType": "g2-standard-4"
+    "calls.minimal_hello_world.hello_world.runtimeAttributes.preemptible": "0"
+    "outputs.minimal_hello_world.actual_machine_type": ~~"machineTypes/g2-standard-4"
+    "outputs.minimal_hello_world.is_preemptible": "FALSE"
+}
@@ -0,0 +1,15 @@
+name: gcp_machine_type_preemptible
+testFormat: workflowsuccess
+backends: [GCPBATCH]
+
+files {
+  workflow: gcp_machine_type.wdl
+  inputs: preemptible_inputs.json
+}
+
+metadata {
+    "calls.minimal_hello_world.hello_world.runtimeAttributes.predefinedMachineType": "n2-standard-2"
+    "calls.minimal_hello_world.hello_world.runtimeAttributes.preemptible": "5"
+    "outputs.minimal_hello_world.actual_machine_type": ~~"machineTypes/n2-standard-2"
+    "outputs.minimal_hello_world.is_preemptible": "TRUE"
+}
@@ -0,0 +1,4 @@
+{
+  "minimal_hello_world.machine_type": "g2-standard-4",
+  "minimal_hello_world.zones": "us-east4-a us-east4-c"
+}
@@ -0,0 +1,3 @@
+{
+  "minimal_hello_world.preemptible": 5
+}
@@ -60,9 +60,10 @@ There are a number of additional runtime attributes that apply to the Google Clo
 
 - [zones](#zones)
 - [preemptible](#preemptible)
+- [predefinedMachineType](#predefinedmachinetype-alpha)
 - [bootDiskSizeGb](#bootdisksizegb)
 - [noAddress](#noaddress)
-- [gpuCount, gpuType, and nvidiaDriverVersion](#gpucount-gputype-and-nvidiadriverversion)
+- [gpuCount and gpuType](#gpucount-and-gputype)
 - [cpuPlatform](#cpuplatform)
 
 
@@ -315,6 +316,38 @@ runtime {
 
 Defaults to the configuration setting `genomics.default-zones` in the Google Cloud configuration block, which in turn defaults to using `us-central1-b`.
 
+### `predefinedMachineType` (alpha)
+
+*Default: none*
+
+**This attribute is in experimental status. Please see limitations for details.** 
+
+Select a specific GCP machine type, such as `n2-standard-2` or `a2-highgpu-1g`.
+
+Setting `predefinedMachineType` overrides `cpu`, `memory`, `gpuCount`, and `gpuType`.
+
+`predefinedMachineType` _is_ compatible with `cpuPlatform` so long as the platform is [a valid option](https://cloud.google.com/compute/docs/cpu-platforms) for the specified type.
+
+```
+runtime {
+  predefinedMachineType: "n2-standard-2"
+}
+```
+
+Possible benefits:
+
+* Access [GPU machine types](https://cloud.google.com/compute/docs/gpus#gpu-models) such as Ampere, Lovelace, and other newer models
+* Avoid [5% surcharge](https://cloud.google.com/compute/docs/instances/creating-instance-with-custom-machine-type#custom_machine_type_pricing) on custom machine types (Cromwell default)
+* Reduce preemption by using predefined types with [better availability](https://cloud.google.com/compute/docs/instances/create-use-preemptible#best_practices)
+* Run basic tasks at the lowest possible cost with [shared-core machines](https://cloud.google.com/compute/docs/general-purpose-machines#sharedcore) like `e2-medium`
+
+Limitations:
+
+* Cost estimation not yet supported
+* GPU availability may be limited due to resource or quota exhaustion
+* GCP types are non-portable and proprietary to Google Cloud Platform
+* GCP Batch job details display incorrect "Cores", "Memory" values (cosmetic)
+
 ### `preemptible`
 
 *Default: _0_*
@@ -395,10 +428,10 @@ Configure your Google network to use "Private Google Access". This will allow yo
 
 That's it!  You can now run with `noAddress` runtime attribute and it will work as expected.
 
-### `gpuCount`, `gpuType`, and `nvidiaDriverVersion`
+### `gpuCount` and `gpuType`
 
-Attach GPUs to the instance when running on the Pipelines API([GPU documentation](https://cloud.google.com/compute/docs/gpus/)).
-Make sure to choose a zone for which the type of GPU you want to attach is available.
+Attach [GPUs](https://cloud.google.com/compute/docs/gpus/) to the [GCP Batch instance](https://cloud.google.com/batch/docs/create-run-job-gpus).
+Make sure to choose a zone in which the type of GPU you want is available.
 
 The types of compute GPU supported are:
 
@@ -407,19 +440,16 @@ The types of compute GPU supported are:
 * `nvidia-tesla-p4`
 * `nvidia-tesla-t4`
 
-On Life Sciences API, the default driver is `418.87.00`. You may specify your own via the `nvidiaDriverVersion` key.  Make sure that driver exists in the `nvidia-drivers-us-public` beforehand, per the [Google Pipelines API documentation](https://cloud.google.com/genomics/reference/rest/Shared.Types/Metadata#VirtualMachine). 
-
-On GCP Batch, `nvidiaDriverVersion` is currently ignored; Batch selects the correct driver version automatically.
-
 ```
 runtime {
     gpuType: "nvidia-tesla-t4"
     gpuCount: 2
-    nvidiaDriverVersion: "418.87.00"
     zones: ["us-central1-c"]
 }
 ```
 
+`nvidiaDriverVersion` is deprecated and ignored; GCP Batch selects the correct driver version automatically.
+
 ### `cpuPlatform`
 
 This option is specific to the Google Cloud backend, specifically [this](https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform) feature when a certain minimum CPU platform is desired.

@@ -10,6 +10,6 @@ cromwell::build::setup_common_environment
 cromwell::build::pip_install mkdocs
 mkdocs build -s
 
-sbt -Dsbt.supershell=false --warn +package assembly dockerPushCheck +doc
+sbt -Dsbt.supershell=false --warn +package assembly dockerPushCheck
 
 git secrets --scan-history
@@ -81,7 +81,7 @@ object GcpBatchAsyncBackendJobExecutionActor {
 
     new Exception(
       s"Task $jobTag failed. $returnCodeMessage GCP Batch task exited with ${errorCode}(${errorCode.code}). ${message}"
-    )
+    ) with NoStackTrace
   }
 
   // GCS path regexes comments:

@@ -25,7 +25,7 @@ import com.google.cloud.batch.v1.{
 import com.google.protobuf.Duration
 import cromwell.backend.google.batch.io.GcpBatchAttachedDisk
 import cromwell.backend.google.batch.models.GcpBatchConfigurationAttributes.GcsTransferConfiguration
-import cromwell.backend.google.batch.models.{GcpBatchRequest, VpcAndSubnetworkProjectLabelValues}
+import cromwell.backend.google.batch.models.{GcpBatchRequest, MachineType, VpcAndSubnetworkProjectLabelValues}
 import cromwell.backend.google.batch.runnable._
 import cromwell.backend.google.batch.util.{BatchUtilityConversions, GcpBatchMachineConstraints}
 import cromwell.core.labels.{Label, Labels}
@@ -256,14 +256,33 @@ class GcpBatchRequestFactoryImpl()(implicit gcsTransferConfiguration: GcsTransfe
       isBackground = _.getBackground
     )
 
+    /**
+     * The "compute resource" concept is a suggestion to Batch regarding how many jobs can fit on a single VM.
+     * The Cromwell backend currently creates VMs at a 1:1 ratio with jobs, so the compute resource is effectively ignored.
+     *
+     * That said, it has a cosmetic effect in the Batch web UI, where it drives the "Cores" and "Memory" readouts.
+     * The machine type is the "real" VM shape; one can set bogus cores/memory in the compute resource,
+     * and it will have no effect other than the display.
+     */
     val computeResource = createComputeResource(cpuCores, memory, gcpBootDiskSizeMb)
     val taskSpec = createTaskSpec(sortedRunnables, computeResource, durationInSeconds, allVolumes)
     val taskGroup: TaskGroup = createTaskGroup(taskCount, taskSpec)
-    val machineType = GcpBatchMachineConstraints.machineType(runtimeAttributes.memory,
-                                                             runtimeAttributes.cpu,
-                                                             cpuPlatformOption = runtimeAttributes.cpuPlatform,
-                                                             jobLogger = jobLogger
-    )
+
+    val machineType = runtimeAttributes.machine match {
+      case Some(m: MachineType) =>
+        // Allow users to select predefined machine types, such as `n2-standard-4`.
+        // Overrides CPU count and memory attributes.
+        // We still pass platform when machine is specified, it is the user's responsibility to select a valid type/platform combination
+        m.machineType
+      case None =>
+        // CPU platform drives selection of machine type, but is not encoded in the `machineType` return value itself
+        GcpBatchMachineConstraints.machineType(runtimeAttributes.memory,
+                                               runtimeAttributes.cpu,
+                                               cpuPlatformOption = runtimeAttributes.cpuPlatform,
+                                               jobLogger = jobLogger
+        )
+    }
+
     val instancePolicy =
       createInstancePolicy(cpuPlatform = cpuPlatform, spotModel, accelerators, allDisks, machineType = machineType)
     val locationPolicy = LocationPolicy.newBuilder.addAllAllowedLocations(zones.asJava).build