From e176a1f9bd3df30821cf07f94e01a4caf0791b4e Mon Sep 17 00:00:00 2001 From: "Fabio M. Graetz, Ph.D." Date: Sat, 29 Nov 2025 09:06:59 +0100 Subject: [PATCH] Doc: Explain how to use a RayJob with Kueue and ProvisioningRequest despite's GKE single PodSet limitation Signed-off-by: Fabio M. Graetz, Ph.D. --- .../kubernetes/examples/rayjob-kueue-gang-scheduling.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/source/cluster/kubernetes/examples/rayjob-kueue-gang-scheduling.md b/doc/source/cluster/kubernetes/examples/rayjob-kueue-gang-scheduling.md index 81bab762e97b..68632aeffd94 100644 --- a/doc/source/cluster/kubernetes/examples/rayjob-kueue-gang-scheduling.md +++ b/doc/source/cluster/kubernetes/examples/rayjob-kueue-gang-scheduling.md @@ -114,6 +114,7 @@ metadata: name: rayjob-gpu-config spec: provisioningClassName: queued-provisioning.gke.io + podSetMergePolicy: IdenticalWorkloadSchedulingRequirements managedResources: - nvidia.com/gpu --- @@ -155,6 +156,12 @@ kubectl apply -f kueue-resources.yaml This example configures Kueue to orchestrate the gang scheduling of GPUs. However, you can use other resources such as CPU and memory. ::: +:::{note} +Google Kubernetes Engine's queued provisioning feature currently supports only single PodSet per request. To circumvent this issue, we +set `podSetMergePolicy: IdenticalWorkloadSchedulingRequirements` in the `ProvisioningRequestConfig`. When giving the head node and the +worker nodes the same resource requirements, affinities, and tolerations, Kueue merges them into a single PodSet in the `ProvisioningRequest`. +::: + ## Deploy a RayJob Download the RayJob that executes all the steps documented in [Fine-tune a PyTorch Lightning Text Classifier](https://docs.ray.io/en/master/train/examples/lightning/lightning_cola_advanced.html). The [source code](https://github.com/ray-project/kuberay/tree/master/ray-operator/config/samples/pytorch-text-classifier) is also in the KubeRay repository.