From d66f81eb2cbec4038817a2d6a822268cc35f0fa7 Mon Sep 17 00:00:00 2001 From: Fagani Hajizada Date: Wed, 15 Oct 2025 18:12:16 +0200 Subject: [PATCH 1/3] feat(worker): expose SSH port for pam_slurm_adopt Add SSH port (22) to worker container specification to enable users to SSH into worker nodes where they have running jobs. This works with updated slurmd images that include pam_slurm_adopt for access control. --- internal/builder/worker_app.go | 6 ++++++ internal/builder/worker_app_test.go | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/internal/builder/worker_app.go b/internal/builder/worker_app.go index c3e7c81e..597973e6 100644 --- a/internal/builder/worker_app.go +++ b/internal/builder/worker_app.go @@ -23,6 +23,7 @@ import ( const ( SlurmdPort = 6818 + SshPort = 22 slurmdUser = "root" @@ -118,6 +119,11 @@ func (b *Builder) slurmdContainer(nodeset *slinkyv1beta1.NodeSet, controller *sl ContainerPort: SlurmdPort, Protocol: corev1.ProtocolTCP, }, + { + Name: "ssh", + ContainerPort: SshPort, + Protocol: corev1.ProtocolTCP, + }, }, StartupProbe: &corev1.Probe{ ProbeHandler: corev1.ProbeHandler{ diff --git a/internal/builder/worker_app_test.go b/internal/builder/worker_app_test.go index 03e58dcc..003bccce 100644 --- a/internal/builder/worker_app_test.go +++ b/internal/builder/worker_app_test.go @@ -93,6 +93,14 @@ func TestBuilder_BuildWorkerPodTemplate(t *testing.T) { t.Errorf("Containers[0].Ports[0].ContainerPort = %v , want = %v", got.Spec.Containers[0].Ports[0].Name, SlurmdPort) + case got.Spec.Containers[0].Ports[1].Name != "ssh": + t.Errorf("Containers[0].Ports[1].Name = %v , want = ssh", + got.Spec.Containers[0].Ports[1].Name) + + case got.Spec.Containers[0].Ports[1].ContainerPort != SshPort: + t.Errorf("Containers[0].Ports[1].ContainerPort = %v , want = %v", + got.Spec.Containers[0].Ports[1].ContainerPort, SshPort) + case got.Spec.Subdomain == "": t.Errorf("Subdomain = %v , want = non-empty", got.Spec.Subdomain) From c4e92d89f854008f4aa3293b620e19f00b9fd8c0 Mon Sep 17 00:00:00 2001 From: Fagani Hajizada Date: Wed, 26 Nov 2025 11:32:58 +0100 Subject: [PATCH 2/3] fix(builder): correct SSH key type assignments in LoginSet RSA and ECDSA keys were swapped in the secret data mapping. This fix aligns key types with their filenames. --- internal/builder/login_secret.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/builder/login_secret.go b/internal/builder/login_secret.go index 3df02f9e..3f1f96aa 100644 --- a/internal/builder/login_secret.go +++ b/internal/builder/login_secret.go @@ -32,12 +32,12 @@ func (b *Builder) BuildLoginSshHostKeys(loginset *slinkyv1beta1.LoginSet) (*core Key: loginset.SshHostKeys(), Metadata: loginset.Spec.Template.PodMetadata, Data: map[string][]byte{ - sshHostEcdsaKeyFile: keyPairRsa.PrivateKey(), - sshHostEcdsaPubKeyFile: keyPairRsa.PublicKey(), + sshHostEcdsaKeyFile: keyPairEcdsa.PrivateKey(), + sshHostEcdsaPubKeyFile: keyPairEcdsa.PublicKey(), sshHostEd25519KeyFile: keyPairEd25519.PrivateKey(), sshHostEd25519PubKeyFile: keyPairEd25519.PublicKey(), - sshHostRsaKeyFile: keyPairEcdsa.PrivateKey(), - sshHostRsaPubKeyFile: keyPairEcdsa.PublicKey(), + sshHostRsaKeyFile: keyPairRsa.PrivateKey(), + sshHostRsaPubKeyFile: keyPairRsa.PublicKey(), }, Immutable: true, } From a7e1ce90bfd68a0b86451aa0456ce867c11ed807 Mon Sep 17 00:00:00 2001 From: Fagani Hajizada Date: Wed, 26 Nov 2025 13:57:23 +0100 Subject: [PATCH 3/3] feat: add optional SSH access to worker pods Enable SSH access to NodeSet worker pods with a CRD toggle, following the same pattern as LoginSet. SSH host keys are shared across all pods in a NodeSet to prevent "host key changed" warnings when pods are recreated or scaled. Ref: https://slurm.schedmd.com/pam_slurm_adopt. --- api/v1beta1/nodeset_keys.go | 8 ++ api/v1beta1/nodeset_types.go | 12 +++ api/v1beta1/zz_generated.deepcopy.go | 16 ++++ .../crd/bases/slinky.slurm.net_nodesets.yaml | 12 +++ .../templates/slinky.slurm.net_nodesets.yaml | 12 +++ helm/slurm/templates/nodeset/nodeset-cr.yaml | 4 + helm/slurm/values.yaml | 5 + internal/builder/worker_app.go | 92 +++++++++++++++---- internal/builder/worker_app_test.go | 8 -- internal/builder/worker_secret.go | 48 ++++++++++ internal/builder/worker_secret_test.go | 73 +++++++++++++++ internal/controller/nodeset/nodeset_sync.go | 26 ++++++ 12 files changed, 288 insertions(+), 28 deletions(-) create mode 100644 internal/builder/worker_secret.go create mode 100644 internal/builder/worker_secret_test.go diff --git a/api/v1beta1/nodeset_keys.go b/api/v1beta1/nodeset_keys.go index 06dcd590..6b74460c 100644 --- a/api/v1beta1/nodeset_keys.go +++ b/api/v1beta1/nodeset_keys.go @@ -23,3 +23,11 @@ func (o *NodeSet) HeadlessServiceKey() types.NamespacedName { Namespace: o.Namespace, } } + +func (o *NodeSet) SshHostKeys() types.NamespacedName { + key := o.Key() + return types.NamespacedName{ + Name: fmt.Sprintf("%s-ssh-host-keys", key.Name), + Namespace: o.Namespace, + } +} diff --git a/api/v1beta1/nodeset_types.go b/api/v1beta1/nodeset_types.go index 8e36536c..f6052e88 100644 --- a/api/v1beta1/nodeset_types.go +++ b/api/v1beta1/nodeset_types.go @@ -37,6 +37,10 @@ type NodeSetSpec struct { // +optional Slurmd ContainerWrapper `json:"slurmd,omitempty"` + // SSH configuration for worker pods. + // +optional + Ssh NodeSetSsh `json:"ssh,omitzero"` + // The logfile sidecar configuration. // +optional LogFile ContainerWrapper `json:"logfile,omitzero"` @@ -112,6 +116,14 @@ type NodeSetPartition struct { Config string `json:"config,omitzero"` } +// NodeSetSsh defines SSH configuration for NodeSet worker pods. +type NodeSetSsh struct { + // Enabled controls whether SSH access is enabled for this NodeSet. + // When enabled, SSH host keys will be created and mounted, and port 22 will be exposed. + // +default:=false + Enabled bool `json:"enabled"` +} + // NodeSetUpdateStrategy indicates the strategy that the NodeSet // controller will be used to perform updates. It includes any additional // parameters necessary to perform the update for the indicated strategy. diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index bad48853..1667c646 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -567,6 +567,7 @@ func (in *NodeSetSpec) DeepCopyInto(out *NodeSetSpec) { **out = **in } in.Slurmd.DeepCopyInto(&out.Slurmd) + out.Ssh = in.Ssh in.LogFile.DeepCopyInto(&out.LogFile) in.Template.DeepCopyInto(&out.Template) out.Partition = in.Partition @@ -600,6 +601,21 @@ func (in *NodeSetSpec) DeepCopy() *NodeSetSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeSetSsh) DeepCopyInto(out *NodeSetSsh) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeSetSsh. +func (in *NodeSetSsh) DeepCopy() *NodeSetSsh { + if in == nil { + return nil + } + out := new(NodeSetSsh) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NodeSetStatus) DeepCopyInto(out *NodeSetStatus) { *out = *in diff --git a/config/crd/bases/slinky.slurm.net_nodesets.yaml b/config/crd/bases/slinky.slurm.net_nodesets.yaml index c67763ef..ef411654 100644 --- a/config/crd/bases/slinky.slurm.net_nodesets.yaml +++ b/config/crd/bases/slinky.slurm.net_nodesets.yaml @@ -173,6 +173,18 @@ spec: Ref: https://github.com/kubernetes/api/blob/master/core/v1/types.go#L2885 type: object x-kubernetes-preserve-unknown-fields: true + ssh: + description: SSH configuration for worker pods. + properties: + enabled: + default: false + description: |- + Enabled controls whether SSH access is enabled for this NodeSet. + When enabled, SSH host keys will be created and mounted, and port 22 will be exposed. + type: boolean + required: + - enabled + type: object taintKubeNodes: default: false description: |- diff --git a/helm/slurm-operator-crds/templates/slinky.slurm.net_nodesets.yaml b/helm/slurm-operator-crds/templates/slinky.slurm.net_nodesets.yaml index c67763ef..ef411654 100644 --- a/helm/slurm-operator-crds/templates/slinky.slurm.net_nodesets.yaml +++ b/helm/slurm-operator-crds/templates/slinky.slurm.net_nodesets.yaml @@ -173,6 +173,18 @@ spec: Ref: https://github.com/kubernetes/api/blob/master/core/v1/types.go#L2885 type: object x-kubernetes-preserve-unknown-fields: true + ssh: + description: SSH configuration for worker pods. + properties: + enabled: + default: false + description: |- + Enabled controls whether SSH access is enabled for this NodeSet. + When enabled, SSH host keys will be created and mounted, and port 22 will be exposed. + type: boolean + required: + - enabled + type: object taintKubeNodes: default: false description: |- diff --git a/helm/slurm/templates/nodeset/nodeset-cr.yaml b/helm/slurm/templates/nodeset/nodeset-cr.yaml index 22842b8e..385bd633 100644 --- a/helm/slurm/templates/nodeset/nodeset-cr.yaml +++ b/helm/slurm/templates/nodeset/nodeset-cr.yaml @@ -51,6 +51,10 @@ spec: config: {{ include "slurm.worker.partitionConfig" $nodeset.partition }} {{- end }}{{- /* if (include "slurm.worker.partitionConfig" $nodeset.partition) */}} {{- end }}{{- /* with $nodeset.partition */}} + {{- with $nodeset.ssh }} + ssh: + {{- toYaml . | nindent 4 }} + {{- end }}{{- /* with $nodeset.ssh */}} replicas: {{ $nodeset.replicas }} slurmd: {{- $_ := set $nodeset.slurmd "imagePullPolicy" (default $.Values.imagePullPolicy $nodeset.slurmd.imagePullPolicy) -}} diff --git a/helm/slurm/values.yaml b/helm/slurm/values.yaml index b27a1605..9a3e93d0 100644 --- a/helm/slurm/values.yaml +++ b/helm/slurm/values.yaml @@ -643,6 +643,11 @@ nodesets: configMap: {} # State: UP # MaxTime: UNLIMITED + # SSH configuration for this NodeSet. + # ssh: + # -- Enable SSH access to worker pods with pam_slurm_adopt. + # Ref: https://slurm.schedmd.com/pam_slurm_adopt.html + # enabled: false # -- Enable propagation of container `resources.limits` into slurmd. useResourceLimits: true # Update strategy configuration. diff --git a/internal/builder/worker_app.go b/internal/builder/worker_app.go index 597973e6..254a247d 100644 --- a/internal/builder/worker_app.go +++ b/internal/builder/worker_app.go @@ -68,7 +68,7 @@ func (b *Builder) BuildWorkerPodTemplate(nodeset *slinkyv1beta1.NodeSet, control InitContainers: []corev1.Container{ b.logfileContainer(spec.LogFile, slurmdLogFilePath), }, - Volumes: nodesetVolumes(controller), + Volumes: nodesetVolumes(nodeset, controller), Tolerations: []corev1.Toleration{ slurmtaints.TolerationWorkerNode, }, @@ -79,7 +79,7 @@ func (b *Builder) BuildWorkerPodTemplate(nodeset *slinkyv1beta1.NodeSet, control return b.buildPodTemplate(opts) } -func nodesetVolumes(controller *slinkyv1beta1.Controller) []corev1.Volume { +func nodesetVolumes(nodeset *slinkyv1beta1.NodeSet, controller *slinkyv1beta1.Controller) []corev1.Volume { out := []corev1.Volume{ { Name: slurmEtcVolume, @@ -103,28 +103,83 @@ func nodesetVolumes(controller *slinkyv1beta1.Controller) []corev1.Volume { }, logFileVolume(), } + + // Add SSH host keys volume if SSH is enabled + if nodeset.Spec.Ssh.Enabled { + out = append(out, corev1.Volume{ + Name: sshHostKeysVolume, + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + DefaultMode: ptr.To[int32](0o600), + Sources: []corev1.VolumeProjection{ + { + Secret: &corev1.SecretProjection{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: nodeset.SshHostKeys().Name, + }, + Items: []corev1.KeyToPath{ + {Key: sshHostRsaKeyFile, Path: sshHostRsaKeyFile, Mode: ptr.To[int32](0o600)}, + {Key: sshHostRsaPubKeyFile, Path: sshHostRsaPubKeyFile, Mode: ptr.To[int32](0o644)}, + {Key: sshHostEd25519KeyFile, Path: sshHostEd25519KeyFile, Mode: ptr.To[int32](0o600)}, + {Key: sshHostEd25519PubKeyFile, Path: sshHostEd25519PubKeyFile, Mode: ptr.To[int32](0o644)}, + {Key: sshHostEcdsaKeyFile, Path: sshHostEcdsaKeyFile, Mode: ptr.To[int32](0o600)}, + {Key: sshHostEcdsaPubKeyFile, Path: sshHostEcdsaPubKeyFile, Mode: ptr.To[int32](0o644)}, + }, + }, + }, + }, + }, + }, + }) + } + return out } func (b *Builder) slurmdContainer(nodeset *slinkyv1beta1.NodeSet, controller *slinkyv1beta1.Controller) corev1.Container { merge := nodeset.Spec.Slurmd.Container + // Base ports always include slurmd + ports := []corev1.ContainerPort{ + { + Name: labels.WorkerApp, + ContainerPort: SlurmdPort, + Protocol: corev1.ProtocolTCP, + }, + } + + // Add SSH port if enabled + if nodeset.Spec.Ssh.Enabled { + ports = append(ports, corev1.ContainerPort{ + Name: "ssh", + ContainerPort: SshPort, + Protocol: corev1.ProtocolTCP, + }) + } + + // Base volume mounts + volumeMounts := []corev1.VolumeMount{ + {Name: slurmEtcVolume, MountPath: slurmEtcDir, ReadOnly: true}, + {Name: slurmLogFileVolume, MountPath: slurmLogFileDir}, + } + + // Add SSH host key mounts if enabled + if nodeset.Spec.Ssh.Enabled { + volumeMounts = append(volumeMounts, + corev1.VolumeMount{Name: sshHostKeysVolume, MountPath: sshHostRsaKeyFilePath, SubPath: sshHostRsaKeyFile, ReadOnly: true}, + corev1.VolumeMount{Name: sshHostKeysVolume, MountPath: sshHostRsaKeyPubFilePath, SubPath: sshHostRsaPubKeyFile, ReadOnly: true}, + corev1.VolumeMount{Name: sshHostKeysVolume, MountPath: sshHostEd25519KeyFilePath, SubPath: sshHostEd25519KeyFile, ReadOnly: true}, + corev1.VolumeMount{Name: sshHostKeysVolume, MountPath: sshHostEd25519PubKeyFilePath, SubPath: sshHostEd25519PubKeyFile, ReadOnly: true}, + corev1.VolumeMount{Name: sshHostKeysVolume, MountPath: sshHostEcdsaKeyFilePath, SubPath: sshHostEcdsaKeyFile, ReadOnly: true}, + corev1.VolumeMount{Name: sshHostKeysVolume, MountPath: sshHostEcdsaPubKeyFilePath, SubPath: sshHostEcdsaPubKeyFile, ReadOnly: true}, + ) + } + opts := ContainerOpts{ base: corev1.Container{ - Name: labels.WorkerApp, - Args: slurmdArgs(nodeset, controller), - Ports: []corev1.ContainerPort{ - { - Name: labels.WorkerApp, - ContainerPort: SlurmdPort, - Protocol: corev1.ProtocolTCP, - }, - { - Name: "ssh", - ContainerPort: SshPort, - Protocol: corev1.ProtocolTCP, - }, - }, + Name: labels.WorkerApp, + Args: slurmdArgs(nodeset, controller), + Ports: ports, StartupProbe: &corev1.Probe{ ProbeHandler: corev1.ProbeHandler{ HTTPGet: &corev1.HTTPGetAction{ @@ -175,10 +230,7 @@ func (b *Builder) slurmdContainer(nodeset *slinkyv1beta1.NodeSet, controller *sl }, }, }, - VolumeMounts: []corev1.VolumeMount{ - {Name: slurmEtcVolume, MountPath: slurmEtcDir, ReadOnly: true}, - {Name: slurmLogFileVolume, MountPath: slurmLogFileDir}, - }, + VolumeMounts: volumeMounts, }, merge: merge, } diff --git a/internal/builder/worker_app_test.go b/internal/builder/worker_app_test.go index 003bccce..03e58dcc 100644 --- a/internal/builder/worker_app_test.go +++ b/internal/builder/worker_app_test.go @@ -93,14 +93,6 @@ func TestBuilder_BuildWorkerPodTemplate(t *testing.T) { t.Errorf("Containers[0].Ports[0].ContainerPort = %v , want = %v", got.Spec.Containers[0].Ports[0].Name, SlurmdPort) - case got.Spec.Containers[0].Ports[1].Name != "ssh": - t.Errorf("Containers[0].Ports[1].Name = %v , want = ssh", - got.Spec.Containers[0].Ports[1].Name) - - case got.Spec.Containers[0].Ports[1].ContainerPort != SshPort: - t.Errorf("Containers[0].Ports[1].ContainerPort = %v , want = %v", - got.Spec.Containers[0].Ports[1].ContainerPort, SshPort) - case got.Spec.Subdomain == "": t.Errorf("Subdomain = %v , want = non-empty", got.Spec.Subdomain) diff --git a/internal/builder/worker_secret.go b/internal/builder/worker_secret.go new file mode 100644 index 00000000..31502b0a --- /dev/null +++ b/internal/builder/worker_secret.go @@ -0,0 +1,48 @@ +// SPDX-FileCopyrightText: Copyright (C) SchedMD LLC. +// SPDX-License-Identifier: Apache-2.0 + +package builder + +import ( + "fmt" + + corev1 "k8s.io/api/core/v1" + + slinkyv1beta1 "github.com/SlinkyProject/slurm-operator/api/v1beta1" + "github.com/SlinkyProject/slurm-operator/internal/builder/labels" + "github.com/SlinkyProject/slurm-operator/internal/utils/crypto" + "github.com/SlinkyProject/slurm-operator/internal/utils/structutils" +) + +func (b *Builder) BuildWorkerSshHostKeys(nodeset *slinkyv1beta1.NodeSet) (*corev1.Secret, error) { + keyPairRsa, err := crypto.NewKeyPair(crypto.WithType(crypto.KeyPairRsa)) + if err != nil { + return nil, fmt.Errorf("failed to create RSA key pair: %w", err) + } + keyPairEd25519, err := crypto.NewKeyPair(crypto.WithType(crypto.KeyPairEd25519)) + if err != nil { + return nil, fmt.Errorf("failed to create ED25519 key pair: %w", err) + } + keyPairEcdsa, err := crypto.NewKeyPair(crypto.WithType(crypto.KeyPairEcdsa)) + if err != nil { + return nil, fmt.Errorf("failed to create ECDSA key pair: %w", err) + } + + opts := SecretOpts{ + Key: nodeset.SshHostKeys(), + Metadata: nodeset.Spec.Template.PodMetadata, + Data: map[string][]byte{ + sshHostEcdsaKeyFile: keyPairEcdsa.PrivateKey(), + sshHostEcdsaPubKeyFile: keyPairEcdsa.PublicKey(), + sshHostEd25519KeyFile: keyPairEd25519.PrivateKey(), + sshHostEd25519PubKeyFile: keyPairEd25519.PublicKey(), + sshHostRsaKeyFile: keyPairRsa.PrivateKey(), + sshHostRsaPubKeyFile: keyPairRsa.PublicKey(), + }, + Immutable: true, + } + + opts.Metadata.Labels = structutils.MergeMaps(opts.Metadata.Labels, labels.NewBuilder().WithWorkerLabels(nodeset).Build()) + + return b.BuildSecret(opts, nodeset) +} diff --git a/internal/builder/worker_secret_test.go b/internal/builder/worker_secret_test.go new file mode 100644 index 00000000..134cbefa --- /dev/null +++ b/internal/builder/worker_secret_test.go @@ -0,0 +1,73 @@ +// SPDX-FileCopyrightText: Copyright (C) SchedMD LLC. +// SPDX-License-Identifier: Apache-2.0 + +package builder + +import ( + "testing" + + slinkyv1beta1 "github.com/SlinkyProject/slurm-operator/api/v1beta1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestBuilder_BuildWorkerSshHostKeys(t *testing.T) { + type fields struct { + client client.Client + } + type args struct { + nodeset *slinkyv1beta1.NodeSet + } + tests := []struct { + name string + fields fields + args args + wantErr bool + }{ + { + name: "default", + fields: fields{ + client: fake.NewFakeClient(), + }, + args: args{ + nodeset: &slinkyv1beta1.NodeSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "slurm", + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + b := New(tt.fields.client) + got, err := b.BuildWorkerSshHostKeys(tt.args.nodeset) + if (err != nil) != tt.wantErr { + t.Errorf("Builder.BuildWorkerSshHostKeys() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if err != nil { + return + } + + switch { + case got.Data[sshHostEcdsaKeyFile] == nil && got.StringData[sshHostEcdsaKeyFile] == "": + t.Errorf("got.Data[%s] = %v", sshHostEcdsaKeyFile, got.Data[sshHostEcdsaKeyFile]) + case got.Data[sshHostEcdsaPubKeyFile] == nil && got.StringData[sshHostEcdsaPubKeyFile] == "": + t.Errorf("got.Data[%s] = %v", sshHostEcdsaPubKeyFile, got.Data[sshHostEcdsaPubKeyFile]) + + case got.Data[sshHostEd25519KeyFile] == nil && got.StringData[sshHostEd25519KeyFile] == "": + t.Errorf("got.Data[%s] = %v", sshHostEd25519KeyFile, got.Data[sshHostEd25519KeyFile]) + case got.Data[sshHostEd25519PubKeyFile] == nil && got.StringData[sshHostEd25519PubKeyFile] == "": + t.Errorf("got.Data[%s] = %v", sshHostEd25519PubKeyFile, got.Data[sshHostEd25519PubKeyFile]) + + case got.Data[sshHostRsaKeyFile] == nil && got.StringData[sshHostRsaKeyFile] == "": + t.Errorf("got.Data[%s] = %v", sshHostRsaKeyFile, got.Data[sshHostRsaKeyFile]) + case got.Data[sshHostRsaPubKeyFile] == nil && got.StringData[sshHostRsaPubKeyFile] == "": + t.Errorf("got.Data[%s] = %v", sshHostRsaPubKeyFile, got.Data[sshHostRsaPubKeyFile]) + } + }) + } +} diff --git a/internal/controller/nodeset/nodeset_sync.go b/internal/controller/nodeset/nodeset_sync.go index 39a209ae..ed0dcf91 100644 --- a/internal/controller/nodeset/nodeset_sync.go +++ b/internal/controller/nodeset/nodeset_sync.go @@ -231,6 +231,10 @@ func (r *NodeSetReconciler) sync( return err } + if err := r.syncSshHostKeys(ctx, nodeset); err != nil { + return err + } + if err := r.syncSlurmDeadline(ctx, nodeset, pods); err != nil { return err } @@ -1119,3 +1123,25 @@ func (r *NodeSetReconciler) syncClusterWorkerPDB( return nil } + +// syncSshHostKeys manages SSH host keys secret for the NodeSet if SSH is enabled +func (r *NodeSetReconciler) syncSshHostKeys( + ctx context.Context, + nodeset *slinkyv1beta1.NodeSet, +) error { + // Only create SSH host keys if SSH is enabled + if !nodeset.Spec.Ssh.Enabled { + return nil + } + + secret, err := r.builder.BuildWorkerSshHostKeys(nodeset) + if err != nil { + return fmt.Errorf("failed to build SSH host keys secret: %w", err) + } + + if err := objectutils.SyncObject(r.Client, ctx, secret, true); err != nil { + return fmt.Errorf("failed to sync SSH host keys secret (%s): %w", klog.KObj(secret), err) + } + + return nil +}