From 372cb6d3009978bc01e68d86c613b217b6b51aee Mon Sep 17 00:00:00 2001 From: subburamoracle <110624822+subburamoracle@users.noreply.github.com> Date: Mon, 17 Nov 2025 19:26:23 +0530 Subject: [PATCH 1/5] Add files via upload --- files/oke-fss-mount.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 files/oke-fss-mount.sh diff --git a/files/oke-fss-mount.sh b/files/oke-fss-mount.sh new file mode 100644 index 0000000..435e7f8 --- /dev/null +++ b/files/oke-fss-mount.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# Variables (set these to your OCI details) +MOUNT_TARGET_IP="10.140.0.122" # IP address of OCI FSS mount target +EXPORT_PATH="/oke-gpu-jevhap" # Export path of FSS (found in OCI console) +MOUNT_POINT="/mnt/oci-fss" # Local directory to mount FSS + +# Script to execute on each worker node +if command -v yum >/dev/null 2>&1; then + sudo yum install -y nfs-utils + echo "Installed nfs-utils" +elif command -v apt-get >/dev/null 2>&1; then +fi + sudo mkdir -p $MOUNT_POINT + sudo mount -t nfs -o vers=3 $MOUNT_TARGET_IP:$EXPORT_PATH $MOUNT_POINT + echo "Successfully mounted" + if ! grep -q '$MOUNT_TARGET_IP:$EXPORT_PATH' /etc/fstab; then + echo "$MOUNT_TARGET_IP:$EXPORT_PATH $MOUNT_POINT nfs vers=3,_netdev 0 0" | sudo tee -a /etc/fstab +fi From d211acf240b93f5d9e8ab2f5e46602641793fc7d Mon Sep 17 00:00:00 2001 From: subburamoracle <110624822+subburamoracle@users.noreply.github.com> Date: Mon, 17 Nov 2025 21:20:05 +0530 Subject: [PATCH 2/5] Update oke-fss-mount.sh --- files/oke-fss-mount.sh | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/files/oke-fss-mount.sh b/files/oke-fss-mount.sh index 435e7f8..9911553 100644 --- a/files/oke-fss-mount.sh +++ b/files/oke-fss-mount.sh @@ -1,19 +1,36 @@ #!/bin/bash -# Variables (set these to your OCI details) -MOUNT_TARGET_IP="10.140.0.122" # IP address of OCI FSS mount target -EXPORT_PATH="/oke-gpu-jevhap" # Export path of FSS (found in OCI console) -MOUNT_POINT="/mnt/oci-fss" # Local directory to mount FSS +# Variables (set these to your OCI FSS details) +MOUNT_TARGET_IP="$3" # IP address of OCI FSS mount target +EXPORT_PATH="$1" # Export path of FSS (from OCI Console) +MOUNT_POINT="$2" # Local directory to mount FSS -# Script to execute on each worker node +# Install NFS utils on yum or apt-get systems if command -v yum >/dev/null 2>&1; then - sudo yum install -y nfs-utils - echo "Installed nfs-utils" + yum install -y nfs-utils + echo "Installed nfs-utils via yum" elif command -v apt-get >/dev/null 2>&1; then + apt-get update + apt-get install -y nfs-common + echo "Installed nfs-common via apt-get" fi - sudo mkdir -p $MOUNT_POINT - sudo mount -t nfs -o vers=3 $MOUNT_TARGET_IP:$EXPORT_PATH $MOUNT_POINT - echo "Successfully mounted" - if ! grep -q '$MOUNT_TARGET_IP:$EXPORT_PATH' /etc/fstab; then - echo "$MOUNT_TARGET_IP:$EXPORT_PATH $MOUNT_POINT nfs vers=3,_netdev 0 0" | sudo tee -a /etc/fstab + +# Create mount point directory if it doesn't exist +mkdir -p "$MOUNT_POINT" + +# Mount the NFS share +mount -t nfs -o vers=3 "$MOUNT_TARGET_IP:$EXPORT_PATH" "$MOUNT_POINT" +if [ $? -eq 0 ]; then + echo "Successfully mounted $MOUNT_TARGET_IP:$EXPORT_PATH at $MOUNT_POINT" +else + echo "Failed to mount $MOUNT_TARGET_IP:$EXPORT_PATH" >&2 + exit 1 +fi + +# Add entry to /etc/fstab for re-mount at boot (if not already present) +if ! grep -q "$MOUNT_TARGET_IP:$EXPORT_PATH" /etc/fstab; then + echo "$MOUNT_TARGET_IP:$EXPORT_PATH $MOUNT_POINT nfs vers=3,_netdev 0 0" >> /etc/fstab + echo "Added mount entry to /etc/fstab" +else + echo "Mount entry already exists in /etc/fstab" fi From 8f80f9bf78d6472bb1e937716df046b7ca43597c Mon Sep 17 00:00:00 2001 From: subburamoracle Date: Fri, 21 Nov 2025 21:30:43 +0530 Subject: [PATCH 3/5] Issue number:90 Signed-off-by: Subburam Mathuraiveeran subburam.mathuraiveeran@oracle.com:wq implemented changes related to fss mount on all worker nodes --- terraform/fss.tf | 7 ++++++- terraform/oke-workers.tf | 9 +++++++++ terraform/schema.yaml | 14 ++++++++++++++ terraform/variables.tf | 11 +++++++++++ 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/terraform/fss.tf b/terraform/fss.tf index fecdeea..5969219 100644 --- a/terraform/fss.tf +++ b/terraform/fss.tf @@ -1,8 +1,13 @@ # Copyright (c) 2025 Oracle Corporation and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl +#locals { +# fss_export_path = format("/oke-gpu-%v", local.state_id) +#} + +#export path picked from user input locals { - fss_export_path = format("/oke-gpu-%v", local.state_id) + fss_export_path = var.fss_export_path } data "oci_file_storage_mount_targets" "fss" { diff --git a/terraform/oke-workers.tf b/terraform/oke-workers.tf index 9989152..f34133d 100644 --- a/terraform/oke-workers.tf +++ b/terraform/oke-workers.tf @@ -3,6 +3,7 @@ locals { create_workers = true + fss_mount_ip = try(data.oci_core_private_ip.fss_mt_ip[0].ip_address, "") ssh_authorized_keys = compact([ trimspace(local.ssh_public_key), ]) @@ -25,6 +26,13 @@ locals { var.nvme_raid_level, ) : "" + #fss mounting on worker nodes + + runcmd_fss_mount = var.create_fss ? format( + "curl -sL -o /var/run/oke-fss-mount.sh https://raw.githubusercontent.com/subburamoracle/oci-hpc-oke/refs/heads/fssmount_worker/files/oke-fss-mount.sh && (bash /var/run/oke-fss-mount.sh '%v' '%v' '%v' || echo 'Error initializing RAID' >&2)", + var.fss_export_path, var.fss_mount_path, local.fss_mount_ip + ) : "" + write_files = [ { content = local.cluster_apiserver, @@ -43,6 +51,7 @@ locals { runcmd = compact([ local.runcmd_nvme_raid, local.runcmd_bootstrap, + local.runcmd_fss_mount ]) write_files = local.write_files } diff --git a/terraform/schema.yaml b/terraform/schema.yaml index 025a024..8a0cba6 100644 --- a/terraform/schema.yaml +++ b/terraform/schema.yaml @@ -184,6 +184,8 @@ variableGroups: - create_bv_high - create_fss - fss_ad + - fss_export_path + - fss_mount_path - create_lustre - lustre_ad - lustre_size_in_tb @@ -743,6 +745,18 @@ variables: visible: ${create_fss} dependsOn: compartmentId: ${compartment_ocid} + fss_export_path: + title: FSS export path + type: string + default: "/oke-hpc-export" + required: true + visible: ${create_fss} + fss_mount_path: + title: FSS mount point + type: string + default: "/mnt/oci-fss" + required: true + visible: ${create_fss} create_bv_high: title: Create a storage class backed by higher performance OCI block volumes description: "Create a storage class backed by higher performance OCI block volumes (VPU: 20)." diff --git a/terraform/variables.tf b/terraform/variables.tf index 159025c..48161f0 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -188,6 +188,17 @@ variable "create_lustre_pv" { default = true type = bool } +# created variable for fss mounting +variable "fss_export_path" { + default = "/oke-hpc-export" + type = string +} +variable "fss_mount_path" { + default = "/mnt/oci-fss" + type = string +} + + # MONITORING variable "install_monitoring" { From 18b44ba90c9af30310d38d7e3e616de0ede8187b Mon Sep 17 00:00:00 2001 From: subburamoracle Date: Fri, 21 Nov 2025 21:30:43 +0530 Subject: [PATCH 4/5] Issue number:90 Signed-off-by: Subburam Mathuraiveeran subburam.mathuraiveeran@oracle.com:wq implemented changes related to fss mount on all worker nodes --- files/oke-fss-mount.sh | 5 +++++ terraform/oke-workers.tf | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/files/oke-fss-mount.sh b/files/oke-fss-mount.sh index 9911553..6eda4a8 100644 --- a/files/oke-fss-mount.sh +++ b/files/oke-fss-mount.sh @@ -10,6 +10,11 @@ if command -v yum >/dev/null 2>&1; then yum install -y nfs-utils echo "Installed nfs-utils via yum" elif command -v apt-get >/dev/null 2>&1; then + # Wait for apt lock and install the package + while fuser /var/{lib/{dpkg/{lock,lock-frontend},apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do + echo "Waiting for dpkg/apt lock" + sleep 1 + done apt-get update apt-get install -y nfs-common echo "Installed nfs-common via apt-get" diff --git a/terraform/oke-workers.tf b/terraform/oke-workers.tf index f34133d..d76d4e2 100644 --- a/terraform/oke-workers.tf +++ b/terraform/oke-workers.tf @@ -28,7 +28,7 @@ locals { #fss mounting on worker nodes - runcmd_fss_mount = var.create_fss ? format( + runcmd_fss_mount = var.create_fss && local.fss_mount_ip != "" ? format( "curl -sL -o /var/run/oke-fss-mount.sh https://raw.githubusercontent.com/subburamoracle/oci-hpc-oke/refs/heads/fssmount_worker/files/oke-fss-mount.sh && (bash /var/run/oke-fss-mount.sh '%v' '%v' '%v' || echo 'Error initializing RAID' >&2)", var.fss_export_path, var.fss_mount_path, local.fss_mount_ip ) : "" From 2f01ef98a6a976f82e0ec748be79b0a4a5672b53 Mon Sep 17 00:00:00 2001 From: subburamoracle Date: Fri, 28 Nov 2025 12:50:35 +0530 Subject: [PATCH 5/5] mend --- terraform/fss.tf | 12 ++++++------ terraform/oke-workers.tf | 4 ++-- terraform/schema.yaml | 7 ------- terraform/variables.tf | 4 ---- 4 files changed, 8 insertions(+), 19 deletions(-) diff --git a/terraform/fss.tf b/terraform/fss.tf index 5969219..96105a0 100644 --- a/terraform/fss.tf +++ b/terraform/fss.tf @@ -1,15 +1,15 @@ # Copyright (c) 2025 Oracle Corporation and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl -#locals { -# fss_export_path = format("/oke-gpu-%v", local.state_id) -#} - -#export path picked from user input locals { - fss_export_path = var.fss_export_path + fss_export_path = format("/oke-gpu-%v", local.state_id) } +#export path picked from user input +#locals { +# fss_export_path = var.fss_export_path +#} + data "oci_file_storage_mount_targets" "fss" { count = var.create_fss ? 1 : 0 availability_domain = var.fss_ad diff --git a/terraform/oke-workers.tf b/terraform/oke-workers.tf index d76d4e2..452e79c 100644 --- a/terraform/oke-workers.tf +++ b/terraform/oke-workers.tf @@ -28,9 +28,9 @@ locals { #fss mounting on worker nodes - runcmd_fss_mount = var.create_fss && local.fss_mount_ip != "" ? format( + runcmd_fss_mount = var.create_fss && local.fss_mount_ip != "" && local.fss_export_path != "" ? format( "curl -sL -o /var/run/oke-fss-mount.sh https://raw.githubusercontent.com/subburamoracle/oci-hpc-oke/refs/heads/fssmount_worker/files/oke-fss-mount.sh && (bash /var/run/oke-fss-mount.sh '%v' '%v' '%v' || echo 'Error initializing RAID' >&2)", - var.fss_export_path, var.fss_mount_path, local.fss_mount_ip + local.fss_export_path, var.fss_mount_path, local.fss_mount_ip ) : "" write_files = [ diff --git a/terraform/schema.yaml b/terraform/schema.yaml index 8a0cba6..b46f17b 100644 --- a/terraform/schema.yaml +++ b/terraform/schema.yaml @@ -184,7 +184,6 @@ variableGroups: - create_bv_high - create_fss - fss_ad - - fss_export_path - fss_mount_path - create_lustre - lustre_ad @@ -745,12 +744,6 @@ variables: visible: ${create_fss} dependsOn: compartmentId: ${compartment_ocid} - fss_export_path: - title: FSS export path - type: string - default: "/oke-hpc-export" - required: true - visible: ${create_fss} fss_mount_path: title: FSS mount point type: string diff --git a/terraform/variables.tf b/terraform/variables.tf index 48161f0..ae4da8a 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -189,10 +189,6 @@ variable "create_lustre_pv" { type = bool } # created variable for fss mounting -variable "fss_export_path" { - default = "/oke-hpc-export" - type = string -} variable "fss_mount_path" { default = "/mnt/oci-fss" type = string