Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 46 additions & 15 deletions appliances/FabricManager/appliance.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@ set -o errexit -o pipefail
### Important notes ##################################################
#
# 1. This appliance requires a base OS image with cloud-init and wget.
# 2. You MUST edit the 'PARTITION_TOOL_SRC' variable in the 'Globals'
# section below to point to the 'partitions.cpp' file in your
# internal GitLab repository.
# 3. This appliance MUST be instantiated with a VM Template that
# 2. This appliance MUST be instantiated with a VM Template that
# includes UEFI boot and Q35 machine settings .
#
### Important notes ##################################################
Expand All @@ -44,7 +41,7 @@ ONE_SERVICE_SHORT_DESCRIPTION='Appliance with NVIDIA Fabric Manager for Shared N
ONE_SERVICE_DESCRIPTION=$(cat <<EOF
Appliance with pre-installed NVIDIA Fabric Manager service.

This appliance is designed to be the "Shared NVSwitch Virtualization Model" service VM, as described in the IAAS-NVIDIA-GPUs-Passthru documentation .
This appliance is designed to be the "Shared NVSwitch Virtualization Model" service VM, as described in the IAAS-NVIDIA-GPUs-Passthrough documentation .

It comes with:
- The required NVIDIA datacenter driver.
Expand Down Expand Up @@ -121,6 +118,9 @@ service_install()
# Install drivers, FM service, and build tools (make are in build-essential)
install_packages

# Install the boot manager script that adds resiliency
install_boot_manager

# Configure the FM service
configure_fm_service

Expand All @@ -140,9 +140,9 @@ service_install()

service_configure()
{
msg info "Starting NVIDIA Fabric Manager service"
msg info "Starting NVIDIA Fabric Manager service (managed by one-fm-boot-manager.sh)"
if ! systemctl start nvidia-fabricmanager; then
msg error "Failed to start nvidia-fabricmanager service. Check VM logs."
msg error "Failed to start nvidia-fabricmanager service. Check VM logs and /var/log/syslog for 'one-fm-boot-manager' entries."
exit 1
fi

Expand All @@ -166,6 +166,24 @@ service_bootstrap()
# functions
#

install_boot_manager()
{
local SCRIPT_DIR
SCRIPT_DIR="$(dirname "$0")"
local BOOT_MANAGER_SRC="${SCRIPT_DIR}/one-fm-boot-manager.sh"
local BOOT_MANAGER_DST="/usr/local/sbin/one-fm-boot-manager.sh"

msg info "Installing Fabric Manager boot manager script"

if [ ! -f "${BOOT_MANAGER_SRC}" ]; then
msg error "Boot manager script not found at: ${BOOT_MANAGER_SRC}"
exit 1
fi

cp "${BOOT_MANAGER_SRC}" "${BOOT_MANAGER_DST}"
chmod +x "${BOOT_MANAGER_DST}"
}

install_nvidia_repo()
{
msg info "Installing NVIDIA CUDA Repository Key"
Expand Down Expand Up @@ -203,18 +221,31 @@ install_packages()
configure_fm_service()
{
local FM_CONFIG_FILE="/usr/share/nvidia/nvswitch/fabricmanager.cfg"

msg info "Configuring Fabric Manager for Shared NVSwitch Mode"

# Set Fabric Manager mode to 1 (Shared NVSwitch multitenancy)
# Fabric Manager Operating Mode
# (1) Start FM in Shared NVSwitch multi-tenancy mode.
sed -i 's/^\(FABRIC_MODE\)=.*/\1=1/' ${FM_CONFIG_FILE}

# Set persistent state file
# Ensure the directory exists and has correct permissions potentially needed by the service
mkdir -p /var/run/nvidia-fabricmanager
# Note: Service might manage permissions itself, but setting owner is safer. Check service docs if issues arise.
# chown nvidia-fabricmanager:nvidia-fabricmanager /var/run/nvidia-fabricmanager # Example, user/group might differ
sed -i 's|^\(STATE_FILE_NAME\)=.*|\1=/var/run/nvidia-fabricmanager/fabricmanager.state|' ${FM_CONFIG_FILE}
# Set persistent state files
mkdir -p /var/lib/nvidia-fabricmanager
touch /var/lib/nvidia-fabricmanager/active_partitions.state

# STATE_FILE_NAME is used by nvidia-fabricmanager itself (metadata)
sed -i 's|^\(STATE_FILE_NAME\)=.*|\1=/var/lib/nvidia-fabricmanager/fabricmanager.state|' ${FM_CONFIG_FILE}

# Override the systemd to use boot manager script for resiliency
msg info "Overriding systemd service to use boot manager"
local OVERRIDE_DIR="/etc/systemd/system/nvidia-fabricmanager.service.d"
mkdir -p "${OVERRIDE_DIR}"
cat <<EOF > "${OVERRIDE_DIR}/override.conf"
[Service]
ExecStart=
ExecStart=/usr/local/sbin/one-fm-boot-manager.sh
EOF

systemctl daemon-reload

msg info "Enabling nvidia-fabricmanager systemd service"
systemctl enable nvidia-fabricmanager.service
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,50 @@
#include <iomanip>
#include <limits>
#include <sstream>
#include <vector>
#include <fstream>
#include <algorithm>

#include "nv_fm_agent.h"

const char* STATE_FILE_PATH = "/var/lib/nvidia-fabricmanager/active_partitions.state";

// --- State Management Functions ---

std::vector<fmFabricPartitionId_t> readState() {
std::vector<fmFabricPartitionId_t> activePartitions;
std::ifstream stateFile(STATE_FILE_PATH);
fmFabricPartitionId_t id;
while (stateFile >> id) {
activePartitions.push_back(id);
}
return activePartitions;
}

void writeState(const std::vector<fmFabricPartitionId_t>& activePartitions) {
std::ofstream stateFile(STATE_FILE_PATH, std::ios::trunc);
for (const auto& id : activePartitions) {
stateFile << id << std::endl;
}
}

void updateStateFile(fmFabricPartitionId_t partitionId, bool activate) {
auto activePartitions = readState();
auto it = std::find(activePartitions.begin(), activePartitions.end(), partitionId);

if (activate) {
if (it == activePartitions.end()) {
activePartitions.push_back(partitionId);
}
} else {
if (it != activePartitions.end()) {
activePartitions.erase(it);
}
}
writeState(activePartitions);
}


void printFmError(const char* operation, fmReturn_t fmReturn) {
std::cout << "Error: Failed to " << operation << ". (Code: " << fmReturn << ")" << std::endl;
}
Expand All @@ -40,7 +81,8 @@ void printMenu() {
std::cout << " 0 - List Supported Partitions\n";
std::cout << " 1 - Activate a Partition\n";
std::cout << " 2 - Deactivate a Partition\n";
std::cout << " 3 - Quit\n";
std::cout << " 3 - Restore Active Partitions\n";
std::cout << " 4 - Quit\n";
std::cout << "------------------------------------------\n";
std::cout << "Enter operation: ";
}
Expand Down Expand Up @@ -142,6 +184,7 @@ fmReturn_t executeOperation(fmHandle_t fmHandle, unsigned int operation, fmFabri
fmReturn = fmActivateFabricPartition(fmHandle, partitionId);
if (fmReturn == FM_ST_SUCCESS) {
std::cout << "Successfully sent activation request for partition " << partitionId << std::endl;
updateStateFile(partitionId, true);
} else {
printFmError("activate partition", fmReturn);
}
Expand All @@ -156,12 +199,41 @@ fmReturn_t executeOperation(fmHandle_t fmHandle, unsigned int operation, fmFabri
fmReturn = fmDeactivateFabricPartition(fmHandle, partitionId);
if (fmReturn == FM_ST_SUCCESS) {
std::cout << "Successfully sent deactivation request for partition " << partitionId << std::endl;
updateStateFile(partitionId, false);
} else {
printFmError("deactivate partition", fmReturn);
}
break;
}

case 3: { // Restore Active Partitions
auto activePartitions = readState();

fmActivatedFabricPartitionList_t partitionsToRestore = {0};
partitionsToRestore.version = fmActivatedFabricPartitionList_version;

if (activePartitions.size() > FM_MAX_FABRIC_PARTITIONS) {
std::cout << "Error: Number of active partitions in state file exceeds limit." << std::endl;
return FM_ST_BADPARAM;
}

partitionsToRestore.numPartitions = activePartitions.size();
for(size_t i = 0; i < activePartitions.size(); ++i) {
partitionsToRestore.partitionIds[i] = activePartitions[i];
}

std::cout << "Restoring " << partitionsToRestore.numPartitions << " active partition(s)..." << std::endl;

fmReturn = fmSetActivatedFabricPartitions(fmHandle, &partitionsToRestore);

if (fmReturn == FM_ST_SUCCESS) {
std::cout << "Successfully restored active partitions." << std::endl;
} else {
printFmError("restore active partitions", fmReturn);
}
break;
}

default:
std::cout << "Error: Invalid operation specified (" << operation << ")." << std::endl;
fmReturn = FM_ST_BADPARAM;
Expand Down Expand Up @@ -214,10 +286,10 @@ int main(int argc, char **argv)
runInteractive = false;
} else if (!runInteractive) {
std::cout << "Usage: " << argv[0] << " [-i <IP>] -o <OP> [-p <ID>] [-f <FORMAT>]\n"
<< " -i, --ip <IP> : IP address of Fabric Manager (default: 127.0.0.1)\n"
<< " -o, --operation <N>: 0=List, 1=Activate, 2=Deactivate\n"
<< " -p, --partition <ID>: Partition ID (required for Activate/Deactivate)\n"
<< " -f, --format <FORMAT>: Output format for operation 0 (csv or table, default: table)\n"
<< " -i, --ip <IP> : IP address of Fabric Manager (default: 127.0.0.1)\n"
<< " -o, --operation <N> : 0=List, 1=Activate, 2=Deactivate, 3=Restore\n"
<< " -p, --partition <ID> : Partition ID (for Activate/Deactivate)\n"
<< " -f, --format <FORMAT>: Output format for op 0 (csv or table, default: table)\n"
<< "Running without options starts interactive mode.\n";
return FM_ST_BADPARAM;
}
Expand Down Expand Up @@ -259,7 +331,7 @@ int main(int argc, char **argv)
return fmReturn;
}

if (outputFormat != "csv") {
if (outputFormat != "csv" && operation != 3) {
std::cout << "Successfully connected to Fabric Manager at " << hostIpAddress << std::endl;
}

Expand All @@ -277,7 +349,7 @@ int main(int argc, char **argv)
continue;
}

if (operation == 3) break;
if (operation == 4) break;

partitionId = PARTITION_ID_NOT_SET;
if (operation == 1 || operation == 2) {
Expand Down
135 changes: 135 additions & 0 deletions appliances/FabricManager/one-fm-boot-manager.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#!/bin/bash
# ---------------------------------------------------------------------------- #
# Copyright 2024, OpenNebula Project, OpenNebula Systems #
# #
# Licensed under the Apache License, Version 2.0 (the "License"); you may #
# not use this file except in compliance with the License. You may obtain #
# a copy of the License at #
# #
# http://www.apache.org/licenses/LICENSE-2.0 #
# #
# Unless required by applicable law or agreed to in writing, software #
# distributed under the License is distributed on an "AS IS" BASIS, #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and #
# limitations under the License. #
# ---------------------------------------------------------------------------- #

#
# Script that manages the startup and state restoration of nvidia-fabricmanager
#

set -o errexit -o pipefail

FM_STATE_FILE="/var/lib/nvidia-fabricmanager/fabricmanager.state"
ONE_STATE_FILE="/var/lib/nvidia-fabricmanager/active_partitions.state"
PARTITIONER_TOOL="/usr/local/sbin/nv-partitioner"
LOG_TAG="one-fm-boot-manager"

log() {
echo "$@" >&2
logger -t "${LOG_TAG}" -- "$@"
}

log "Starting NVIDIA Fabric Manager boot manager..."

# Check for inconsistent state before starting Fabric Manager
if [ -s "${ONE_STATE_FILE}" ] && [ ! -f "${FM_STATE_FILE}" ]; then
# --- Hard Recovery ---
log "WARNING: Inconsistent state detected. Starting hard recovery process."

# 1. Create backup
BACKUP_FILE="${ONE_STATE_FILE}.failed-$(date +%Y%m%d-%H%M%S)"
log "INFO: Backing up current partition state to ${BACKUP_FILE}"
mv "${ONE_STATE_FILE}" "${BACKUP_FILE}"

# 2. Start FM in normal mode
log "INFO: Starting Fabric Manager in normal mode for recovery."
/usr/bin/nv-fabricmanager &
FM_PID=$!
log "INFO: Fabric Manager daemon started with PID ${FM_PID}."
log "INFO: Waiting for 5 seconds for the daemon to initialize..."
sleep 5

# 3. Read partitions from backup and activate one-by-one
log "INFO: Attempting to reactivate partitions from ${BACKUP_FILE}"
while read -r PARTITION_ID; do
# Skip empty
if [ -z "$PARTITION_ID" ]; then continue; fi

log "INFO: Attempting to activate partition ID: ${PARTITION_ID}"
if "${PARTITIONER_TOOL}" -o 1 -p "${PARTITION_ID}"; then
log "SUCCESS: Partition ${PARTITION_ID} activated."
else
log "ERROR: Failed to activate partition ${PARTITION_ID}. Check logs for details."
fi
done < "${BACKUP_FILE}"

# 4. Final Summary
log "INFO: Hard recovery process finished. Validating final state..."

# Ensure the new state file exists
if [ ! -f "${ONE_STATE_FILE}" ]; then
touch "${ONE_STATE_FILE}"
fi

# compare files
SORTED_BACKUP=$(mktemp)
SORTED_CURRENT=$(mktemp)
sort "${BACKUP_FILE}" > "${SORTED_BACKUP}"
sort "${ONE_STATE_FILE}" > "${SORTED_CURRENT}"
if diff -q "${SORTED_BACKUP}" "${SORTED_CURRENT}" >/dev/null; then
log "SUCCESS: Hard recovery complete. All partitions were successfully restored."
else
log "CRITICAL: Hard recovery was PARTIAL. Not all partitions could be restored."
log "CRITICAL: The following differences were found between the desired state (left) and the recovered state (right):"
diff "${SORTED_BACKUP}" "${SORTED_CURRENT}" | logger -t "${LOG_TAG}" --
fi

rm -f "${SORTED_BACKUP}" "${SORTED_CURRENT}"

else
# --- Normal/Resilient Boot ---
log "INFO: Consistent state detected. Proceeding with normal or restart boot."

RESTART_MODE=false

# 1. Decide which mode to start the Fabric Manager based on the saved decision
if [ -f "${FM_STATE_FILE}" ] && [ -s "${ONE_STATE_FILE}" ]; then
log "State file found. Starting Fabric Manager in --restart mode."
RESTART_MODE=true
/usr/bin/nv-fabricmanager --restart &
else
log "No state file found. Starting Fabric Manager in normal mode."
/usr/bin/nv-fabricmanager &
fi

FM_PID=$!
log "Fabric Manager daemon started with PID ${FM_PID}."

# 2. Wait for the daemon to be ready
log "Waiting for 5 seconds for the daemon to initialize..."
sleep 5

# 3. Perform partition restoration based on the SAVED start-up decision
if [ "${RESTART_MODE}" = true ]; then
if [ -x "${PARTITIONER_TOOL}" ]; then
log "Executing atomic partition restore operation..."
if ! "${PARTITIONER_TOOL}" -o 3; then
log "WARNING: Partition restore command failed. Check fabricmanager logs."
else
log "Partition restore operation completed."
fi
else
log "WARNING: Partitioner tool not found at ${PARTITIONER_TOOL}. Skipping restore."
fi
else
log "INFO: Started in normal mode, skipping partition restore operation."
fi
fi

# 4. Wait for the Fabric Manager daemon to exit.
log "Boot manager script is now waiting for the daemon to exit."
wait "${FM_PID}"

exit $?
5 changes: 5 additions & 0 deletions packer/service_FabricManager/FabricManager.pkr.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ build {
destination = "/etc/one-appliance/fabricManager-partition-tool"
}

provisioner "file" {
source = "appliances/FabricManager/one-fm-boot-manager.sh"
destination = "/etc/one-appliance/one-fm-boot-manager.sh"
}

provisioner "shell" {
scripts = ["${var.input_dir}/82-configure-context.sh"]
}
Expand Down