Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions cli/src/bacc/mpi_bm.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def populate_arguments(loader):
"pool_id",
options_list=["--pool-id", "-p"],
help="The ID of the pool to use for the job.",
choices=['almalinux', 'rhel8']
# choices=['almalinux', 'rhel8']
)
c.argument(
"await_completion",
Expand All @@ -72,8 +72,8 @@ def populate_arguments(loader):
c.argument(
"mpi_impl",
options_list=["--mpi-implementation", "-m"],
help="The MPI implementation to use for the job.",
choices=['hpcx'],
help="The MPI implementation to use for the job. Ensure that the MPI implementation is available on the compute node.",
choices=['hpcx', 'openmpi', 'impi-2021', 'mvapich2'],
arg_group="MPI Arguments",
)

Expand Down Expand Up @@ -172,8 +172,12 @@ def execute(resource_group_name:str, subscription_id:str,
job_id = "{}-{}".format('custom', uid)

num_ranks_per_node = math.ceil(num_ranks / num_nodes)
if mpi_impl == "hpcx":
mpi_cmd=f"mpirun -host $(get_openmpi_hosts_with_slots) -x UCX_TLS=rc -x LD_LIBRARY_PATH --map-by ppr:{num_ranks_per_node}:node -np {num_ranks}"
if mpi_impl == "hpcx" or mpi_impl == "openmpi":
mpi_cmd=f"mpirun -host $(get_openmpi_hosts_with_slots) -mca coll_hcoll_enable 0 -x UCX_TLS=tcp -x LD_LIBRARY_PATH -x UCX_NET_DEVICES=eth0 --map-by ppr:{num_ranks_per_node}:node -np {num_ranks}"
elif mpi_impl == "impi-2021":
mpi_cmd=f"mpirun -hosts $(echo $AZ_BATCH_NODE_LIST | sed \"s/;/,/g\") -genv I_MPI_DEBUG 5 -genv I_MPI_FABRICS ofi -ppn {num_ranks_per_node} -np {num_ranks}"
elif mpi_impl == "mvapich2":
mpi_cmd=f"mpirun -host $(get_openmpi_hosts_with_slots) -x LD_LIBRARY_PATH --map-by ppr:{num_ranks_per_node}:node -np {num_ranks}"

wrk_command = f"$(find {prefix}/{mpi_impl}/ -name {bm_exe} -type f | head -n 1) {bm_args}"
task_cmd = f"bash -c 'source /etc/profile.d/modules.sh && source /mnt/batch_utils.sh && module load mpi/{mpi_impl} && {mpi_cmd} {wrk_command}'"
Expand Down
13 changes: 11 additions & 2 deletions examples/mpi-benchmarks/deployment.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ param addressPrefix string = '10.121.0.0/16'
@description('Batch Service Object Id (az ad sp show --id "ddbf3205-c6bd-46ae-8127-60eb93363864" --query id)')
param batchServiceObjectId string

@description('log analysis workspace resource id (optional). leave empty to disable log analytics')
param logAnalyticsWorkspaceId string = ''

//------------------------------------------------------------------------------
var extraArgs = !empty(mpiWorkloadGitUrl) && !empty(mpiWorkloadGitBranch) && !empty(mpiWorkloadGitCMakePath) ? '-g ${mpiWorkloadGitUrl} -b ${mpiWorkloadGitBranch} -p ${mpiWorkloadGitCMakePath}' : ''
var c0 = replace(loadTextContent('./config.jsonc'), '\${sku}', sku)
Expand All @@ -60,12 +63,18 @@ var peerings = !empty(vnetPeerResourceGroupName) && !empty(vnetPeerName) ? [{
useGateway: true
}] : []

var hubConfig = !empty(peerings) ? {
var hc0 = !empty(peerings) ? {
network: {
peerings: peerings
}
} : {}

var hc1 = !empty(logAnalyticsWorkspaceId) ? {
diagnostics: {
logAnalyticsWorkspace: { id: logAnalyticsWorkspaceId }
}
} : {}

@description('suffix used for all nested deployments')
var dplSuffix = uniqueString(deployment().name, location, resourceGroupName)

Expand All @@ -74,7 +83,7 @@ module mdlInfrastructure '../../modules/infrastructure.bicep' = {
name: 'infrastructure-${dplSuffix}'
params: {
config: config
hubConfig: hubConfig
hubConfig: union(hc0, hc1)
resourceGroupName: resourceGroupName
location: location
tags: tags
Expand Down
43 changes: 25 additions & 18 deletions examples/mpi-benchmarks/start_task.sh
Original file line number Diff line number Diff line change
Expand Up @@ -246,12 +246,6 @@ EOF
install_intel_benchmarks () {
mpi_impl=$1

# check if arguments are valid
if [ "$mpi_impl" != "hpcx" ]; then
echo "Invalid MPI implementation: ${mpi_impl}"
exit 1
fi

status_file="${STATUS_PREFIX}/intel_benchmarks_installed_${mpi_impl}"
if [ -f "${status_file}" ]; then
echo "Intel MPI Benchmarks (${mpi_impl}) already installed. Skipping."
Expand Down Expand Up @@ -284,12 +278,6 @@ install_osu_benchmarks () {
#--------
mpi_impl=$1

# check if arguments are valid
if [ "$mpi_impl" != "hpcx" ]; then
echo "Invalid MPI implementation: ${mpi_impl}"
exit 1
fi

status_file="${STATUS_PREFIX}/osu_benchmarks_installed_${mpi_impl}"
if [ -f "${status_file}" ]; then
echo "OSU Benchmarks (${mpi_impl}) already installed. Skipping."
Expand All @@ -303,7 +291,7 @@ install_osu_benchmarks () {
tar -xvf osu-micro-benchmarks-7.0.1.tar.gz
pushd osu-micro-benchmarks-7.0.1

./configure CC=mpicc CXX=mpicxx --prefix=/mnt/osu-micro-benchmarks/${mpi_impl}
./configure CC=mpicc CXX=mpicxx --prefix=${INSTALL_PREFIX}/osu-micro-benchmarks/${mpi_impl}
make -j $(nproc)
make install
popd
Expand Down Expand Up @@ -366,7 +354,7 @@ EOF

save_batch_utils () {
# This function has utility functions for Batch tasks
cat << EOF > /mnt/batch_utils.sh
cat << EOF > ${INSTALL_PREFIX}/batch_utils.sh
#!/usr/bin/env bash

# This script has utility functions for Batch tasks
Expand All @@ -390,6 +378,17 @@ export AZ_BATCH_OMPI_HOSTS=\$(get_openmpi_hosts_with_slots)
EOF
}

get_mpi_impls () {
mpi_impls=""
mpis_to_test="hpcx openmpi mvapich2 impi-2021"
for mpi in $mpis_to_test; do
if [ $(module avail -t mpi 2>&1 | grep -c $mpi) -gt 0 ]; then
mpi_impls="$mpi_impls $mpi"
fi
done
echo $mpi_impls
}

if [ "${_arg_mofed}" = "on" ]; then
echo "Installing Mellanox OFED drivers"
install_dependencies
Expand All @@ -403,24 +402,32 @@ if [ "${_arg_mpis}" = "on" ]; then
fi

source /etc/profile.d/modules.sh
mpi_impls=$(get_mpi_impls)

if [ "${_arg_ibm}" = "on" ]; then
echo "Installing Intel MPI Benchmarks"
install_intel_benchmarks hpcx
for mpi in $mpi_impls; do
install_intel_benchmarks $mpi
done
module purge
fi

if [ "${_arg_osu}" = "on" ]; then
echo "Installing OSU Micro Benchmarks"
install_osu_benchmarks hpcx
for mpi in $mpi_impls; do
install_osu_benchmarks $mpi
done
module purge
fi

# build mpi workload
if [ -n "${_arg_git_url}" ]; then
echo "Building MPI workload from git repo"
install_mpi_workload "${_arg_git_url}" "${_arg_git_branch}" "${_arg_git_path}" "hpcx"
for mpi in $mpi_impls; do
install_mpi_workload "${_arg_git_url}" "${_arg_git_branch}" "${_arg_git_path}" $mpi
done
module purge
fi

# save batch_utils to /mnt/batch_utils.sh
# save batch_utils to ${INSTALL_PREFIX}/batch_utils.sh
save_batch_utils