Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion docker_config/master_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,7 @@ docker_cln_sh: |
--model_name) CLI_MODEL_NAME="$2"; shift ;;
--num_epochs) CLI_NUM_EPOCHS="$2"; shift ;;
--config) CLI_CONFIG="$2"; shift ;;
--container_name) CONTAINER_NAME="$2"; shift ;;
*) echo "Unknown parameter passed: $1"; exit 1 ;;
esac
shift
Expand Down Expand Up @@ -791,7 +792,10 @@ docker_cln_sh: |
docker pull "$DOCKER_IMAGE"
fi

CONTAINER_NAME=odelia_swarm_client_{~~client_name~~}___REPLACED_BY_CONTAINER_VERSION_IDENTIFIER_WHEN_BUILDING_DOCKER_IMAGE__
if [[ -z "$CONTAINER_NAME" ]]; then
CONTAINER_NAME=odelia_swarm_client_{~~client_name~~}___REPLACED_BY_CONTAINER_VERSION_IDENTIFIER_WHEN_BUILDING_DOCKER_IMAGE__
fi

DOCKER_OPTIONS_A="--name=$CONTAINER_NAME --gpus=$GPU2USE -u $(id -u):$(id -g)"
DOCKER_MOUNTS="-v /etc/passwd:/etc/passwd -v /etc/group:/etc/group -v $DIR/..:/startupkit/ -v $MY_SCRATCH_DIR:/scratch/"
if [ -n "$MY_DATA_DIR" ]; then
Expand Down Expand Up @@ -929,6 +933,7 @@ docker_cln_sh: |
echo " when running with sudo, as sudo resets environment variables"
echo "--num_epochs <n> set number of training epochs (default: 100)"
echo "--config <name> set config name (default: unilateral)"
echo "--container_name <name> set name for the Docker container to override default using version number"
exit 1
fi

Expand Down
29 changes: 29 additions & 0 deletions scripts/ci/runIntegrationTests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,29 @@ run_docker_gpu_preflight_check () {
}


run_two_containers_in_parallel () {
# requires having built a startup kit
echo "[Run] Starting two containers in parallel (local dummy training via startup kit) ..."
cd "$PROJECT_DIR/prod_00/client_A/startup/"
CONSOLE_OUTPUT=docker_gpu_preflight_check_console_output.txt
timeout --signal=kill 1m ./docker.sh --scratch_dir "$SCRATCH_DIR"/client_A --GPU "$GPU_FOR_TESTING" --dummy_training --no_pull 2>&1 | tee "$CONSOLE_OUTPUT" &
sleep 1

CONSOLE_OUTPUT_A=docker_gpu_preflight_check_console_output_a.txt
timeout --signal=kill 1m ./docker.sh --scratch_dir "$SCRATCH_DIR"/client_A --GPU "$GPU_FOR_TESTING" --dummy_training --no_pull --container_name MediSwarmODELIATestSecondContainer 2>&1 | tee "$CONSOLE_OUTPUT_A" &
sleep 60

if grep -q "Epoch 1: 100%" "$CONSOLE_OUTPUT_A" && grep -q "Training completed successfully" "$CONSOLE_OUTPUT_A"; then
echo "✅ Expected output of running two containers in parallel found"
else
echo "❌ Missing expected output of running two containers in parallel"
exit 1
fi

cd "$CWD"
}


run_data_access_preflight_check () {
# requires having built a startup kit and synthetic dataset
echo "[Run] Data access preflight check..."
Expand Down Expand Up @@ -802,6 +825,12 @@ case "$1" in
cleanup_temporary_data
;;

run_two_containers_in_parallel)
create_startup_kits_and_check_contained_files
run_two_containers_in_parallel
cleanup_temporary_data
;;

run_data_access_preflight_check)
create_startup_kits_and_check_contained_files
create_synthetic_data
Expand Down
Loading