diff --git a/hpc/LoadBalancer.cpp b/hpc/LoadBalancer.cpp index c23bdff3..742ad256 100644 --- a/hpc/LoadBalancer.cpp +++ b/hpc/LoadBalancer.cpp @@ -86,6 +86,6 @@ int main(int argc, char *argv[]) std::transform(LB_vector.begin(), LB_vector.end(), LB_ptr_vector.begin(), [](LoadBalancer& obj) { return &obj; }); - std::cout << "Load balancer running port" << port << std::endl; + std::cout << "Load balancer running port " << port << std::endl; umbridge::serveModels(LB_ptr_vector, "0.0.0.0", port, true, false); } diff --git a/hpc/Makefile b/hpc/Makefile index 36174295..af41e05d 100644 --- a/hpc/Makefile +++ b/hpc/Makefile @@ -4,3 +4,17 @@ load-balancer-files = LoadBalancer.cpp LoadBalancer.hpp ../lib/httplib.h ../lib/ build-load-balancer: - g++ -O3 -Wno-unused-result -std=c++17 $(load-balancer-files) -o load-balancer -pthread + +run-load-balancer: + rm -f retry-respond-job_id.txt + + if ! printenv PORT > /dev/null; then \ + echo "PORT environment variable not set. Using default value 4242."; \ + export PORT=4242; \ + fi && \ + export HQ_SUBMIT_DELAY_MS=100 && \ + while nc -z localhost $$PORT; do \ + read -p "Port $$PORT is already in use. Please enter a different port: " NEW_PORT; \ + PORT=$${NEW_PORT:-$$PORT}; \ + done; \ + ./load-balancer \ No newline at end of file diff --git a/hpc/hq_scripts/job.sh b/hpc/hq_scripts/job.sh index 94e7b0ba..fbaa6b9b 100755 --- a/hpc/hq_scripts/job.sh +++ b/hpc/hq_scripts/job.sh @@ -6,12 +6,14 @@ #HQ --stdout none #HQ --stderr none +# Remove "#HQ --stdout none" and "#HQ --stderr none" if you want to see the output of the job. + # Launch model server, send back server URL # and wait to ensure that HQ won't schedule any more jobs to this allocation. function get_avaliable_port { # Define the range of ports to select from - MIN_PORT=1024 + MIN_PORT=49152 MAX_PORT=65535 # Generate a random port number @@ -34,14 +36,21 @@ export PORT=$port load_balancer_dir="/load/balancer/directory" # CHANGE ME! - host=$(hostname -I | awk '{print $1}') +timeout=60 # timeout in seconds, might need to be increased if the model server takes longer to start echo "Waiting for model server to respond at $host:$port..." -while ! curl -s "http://$host:$port/Info" > /dev/null; do - sleep 1 -done -echo "Model server responded" +if timeout $timeout sh -c 'while ! curl -s "http://'"$host"':'"$port"'/Info" > /dev/null ; do :; done'; then + echo "Model server responded within $timeout seconds" +else + echo "Timeout: Model server did not respond within $timeout seconds" + echo "$HQ_JOB_ID" > "$load_balancer_dir/retry-respond-job_id.txt" + + # clear the server here if needed + + # restart the job + $load_balancer_dir/hq_scripts/job.sh +fi # Write server URL to file identified by HQ job ID. mkdir -p "$load_balancer_dir/urls" diff --git a/hpc/test/MultiplyBy2/Makefile b/hpc/test/MultiplyBy2/Makefile new file mode 100644 index 00000000..8f19ff90 --- /dev/null +++ b/hpc/test/MultiplyBy2/Makefile @@ -0,0 +1,26 @@ +all: build-server build-lb run + +load-balancer-files = ../../LoadBalancer.cpp ../../LoadBalancer.hpp ../../../lib/httplib.h ../../../lib/json.hpp ../../../lib/umbridge.h + +build-server: + g++ -O3 -w -std=c++11 minimal-server.cpp -o server -lssl -lcrypto -pthread + +build-lb: + g++ -O3 -Wno-unused-result -std=c++17 $(load-balancer-files) -o ../../load-balancer -pthread + +run: + rm -f retry-port-job_id.txt + rm -f retry-respond-job_id.txt + mkdir -p logs + rm -f logs/* + + if ! printenv PORT > /dev/null; then \ + echo "PORT environment variable not set. Using default value 4242."; \ + export PORT=4242; \ + fi && \ + export HQ_SUBMIT_DELAY_MS=100 && \ + while nc -z localhost $$PORT; do \ + read -p "Port $$PORT is already in use. Please enter a different port: " NEW_PORT; \ + PORT=$${NEW_PORT:-$$PORT}; \ + done; \ + cd ../../ && ./load-balancer \ No newline at end of file diff --git a/hpc/test/MultiplyBy2/client.sh b/hpc/test/MultiplyBy2/client.sh new file mode 100644 index 00000000..3b714f7b --- /dev/null +++ b/hpc/test/MultiplyBy2/client.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# export TEST_DELAY=1e4 + +if [ -z "$PORT" ]; then + PORT="4242" + fi + +echo "Using URL http://localhost:$PORT" + +echo "Sending requests..." + +for i in {1..300} +do + # Expected output: {"output":[[200.0]]} + # Check if curl output equals expected output + # If not, print error message + + if [ "$(curl -s http://localhost:$PORT/Evaluate -X POST -d '{"name": "forward", "input": [[100.0]]}')" == '{"output":[[200.0]]}' ]; then + echo -n "y" + else + echo $(curl -s http://localhost:$PORT/Evaluate -X POST -d '{"name": "forward", "input": [[100.0]]}') + echo -n "n" + #echo "Error: curl output does not equal expected output" + fi & + +done + +echo "Requests sent. Waiting for responses..." + +wait \ No newline at end of file diff --git a/hpc/test/MultiplyBy2/job.sh b/hpc/test/MultiplyBy2/job.sh new file mode 100644 index 00000000..b530516a --- /dev/null +++ b/hpc/test/MultiplyBy2/job.sh @@ -0,0 +1,61 @@ +#! /bin/bash + +#HQ --cpus=1 +#HQ --time-request=1m +#HQ --time-limit=2m +#HQ --stdout %{CWD}/test/MultiplyBy2/logs/job-%{JOB_ID}.out +#HQ --stderr %{CWD}/test/MultiplyBy2/logs/job-%{JOB_ID}.err + +# Launch model server, send back server URL +# and wait to ensure that HQ won't schedule any more jobs to this allocation. + + +# Define the range of ports to select from +MIN_PORT=49152 +MAX_PORT=65535 +# Generate a random port number +port=$(shuf -i $MIN_PORT-$MAX_PORT -n 1) +# Check if the port is in use +try_count=0 +echo "$(lsof -Pi :$port -sTCP:LISTEN -t )" +while [ -n "$(lsof -Pi :$port -sTCP:LISTEN -t )" ] +do + echo "Port $port is in use, trying another port" + # If the port is in use, generate a new port number + port=$(shuf -i $MIN_PORT-$MAX_PORT -n 1) + + try_count=$((try_count+1)) + + echo "$HQ_JOB_ID" > "./test/MultiplyBy2/retry-port-job_id.txt" +done +echo "Selected port $port after $try_count tries" + +echo "Starting server on port $port" +export PORT=$port + +# Assume that server sets the port according to the environment variable 'PORT'. +./test/MultiplyBy2/server & # CHANGE ME! + +load_balancer_dir="./" # CHANGE ME! + +host=$(hostname -I | awk '{print $1}') + +timeout=30 # timeout in seconds +echo "Waiting for model server to respond at $host:$port..." +if timeout $timeout sh -c 'while ! curl -s "http://'"$host"':'"$port"'/Info" > /dev/null ; do :; done'; then + echo "Model server responded within $timeout seconds" +else + echo "Timeout: Model server did not respond within $timeout seconds" + echo "$HQ_JOB_ID" > "./test/MultiplyBy2/retry-respond-job_id.txt" + + # clear the server here if needed + + # restart the job + $load_balancer_dir/hq_scripts/job.sh +fi + +# Write server URL to file identified by HQ job ID. +mkdir -p "$load_balancer_dir/urls" +echo "http://$host:$port" > "$load_balancer_dir/urls/url-$HQ_JOB_ID.txt" + +sleep infinity # keep the job occupied diff --git a/hpc/test/MultiplyBy2/minimal-server.cpp b/hpc/test/MultiplyBy2/minimal-server.cpp index 156dda8e..25057132 100644 --- a/hpc/test/MultiplyBy2/minimal-server.cpp +++ b/hpc/test/MultiplyBy2/minimal-server.cpp @@ -2,9 +2,36 @@ #include #include #include - +#include +#include +#include +#include +#include #include "../../../lib/umbridge.h" + +bool isPortInUse(int port) { + int sockfd = socket(AF_INET, SOCK_STREAM, 0); + if (sockfd < 0) { + std::cerr << "Failed to create socket." << std::endl; + return false; + } + + struct sockaddr_in serverAddress; + memset(&serverAddress, 0, sizeof(serverAddress)); + serverAddress.sin_family = AF_INET; + serverAddress.sin_addr.s_addr = htonl(INADDR_ANY); + serverAddress.sin_port = htons(port); + + if (bind(sockfd, (struct sockaddr*)&serverAddress, sizeof(serverAddress)) < 0) { + close(sockfd); + return true; + } + + close(sockfd); + return false; +} + class ExampleModel : public umbridge::Model { public: @@ -79,6 +106,14 @@ int main(int argc, char *argv[]) else { port = atoi(port_cstr); + std::cout << "Using port [ " << port_cstr << " ] as specified by environment variable PORT." << std::endl; + if (isPortInUse(port)) + { + std::cerr << "Port " << port << " is already in use. Exiting." << std::endl; + + exit(-1); + } + } char const *delay_cstr = std::getenv("TEST_DELAY");