From 866a20be8e82956b0895292171144980ed4d7af9 Mon Sep 17 00:00:00 2001 From: Antti Kervinen Date: Wed, 9 Oct 2019 16:50:23 +0300 Subject: [PATCH] metrics: report pod communication latency percentiles in pdf - Add R routine to load and plot percentiles from JSON. - Support any number of percentiles 1..n. - Store percentile configuration in JSON. Signed-off-by: Antti Kervinen --- metrics/lib/common.bash | 4 +- .../report_dockerfile/metrics_report.Rmd | 12 + .../report_dockerfile/tidy_scaling_nc.R | 155 ++++++ metrics/scaling/k8s_scale_nc.sh | 468 ------------------ metrics/scaling/k8s_scale_rapid_nc.sh | 337 +++++++++++++ 5 files changed, 506 insertions(+), 470 deletions(-) create mode 100755 metrics/report/report_dockerfile/tidy_scaling_nc.R delete mode 100755 metrics/scaling/k8s_scale_nc.sh create mode 100755 metrics/scaling/k8s_scale_rapid_nc.sh diff --git a/metrics/lib/common.bash b/metrics/lib/common.bash index dd76d986..9c601c83 100755 --- a/metrics/lib/common.bash +++ b/metrics/lib/common.bash @@ -84,7 +84,7 @@ framework_init() { k8s_api_init # Launch our stats gathering pod - if [ -n "$SMF_USE_COLLECTD" ]; then + if [ "$SMF_USE_COLLECTD" == "true" ]; then info "Setting up collectd" init_stats $wait_time fi @@ -104,7 +104,7 @@ framework_shutdown() { k8s_api_shutdown cpu_load_shutdown - if [ -n "$SMF_USE_COLLECTD" ]; then + if [ "$SMF_USE_COLLECTD" == "true" ]; then cleanup_stats fi diff --git a/metrics/report/report_dockerfile/metrics_report.Rmd b/metrics/report/report_dockerfile/metrics_report.Rmd index 630365ac..c88be2f0 100644 --- a/metrics/report/report_dockerfile/metrics_report.Rmd +++ b/metrics/report/report_dockerfile/metrics_report.Rmd @@ -51,6 +51,18 @@ source('collectd_scaling.R') \pagebreak +# Pod communication latency +This [test](https://github.com/clearlinux/cloud-native-setup/metrics/scaling/k8s_scale_rapid_nc.sh) +measures pod query--response latency when scaling up. The +time is measured from sending a message directly to a socket, that `nc` +listens to inside each pod, to reading the response from the pod. + +```{r scaling_nc, echo=FALSE, fig.cap="K8S pod communication latency", results='asis'} +source('tidy_scaling_nc.R') +``` + +\pagebreak + # Test setup details This table describes the test system details, as derived from the information contained diff --git a/metrics/report/report_dockerfile/tidy_scaling_nc.R b/metrics/report/report_dockerfile/tidy_scaling_nc.R new file mode 100755 index 00000000..ea2af33b --- /dev/null +++ b/metrics/report/report_dockerfile/tidy_scaling_nc.R @@ -0,0 +1,155 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2019 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Show pod communication latency + +suppressMessages(suppressWarnings(library(ggplot2))) # ability to plot nicely. +suppressWarnings(suppressWarnings(library(ggpubr))) # ggtexttable +suppressMessages(library(jsonlite)) # to load the data. +suppressMessages(library(scales)) # For de-science notation of axis +library(tibble) # tibbles for tidy data + +testnames=c( + "k8s-rapid-nc" +) + +### For developers: uncomment following variables to run this as is in R +# resultdirs=c("PATH/TO/RES1/", ...) # keep the ending slash on result paths +# inputdir="" + +latencydata=c() + +# iterate over every set of results (test run) +for (currentdir in resultdirs) { + # For every results file we are interested in evaluating + for (testname in testnames) { + matchdir=paste(inputdir, currentdir, sep="") + matchfile=paste(testname, '\\.json', sep="") + files=list.files(matchdir, pattern=matchfile) + + # For every matching results file + for (ffound in files) { + fname=paste(inputdir, currentdir, ffound, sep="") + if (!file.exists(fname)) { + warning(paste("Skipping non-existent file: ", fname)) + next + } + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + # Import the data + fdata=fromJSON(fname) + # De-nest the test name specific data + shortname=substr(ffound, 1, nchar(ffound)-nchar(".json")) + fdata=fdata[[shortname]] + testname=datasetname + + # All the data we are looking for comes in BootResults, + # so pick it out to make referencing easier + br=fdata$BootResults + + ######################################################## + #### Now extract latency time percentiles (ltp) ######## + ######################################################## + ltp=br$latency_time$Percentiles + # Percentile thresholds, for example [5, 25, 50, 75, 95] + ltp_perc=fdata$Config$nc_percentiles[[1]] + perc_count = length(ltp_perc) + # Measured times + ltp_meas=matrix(unlist(ltp), nrow=perc_count) + # Build latency percentiles tibble with nice headings + ltpt=tibble(n_pods=br$n_pods$Result) + for (n in seq(perc_count)) { + p_title = paste0("p", ltp_perc[n]) + ltpt[p_title] = ltp_meas[n,] + } + # ltpt example: with percentiles [5, 50, 95]: + # n_pods p5 p50 p95 + # 100 4 8 10 + # 200 5 11 15 + # 300 6 14 19 + ltpt$testname=testname + latencydata=rbind(latencydata, ltpt) + } + } +} + +# Visualize data. +if (length(latencydata[[1]]) <= 5 || length(unique(latencydata$testname)) > 1) { + # If there are many tests to compare or only few data points, use boxplot with extra percentile points. + latp = ggplot(data=latencydata, aes(x=n_pods)) + ylab("Latency (us)") + xlab("pods") + scale_y_continuous(labels=comma) + perc_mid = floor((perc_count)/2) + # Create boxplot around the middle percentile + if (perc_count >= 3) { + box_bottom=names(ltpt)[perc_mid+1] + box_mid=names(ltpt)[perc_mid+2] + box_top=names(ltpt)[perc_mid+3] + if (perc_count >= 5) { + whis_low=names(ltpt)[perc_mid] + whis_high=names(ltpt)[perc_mid+4] + latp = latp + geom_boxplot(aes_string(group="interaction(testname,n_pods)",ymin=whis_low,lower=box_bottom,middle=box_mid,upper=box_top,ymax=whis_high,fill="testname"),stat="identity") + } else { + latp = latp + geom_boxplot(aes_string(group="interaction(testname,n_pods)",lower=box_bottom,middle=box_mid,upper=box_top,fill="testname"),stat="identity") + } + } + # Boxplot (above) covers at most 5 percentiles around the center (median). + # Visualize the rest using a point for each percentile. + if (perc_count > 5) { + for (n in seq(1, (perc_count-5)/2)) { + lower_name=names(ltpt)[n+1] + upper_name=names(ltpt)[perc_count-n+2] + latp = latp + geom_point(aes_string(group="interaction(testname,n_pods)",y=lower_name, color="testname")) + latp = latp + geom_point(aes_string(group="interaction(testname,n_pods)",y=upper_name, color="testname")) + } + } +} else { + # Use colored areas and median lines when there are many ticks on X axis + latp = ggplot(data=latencydata, aes(x=n_pods)) + ylab("Latency (us)") + xlab("pods") + scale_y_continuous(labels=comma) + perc_mid = floor((perc_count)/2) + perc_maxdist = perc_mid + plot_number = 0 + for (plot_test in unique(latencydata$testname)) { + plot_number = plot_number + 1 + for (n in seq(perc_mid)) { + # First fill outmost areas, like p5..p25 and p75..p95, + # then areas closer to the middle, like p25..p50 and p50..p75 + lower_name = names(ltpt)[n+1] + lower_next_name = names(ltpt)[n+2] + upper_name = names(ltpt)[perc_count-n+2] + upper_prev_name = names(ltpt)[perc_count-n+1] + alpha = 0.7 * ((n+1) / (perc_mid+1))**2 + latp = latp + geom_ribbon(data=latencydata[latencydata$testname==plot_test,],aes_string(x="n_pods",ymin=lower_name,ymax=lower_next_name,fill="testname"),alpha=alpha) + latp = latp + geom_ribbon(data=latencydata[latencydata$testname==plot_test,],aes_string(x="n_pods",ymin=upper_prev_name,ymax=upper_name,fill="testname"),alpha=alpha) + } + median_index = match("p50", names(ltpt)) + if (!is.na(median_index)) { + # Draw median line + latp = latp + geom_line(data=latencydata[latencydata$testname==plot_test,],aes_string(x="n_pods",y=names(ltpt)[median_index],color="testname")) + } + } +} + +# Table presentation. +lat_table=c() +for (testname in unique(latencydata$testname)) { + testlines=latencydata[latencydata$testname==testname,] + lat_table=rbind(lat_table,testlines[1,]) + if (length(testlines) > 3) { + # middle pod count + lat_table=rbind(lat_table,testlines[(length(testlines)-1)/2,]) + } + if (length(testlines) > 2) { + # max pod count + lat_table=rbind(lat_table,testlines[length(testlines)-1,]) + } +} +latt=ggtexttable(lat_table,rows=NULL) + +cat("\n\nLatency percentiles illustrated in the Figure below: ", paste0(ltp_perc, "\\%"), "\n\n") + +page1 = grid.arrange(latp, latt, ncol=1) + +# pagebreak, as the graphs overflow the page otherwise +cat("\n\n\\pagebreak\n") diff --git a/metrics/scaling/k8s_scale_nc.sh b/metrics/scaling/k8s_scale_nc.sh deleted file mode 100755 index f1b86b1b..00000000 --- a/metrics/scaling/k8s_scale_nc.sh +++ /dev/null @@ -1,468 +0,0 @@ -#!/bin/bash -# Copyright (c) 2019 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -set -e - -# Pull in some common, useful, items -SCRIPT_PATH=$(dirname "$(readlink -f "$0")") -source "${SCRIPT_PATH}/../lib/common.bash" -source "${SCRIPT_PATH}/common.bash" - -LABELVALUE=${LABELVALUE:-scale_nc} - -# Latency test parameters: -# number of requests to be sent to each pod -nc_reqs_per_pod=${nc_reqs_per_pod:-100} -# length of each request [bytes] -nc_req_msg_len=${nc_req_msg_len:-1000} -# port that request servers listen to in pods -nc_port=33101 -# request message -nc_req_msg=$(head -c $nc_req_msg_len /dev/zero | tr '\0' 'x') - -pod_command="[\"nc\", \"-lk\", \"-p\", \"${nc_port}\", \"-e\", \"/bin/cat\"]" - -# Set some default metrics env vars -TEST_ARGS="runtime=${RUNTIME}" -TEST_NAME="k8s scaling nc" - -# $1 is the launch time in seconds this pod/container took to start up. -# $2 is the number of pod/containers under test -grab_stats() { - local launch_time_ms=$1 - local n_pods=$2 - shift ; shift - local latency_percentiles=($@) # array of percentiles - local cpu_idle=() - local mem_free=() - local total_mem_used=0 - - info "And grab some stats" - - local date_json="$(cat << EOF - "date": { - "ns": $(date +%s%N), - "Date": "$(date -u +"%Y-%m-%dT%T.%3N")" - } -EOF - )" - metrics_json_add_array_fragment "$date_json" - - local pods_json="$(cat << EOF - "n_pods": { - "Result": ${n_pods}, - "Units" : "int" - } -EOF - )" - metrics_json_add_array_fragment "$pods_json" - - local launch_json="$(cat << EOF - "launch_time": { - "Result": $launch_time_ms, - "Units" : "ms" - } -EOF - )" - metrics_json_add_array_fragment "$launch_json" - - local latency_json="$(cat << EOF - "latency_time": { - "Pod_command": "${pod_command//\"/\\\"}", - "Request_length": "${nc_req_msg_len}", - "Requests_per_pod": "${nc_reqs_per_pod}", - "Sender": "serial", - "Percentiles": [$(IFS=, ; echo "${latency_percentiles[*]}")], - "Result": ${latency_percentiles[$(( ${#latency_percentiles[@]} / 2 ))]}, - "Units" : "ms" - } -EOF - )" - metrics_json_add_array_fragment "$latency_json" - - # start the node utilization array - metrics_json_start_nested_array - - # grab pods in the stats daemonset - # use 3 for the file descriptor rather than stdin otherwise the sh commands - # in the middle will read the rest of stdin - while read -u 3 name node; do - # look for taint that prevents scheduling - local noschedule=false - local t_match_values=$(kubectl get node ${node} -o json | jq 'select(.spec.taints) | .spec.taints[].effect == "NoSchedule"') - for v in $t_match_values; do - if [[ $v == true ]]; then - noschedule=true - break - fi - done - # Tell mpstat to measure over a short period, not only so we get slightly de-noised data, but also - # if you don't tell it the period, you will get the avg since boot, which is not what we want. - local cpu_idle=$(kubectl exec -ti $name -- sh -c "mpstat -u 3 1 | tail -1 | awk '{print \$11}'" | sed 's/\r//') - local mem_free=$(kubectl exec -ti $name -- sh -c "free | tail -2 | head -1 | awk '{print \$4}'" | sed 's/\r//') - local inode_free=$(kubectl exec -ti $name -- sh -c "df -i | awk '/^overlay/ {print \$4}'" | sed 's/\r//') - - info "idle [$cpu_idle] free [$mem_free] launch [$launch_time_ms] node [$node] inodes_free [$inode_free]" - - # Annoyingly, it seems sometimes once in a while we don't get an answer! - # We should really retry, but for now, make the json valid at least - cpu_idle=${cpu_idle:-0} - mem_free=${mem_free:-0} - inode_free=${inode_free:-0} - - # If this is the 0 node instance, store away the base memory value - if [ $n_pods -eq 0 ]; then - node_basemem[$node]=$mem_free - node_baseinode[$node]=$inode_free - fi - - local mem_used=$((node_basemem[$node]-mem_free)) - local inode_used=$((node_baseinode[$node]-inode_free)) - # Only account for memory usage on schedulable nodes - if [ $noschedule == false ]; then - total_mem_used=$((total_mem_used+mem_used)) - fi - - local util_json="$(cat << EOF - { - "node": "${node}", - "noschedule": "${noschedule}", - "cpu_idle": { - "Result": ${cpu_idle}, - "Units" : "%" - }, - "mem_free": { - "Result": ${mem_free}, - "Units" : "kb" - }, - "mem_used": { - "Result": ${mem_used}, - "Units" : "kb" - }, - "inode_free": { - "Result": ${inode_free} - }, - "inode_used": { - "Result": ${inode_used} - } - } -EOF - )" - - metrics_json_add_nested_array_element "$util_json" - - done 3< <(kubectl get pods --selector name=stats-pods -o json | jq -r '.items[] | "\(.metadata.name) \(.spec.nodeName)"') - - metrics_json_end_nested_array "node_util" - - # start the new pods array - metrics_json_start_nested_array - - # for the first call to grab stats, there are no new pods - # so we need to fill in with NA (R specific value) in matching - # dimension to the rest of the calls to grab_stats, so $STEP items - if [[ ${#new_pods[@]} == 0 ]]; then - for i in $STEP; do - local new_pod_json="$(cat << EOF - { - "pod_name": "NA", - "node": "NA" - } -EOF - )" - metrics_json_add_nested_array_element "$new_pod_json" - done - else - local maxelem=$(( ${#new_pods[@]} - 1 )) - for index in $(seq 0 $maxelem); do - local node=$(kubectl get pod ${new_pods[$index]} -o json | jq -r '"\(.spec.nodeName)"') - local new_pod_json="$(cat << EOF - { - "pod_name": "${new_pods[$index]}", - "node": "${node}" - } -EOF - )" - metrics_json_add_nested_array_element "$new_pod_json" - done - fi - metrics_json_end_nested_array "launched_pods" - - # And store off the total memory consumed across all nodes, and the pod/Gb value - if [ $n_pods -eq 0 ]; then - local pods_per_gb=0 - else - local pods_per_gb=$(printf "%0f" $(bc -l <<< "scale=2; ($total_mem_used/1024) / $n_pods")) - fi - local mem_json="$(cat << EOF - "memory": { - "consumed": { - "Result": ${total_mem_used}, - "Units": "Kb" - }, - "pods_per_gb": { - "Result": ${pods_per_gb} - } - } -EOF - )" - metrics_json_add_array_fragment "$mem_json" - - metrics_json_close_array_element -} - -init() { - info "Initialising" - - local cmds=("bc" "jq") - check_cmds "${cmds[@]}" - - info "Checking Kubernetes accessible" - local worked=$( kubectl get nodes > /dev/null 2>&1 && echo $? || echo $? ) - if [ "$worked" != 0 ]; then - die "kubectl failed to get nodes" - fi - - info $(get_num_nodes) "Kubernetes nodes in 'Ready' state found" - # We could check we have just the one node here - right now this is a single node - # test!! - because, our stats gathering is rudimentry, as k8s does not provide - # a nice way to do it (unless you want to parse 'descibe nodes') - # Have a read of https://github.com/kubernetes/kubernetes/issues/25353 - - # FIXME - check the node(s) can run enough pods - check 'max-pods' in the - # kubelet config - from 'kubectl describe node -o json' ? - - framework_init - - # Launch our stats gathering pod - kubectl apply -f ${SCRIPT_PATH}/${stats_pod}.yaml - kubectl rollout status --timeout=${wait_time}s daemonset/${stats_pod} - - # FIXME - we should probably 'warm up' the cluster with the container image(s) we will - # use for testing, otherwise the download time will likely be included in the first pod - # boot time. -} - -save_config(){ - metrics_json_start_array - - local json="$(cat << EOF - { - "testname": "${TEST_NAME}", - "NUM_PODS": ${NUM_PODS}, - "STEP": ${STEP}, - "wait_time": ${wait_time}, - "delete_wait_time": ${delete_wait_time}, - "settle_time": ${settle_time} - } -EOF -)" - metrics_json_add_array_element "$json" - metrics_json_end_array "Config" -} - -run() { - info "Running test" - - trap cleanup EXIT QUIT KILL - - metrics_json_start_array - - # grab starting stats before launching workload pods - grab_stats 0 0 0 - - for reqs in $(seq ${STEP} ${STEP} ${NUM_PODS}); do - info "Testing replicas ${reqs} of ${NUM_PODS}" - # Generate the next yaml file - - local runtime_command - if [ -n "$RUNTIME" ]; then - runtime_command="s|@RUNTIMECLASS@|${RUNTIME}|g" - else - runtime_command="/@RUNTIMECLASS@/d" - fi - - local input_template - local generated_file - - if [ "$use_api" != "no" ]; then - input_template=$input_json - generated_file=$generated_json - else - input_template=$input_yaml - generated_file=$generated_yaml - fi - - sed -e "s|@REPLICAS@|${reqs}|g" \ - -e $runtime_command \ - -e "s|@DEPLOYMENT@|${deployment}|g" \ - -e "s|@LABEL@|${LABEL}|g" \ - -e "s|@LABELVALUE@|${LABELVALUE}|g" \ - -e "s|@GRACE@|${grace}|g" \ - -e "s#@PODCOMMAND@#${pod_command}#g" \ - < ${input_template} > ${generated_file} - - # get list of workload pods before launching another one - local pods_before=$(kubectl get pods --selector ${LABEL}=${LABELVALUE} -o json | jq -r '.items[] | "\(.metadata.name)"') - - info "Applying changes" - local start_time=$(date +%s%N) - if [ "$use_api" != "no" ]; then - # If this is the first launch of the deploy, we need to use a different URL form. - if [ $reqs == ${STEP} ]; then - curl -s ${API_ADDRESS}:${API_PORT}/apis/apps/v1/namespaces/default/deployments -XPOST -H 'Content-Type: application/json' -d@${generated_file} > /dev/null - else - curl -s ${API_ADDRESS}:${API_PORT}/apis/apps/v1/namespaces/default/deployments/${deployment} -XPATCH -H 'Content-Type:application/strategic-merge-patch+json' -d@${generated_file} > /dev/null - fi - else - kubectl apply -f ${generated_file} - fi - - #cmd="kubectl get pods | grep busybox | grep Completed" - kubectl rollout status --timeout=${wait_time}s deployment/${deployment} - local end_time=$(date +%s%N) - local total_milliseconds=$(( (end_time - start_time) / 1000000 )) - info "Took $total_milliseconds ms ($end_time - $start_time)" - - # grab list of workload pods after - local pods_after=$(kubectl get pods --selector ${LABEL}=${LABELVALUE} -o json | jq -r '.items[] | "\(.metadata.name)"') - find_unique_pods "${pods_after}" "${pods_before}" - - sleep ${settle_time} - - if [[ ${nc_reqs_per_pod} -ge 1 ]]; then - pod_ips=$(kubectl get pods --selector ${LABEL}=${LABELVALUE} -o json | jq -r '.items[].status.podIP') - if [[ ${reqs} != $(echo $pod_ips | wc -w) ]]; then - info "WARNING: pod IP count mismatch expected ${reqs} found $(echo $pod_ips | wc -w)" - fi - info "Measuring latency, sending ${nc_reqs_per_pod} messages to each of the ${reqs} pods" - local latency_failures=0 - local latency_pod_array=() - for latency_round in $(seq ${nc_reqs_per_pod}); do - for pod_ip in ${pod_ips}; do - local latency_pod_start_time=$(date +%s%N) - if [[ $(echo ${nc_req_msg} | nc ${pod_ip} ${nc_port}) != "${nc_req_msg}" ]]; then - latency_failures=$(( latency_failures + 1 )) - fi - local latency_pod_end_time=$(date +%s%N) - latency_pod_array+=($(( (latency_pod_end_time - latency_pod_start_time) / 1000000 ))) - done - done - IFS=$'\n' - local latency_pod_array_sorted=($(sort -n <<<"${latency_pod_array[*]}")) - unset IFS - local latency_pod_array_len=${#latency_pod_array[@]} - local latency_percentiles=() - latency_percentiles+=(${latency_pod_array_sorted[$(bc <<<"$latency_pod_array_len / 20")]}) - latency_percentiles+=(${latency_pod_array_sorted[$(bc <<<"$latency_pod_array_len / 4")]}) - latency_percentiles+=(${latency_pod_array_sorted[$(bc <<<"$latency_pod_array_len / 2")]}) - latency_percentiles+=(${latency_pod_array_sorted[$(bc <<<"$latency_pod_array_len / 1.25")]}) - latency_percentiles+=(${latency_pod_array_sorted[$(bc <<<"$latency_pod_array_len / 1.05")]}) - info "Latency percentiles [ms] 5-25-50-75-95 %: ${latency_percentiles[*]}" - else - local latency_avg_ms=0 - local latency_percentiles=(0 0 0 0 0) - - fi - - grab_stats $total_milliseconds $reqs ${latency_percentiles[@]} - done -} - -cleanup() { - info "Cleaning up" - - # First try to save any results we got - metrics_json_end_array "BootResults" - - kubectl delete daemonset --wait=true --timeout=${delete_wait_time}s "${stats_pod}" || true - local start_time=$(date +%s%N) - kubectl delete deployment --wait=true --timeout=${delete_wait_time}s ${deployment} || true - for x in $(seq 1 ${delete_wait_time}); do - local npods=$(kubectl get pods -l=${LABEL}=${LABELVALUE} -o=name | wc -l) - if [ $npods -eq 0 ]; then - echo "All pods have terminated at cycle $x" - local alldied=true - break; - fi - sleep 1 - done - local end_time=$(date +%s%N) - local total_milliseconds=$(( (end_time - start_time) / 1000000 )) - if [ -z "$alldied" ]; then - echo "ERROR: Not all pods died!" - fi - info "Delete Took $total_milliseconds ms ($end_time - $start_time)" - - local json="$(cat << EOF - "Delete": { - "Result": ${total_milliseconds}, - "Units" : "ms" - } -EOF -)" - - metrics_json_add_fragment "$json" - framework_shutdown -} - -show_vars() -{ - echo -e "\nEnvironment variables:" - echo -e "\tName (default)" - echo -e "\t\tDescription" - echo -e "\tTEST_NAME (${TEST_NAME})" - echo -e "\t\tCan be set to over-ride the default JSON results filename" - echo -e "\tNUM_PODS (${NUM_PODS})" - echo -e "\t\tNumber of pods to launch" - echo -e "\tSTEP (${STEP})" - echo -e "\t\tNumber of pods to launch per cycle" - echo -e "\twait_time (${wait_time})" - echo -e "\t\tSeconds to wait for pods to become ready" - echo -e "\tdelete_wait_time (${delete_wait_time})" - echo -e "\t\tSeconds to wait for all pods to be deleted" - echo -e "\tsettle_time (${settle_time})" - echo -e "\t\tSeconds to wait after pods ready before taking measurements" - echo -e "\tuse_api (${use_api})" - echo -e "\t\tspecify yes or no to use the API to launch pods" - echo -e "\tgrace (${grace})" - echo -e "\t\tspecify the grace period in seconds for workload pod termination" -} - -help() -{ - usage=$(cat << EOF -Usage: $0 [-h] [options] - Description: - Launch a series of workloads and take memory metric measurements after - each launch. - Options: - -h, Help page. -EOF -) - echo "$usage" - show_vars -} - -main() { - - local OPTIND - while getopts "h" opt;do - case ${opt} in - h) - help - exit 0; - ;; - esac - done - shift $((OPTIND-1)) - init - run - # cleanup will happen at exit due to the shell 'trap' we registered - # cleanup -} - -main "$@" diff --git a/metrics/scaling/k8s_scale_rapid_nc.sh b/metrics/scaling/k8s_scale_rapid_nc.sh new file mode 100755 index 00000000..6270e693 --- /dev/null +++ b/metrics/scaling/k8s_scale_rapid_nc.sh @@ -0,0 +1,337 @@ +#!/bin/bash +# Copyright (c) 2019 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -e + +# Pull in some common, useful, items +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +LABELVALUE=${LABELVALUE:-rapid_nc} +source "${SCRIPT_PATH}/../lib/common.bash" +source "${SCRIPT_PATH}/common.bash" +source "${SCRIPT_PATH}/../collectd/collectd.bash" + +SMF_USE_COLLECTD=true + +# Network latency test parameters: +# number of requests to be sent after each scaling step +nc_reqs=${nc_reqs:-1000} +# length of each request [bytes] +nc_req_msg_len=${nc_req_msg_len:-1000} +# port that request servers listen to in pods +nc_port=33101 +# request message +nc_req_msg=$(head -c $nc_req_msg_len /dev/zero | tr '\0' 'x') +nc_percentiles=(0 1 5 25 50 75 95 99 100) + +pod_command="[\"nc\", \"-lk\", \"-p\", \"${nc_port}\", \"-e\", \"/bin/sh\", \"-c\", \"/bin/echo \${EPOCHREALTIME/./}; /bin/cat; /bin/echo \${EPOCHREALTIME/./}\"]" + +# Set some default metrics env vars +TEST_ARGS="runtime=${RUNTIME}" +TEST_NAME="k8s rapid nc" + +# $1 is the launch time in seconds this pod/container took to start up. +# $2 is the number of pod/containers under test +grab_stats() { + local launch_time_ms=$1 + local n_pods=$2 + shift ; shift + local latency_percentiles=($@) # array of percentiles + local cpu_idle=() + local mem_free=() + local total_mem_used=0 + + info "And grab some stats" + + local date_json="$(cat << EOF + "date": { + "ns": $(date +%s%N), + "Date": "$(date -u +"%Y-%m-%dT%T.%3N")" + } +EOF + )" + metrics_json_add_array_fragment "$date_json" + + local pods_json="$(cat << EOF + "n_pods": { + "Result": ${n_pods}, + "Units" : "int" + } +EOF + )" + metrics_json_add_array_fragment "$pods_json" + + local launch_json="$(cat << EOF + "launch_time": { + "Result": $launch_time_ms, + "Units" : "ms" + } +EOF + )" + metrics_json_add_array_fragment "$launch_json" + + local latency_json="$(cat << EOF + "latency_time": { + "Percentiles": [$(IFS=, ; echo "${latency_percentiles[*]}")], + "Result": ${latency_percentiles[$(( ${#latency_percentiles[@]} / 2 ))]}, + "Units" : "ms" + } +EOF + )" + + metrics_json_add_array_fragment "$latency_json" + + info "launch [$launch_time_ms]" + + metrics_json_close_array_element +} + +init() { + framework_init +} + +save_config(){ + metrics_json_start_array + + local json="$(cat << EOF + { + "testname": "${TEST_NAME}", + "NUM_PODS": ${NUM_PODS}, + "STEP": ${STEP}, + "wait_time": ${wait_time}, + "delete_wait_time": ${delete_wait_time}, + "settle_time": ${settle_time}, + "nc_reqs": ${nc_reqs}, + "nc_req_msg_len": ${nc_req_msg_len}, + "nc_percentiles": [$(IFS=, ; echo "${nc_percentiles[*]}")] + } +EOF +)" + metrics_json_add_array_element "$json" + metrics_json_end_array "Config" +} + +run() { + info "Running test" + + trap cleanup EXIT QUIT KILL + + metrics_json_start_array + + for reqs in $(seq ${STEP} ${STEP} ${NUM_PODS}); do + info "Testing replicas ${reqs} of ${NUM_PODS}" + # Generate the next yaml file + + local runtime_command + if [ -n "$RUNTIME" ]; then + runtime_command="s|@RUNTIMECLASS@|${RUNTIME}|g" + else + runtime_command="/@RUNTIMECLASS@/d" + fi + + local input_template + local generated_file + if [ "$use_api" != "no" ]; then + input_template=$input_json + generated_file=$generated_json + else + input_template=$input_yaml + generated_file=$generated_yaml + fi + + sed -e "s|@REPLICAS@|${reqs}|g" \ + -e $runtime_command \ + -e "s|@DEPLOYMENT@|${deployment}|g" \ + -e "s|@LABEL@|${LABEL}|g" \ + -e "s|@LABELVALUE@|${LABELVALUE}|g" \ + -e "s|@GRACE@|${grace}|g" \ + -e "s#@PODCOMMAND@#${pod_command}#g" \ + < ${input_template} > ${generated_file} + + info "Applying changes" + local start_time=$(date +%s%N) + if [ "$use_api" != "no" ]; then + # If this is the first launch of the deploy, we need to use a different URL form. + if [ $reqs == ${STEP} ]; then + curl -s ${API_ADDRESS}:${API_PORT}/apis/apps/v1/namespaces/default/deployments -XPOST -H 'Content-Type: application/json' -d@${generated_file} > /dev/null + else + curl -s ${API_ADDRESS}:${API_PORT}/apis/apps/v1/namespaces/default/deployments/${deployment} -XPATCH -H 'Content-Type:application/strategic-merge-patch+json' -d@${generated_file} > /dev/null + fi + else + kubectl apply -f ${generated_file} + fi + + kubectl rollout status --timeout=${wait_time}s deployment/${deployment} + local end_time=$(date +%s%N) + local total_milliseconds=$(( (end_time - start_time) / 1000000 )) + info "Took $total_milliseconds ms ($end_time - $start_time)" + + sleep ${settle_time} + + # Measure network latency + if [[ ${nc_reqs} -ge 1 ]]; then + mkdir -p "$RESULT_DIR" 2>/dev/null || true + local latency_raw_output="$RESULT_DIR/${TEST_NAME// /-}.tmaster_tworker_pods_req_ipaddr_lattot_latconn_latio_latdisconn_rx.raw" + local pod_ips=($(kubectl get pods --selector ${LABEL}=${LABELVALUE} -o json | jq -r '.items[].status.podIP')) + local pod_ips_len=${#pod_ips[@]} + if [[ ${reqs} != ${pod_ips_len} ]]; then + info "WARNING: pod IP count mismatch expected ${reqs} found ${pod_ips_len}" + fi + info "Measuring latency, sending ${nc_reqs} messages to ${reqs} pods (~$((nc_reqs / reqs)) messages each)" + local latency_failures=0 + local latency_pod_array=() + + # send $nc_reqs messages, go through pods + local req_index=0 + local pod_index=0 + while [[ $req_index -lt $nc_reqs ]] && [[ $pod_ips_len -gt 0 ]]; do + req_index=$(( req_index + 1 )) + pod_index=$(( pod_index + 1 )) + if [[ $pod_index -ge $pod_ips_len ]]; then + pod_index=0 + fi + local pod_ip=${pod_ips[$pod_index]} + local latency_failed=0 + local latency_pod_start_time=${EPOCHREALTIME/./} + local latency_pod_start_response_end=$(echo ${latency_pod_start_time} ${nc_req_msg} | nc ${pod_ip} ${nc_port}) + # start_response_end contents: + local latency_pod_end_time=${EPOCHREALTIME/./} + local latency_response_microseconds=$(( latency_pod_end_time - latency_pod_start_time )) + local latency_pod_response=$(echo $latency_pod_start_response_end | awk '{print $3}') + if [[ "$latency_pod_response" != "${nc_req_msg}" ]]; then + latency_failures=$(( latency_failures + 1 )) + local latency_pod_first_t=$latency_pod_end_time + local latency_pod_last_t=$latency_pod_end_time + latency_failed=1 + else + local latency_pod_first_t=$(echo $latency_pod_start_response_end | awk '{print $1}') + local latency_pod_last_t=$(echo $latency_pod_start_response_end | awk '{print $4}') + fi + local latency_pod_local_io=$(( latency_pod_last_t - latency_pod_first_t )) + local latency_pod_conn=$(( latency_pod_first_t - latency_pod_start_time )) + local latency_pod_disconn=$(( latency_pod_end_time - latency_pod_last_t )) + latency_pod_array+=($latency_response_microseconds) + echo "$latency_pod_start_time $latency_pod_first_t $reqs $req_index $pod_ip $latency_response_microseconds $latency_pod_conn $latency_pod_local_io $latency_pod_disconn $(echo $latency_pod_start_response_end | wc -c)" >> $latency_raw_output + done + IFS=$'\n' + local latency_pod_array_sorted=($(sort -n <<<"${latency_pod_array[*]}")) + unset IFS + local latency_pod_array_len=${#latency_pod_array[@]} + local latency_percentiles=() + for p in ${nc_percentiles[@]}; do + if [[ $p -lt 100 ]]; then + latency_percentiles+=(${latency_pod_array_sorted[$(bc <<<"$latency_pod_array_len * $p / 100")]}) + else + # Asking for a value that is greater than 100 % of measured values. + # This is the way to save the maximum value. + latency_percentiles+=(${latency_pod_array_sorted[$(bc <<<"$latency_pod_array_len - 1")]}) + fi + done + info "Latency percentiles [ms] ${nc_percentiles[@]} %: ${latency_percentiles[@]}" + else + local latency_avg_ms=0 + local latency_percentiles=() + for p in ${nc_percentiles[@]}; do + latency_percentiles+=(0) + done + fi + + grab_stats $total_milliseconds $reqs ${latency_percentiles[@]} + done +} + +cleanup() { + info "Cleaning up" + + # First try to save any results we got + metrics_json_end_array "BootResults" + + local start_time=$(date +%s%N) + kubectl delete deployment --wait=true --timeout=${delete_wait_time}s ${deployment} || true + for x in $(seq 1 ${delete_wait_time}); do + local npods=$(kubectl get pods -l=${LABEL}=${LABELVALUE} -o=name | wc -l) + if [ $npods -eq 0 ]; then + echo "All pods have terminated at cycle $x" + local alldied=true + break; + fi + sleep 1 + done + local end_time=$(date +%s%N) + local total_milliseconds=$(( (end_time - start_time) / 1000000 )) + if [ -z "$alldied" ]; then + echo "ERROR: Not all pods died!" + fi + info "Delete Took $total_milliseconds ms ($end_time - $start_time)" + + local json="$(cat << EOF + "Delete": { + "Result": ${total_milliseconds}, + "Units" : "ms" + } +EOF +)" + + metrics_json_add_fragment "$json" + framework_shutdown +} + +show_vars() +{ + echo -e "\nEnvironment variables:" + echo -e "\tName (default)" + echo -e "\t\tDescription" + echo -e "\tTEST_NAME (${TEST_NAME})" + echo -e "\t\tCan be set to over-ride the default JSON results filename" + echo -e "\tNUM_PODS (${NUM_PODS})" + echo -e "\t\tNumber of pods to launch" + echo -e "\tSTEP (${STEP})" + echo -e "\t\tNumber of pods to launch per cycle" + echo -e "\twait_time (${wait_time})" + echo -e "\t\tSeconds to wait for pods to become ready" + echo -e "\tdelete_wait_time (${delete_wait_time})" + echo -e "\t\tSeconds to wait for all pods to be deleted" + echo -e "\tsettle_time (${settle_time})" + echo -e "\t\tSeconds to wait after pods ready before taking measurements" + echo -e "\tuse_api (${use_api})" + echo -e "\t\tspecify yes or no to use the API to launch pods" + echo -e "\tgrace (${grace})" + echo -e "\t\tspecify the grace period in seconds for workload pod termination" +} + +help() +{ + usage=$(cat << EOF +Usage: $0 [-h] [options] + Description: + Launch a series of workloads and take memory metric measurements after + each launch. + Options: + -h, Help page. +EOF +) + echo "$usage" + show_vars +} + +main() { + + local OPTIND + while getopts "h" opt;do + case ${opt} in + h) + help + exit 0; + ;; + esac + done + shift $((OPTIND-1)) + init + run + # cleanup will happen at exit due to the shell 'trap' we registered + # cleanup +} + +main "$@"