From 866a20be8e82956b0895292171144980ed4d7af9 Mon Sep 17 00:00:00 2001
From: Antti Kervinen <antti.kervinen@intel.com>
Date: Wed, 9 Oct 2019 16:50:23 +0300
Subject: [PATCH] metrics: report pod communication latency percentiles in pdf

- Add R routine to load and plot percentiles from JSON.
- Support any number of percentiles 1..n.
- Store percentile configuration in JSON.

Signed-off-by: Antti Kervinen <antti.kervinen@intel.com>
---
 metrics/lib/common.bash                       |   4 +-
 .../report_dockerfile/metrics_report.Rmd      |  12 +
 .../report_dockerfile/tidy_scaling_nc.R       | 155 ++++++
 metrics/scaling/k8s_scale_nc.sh               | 468 ------------------
 metrics/scaling/k8s_scale_rapid_nc.sh         | 337 +++++++++++++
 5 files changed, 506 insertions(+), 470 deletions(-)
 create mode 100755 metrics/report/report_dockerfile/tidy_scaling_nc.R
 delete mode 100755 metrics/scaling/k8s_scale_nc.sh
 create mode 100755 metrics/scaling/k8s_scale_rapid_nc.sh

diff --git a/metrics/lib/common.bash b/metrics/lib/common.bash
index dd76d986..9c601c83 100755
--- a/metrics/lib/common.bash
+++ b/metrics/lib/common.bash
@@ -84,7 +84,7 @@ framework_init() {
 	k8s_api_init
 
 	# Launch our stats gathering pod
-	if [ -n "$SMF_USE_COLLECTD" ]; then
+	if [ "$SMF_USE_COLLECTD" == "true" ]; then
 		info "Setting up collectd"
 		init_stats $wait_time
 	fi
@@ -104,7 +104,7 @@ framework_shutdown() {
 	k8s_api_shutdown
 	cpu_load_shutdown
 
-	if [ -n "$SMF_USE_COLLECTD" ]; then
+	if [ "$SMF_USE_COLLECTD" == "true" ]; then
 		cleanup_stats
 	fi
 
diff --git a/metrics/report/report_dockerfile/metrics_report.Rmd b/metrics/report/report_dockerfile/metrics_report.Rmd
index 630365ac..c88be2f0 100644
--- a/metrics/report/report_dockerfile/metrics_report.Rmd
+++ b/metrics/report/report_dockerfile/metrics_report.Rmd
@@ -51,6 +51,18 @@ source('collectd_scaling.R')
 
 \pagebreak
 
+# Pod communication latency
+This [test](https://github.com/clearlinux/cloud-native-setup/metrics/scaling/k8s_scale_rapid_nc.sh)
+measures pod query--response latency when scaling up. The
+time is measured from sending a message directly to a socket, that `nc`
+listens to inside each pod, to reading the response from the pod.
+
+```{r scaling_nc, echo=FALSE, fig.cap="K8S pod communication latency", results='asis'}
+source('tidy_scaling_nc.R')
+```
+
+\pagebreak
+
 # Test setup details
 
 This table describes the test system details, as derived from the information contained
diff --git a/metrics/report/report_dockerfile/tidy_scaling_nc.R b/metrics/report/report_dockerfile/tidy_scaling_nc.R
new file mode 100755
index 00000000..ea2af33b
--- /dev/null
+++ b/metrics/report/report_dockerfile/tidy_scaling_nc.R
@@ -0,0 +1,155 @@
+#!/usr/bin/env Rscript
+# Copyright (c) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Show pod communication latency
+
+suppressMessages(suppressWarnings(library(ggplot2)))	# ability to plot nicely.
+suppressWarnings(suppressWarnings(library(ggpubr)))	# ggtexttable
+suppressMessages(library(jsonlite))			# to load the data.
+suppressMessages(library(scales))			# For de-science notation of axis
+library(tibble)						# tibbles for tidy data
+
+testnames=c(
+	"k8s-rapid-nc"
+)
+
+### For developers: uncomment following variables to run this as is in R
+# resultdirs=c("PATH/TO/RES1/", ...) # keep the ending slash on result paths
+# inputdir=""
+
+latencydata=c()
+
+# iterate over every set of results (test run)
+for (currentdir in resultdirs) {
+	# For every results file we are interested in evaluating
+	for (testname in testnames) {
+		matchdir=paste(inputdir, currentdir, sep="")
+		matchfile=paste(testname, '\\.json', sep="")
+		files=list.files(matchdir, pattern=matchfile)
+
+		# For every matching results file
+		for (ffound in files) {
+			fname=paste(inputdir, currentdir, ffound, sep="")
+			if (!file.exists(fname)) {
+				warning(paste("Skipping non-existent file: ", fname))
+				next
+			}
+			# Derive the name from the test result dirname
+			datasetname=basename(currentdir)
+
+			# Import the data
+			fdata=fromJSON(fname)
+			# De-nest the test name specific data
+			shortname=substr(ffound, 1, nchar(ffound)-nchar(".json"))
+			fdata=fdata[[shortname]]
+			testname=datasetname
+
+			# All the data we are looking for comes in BootResults,
+			# so pick it out to make referencing easier
+			br=fdata$BootResults
+
+			########################################################
+			#### Now extract latency time percentiles (ltp) ########
+			########################################################
+			ltp=br$latency_time$Percentiles
+			# Percentile thresholds, for example [5, 25, 50, 75, 95]
+			ltp_perc=fdata$Config$nc_percentiles[[1]]
+			perc_count = length(ltp_perc)
+			# Measured times
+			ltp_meas=matrix(unlist(ltp), nrow=perc_count)
+			# Build latency percentiles tibble with nice headings
+			ltpt=tibble(n_pods=br$n_pods$Result)
+			for (n in seq(perc_count)) {
+				p_title = paste0("p", ltp_perc[n])
+				ltpt[p_title] = ltp_meas[n,]
+			}
+			# ltpt example: with percentiles [5, 50, 95]:
+			# n_pods  p5  p50  p95
+			#    100   4	8   10
+			#    200   5   11   15
+			#    300   6   14   19
+			ltpt$testname=testname
+			latencydata=rbind(latencydata, ltpt)
+		}
+	}
+}
+
+# Visualize data.
+if (length(latencydata[[1]]) <= 5 || length(unique(latencydata$testname)) > 1) {
+	# If there are many tests to compare or only few data points, use boxplot with extra percentile points.
+	latp = ggplot(data=latencydata, aes(x=n_pods)) + ylab("Latency (us)") + xlab("pods") + scale_y_continuous(labels=comma)
+	perc_mid = floor((perc_count)/2)
+	# Create boxplot around the middle percentile
+	if (perc_count >= 3) {
+		box_bottom=names(ltpt)[perc_mid+1]
+		box_mid=names(ltpt)[perc_mid+2]
+		box_top=names(ltpt)[perc_mid+3]
+		if (perc_count >= 5) {
+			whis_low=names(ltpt)[perc_mid]
+			whis_high=names(ltpt)[perc_mid+4]
+			latp = latp + geom_boxplot(aes_string(group="interaction(testname,n_pods)",ymin=whis_low,lower=box_bottom,middle=box_mid,upper=box_top,ymax=whis_high,fill="testname"),stat="identity")
+		} else {
+			latp = latp + geom_boxplot(aes_string(group="interaction(testname,n_pods)",lower=box_bottom,middle=box_mid,upper=box_top,fill="testname"),stat="identity")
+		}
+	}
+	# Boxplot (above) covers at most 5 percentiles around the center (median).
+	# Visualize the rest using a point for each percentile.
+	if (perc_count > 5) {
+		for (n in seq(1, (perc_count-5)/2)) {
+			lower_name=names(ltpt)[n+1]
+			upper_name=names(ltpt)[perc_count-n+2]
+			latp = latp + geom_point(aes_string(group="interaction(testname,n_pods)",y=lower_name, color="testname"))
+			latp = latp + geom_point(aes_string(group="interaction(testname,n_pods)",y=upper_name, color="testname"))
+		}
+	}
+} else {
+	# Use colored areas and median lines when there are many ticks on X axis
+	latp = ggplot(data=latencydata, aes(x=n_pods)) + ylab("Latency (us)") + xlab("pods") + scale_y_continuous(labels=comma)
+	perc_mid = floor((perc_count)/2)
+	perc_maxdist = perc_mid
+	plot_number = 0
+	for (plot_test in unique(latencydata$testname)) {
+		plot_number = plot_number + 1
+		for (n in seq(perc_mid)) {
+			# First fill outmost areas, like p5..p25 and p75..p95,
+			# then areas closer to the middle, like p25..p50 and p50..p75
+			lower_name = names(ltpt)[n+1]
+			lower_next_name = names(ltpt)[n+2]
+			upper_name = names(ltpt)[perc_count-n+2]
+			upper_prev_name = names(ltpt)[perc_count-n+1]
+			alpha = 0.7 * ((n+1) / (perc_mid+1))**2
+			latp = latp + geom_ribbon(data=latencydata[latencydata$testname==plot_test,],aes_string(x="n_pods",ymin=lower_name,ymax=lower_next_name,fill="testname"),alpha=alpha)
+			latp = latp + geom_ribbon(data=latencydata[latencydata$testname==plot_test,],aes_string(x="n_pods",ymin=upper_prev_name,ymax=upper_name,fill="testname"),alpha=alpha)
+		}
+		median_index = match("p50", names(ltpt))
+		if (!is.na(median_index)) {
+			# Draw median line
+			latp = latp + geom_line(data=latencydata[latencydata$testname==plot_test,],aes_string(x="n_pods",y=names(ltpt)[median_index],color="testname"))
+		}
+	}
+}
+
+# Table presentation.
+lat_table=c()
+for (testname in unique(latencydata$testname)) {
+	testlines=latencydata[latencydata$testname==testname,]
+	lat_table=rbind(lat_table,testlines[1,])
+	if (length(testlines) > 3) {
+		# middle pod count
+		lat_table=rbind(lat_table,testlines[(length(testlines)-1)/2,])
+	}
+	if (length(testlines) > 2) {
+		# max pod count
+		lat_table=rbind(lat_table,testlines[length(testlines)-1,])
+	}
+}
+latt=ggtexttable(lat_table,rows=NULL)
+
+cat("\n\nLatency percentiles illustrated in the Figure below: ", paste0(ltp_perc, "\\%"), "\n\n")
+
+page1 = grid.arrange(latp, latt, ncol=1)
+
+# pagebreak, as the graphs overflow the page otherwise
+cat("\n\n\\pagebreak\n")
diff --git a/metrics/scaling/k8s_scale_nc.sh b/metrics/scaling/k8s_scale_nc.sh
deleted file mode 100755
index f1b86b1b..00000000
--- a/metrics/scaling/k8s_scale_nc.sh
+++ /dev/null
@@ -1,468 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2019 Intel Corporation
-#
-# SPDX-License-Identifier: Apache-2.0
-#
-
-set -e
-
-# Pull in some common, useful, items
-SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
-source "${SCRIPT_PATH}/../lib/common.bash"
-source "${SCRIPT_PATH}/common.bash"
-
-LABELVALUE=${LABELVALUE:-scale_nc}
-
-# Latency test parameters:
-# number of requests to be sent to each pod
-nc_reqs_per_pod=${nc_reqs_per_pod:-100}
-# length of each request [bytes]
-nc_req_msg_len=${nc_req_msg_len:-1000}
-# port that request servers listen to in pods
-nc_port=33101
-# request message
-nc_req_msg=$(head -c $nc_req_msg_len /dev/zero | tr  '\0' 'x')
-
-pod_command="[\"nc\", \"-lk\", \"-p\", \"${nc_port}\", \"-e\", \"/bin/cat\"]"
-
-# Set some default metrics env vars
-TEST_ARGS="runtime=${RUNTIME}"
-TEST_NAME="k8s scaling nc"
-
-# $1 is the launch time in seconds this pod/container took to start up.
-# $2 is the number of pod/containers under test
-grab_stats() {
-	local launch_time_ms=$1
-	local n_pods=$2
-	shift ; shift
-	local latency_percentiles=($@) # array of percentiles
-	local cpu_idle=()
-	local mem_free=()
-	local total_mem_used=0
-
-	info "And grab some stats"
-
-	local date_json="$(cat << EOF
-			"date": {
-				"ns": $(date +%s%N),
-				"Date": "$(date -u +"%Y-%m-%dT%T.%3N")"
-			}
-EOF
-	)"
-	metrics_json_add_array_fragment "$date_json"
-
-	local pods_json="$(cat << EOF
-			"n_pods": {
-				"Result": ${n_pods},
-				"Units" : "int"
-			}
-EOF
-	)"
-	metrics_json_add_array_fragment "$pods_json"
-
-	local launch_json="$(cat << EOF
-			"launch_time": {
-				"Result": $launch_time_ms,
-				"Units" : "ms"
-			}
-EOF
-	)"
-	metrics_json_add_array_fragment "$launch_json"
-
-	local latency_json="$(cat << EOF
-			"latency_time": {
-				"Pod_command": "${pod_command//\"/\\\"}",
-				"Request_length": "${nc_req_msg_len}",
-				"Requests_per_pod": "${nc_reqs_per_pod}",
-				"Sender": "serial",
-				"Percentiles": [$(IFS=, ; echo "${latency_percentiles[*]}")],
-				"Result": ${latency_percentiles[$(( ${#latency_percentiles[@]} / 2 ))]},
-				"Units" : "ms"
-			}
-EOF
-	)"
-	metrics_json_add_array_fragment "$latency_json"
-
-	# start the node utilization array
-	metrics_json_start_nested_array
-
-	# grab pods in the stats daemonset
-	# use 3 for the file descriptor rather than stdin otherwise the sh commands
-	# in the middle will read the rest of stdin
-	while read -u 3 name node; do
-		# look for taint that prevents scheduling
-		local noschedule=false
-		local t_match_values=$(kubectl get node ${node} -o json | jq 'select(.spec.taints) | .spec.taints[].effect == "NoSchedule"')
-		for v in $t_match_values; do
-			if [[ $v == true ]]; then
-				noschedule=true
-				break
-			fi
-		done
-		# Tell mpstat to measure over a short period, not only so we get slightly de-noised data, but also
-		# if you don't tell it the period, you will get the avg since boot, which is not what we want.
-		local cpu_idle=$(kubectl exec -ti $name -- sh -c "mpstat -u 3 1 | tail -1 | awk '{print \$11}'" | sed 's/\r//')
-		local mem_free=$(kubectl exec -ti $name -- sh -c "free | tail -2 | head -1 | awk '{print \$4}'" | sed 's/\r//')
-		local inode_free=$(kubectl exec -ti $name -- sh -c "df -i | awk '/^overlay/ {print \$4}'" | sed 's/\r//')
-
-		info "idle [$cpu_idle] free [$mem_free] launch [$launch_time_ms] node [$node] inodes_free [$inode_free]"
-
-		# Annoyingly, it seems sometimes once in a while we don't get an answer!
-		# We should really retry, but for now, make the json valid at least
-		cpu_idle=${cpu_idle:-0}
-		mem_free=${mem_free:-0}
-		inode_free=${inode_free:-0}
-
-		# If this is the 0 node instance, store away the base memory value
-		if [ $n_pods -eq 0 ]; then
-			node_basemem[$node]=$mem_free
-			node_baseinode[$node]=$inode_free
-		fi
-
-		local mem_used=$((node_basemem[$node]-mem_free))
-		local inode_used=$((node_baseinode[$node]-inode_free))
-		# Only account for memory usage on schedulable nodes
-		if [ $noschedule == false ]; then
-			total_mem_used=$((total_mem_used+mem_used))
-		fi
-
-		local util_json="$(cat << EOF
-		{
-			"node": "${node}",
-			"noschedule": "${noschedule}",
-			"cpu_idle": {
-				"Result": ${cpu_idle},
-				"Units" : "%"
-			},
-			"mem_free": {
-				"Result": ${mem_free},
-				"Units" : "kb"
-			},
-			"mem_used": {
-				"Result": ${mem_used},
-				"Units" : "kb"
-			},
-			"inode_free": {
-				"Result": ${inode_free}
-			},
-			"inode_used": {
-				"Result": ${inode_used}
-			}
-		}
-EOF
-		)"
-
-		metrics_json_add_nested_array_element "$util_json"
-
-	done 3< <(kubectl get pods --selector name=stats-pods -o json | jq -r '.items[] | "\(.metadata.name) \(.spec.nodeName)"')
-
-	metrics_json_end_nested_array "node_util"
-
-	# start the new pods array
-	metrics_json_start_nested_array
-
-	# for the first call to grab stats, there are no new pods
-	# so we need to fill in with NA (R specific value) in matching
-	# dimension to the rest of the calls to grab_stats, so $STEP items
-	if [[ ${#new_pods[@]} == 0 ]]; then
-		for i in $STEP; do
-			local new_pod_json="$(cat << EOF
-						{
-								"pod_name": "NA",
-								"node": "NA"
-						}
-EOF
-			)"
-			metrics_json_add_nested_array_element "$new_pod_json"
-		done
-	else
-		local maxelem=$(( ${#new_pods[@]} - 1 ))
-		for index in $(seq 0 $maxelem); do
-			local node=$(kubectl get pod ${new_pods[$index]} -o json | jq -r '"\(.spec.nodeName)"')
-			local new_pod_json="$(cat << EOF
-				{
-					"pod_name": "${new_pods[$index]}",
-					"node": "${node}"
-				}
-EOF
-			)"
-			metrics_json_add_nested_array_element "$new_pod_json"
-		done
-	fi
-	metrics_json_end_nested_array "launched_pods"
-
-	# And store off the total memory consumed across all nodes, and the pod/Gb value
-	if [ $n_pods -eq 0 ]; then
-		local pods_per_gb=0
-	else
-		local pods_per_gb=$(printf "%0f" $(bc -l <<< "scale=2; ($total_mem_used/1024) / $n_pods"))
-	fi
-	local mem_json="$(cat << EOF
-			"memory": {
-				"consumed": {
-					"Result": ${total_mem_used},
-					"Units": "Kb"
-				},
-				"pods_per_gb": {
-					"Result": ${pods_per_gb}
-				}
-			}
-EOF
-	)"
-	metrics_json_add_array_fragment "$mem_json"
-
-	metrics_json_close_array_element
-}
-
-init() {
-	info "Initialising"
-
-	local cmds=("bc" "jq")
-	check_cmds "${cmds[@]}"
-
-	info "Checking Kubernetes accessible"
-	local worked=$( kubectl get nodes > /dev/null 2>&1 && echo $? || echo $? )
-	if [ "$worked" != 0 ]; then
-		die "kubectl failed to get nodes"
-	fi
-
-	info $(get_num_nodes) "Kubernetes nodes in 'Ready' state found"
-	# We could check we have just the one node here - right now this is a single node
-	# test!! - because, our stats gathering is rudimentry, as k8s does not provide
-	# a nice way to do it (unless you want to parse 'descibe nodes')
-	# Have a read of https://github.com/kubernetes/kubernetes/issues/25353
-
-	# FIXME - check the node(s) can run enough pods - check 'max-pods' in the
-	# kubelet config - from 'kubectl describe node -o json' ?
-
-	framework_init
-
-	# Launch our stats gathering pod
-	kubectl apply -f ${SCRIPT_PATH}/${stats_pod}.yaml
-	kubectl rollout status --timeout=${wait_time}s daemonset/${stats_pod}
-
-	# FIXME - we should probably 'warm up' the cluster with the container image(s) we will
-	# use for testing, otherwise the download time will likely be included in the first pod
-	# boot time.
-}
-
-save_config(){
-	metrics_json_start_array
-
-	local json="$(cat << EOF
-	{
-		"testname": "${TEST_NAME}",
-		"NUM_PODS": ${NUM_PODS},
-		"STEP": ${STEP},
-		"wait_time": ${wait_time},
-		"delete_wait_time": ${delete_wait_time},
-		"settle_time": ${settle_time}
-	}
-EOF
-)"
-	metrics_json_add_array_element "$json"
-	metrics_json_end_array "Config"
-}
-
-run() {
-	info "Running test"
-
-	trap cleanup EXIT QUIT KILL
-
-	metrics_json_start_array
-
-	# grab starting stats before launching workload pods
-	grab_stats 0 0 0
-
-	for reqs in $(seq ${STEP} ${STEP} ${NUM_PODS}); do
-		info "Testing replicas ${reqs} of ${NUM_PODS}"
-		# Generate the next yaml file
-
-		local runtime_command
-		if [ -n "$RUNTIME" ]; then
-			runtime_command="s|@RUNTIMECLASS@|${RUNTIME}|g"
-		else
-			runtime_command="/@RUNTIMECLASS@/d"
-		fi
-
-		local input_template
-		local generated_file
-
-		if [ "$use_api" != "no" ]; then
-			input_template=$input_json
-			generated_file=$generated_json
-		else
-			input_template=$input_yaml
-			generated_file=$generated_yaml
-		fi
-
-		sed -e "s|@REPLICAS@|${reqs}|g" \
-			-e $runtime_command \
-			-e "s|@DEPLOYMENT@|${deployment}|g" \
-			-e "s|@LABEL@|${LABEL}|g" \
-			-e "s|@LABELVALUE@|${LABELVALUE}|g" \
-			-e "s|@GRACE@|${grace}|g" \
-			-e "s#@PODCOMMAND@#${pod_command}#g" \
-			< ${input_template} > ${generated_file}
-
-		# get list of workload pods before launching another one
-		local pods_before=$(kubectl get pods --selector ${LABEL}=${LABELVALUE} -o json | jq -r '.items[] | "\(.metadata.name)"')
-
-		info "Applying changes"
-		local start_time=$(date +%s%N)
-		if [ "$use_api" != "no" ]; then
-			# If this is the first launch of the deploy, we need to use a different URL form.
-			if [ $reqs == ${STEP} ]; then
-				curl -s ${API_ADDRESS}:${API_PORT}/apis/apps/v1/namespaces/default/deployments -XPOST -H 'Content-Type: application/json' -d@${generated_file} > /dev/null
-			else
-				curl -s ${API_ADDRESS}:${API_PORT}/apis/apps/v1/namespaces/default/deployments/${deployment} -XPATCH -H 'Content-Type:application/strategic-merge-patch+json' -d@${generated_file} > /dev/null
-			fi
-		else
-			kubectl apply -f ${generated_file}
-		fi
-
-		#cmd="kubectl get pods | grep busybox | grep Completed"
-		kubectl rollout status --timeout=${wait_time}s deployment/${deployment}
-		local end_time=$(date +%s%N)
-		local total_milliseconds=$(( (end_time - start_time) / 1000000 ))
-		info "Took $total_milliseconds ms ($end_time - $start_time)"
-
-		# grab list of workload pods after
-		local pods_after=$(kubectl get pods --selector ${LABEL}=${LABELVALUE} -o json | jq -r '.items[] | "\(.metadata.name)"')
-		find_unique_pods "${pods_after}" "${pods_before}"
-
-		sleep ${settle_time}
-
-		if [[ ${nc_reqs_per_pod} -ge 1 ]]; then
-			pod_ips=$(kubectl get pods --selector ${LABEL}=${LABELVALUE} -o json | jq -r '.items[].status.podIP')
-			if [[ ${reqs} != $(echo $pod_ips | wc -w) ]]; then
-				info "WARNING: pod IP count mismatch expected ${reqs} found $(echo $pod_ips | wc -w)"
-			fi
-			info "Measuring latency, sending ${nc_reqs_per_pod} messages to each of the ${reqs} pods"
-			local latency_failures=0
-			local latency_pod_array=()
-			for latency_round in $(seq ${nc_reqs_per_pod}); do
-				for pod_ip in ${pod_ips}; do
-					local latency_pod_start_time=$(date +%s%N)
-					if [[ $(echo ${nc_req_msg} | nc ${pod_ip} ${nc_port}) != "${nc_req_msg}" ]]; then
-						latency_failures=$(( latency_failures + 1 ))
-					fi
-					local latency_pod_end_time=$(date +%s%N)
-					latency_pod_array+=($(( (latency_pod_end_time - latency_pod_start_time) / 1000000 )))
-				done
-			done
-			IFS=$'\n'
-			local latency_pod_array_sorted=($(sort -n <<<"${latency_pod_array[*]}"))
-			unset IFS
-			local latency_pod_array_len=${#latency_pod_array[@]}
-			local latency_percentiles=()
-			latency_percentiles+=(${latency_pod_array_sorted[$(bc <<<"$latency_pod_array_len / 20")]})
-			latency_percentiles+=(${latency_pod_array_sorted[$(bc <<<"$latency_pod_array_len / 4")]})
-			latency_percentiles+=(${latency_pod_array_sorted[$(bc <<<"$latency_pod_array_len / 2")]})
-			latency_percentiles+=(${latency_pod_array_sorted[$(bc <<<"$latency_pod_array_len / 1.25")]})
-			latency_percentiles+=(${latency_pod_array_sorted[$(bc <<<"$latency_pod_array_len / 1.05")]})
-			info "Latency percentiles [ms] 5-25-50-75-95 %: ${latency_percentiles[*]}"
-		else
-			local latency_avg_ms=0
-			local latency_percentiles=(0 0 0 0 0)
-
-		fi
-
-		grab_stats $total_milliseconds $reqs ${latency_percentiles[@]}
-	done
-}
-
-cleanup() {
-	info "Cleaning up"
-
-	# First try to save any results we got
-	metrics_json_end_array "BootResults"
-
-	kubectl delete daemonset --wait=true --timeout=${delete_wait_time}s "${stats_pod}" || true
-	local start_time=$(date +%s%N)
-	kubectl delete deployment --wait=true --timeout=${delete_wait_time}s ${deployment} || true
-	for x in $(seq 1 ${delete_wait_time}); do
-		local npods=$(kubectl get pods -l=${LABEL}=${LABELVALUE} -o=name | wc -l)
-		if [ $npods -eq 0 ]; then
-			echo "All pods have terminated at cycle $x"
-			local alldied=true
-			break;
-		fi
-		sleep 1
-	done
-	local end_time=$(date +%s%N)
-	local total_milliseconds=$(( (end_time - start_time) / 1000000 ))
-	if [ -z "$alldied" ]; then
-		echo "ERROR: Not all pods died!"
-	fi
-	info "Delete Took $total_milliseconds ms ($end_time - $start_time)"
-
-	local json="$(cat << EOF
-	"Delete": {
-		"Result": ${total_milliseconds},
-		"Units" : "ms"
-	}
-EOF
-)"
-
-	metrics_json_add_fragment "$json"
-	framework_shutdown
-}
-
-show_vars()
-{
-	echo -e "\nEnvironment variables:"
-	echo -e "\tName (default)"
-	echo -e "\t\tDescription"
-	echo -e "\tTEST_NAME (${TEST_NAME})"
-	echo -e "\t\tCan be set to over-ride the default JSON results filename"
-	echo -e "\tNUM_PODS (${NUM_PODS})"
-	echo -e "\t\tNumber of pods to launch"
-	echo -e "\tSTEP (${STEP})"
-	echo -e "\t\tNumber of pods to launch per cycle"
-	echo -e "\twait_time (${wait_time})"
-	echo -e "\t\tSeconds to wait for pods to become ready"
-	echo -e "\tdelete_wait_time (${delete_wait_time})"
-	echo -e "\t\tSeconds to wait for all pods to be deleted"
-	echo -e "\tsettle_time (${settle_time})"
-	echo -e "\t\tSeconds to wait after pods ready before taking measurements"
-	echo -e "\tuse_api (${use_api})"
-	echo -e "\t\tspecify yes or no to use the API to launch pods"
-	echo -e "\tgrace (${grace})"
-	echo -e "\t\tspecify the grace period in seconds for workload pod termination"
-}
-
-help()
-{
-	usage=$(cat << EOF
-Usage: $0 [-h] [options]
-   Description:
-	Launch a series of workloads and take memory metric measurements after
-	each launch.
-   Options:
-		-h,    Help page.
-EOF
-)
-	echo "$usage"
-	show_vars
-}
-
-main() {
-
-	local OPTIND
-	while getopts "h" opt;do
-		case ${opt} in
-		h)
-			help
-			exit 0;
-			;;
-		esac
-	done
-	shift $((OPTIND-1))
-	init
-	run
-	# cleanup will happen at exit due to the shell 'trap' we registered
-	# cleanup
-}
-
-main "$@"
diff --git a/metrics/scaling/k8s_scale_rapid_nc.sh b/metrics/scaling/k8s_scale_rapid_nc.sh
new file mode 100755
index 00000000..6270e693
--- /dev/null
+++ b/metrics/scaling/k8s_scale_rapid_nc.sh
@@ -0,0 +1,337 @@
+#!/bin/bash
+# Copyright (c) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set -e
+
+# Pull in some common, useful, items
+SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
+LABELVALUE=${LABELVALUE:-rapid_nc}
+source "${SCRIPT_PATH}/../lib/common.bash"
+source "${SCRIPT_PATH}/common.bash"
+source "${SCRIPT_PATH}/../collectd/collectd.bash"
+
+SMF_USE_COLLECTD=true
+
+# Network latency test parameters:
+# number of requests to be sent after each scaling step
+nc_reqs=${nc_reqs:-1000}
+# length of each request [bytes]
+nc_req_msg_len=${nc_req_msg_len:-1000}
+# port that request servers listen to in pods
+nc_port=33101
+# request message
+nc_req_msg=$(head -c $nc_req_msg_len /dev/zero | tr  '\0' 'x')
+nc_percentiles=(0 1 5 25 50 75 95 99 100)
+
+pod_command="[\"nc\", \"-lk\", \"-p\", \"${nc_port}\", \"-e\", \"/bin/sh\", \"-c\", \"/bin/echo \${EPOCHREALTIME/./}; /bin/cat; /bin/echo \${EPOCHREALTIME/./}\"]"
+
+# Set some default metrics env vars
+TEST_ARGS="runtime=${RUNTIME}"
+TEST_NAME="k8s rapid nc"
+
+# $1 is the launch time in seconds this pod/container took to start up.
+# $2 is the number of pod/containers under test
+grab_stats() {
+	local launch_time_ms=$1
+	local n_pods=$2
+	shift ; shift
+	local latency_percentiles=($@) # array of percentiles
+	local cpu_idle=()
+	local mem_free=()
+	local total_mem_used=0
+
+	info "And grab some stats"
+
+	local date_json="$(cat << EOF
+			"date": {
+				"ns": $(date +%s%N),
+				"Date": "$(date -u +"%Y-%m-%dT%T.%3N")"
+			}
+EOF
+	)"
+	metrics_json_add_array_fragment "$date_json"
+
+	local pods_json="$(cat << EOF
+			"n_pods": {
+				"Result": ${n_pods},
+				"Units" : "int"
+			}
+EOF
+	)"
+	metrics_json_add_array_fragment "$pods_json"
+
+	local launch_json="$(cat << EOF
+			"launch_time": {
+				"Result": $launch_time_ms,
+				"Units" : "ms"
+			}
+EOF
+	)"
+	metrics_json_add_array_fragment "$launch_json"
+
+	local latency_json="$(cat << EOF
+			"latency_time": {
+				"Percentiles": [$(IFS=, ; echo "${latency_percentiles[*]}")],
+				"Result": ${latency_percentiles[$(( ${#latency_percentiles[@]} / 2 ))]},
+				"Units" : "ms"
+			}
+EOF
+	)"
+
+	metrics_json_add_array_fragment "$latency_json"
+
+	info "launch [$launch_time_ms]"
+
+	metrics_json_close_array_element
+}
+
+init() {
+	framework_init
+}
+
+save_config(){
+	metrics_json_start_array
+
+	local json="$(cat << EOF
+	{
+		"testname": "${TEST_NAME}",
+		"NUM_PODS": ${NUM_PODS},
+		"STEP": ${STEP},
+		"wait_time": ${wait_time},
+		"delete_wait_time": ${delete_wait_time},
+		"settle_time": ${settle_time},
+		"nc_reqs": ${nc_reqs},
+		"nc_req_msg_len": ${nc_req_msg_len},
+		"nc_percentiles": [$(IFS=, ; echo "${nc_percentiles[*]}")]
+	}
+EOF
+)"
+	metrics_json_add_array_element "$json"
+	metrics_json_end_array "Config"
+}
+
+run() {
+	info "Running test"
+
+	trap cleanup EXIT QUIT KILL
+
+	metrics_json_start_array
+
+	for reqs in $(seq ${STEP} ${STEP} ${NUM_PODS}); do
+		info "Testing replicas ${reqs} of ${NUM_PODS}"
+		# Generate the next yaml file
+
+		local runtime_command
+		if [ -n "$RUNTIME" ]; then
+			runtime_command="s|@RUNTIMECLASS@|${RUNTIME}|g"
+		else
+			runtime_command="/@RUNTIMECLASS@/d"
+		fi
+
+		local input_template
+		local generated_file
+		if [ "$use_api" != "no" ]; then
+			input_template=$input_json
+			generated_file=$generated_json
+		else
+			input_template=$input_yaml
+			generated_file=$generated_yaml
+		fi
+
+		sed -e "s|@REPLICAS@|${reqs}|g" \
+			-e $runtime_command \
+			-e "s|@DEPLOYMENT@|${deployment}|g" \
+			-e "s|@LABEL@|${LABEL}|g" \
+			-e "s|@LABELVALUE@|${LABELVALUE}|g" \
+			-e "s|@GRACE@|${grace}|g" \
+			-e "s#@PODCOMMAND@#${pod_command}#g" \
+			< ${input_template} > ${generated_file}
+
+		info "Applying changes"
+		local start_time=$(date +%s%N)
+		if [ "$use_api" != "no" ]; then
+			# If this is the first launch of the deploy, we need to use a different URL form.
+			if [ $reqs == ${STEP} ]; then
+				curl -s ${API_ADDRESS}:${API_PORT}/apis/apps/v1/namespaces/default/deployments -XPOST -H 'Content-Type: application/json' -d@${generated_file} > /dev/null
+			else
+				curl -s ${API_ADDRESS}:${API_PORT}/apis/apps/v1/namespaces/default/deployments/${deployment} -XPATCH -H 'Content-Type:application/strategic-merge-patch+json' -d@${generated_file} > /dev/null
+			fi
+		else
+			kubectl apply -f ${generated_file}
+		fi
+
+		kubectl rollout status --timeout=${wait_time}s deployment/${deployment}
+		local end_time=$(date +%s%N)
+		local total_milliseconds=$(( (end_time - start_time) / 1000000 ))
+		info "Took $total_milliseconds ms ($end_time - $start_time)"
+
+		sleep ${settle_time}
+
+		# Measure network latency
+		if [[ ${nc_reqs} -ge 1 ]]; then
+			mkdir -p "$RESULT_DIR" 2>/dev/null || true
+			local latency_raw_output="$RESULT_DIR/${TEST_NAME// /-}.tmaster_tworker_pods_req_ipaddr_lattot_latconn_latio_latdisconn_rx.raw"
+			local pod_ips=($(kubectl get pods --selector ${LABEL}=${LABELVALUE} -o json | jq -r '.items[].status.podIP'))
+			local pod_ips_len=${#pod_ips[@]}
+			if [[ ${reqs} != ${pod_ips_len} ]]; then
+				info "WARNING: pod IP count mismatch expected ${reqs} found ${pod_ips_len}"
+			fi
+			info "Measuring latency, sending ${nc_reqs} messages to ${reqs} pods (~$((nc_reqs / reqs)) messages each)"
+			local latency_failures=0
+			local latency_pod_array=()
+
+			# send $nc_reqs messages, go through pods
+			local req_index=0
+			local pod_index=0
+			while [[ $req_index -lt $nc_reqs ]] && [[ $pod_ips_len -gt 0 ]]; do
+				req_index=$(( req_index + 1 ))
+				pod_index=$(( pod_index + 1 ))
+				if [[ $pod_index -ge $pod_ips_len ]]; then
+					pod_index=0
+				fi
+				local pod_ip=${pod_ips[$pod_index]}
+				local latency_failed=0
+				local latency_pod_start_time=${EPOCHREALTIME/./}
+				local latency_pod_start_response_end=$(echo ${latency_pod_start_time} ${nc_req_msg} | nc ${pod_ip} ${nc_port})
+				# start_response_end contents: <worker_start_ts> <master_ts> <nc_req_msg> <worker_end_ts>
+				local latency_pod_end_time=${EPOCHREALTIME/./}
+				local latency_response_microseconds=$(( latency_pod_end_time - latency_pod_start_time ))
+				local latency_pod_response=$(echo $latency_pod_start_response_end | awk '{print $3}')
+				if [[ "$latency_pod_response" != "${nc_req_msg}" ]]; then
+					latency_failures=$(( latency_failures + 1 ))
+					local latency_pod_first_t=$latency_pod_end_time
+					local latency_pod_last_t=$latency_pod_end_time
+					latency_failed=1
+				else
+					local latency_pod_first_t=$(echo $latency_pod_start_response_end | awk '{print $1}')
+					local latency_pod_last_t=$(echo $latency_pod_start_response_end | awk '{print $4}')
+				fi
+				local latency_pod_local_io=$(( latency_pod_last_t - latency_pod_first_t ))
+				local latency_pod_conn=$(( latency_pod_first_t - latency_pod_start_time ))
+				local latency_pod_disconn=$(( latency_pod_end_time - latency_pod_last_t ))
+				latency_pod_array+=($latency_response_microseconds)
+				echo "$latency_pod_start_time $latency_pod_first_t $reqs $req_index $pod_ip $latency_response_microseconds $latency_pod_conn $latency_pod_local_io $latency_pod_disconn $(echo $latency_pod_start_response_end | wc -c)" >> $latency_raw_output
+			done
+			IFS=$'\n'
+			local latency_pod_array_sorted=($(sort -n <<<"${latency_pod_array[*]}"))
+			unset IFS
+			local latency_pod_array_len=${#latency_pod_array[@]}
+			local latency_percentiles=()
+			for p in ${nc_percentiles[@]}; do
+				if [[ $p -lt 100 ]]; then
+					latency_percentiles+=(${latency_pod_array_sorted[$(bc <<<"$latency_pod_array_len * $p / 100")]})
+				else
+					# Asking for a value that is greater than 100 % of measured values.
+					# This is the way to save the maximum value.
+					latency_percentiles+=(${latency_pod_array_sorted[$(bc <<<"$latency_pod_array_len - 1")]})
+				fi
+			done
+			info "Latency percentiles [ms] ${nc_percentiles[@]} %: ${latency_percentiles[@]}"
+		else
+			local latency_avg_ms=0
+			local latency_percentiles=()
+			for p in ${nc_percentiles[@]}; do
+				latency_percentiles+=(0)
+			done
+		fi
+
+		grab_stats $total_milliseconds $reqs ${latency_percentiles[@]}
+	done
+}
+
+cleanup() {
+	info "Cleaning up"
+
+	# First try to save any results we got
+	metrics_json_end_array "BootResults"
+
+	local start_time=$(date +%s%N)
+	kubectl delete deployment --wait=true --timeout=${delete_wait_time}s ${deployment} || true
+	for x in $(seq 1 ${delete_wait_time}); do
+		local npods=$(kubectl get pods -l=${LABEL}=${LABELVALUE} -o=name | wc -l)
+		if [ $npods -eq 0 ]; then
+			echo "All pods have terminated at cycle $x"
+			local alldied=true
+			break;
+		fi
+		sleep 1
+	done
+	local end_time=$(date +%s%N)
+	local total_milliseconds=$(( (end_time - start_time) / 1000000 ))
+	if [ -z "$alldied" ]; then
+		echo "ERROR: Not all pods died!"
+	fi
+	info "Delete Took $total_milliseconds ms ($end_time - $start_time)"
+
+	local json="$(cat << EOF
+	"Delete": {
+		"Result": ${total_milliseconds},
+		"Units" : "ms"
+	}
+EOF
+)"
+
+	metrics_json_add_fragment "$json"
+	framework_shutdown
+}
+
+show_vars()
+{
+	echo -e "\nEnvironment variables:"
+	echo -e "\tName (default)"
+	echo -e "\t\tDescription"
+	echo -e "\tTEST_NAME (${TEST_NAME})"
+	echo -e "\t\tCan be set to over-ride the default JSON results filename"
+	echo -e "\tNUM_PODS (${NUM_PODS})"
+	echo -e "\t\tNumber of pods to launch"
+	echo -e "\tSTEP (${STEP})"
+	echo -e "\t\tNumber of pods to launch per cycle"
+	echo -e "\twait_time (${wait_time})"
+	echo -e "\t\tSeconds to wait for pods to become ready"
+	echo -e "\tdelete_wait_time (${delete_wait_time})"
+	echo -e "\t\tSeconds to wait for all pods to be deleted"
+	echo -e "\tsettle_time (${settle_time})"
+	echo -e "\t\tSeconds to wait after pods ready before taking measurements"
+	echo -e "\tuse_api (${use_api})"
+	echo -e "\t\tspecify yes or no to use the API to launch pods"
+	echo -e "\tgrace (${grace})"
+	echo -e "\t\tspecify the grace period in seconds for workload pod termination"
+}
+
+help()
+{
+	usage=$(cat << EOF
+Usage: $0 [-h] [options]
+   Description:
+	Launch a series of workloads and take memory metric measurements after
+	each launch.
+   Options:
+		-h,    Help page.
+EOF
+)
+	echo "$usage"
+	show_vars
+}
+
+main() {
+
+	local OPTIND
+	while getopts "h" opt;do
+		case ${opt} in
+		h)
+			help
+			exit 0;
+			;;
+		esac
+	done
+	shift $((OPTIND-1))
+	init
+	run
+	# cleanup will happen at exit due to the shell 'trap' we registered
+	# cleanup
+}
+
+main "$@"