Skip to content
This repository was archived by the owner on Aug 7, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions metrics/lib/common.bash
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ framework_init() {
k8s_api_init

# Launch our stats gathering pod
if [ -n "$SMF_USE_COLLECTD" ]; then
if [ "$SMF_USE_COLLECTD" == "true" ]; then
info "Setting up collectd"
init_stats $wait_time
fi
Expand All @@ -104,7 +104,7 @@ framework_shutdown() {
k8s_api_shutdown
cpu_load_shutdown

if [ -n "$SMF_USE_COLLECTD" ]; then
if [ "$SMF_USE_COLLECTD" == "true" ]; then
cleanup_stats
fi

Expand Down
12 changes: 12 additions & 0 deletions metrics/report/report_dockerfile/metrics_report.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,18 @@ source('collectd_scaling.R')

\pagebreak

# Pod communication latency
This [test](https://github.com/clearlinux/cloud-native-setup/metrics/scaling/k8s_scale_rapid_nc.sh)
measures pod query--response latency when scaling up. The
time is measured from sending a message directly to a socket, that `nc`
listens to inside each pod, to reading the response from the pod.

```{r scaling_nc, echo=FALSE, fig.cap="K8S pod communication latency", results='asis'}
source('tidy_scaling_nc.R')
```

\pagebreak

# Test setup details

This table describes the test system details, as derived from the information contained
Expand Down
155 changes: 155 additions & 0 deletions metrics/report/report_dockerfile/tidy_scaling_nc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#!/usr/bin/env Rscript
# Copyright (c) 2019 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

# Show pod communication latency

suppressMessages(suppressWarnings(library(ggplot2))) # ability to plot nicely.
suppressWarnings(suppressWarnings(library(ggpubr))) # ggtexttable
suppressMessages(library(jsonlite)) # to load the data.
suppressMessages(library(scales)) # For de-science notation of axis
library(tibble) # tibbles for tidy data

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you might need gridextra here, like we have in the tidy_scaling.R. right now it works as you inherit that from the tidy_scaling.R that runs before this script, but when run on its own in debug mode I see:

Error in grid.arrange(latp, ncol = 1) :
  could not find function "grid.arrange"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nudge.


testnames=c(
"k8s-rapid-nc"
)

### For developers: uncomment following variables to run this as is in R
# resultdirs=c("PATH/TO/RES1/", ...) # keep the ending slash on result paths
# inputdir=""

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this comment? We don't have it in any of the other R files, which do the same thing. Generally I think this is covered by the use of the report debug method, and the source'ing of the Env.R file..


latencydata=c()

# iterate over every set of results (test run)
for (currentdir in resultdirs) {
# For every results file we are interested in evaluating
for (testname in testnames) {
matchdir=paste(inputdir, currentdir, sep="")
matchfile=paste(testname, '\\.json', sep="")
files=list.files(matchdir, pattern=matchfile)

# For every matching results file
for (ffound in files) {
fname=paste(inputdir, currentdir, ffound, sep="")
if (!file.exists(fname)) {
warning(paste("Skipping non-existent file: ", fname))
next
}
# Derive the name from the test result dirname
datasetname=basename(currentdir)

# Import the data
fdata=fromJSON(fname)
# De-nest the test name specific data
shortname=substr(ffound, 1, nchar(ffound)-nchar(".json"))
fdata=fdata[[shortname]]
testname=datasetname

# All the data we are looking for comes in BootResults,
# so pick it out to make referencing easier
br=fdata$BootResults

########################################################
#### Now extract latency time percentiles (ltp) ########
########################################################
ltp=br$latency_time$Percentiles
# Percentile thresholds, for example [5, 25, 50, 75, 95]

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this comment now out of date, since you added 0/1/99/100 as well iirc

ltp_perc=fdata$Config$nc_percentiles[[1]]
perc_count = length(ltp_perc)
# Measured times
ltp_meas=matrix(unlist(ltp), nrow=perc_count)
# Build latency percentiles tibble with nice headings
ltpt=tibble(n_pods=br$n_pods$Result)
for (n in seq(perc_count)) {
p_title = paste0("p", ltp_perc[n])
ltpt[p_title] = ltp_meas[n,]
}
# ltpt example: with percentiles [5, 50, 95]:
# n_pods p5 p50 p95
# 100 4 8 10
# 200 5 11 15
# 300 6 14 19
ltpt$testname=testname
latencydata=rbind(latencydata, ltpt)
}
}
}

# Visualize data.
if (length(latencydata[[1]]) <= 5 || length(unique(latencydata$testname)) > 1) {
# If there are many tests to compare or only few data points, use boxplot with extra percentile points.
latp = ggplot(data=latencydata, aes(x=n_pods)) + ylab("Latency (us)") + xlab("pods") + scale_y_continuous(labels=comma)
perc_mid = floor((perc_count)/2)
# Create boxplot around the middle percentile
if (perc_count >= 3) {
box_bottom=names(ltpt)[perc_mid+1]
box_mid=names(ltpt)[perc_mid+2]
box_top=names(ltpt)[perc_mid+3]
if (perc_count >= 5) {
whis_low=names(ltpt)[perc_mid]
whis_high=names(ltpt)[perc_mid+4]
latp = latp + geom_boxplot(aes_string(group="interaction(testname,n_pods)",ymin=whis_low,lower=box_bottom,middle=box_mid,upper=box_top,ymax=whis_high,fill="testname"),stat="identity")
} else {
latp = latp + geom_boxplot(aes_string(group="interaction(testname,n_pods)",lower=box_bottom,middle=box_mid,upper=box_top,fill="testname"),stat="identity")
}
}
# Boxplot (above) covers at most 5 percentiles around the center (median).
# Visualize the rest using a point for each percentile.
if (perc_count > 5) {
for (n in seq(1, (perc_count-5)/2)) {
lower_name=names(ltpt)[n+1]
upper_name=names(ltpt)[perc_count-n+2]
latp = latp + geom_point(aes_string(group="interaction(testname,n_pods)",y=lower_name, color="testname"))
latp = latp + geom_point(aes_string(group="interaction(testname,n_pods)",y=upper_name, color="testname"))
}
}
} else {
# Use colored areas and median lines when there are many ticks on X axis
latp = ggplot(data=latencydata, aes(x=n_pods)) + ylab("Latency (us)") + xlab("pods") + scale_y_continuous(labels=comma)
perc_mid = floor((perc_count)/2)
perc_maxdist = perc_mid
plot_number = 0
for (plot_test in unique(latencydata$testname)) {
plot_number = plot_number + 1
for (n in seq(perc_mid)) {
# First fill outmost areas, like p5..p25 and p75..p95,
# then areas closer to the middle, like p25..p50 and p50..p75
lower_name = names(ltpt)[n+1]
lower_next_name = names(ltpt)[n+2]
upper_name = names(ltpt)[perc_count-n+2]
upper_prev_name = names(ltpt)[perc_count-n+1]
alpha = 0.7 * ((n+1) / (perc_mid+1))**2
latp = latp + geom_ribbon(data=latencydata[latencydata$testname==plot_test,],aes_string(x="n_pods",ymin=lower_name,ymax=lower_next_name,fill="testname"),alpha=alpha)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before we land this, we should try and generate a proper legend for the plot. I suspect that will either involve some heavy aes() usage, or generating a custom legend.

latp = latp + geom_ribbon(data=latencydata[latencydata$testname==plot_test,],aes_string(x="n_pods",ymin=upper_prev_name,ymax=upper_name,fill="testname"),alpha=alpha)
}
median_index = match("p50", names(ltpt))
if (!is.na(median_index)) {
# Draw median line
latp = latp + geom_line(data=latencydata[latencydata$testname==plot_test,],aes_string(x="n_pods",y=names(ltpt)[median_index],color="testname"))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@askervin - I was wondering if there might be some built in R functionality for generating a similar chart with 'fading percentile bands' from the raw data - looks like it is very similar to a fanchart/fanplot - not sure if we can utilise that with our data under R though (did not dig too far): https://journal.r-project.org/archive/2015-1/abel.pdf

}
}
}

# Table presentation.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't comprehend how the code relates to the final table - can you add some commentary please?
For instance, in my 20 pod run, I see a table in the pdf with rows for 1,5,10 pods - how is that math done? I thought maybe it would be 1,10,20 for a 20 pod run for instance?

lat_table=c()
for (testname in unique(latencydata$testname)) {
testlines=latencydata[latencydata$testname==testname,]
lat_table=rbind(lat_table,testlines[1,])
if (length(testlines) > 3) {
# middle pod count
lat_table=rbind(lat_table,testlines[(length(testlines)-1)/2,])
}
if (length(testlines) > 2) {
# max pod count
lat_table=rbind(lat_table,testlines[length(testlines)-1,])
}
}
latt=ggtexttable(lat_table,rows=NULL)

cat("\n\nLatency percentiles illustrated in the Figure below: ", paste0(ltp_perc, "\\%"), "\n\n")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, look at that - so, maybe disregared some of my above comments if we think the graph is clear enough already ;-)


page1 = grid.arrange(latp, latt, ncol=1)

# pagebreak, as the graphs overflow the page otherwise
cat("\n\n\\pagebreak\n")
Loading