diff --git a/CHANGES.next.md b/CHANGES.next.md
index 9d98c14ae5..49dff15187 100644
--- a/CHANGES.next.md
+++ b/CHANGES.next.md
@@ -1,5 +1,7 @@
 ### Breaking changes:
-
+-   Added --accept_licenses flag. User have to turn this flag on to acknowledge
+    that PKB may install software thereby accepting license agreements on the
+    user's behalf.
 -   Renamed Database-related flags from managed_db* to db* Added alias for
     backwards compatibility, might not be supported in the future release.
 -   Require Python 3.9+
@@ -46,6 +48,7 @@
 -   Remove flag fio_write_against_multiple_clients from FIO.
 -   Drop windows coremark benchmark.
 -   Remove cudnn linux package.
+-   Make Ubuntu 20 the default os_type.
 
 ### New features:
 
diff --git a/LICENSE b/LICENSE
index 261eeb9e9f..c4488f1754 100644
--- a/LICENSE
+++ b/LICENSE
@@ -199,3 +199,39 @@
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
+
+---
+
+Files: data/blaze_config.j2, data/blazemark_config.j2
+#==================================================================================================
+#
+#  Configfile file for the Blaze library
+#
+#  Copyright (C) 2013 Klaus Iglberger - All Rights Reserved
+#
+#  This file is part of the Blaze library. You can redistribute it and/or modify it under
+#  the terms of the New (Revised) BSD License. Redistribution and use in source and binary
+#  forms, with or without modification, are permitted provided that the following conditions
+#  are met:
+#
+#  1. Redistributions of source code must retain the above copyright notice, this list of
+#     conditions and the following disclaimer.
+#  2. Redistributions in binary form must reproduce the above copyright notice, this list
+#     of conditions and the following disclaimer in the documentation and/or other materials
+#     provided with the distribution.
+#  3. Neither the names of the Blaze development group nor the names of its contributors
+#     may be used to endorse or promote products derived from this software without specific
+#     prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+#  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+#  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+#  SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+#  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+#  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+#  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+#  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+#  DAMAGE.
+#
+#==================================================================================================
diff --git a/README.md b/README.md
index dddf7b49e9..7fde1b252e 100644
--- a/README.md
+++ b/README.md
@@ -35,6 +35,8 @@ as part of a benchmark run. Therefore you must accept the license of each of the
 benchmarks individually, and take responsibility for using them before you use
 the PerfKit Benchmarker.
 
+Moving forward, you will need to run PKB with the explicit flag --accept-licenses.
+
 In its current release these are the benchmarks that are executed:
 
 -   `aerospike`:
diff --git a/perfkitbenchmarker/configs/benchmark_config_spec.py b/perfkitbenchmarker/configs/benchmark_config_spec.py
index 6d403b5644..d519b3645e 100644
--- a/perfkitbenchmarker/configs/benchmark_config_spec.py
+++ b/perfkitbenchmarker/configs/benchmark_config_spec.py
@@ -190,6 +190,14 @@ def _GetOptionDecoderConstructions(cls):
             'default': None,
             'none_ok': True
         }),
+        'dataproc_serverless_memory': (option_decoders.IntDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'dataproc_serverless_memory_overhead': (option_decoders.IntDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
         'emr_serverless_executor_count': (option_decoders.IntDecoder, {
             'default': None,
             'none_ok': True
diff --git a/perfkitbenchmarker/configs/default_config_constants.yaml b/perfkitbenchmarker/configs/default_config_constants.yaml
index 28713624a4..7f756614ee 100644
--- a/perfkitbenchmarker/configs/default_config_constants.yaml
+++ b/perfkitbenchmarker/configs/default_config_constants.yaml
@@ -52,6 +52,10 @@ default_single_core: &default_single_core
     machine_type: cx2-2x4
     zone: us-south-1
     image: null
+  OCI:
+    machine_type: VM.Standard.A1.Flex
+    zone: us-ashburn-1
+    image: null
 
 # TODO: update the two core machines for more providers
 default_dual_core: &default_dual_core
@@ -83,6 +87,10 @@ default_dual_core: &default_dual_core
     image: null
   Kubernetes:
     image: null
+  OCI:
+    machine_type: VM.Standard.A1.Flex
+    zone: us-ashburn-1
+    image: null
 
 # TODO(user): update the disk types below as more providers are
 # updated for the disk types refactor.
@@ -138,6 +146,10 @@ default_500_gb: &default_500_gb
     disk_type: standard
     disk_size: 500
     mount_point: /scratch
+  OCI:
+    disk_type: paravirtualized          
+    disk_size: 500
+    mount_point: /scratch
 
 
 # TODO(user): update the disk types below as more providers are
@@ -194,3 +206,8 @@ default_50_gb: &default_50_gb
     disk_type: standard
     disk_size: 50
     mount_point: /scratch
+  OCI:
+    disk_type: paravirtualized
+    disk_size: 50
+    mount_point: /scratch
+
diff --git a/perfkitbenchmarker/flags.py b/perfkitbenchmarker/flags.py
index d61d79dada..9ed73b2a34 100644
--- a/perfkitbenchmarker/flags.py
+++ b/perfkitbenchmarker/flags.py
@@ -36,6 +36,13 @@ def GetCurrentUser():
   except KeyError:
     return 'user_unknown'
 
+
+flags.DEFINE_boolean(
+    'accept_licenses',
+    False,
+    'Acknowledge that PKB may install software thereby accepting license'
+    ' agreements on the users behalf.',
+)
 flags.DEFINE_list('ssh_options', [], 'Additional options to pass to ssh.')
 flags.DEFINE_boolean('use_ipv6', False, 'Whether to use ipv6 for ssh/scp.')
 flags.DEFINE_list('benchmarks', ['cluster_boot'],
diff --git a/perfkitbenchmarker/linux_benchmarks/apachebench_benchmark.py b/perfkitbenchmarker/linux_benchmarks/apachebench_benchmark.py
index f99c115cd2..a657b27d70 100644
--- a/perfkitbenchmarker/linux_benchmarks/apachebench_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/apachebench_benchmark.py
@@ -151,11 +151,11 @@ class ApacheBenchRunMode(object):
   description: Runs apachebench benchmark.
   vm_groups:
     client:
-      os_type: ubuntu1804
+      os_type: ubuntu2004
       vm_spec: *default_single_core
       vm_count: 1
     server:
-      os_type: ubuntu1804
+      os_type: ubuntu2004
       vm_spec: *default_single_core
 """
 FLAGS = flags.FLAGS
diff --git a/perfkitbenchmarker/linux_benchmarks/cloud_redis_memtier_benchmark.py b/perfkitbenchmarker/linux_benchmarks/cloud_redis_memtier_benchmark.py
index 443cc14448..dc9f508fcf 100644
--- a/perfkitbenchmarker/linux_benchmarks/cloud_redis_memtier_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/cloud_redis_memtier_benchmark.py
@@ -16,10 +16,14 @@
 Spins up a cloud redis instance, runs memtier against it, then spins it down.
 """
 
+import collections
 from absl import flags
+from absl import logging
 from perfkitbenchmarker import background_tasks
 from perfkitbenchmarker import configs
+from perfkitbenchmarker import linux_virtual_machine
 from perfkitbenchmarker import managed_memory_store
+from perfkitbenchmarker import sample
 from perfkitbenchmarker.linux_packages import memtier
 
 FLAGS = flags.FLAGS
@@ -36,6 +40,9 @@
       vm_count: 1
 """
 
+_LinuxVm = linux_virtual_machine.BaseLinuxVirtualMachine
+_ManagedRedis = managed_memory_store.BaseManagedMemoryStore
+
 
 def GetConfig(user_config):
   config = configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
@@ -90,6 +97,74 @@ def Prepare(benchmark_spec):
     memtier.Load(vm, memory_store_ip, memory_store_port, password)
 
 
+def _GetConnections(
+    vms: list[_LinuxVm], redis_instance: _ManagedRedis
+) -> list[memtier.MemtierConnection]:
+  """Gets a list of connections mapping client VMs to shards."""
+  if len(vms) == 1:
+    return [
+        memtier.MemtierConnection(
+            vms[0],
+            redis_instance.GetMemoryStoreIp(),
+            redis_instance.GetMemoryStorePort(),
+        )
+    ]
+  # Spread shards by client VM (evenly distributed by zone) such that each
+  # client VM gets an equal number of shards in each zone.
+  connections = []
+  shards = redis_instance.GetShardEndpoints(vms[0])
+  shards_by_zone = collections.defaultdict(list)
+  for shard in shards:
+    shards_by_zone[shard.zone].append(shard)
+  shards_by_vm = collections.defaultdict(list)
+  for shards_list in shards_by_zone.values():
+    for shard_index, shard in enumerate(shards_list):
+      vm_index = shard_index % len(vms)
+      vm = vms[vm_index]
+      connections.append(memtier.MemtierConnection(vm, shard.ip, shard.port))
+      shards_by_vm[vm].append(shard)
+  logging.info('Shards by VM: %s', shards_by_vm)
+  return connections
+
+
+def _MeasureMemtierDistribution(
+    redis_instance: _ManagedRedis,
+    vms: list[_LinuxVm],
+) -> list[sample.Sample]:
+  """Runs and reports stats across a series of memtier runs."""
+  connections = _GetConnections(vms, redis_instance)
+  return memtier.MeasureLatencyCappedThroughputDistribution(
+      connections,
+      redis_instance.GetMemoryStoreIp(),
+      redis_instance.GetMemoryStorePort(),
+      vms,
+      redis_instance.node_count,
+      redis_instance.GetMemoryStorePassword(),
+  )
+
+
+def _Run(vms: list[_LinuxVm], redis_instance: _ManagedRedis):
+  """Runs memtier based on provided flags."""
+  if memtier.MEMTIER_RUN_MODE.value == memtier.MemtierMode.MEASURE_CPU_LATENCY:
+    return memtier.RunGetLatencyAtCpu(redis_instance, vms)
+  if memtier.MEMTIER_LATENCY_CAPPED_THROUGHPUT.value:
+    if memtier.MEMTIER_DISTRIBUTION_ITERATIONS.value:
+      return _MeasureMemtierDistribution(redis_instance, vms)
+    return memtier.MeasureLatencyCappedThroughput(
+        vms[0],
+        redis_instance.node_count,
+        redis_instance.GetMemoryStoreIp(),
+        redis_instance.GetMemoryStorePort(),
+        redis_instance.GetMemoryStorePassword(),
+    )
+  return memtier.RunOverAllThreadsPipelinesAndClients(
+      vms,
+      redis_instance.GetMemoryStoreIp(),
+      [redis_instance.GetMemoryStorePort()],
+      redis_instance.GetMemoryStorePassword(),
+  )
+
+
 def Run(benchmark_spec):
   """Run benchmark and collect samples.
 
@@ -101,30 +176,10 @@ def Run(benchmark_spec):
     A list of sample.Sample instances.
   """
   memtier_vms = benchmark_spec.vm_groups['clients']
-  samples = []
-  if memtier.MEMTIER_RUN_MODE.value == memtier.MemtierMode.MEASURE_CPU_LATENCY:
-    samples = memtier.RunGetLatencyAtCpu(
-        benchmark_spec.cloud_redis_instance, memtier_vms
-    )
-  elif memtier.MEMTIER_LATENCY_CAPPED_THROUGHPUT.value:
-    samples = memtier.MeasureLatencyCappedThroughput(
-        memtier_vms[0],
-        benchmark_spec.cloud_redis_instance.GetMemoryStoreIp(),
-        benchmark_spec.cloud_redis_instance.GetMemoryStorePort(),
-        benchmark_spec.cloud_redis_instance.GetMemoryStorePassword(),
-    )
-  else:
-    samples = memtier.RunOverAllThreadsPipelinesAndClients(
-        memtier_vms,
-        benchmark_spec.cloud_redis_instance.GetMemoryStoreIp(),
-        [benchmark_spec.cloud_redis_instance.GetMemoryStorePort()],
-        benchmark_spec.cloud_redis_instance.GetMemoryStorePassword(),
-    )
-
-  for sample in samples:
-    sample.metadata.update(
-        benchmark_spec.cloud_redis_instance.GetResourceMetadata()
-    )
+  redis_instance: _ManagedRedis = benchmark_spec.cloud_redis_instance
+  samples = _Run(memtier_vms, redis_instance)
+  for s in samples:
+    s.metadata.update(benchmark_spec.cloud_redis_instance.GetResourceMetadata())
 
   return samples
 
@@ -140,4 +195,6 @@ def Cleanup(benchmark_spec):
 
 
 def _Install(vm):
+  """Installs necessary client packages."""
   vm.Install('memtier')
+  vm.Install('redis_cli')
diff --git a/perfkitbenchmarker/linux_benchmarks/cluster_boot_benchmark.py b/perfkitbenchmarker/linux_benchmarks/cluster_boot_benchmark.py
index 5031126761..f0c36dbfaf 100644
--- a/perfkitbenchmarker/linux_benchmarks/cluster_boot_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/cluster_boot_benchmark.py
@@ -368,6 +368,10 @@ def MeasureDelete(
     List of Samples containing the delete times and an overall cluster delete
     time.
   """
+  # Only measure VMs that have a delete time.
+  vms = [vm for vm in vms if vm.delete_start_time and vm.delete_end_time]
+  if not vms:
+    return []
   # Collect a delete time from each VM.
   delete_times = [vm.delete_end_time - vm.delete_start_time for vm in vms]
   # Get the cluster delete time.
diff --git a/perfkitbenchmarker/linux_benchmarks/hammerdbcli_benchmark.py b/perfkitbenchmarker/linux_benchmarks/hammerdbcli_benchmark.py
index 6972b9dad1..4fa1ff25aa 100644
--- a/perfkitbenchmarker/linux_benchmarks/hammerdbcli_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/hammerdbcli_benchmark.py
@@ -12,6 +12,7 @@
 from perfkitbenchmarker import virtual_machine
 
 from perfkitbenchmarker.linux_packages import hammerdb
+from perfkitbenchmarker.providers.gcp import gcp_alloy_db  # pylint: disable=unused-import
 
 # MYSQL Config file path
 MYSQL_CONFIG_PATH = '/etc/mysql/mysql.conf.d/mysqld.cnf'
diff --git a/perfkitbenchmarker/linux_benchmarks/lmbench_benchmark.py b/perfkitbenchmarker/linux_benchmarks/lmbench_benchmark.py
index 87bb0df5a9..400019a50d 100644
--- a/perfkitbenchmarker/linux_benchmarks/lmbench_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/lmbench_benchmark.py
@@ -219,9 +219,10 @@ def _AddProcessorMetricSamples(lmbench_output, processor_metric_list, metadata,
     regex = '%s: (.*)' % metric
     value_unit = regex_util.ExtractGroup(regex, lmbench_output)
     [value, unit] = value_unit.split(' ')
-    results.append(
-        sample.Sample('%s' % metric.replace('\\', ''), float(value), unit,
-                      metadata))
+    if unit == 'microseconds':
+      results.append(
+          sample.Sample('%s' % metric.replace('\\', ''), float(value), unit,
+                        metadata))
 
 
 def _ParseOutput(lmbench_output):
diff --git a/perfkitbenchmarker/linux_benchmarks/mlperf_benchmark.py b/perfkitbenchmarker/linux_benchmarks/mlperf_benchmark.py
index 5b2a326848..be8a5c9fbf 100644
--- a/perfkitbenchmarker/linux_benchmarks/mlperf_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/mlperf_benchmark.py
@@ -183,6 +183,10 @@ def PrepareBenchmark(benchmark_spec, vm=None):
   _UpdateBenchmarkSpecWithFlags(benchmark_spec)
   vm = vm or benchmark_spec.vms[0]
 
+  has_gpu = nvidia_driver.CheckNvidiaGpuExists(vm)
+  if has_gpu:
+    vm.Install('cuda_toolkit')
+
   if (bool(benchmark_spec.tpus) and nvidia_driver.CheckNvidiaGpuExists(vm)):
     raise errors.Config.InvalidValue(
         'Invalid configuration. GPUs and TPUs can not both present in the config.'
@@ -362,10 +366,6 @@ def PrepareRunner(benchmark_spec, vm=None):
   else:
     benchmark_spec.model_dir = '/tmp'
 
-    has_gpu = nvidia_driver.CheckNvidiaGpuExists(vm)
-    if has_gpu:
-      vm.Install('cuda_toolkit')
-
     vm.Install('nvidia_docker')
     docker.AddUser(vm)
     vm.RemoteCommand('sudo usermod -aG docker $USER')
diff --git a/perfkitbenchmarker/linux_benchmarks/netperf_benchmark.py b/perfkitbenchmarker/linux_benchmarks/netperf_benchmark.py
index dee83bccca..a85e390a3b 100644
--- a/perfkitbenchmarker/linux_benchmarks/netperf_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/netperf_benchmark.py
@@ -503,7 +503,18 @@ def RunNetperf(vm, benchmark_name, server_ips, num_streams):
   if len(parsed_output) == 1:
     # Only 1 netperf thread
     throughput_sample, latency_samples, histogram = parsed_output[0]
-    return samples + [throughput_sample] + latency_samples
+    output_samples = samples + [throughput_sample] + latency_samples
+    # Create formatted output for TCP stream throughput metrics
+    if benchmark_name.upper() == 'TCP_STREAM':
+      output_samples.append(
+          sample.Sample(
+              throughput_sample.metric + '_1stream',
+              throughput_sample.value,
+              throughput_sample.unit,
+              throughput_sample.metadata,
+          )
+      )
+    return output_samples
   else:
     # Multiple netperf threads
     # Unzip parsed output
@@ -527,6 +538,17 @@ def RunNetperf(vm, benchmark_name, server_ips, num_streams):
       samples.append(
           sample.Sample(f'{benchmark_name}_Throughput_{stat}', float(value),
                         throughput_unit, metadata))
+    # Create formatted output, following {benchmark_name}_Throughput_Xstream(s)
+    # for TCP stream throughput metrics
+    if benchmark_name.upper() == 'TCP_STREAM':
+      samples.append(
+          sample.Sample(
+              f'{benchmark_name}_Throughput_{len(parsed_output)}streams',
+              throughput_stats['total'],
+              throughput_unit,
+              metadata,
+          )
+      )
     if enable_latency_histograms:
       # Combine all of the latency histogram dictionaries
       latency_histogram = collections.Counter()
diff --git a/perfkitbenchmarker/linux_benchmarks/stress_ng_benchmark.py b/perfkitbenchmarker/linux_benchmarks/stress_ng_benchmark.py
index 2f2316aeed..1b99c5b194 100644
--- a/perfkitbenchmarker/linux_benchmarks/stress_ng_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/stress_ng_benchmark.py
@@ -122,12 +122,6 @@
     'stress_ng_thread_workloads',
     lambda workloads: workloads and set(workloads).issubset(ALL_WORKLOADS))
 
-ALL_VERSIONS = ['0.05.23', '0.09.25']
-flags.DEFINE_enum(
-    'stress_ng_version', '0.09.25', ALL_VERSIONS,
-    'Stress-ng version to use. Default is 0.09.25 which '
-    'is the default package on Ubuntu 1804.')
-
 
 def _GeoMeanOverflow(iterable):
   """Returns the geometric mean.
@@ -171,7 +165,7 @@ def Prepare(benchmark_spec):
       required to run the benchmark.
   """
   vm = benchmark_spec.vms[0]
-  vm.Install('stress_ng')
+  vm.InstallPackages('stress-ng')
 
 
 def _ParseStressngResult(metadata,
@@ -231,7 +225,6 @@ def _RunWorkload(vm, num_threads):
   metadata = {
       'duration_sec': FLAGS.stress_ng_duration,
       'threads': num_threads,
-      'version': FLAGS.stress_ng_version,
   }
 
   samples = []
@@ -245,10 +238,7 @@ def _RunWorkload(vm, num_threads):
                numthreads=num_threads,
                duration=FLAGS.stress_ng_duration))
     stdout, stderr = vm.RemoteCommand(cmd)
-    # TODO(user): Find the actual stress-ng version that changes output to
-    # stderr instead of stdout
-    if FLAGS.stress_ng_version > '0.05.23':
-      stdout = stderr
+    stdout = stderr
     stressng_sample = _ParseStressngResult(metadata, stdout)
     if stressng_sample:
       samples.append(stressng_sample)
@@ -318,4 +308,4 @@ def Cleanup(benchmark_spec):
       required to run the benchmark.
   """
   vm = benchmark_spec.vms[0]
-  vm.Uninstall('stress_ng')
+  vm.Uninstall('stress-ng')
diff --git a/perfkitbenchmarker/linux_benchmarks/sysbench_benchmark.py b/perfkitbenchmarker/linux_benchmarks/sysbench_benchmark.py
index 593c1c716a..4ae8dcf2af 100644
--- a/perfkitbenchmarker/linux_benchmarks/sysbench_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/sysbench_benchmark.py
@@ -27,6 +27,7 @@
 from typing import List
 
 from absl import flags
+from perfkitbenchmarker import background_tasks
 from perfkitbenchmarker import configs
 from perfkitbenchmarker import errors
 from perfkitbenchmarker import flag_util
@@ -329,7 +330,10 @@ def _PrepareSysbench(client_vm, benchmark_spec):
   # Some databases install these query tools during _PostCreate, which is
   # skipped if the database is user managed / restored.
   if db.user_managed or db.restored:
-    db.client_vm_query_tools.InstallPackages()
+    background_tasks.RunThreaded(
+        lambda client_query_tools: client_query_tools.InstallPackages,
+        db.client_vms_query_tools,
+    )
 
   if _SKIP_LOAD_STAGE.value or db.restored:
     logging.info('Skipping the load stage')
diff --git a/perfkitbenchmarker/linux_packages/cuda_toolkit.py b/perfkitbenchmarker/linux_packages/cuda_toolkit.py
index edd3c3eb99..a13f4837ca 100644
--- a/perfkitbenchmarker/linux_packages/cuda_toolkit.py
+++ b/perfkitbenchmarker/linux_packages/cuda_toolkit.py
@@ -70,8 +70,8 @@
 
 CUDA_PIN = 'https://developer.download.nvidia.com/compute/cuda/repos/{os}/{cpu_arch}/cuda-{os}.pin'
 
-CUDA_12_1_TOOLKIT = 'https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda-repo-{os}-12-1-local_12.1.0-530.30.02-1_{cpu_arch}.deb'
-CUDA_12_0_TOOLKIT = 'https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda-repo-{os}-12-0-local_12.0.0-525.60.13-1_{cpu_arch}.deb'
+CUDA_12_1_TOOLKIT = 'https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda-repo-{os}-12-1-local_12.1.1-530.30.02-1_{cpu_arch}.deb'
+CUDA_12_0_TOOLKIT = 'https://developer.download.nvidia.com/compute/cuda/12.0.1/local_installers/cuda-repo-{os}-12-0-local_12.0.1-525.85.12-1_{cpu_arch}.deb'
 CUDA_11_8_TOOLKIT = 'https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda-repo-{os}-11-8-local_11.8.0-520.61.05-1_{cpu_arch}.deb'
 CUDA_11_7_TOOLKIT = 'https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda-repo-{os}-11-7-local_11.7.1-515.65.01-1_{cpu_arch}.deb'
 CUDA_11_6_TOOLKIT = 'https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda-repo-{os}-11-6-local_11.6.2-510.47.03-1_{cpu_arch}.deb'
@@ -295,6 +295,13 @@ def _InstallCuda10Point2(vm):
                      'cuda-libraries-dev-10-2')
 
 
+def _DownloadCuda(vm, toolkit_fmt):
+  toolkit = toolkit_fmt.format(os=_CudaOs(vm.OS_TYPE), cpu_arch=_GetCpuArch(vm))
+  basename = posixpath.basename(toolkit)
+  vm.RemoteCommand(f'wget --tries=3 {toolkit}')
+  vm.RemoteCommand(f'sudo apt -o DPkg::Lock::Timeout=60 install ./{basename}')
+
+
 def _InstallCuda12Generic(vm, toolkit_fmt, version_dash):
   """Installs CUDA Toolkit 12.x from NVIDIA.
 
@@ -303,8 +310,6 @@ def _InstallCuda12Generic(vm, toolkit_fmt, version_dash):
     toolkit_fmt: format string to use for the toolkit name
     version_dash: Version (ie 12-1) to install
   """
-  toolkit = toolkit_fmt.format(os=_CudaOs(vm.OS_TYPE), cpu_arch=_GetCpuArch(vm))
-  basename = posixpath.basename(toolkit)
   vm.RemoteCommand(
       'wget -q'
       f' {CUDA_PIN.format(os=_CudaOs(vm.OS_TYPE), cpu_arch=GetCpuArchPath(vm))}'
@@ -313,8 +318,7 @@ def _InstallCuda12Generic(vm, toolkit_fmt, version_dash):
       f'sudo mv cuda-{_CudaOs(vm.OS_TYPE)}.pin '
       '/etc/apt/preferences.d/cuda-repository-pin-600'
   )
-  vm.RemoteCommand(f'wget -q {toolkit}')
-  vm.RemoteCommand(f'sudo dpkg -i {basename}')
+  _DownloadCuda(vm, toolkit_fmt)
   EnrollSigningKey(vm)
   vm.AptUpdate()
   vm.InstallPackages(
@@ -333,15 +337,12 @@ def _InstallCuda11Generic(vm, toolkit_fmt, version_dash):
     toolkit_fmt: format string to use for the toolkit name
     version_dash: Version (ie 11-1) to install
   """
-  toolkit = toolkit_fmt.format(os=_CudaOs(vm.OS_TYPE), cpu_arch=_GetCpuArch(vm))
-  basename = posixpath.basename(toolkit)
   vm.RemoteCommand(
       f'wget -q {CUDA_PIN.format(os=_CudaOs(vm.OS_TYPE), cpu_arch=GetCpuArchPath(vm))}'
   )
   vm.RemoteCommand(f'sudo mv cuda-{_CudaOs(vm.OS_TYPE)}.pin '
                    '/etc/apt/preferences.d/cuda-repository-pin-600')
-  vm.RemoteCommand(f'wget -q {toolkit}')
-  vm.RemoteCommand(f'sudo dpkg -i {basename}')
+  _DownloadCuda(vm, toolkit_fmt)
   EnrollSigningKey(vm)
   vm.AptUpdate()
   vm.InstallPackages(f'cuda-toolkit-{version_dash} '
diff --git a/perfkitbenchmarker/linux_packages/dpdk.py b/perfkitbenchmarker/linux_packages/dpdk.py
index 28fcd73a4d..29a988debc 100644
--- a/perfkitbenchmarker/linux_packages/dpdk.py
+++ b/perfkitbenchmarker/linux_packages/dpdk.py
@@ -113,7 +113,9 @@ def _InstallDPDK(vm):
     )
 
   # Build and Install
-  vm.RobustRemoteCommand('cd dpdk && sudo meson setup -Dexamples=all build')
+  vm.RobustRemoteCommand(
+      'cd dpdk && sudo meson setup -Dexamples=l3fwd,l2fwd build'
+  )
   vm.RobustRemoteCommand(
       'cd dpdk && sudo ninja install -C build && sudo ldconfig'
   )
diff --git a/perfkitbenchmarker/linux_packages/linux_boot.py b/perfkitbenchmarker/linux_packages/linux_boot.py
index 96da98efe7..e7afadbc62 100644
--- a/perfkitbenchmarker/linux_packages/linux_boot.py
+++ b/perfkitbenchmarker/linux_packages/linux_boot.py
@@ -324,6 +324,8 @@ def _ParseSeconds(formatted_time: str) -> float:
       secs += float(part[0 : len(part) - 3]) * 60
     elif part.endswith('ms'):
       secs += float(part[0 : len(part) - 2]) / 1000
+    elif part.endswith('us'):
+      secs += float(part[0 : len(part) - 2]) / 1000 / 1000
     elif part.endswith('s'):
       secs += float(part[0 : len(part) - 1])
     else:
diff --git a/perfkitbenchmarker/linux_packages/lmbench.py b/perfkitbenchmarker/linux_packages/lmbench.py
index 1b1b90a718..d91a167582 100644
--- a/perfkitbenchmarker/linux_packages/lmbench.py
+++ b/perfkitbenchmarker/linux_packages/lmbench.py
@@ -19,7 +19,7 @@
 
 LMBENCH_DIR = posixpath.join(linux_packages.INSTALL_DIR, 'lmbench')
 GIT = 'https://github.com/intel/lmbench.git'
-COMMIT = '4e4efa113b244b70a1faafd13744578b4edeaeb3'
+COMMIT = '701c6c35b0270d4634fb1dc5272721340322b8ed'
 
 
 def _Install(vm):
diff --git a/perfkitbenchmarker/linux_packages/memtier.py b/perfkitbenchmarker/linux_packages/memtier.py
index 4c5f6b157a..121e4bf532 100644
--- a/perfkitbenchmarker/linux_packages/memtier.py
+++ b/perfkitbenchmarker/linux_packages/memtier.py
@@ -13,26 +13,32 @@
 # limitations under the License.
 """Module containing memtier installation, utilization and cleanup functions."""
 
+import abc
 import collections
 import copy
 import dataclasses
 import json
-import logging
 import math
 import os
 import pathlib
 import random
 import re
+import statistics
 import time
 from typing import Any, Dict, List, Optional, Text, Tuple, Union
 
 from absl import flags
+from absl import logging
+import matplotlib.pyplot as plt
+import numpy as np
 from perfkitbenchmarker import background_tasks
 from perfkitbenchmarker import errors
 from perfkitbenchmarker import flag_util
 from perfkitbenchmarker import linux_packages
 from perfkitbenchmarker import sample
+from perfkitbenchmarker import virtual_machine
 from perfkitbenchmarker import vm_util
+import seaborn as sns
 
 GIT_REPO = 'https://github.com/RedisLabs/memtier_benchmark'
 GIT_TAG = '1.4.0'
@@ -55,9 +61,7 @@
 # upper limit to pipelines when binary searching for latency-capped throughput.
 # arbitrarily chosen for large latency.
 MAX_PIPELINES_COUNT = 5000
-# upper limit to clients when binary searching for latency-capped throughput
-# arbitrarily chosen for large latency.
-MAX_CLIENTS_COUNT = 1000
+MAX_CLIENTS_COUNT = 30
 
 MemtierHistogram = List[Dict[str, Union[float, int]]]
 
@@ -157,6 +161,28 @@ class MemtierMode(object):
         ' memtier_latency_cap. Defaults to False. '
     ),
 )
+MEMTIER_DISTRIBUTION_ITERATIONS = flags.DEFINE_integer(
+    'memtier_distribution_iterations',
+    None,
+    (
+        'If set, measures the distribution of latency capped throughput across'
+        ' multiple iterations. Will run a set number of iterations for the'
+        ' benchmark test and  calculate mean/stddev for metrics. Note that this'
+        ' is different from memtier_run_count which is a passthrough to the'
+        ' actual memtier benchmark tool which reports different aggregate'
+        ' stats.'
+    ),
+)
+MEMTIER_DISTRIBUTION_BINARY_SEARCH = flags.DEFINE_bool(
+    'memtier_distribution_binary_search',
+    True,
+    (
+        'If true, uses a binary search to measure the optimal client and thread'
+        ' count needed for max throughput under latency cap. Else, uses'
+        ' --memtier_clients, --memtier_threads, and --memtier_pipelines for the'
+        ' iterations.'
+    ),
+)
 MEMTIER_LATENCY_CAP = flags.DEFINE_float(
     'memtier_latency_cap',
     1.0,
@@ -227,6 +253,7 @@ class MemtierMode(object):
         ' uniform.'
     ),
 )
+MEMTIER_TLS = flags.DEFINE_bool('memtier_tls', False, 'Whether to enable TLS.')
 
 
 class BuildFailureError(Exception):
@@ -302,6 +329,8 @@ def BuildMemtierCommand(
     outfile: Optional[pathlib.PosixPath] = None,
     password: Optional[str] = None,
     cluster_mode: Optional[bool] = None,
+    shard_addresses: Optional[str] = None,
+    tls: Optional[bool] = None,
     json_out_file: Optional[pathlib.PosixPath] = None,
 ) -> str:
   """Returns command arguments used to run memtier."""
@@ -325,11 +354,20 @@ def BuildMemtierCommand(
       'out-file': outfile,
       'json-out-file': json_out_file,
       'print-percentile': '50,90,95,99,99.5,99.9,99.95,99.99',
+      'shard-addresses': shard_addresses,
   }
   # Arguments passed without a parameter
-  no_param_args = {'random-data': random_data, 'cluster-mode': cluster_mode}
+  no_param_args = {
+      'random-data': random_data,
+      'cluster-mode': cluster_mode,
+      'tls': tls,
+      'tls-skip-verify': tls,
+  }
   # Build the command
-  cmd = ['memtier_benchmark']
+  cmd = []
+  if cluster_mode:
+    cmd += ['ulimit -n 32758 &&']
+  cmd += ['memtier_benchmark']
   for arg, value in args.items():
     if value is not None:
       cmd.extend([f'--{arg}', str(value)])
@@ -365,6 +403,7 @@ def Load(
       requests='allkeys',
       cluster_mode=MEMTIER_CLUSTER_MODE.value,
       password=server_password,
+      tls=MEMTIER_TLS.value,
   )
   _IssueRetryableCommand(client_vm, cmd)
 
@@ -456,120 +495,389 @@ def RunOverAllThreadsPipelinesAndClients(
 class MemtierBinarySearchParameters:
   """Parameters to aid binary search of memtier."""
 
-  lower_bound: float
-  upper_bound: float
-  pipelines: int
-  threads: int
-  clients: int
+  lower_bound: float = 0
+  upper_bound: float = math.inf
+  pipelines: int = 1
+  threads: int = 1
+  clients: int = 1
 
 
-def MeasureLatencyCappedThroughput(
-    client_vm,
+@dataclasses.dataclass(frozen=True)
+class MemtierConnection:
+  """Parameters mapping client to server endpoint."""
+
+  client_vm: virtual_machine.BaseVirtualMachine
+  address: str
+  port: int
+
+
+def _RunParallelConnections(
+    connections: list[MemtierConnection],
     server_ip: str,
     server_port: int,
+    threads: int,
+    clients: int,
+    pipelines: int,
     password: Optional[str] = None,
-) -> List[sample.Sample]:
-  """Runs memtier to find the maximum throughput under a latency cap."""
-  samples = []
+) -> list['MemtierResult']:
+  """Runs memtier in parallel with the given connections."""
+  run_args = []
+  base_args = {
+      'server_ip': server_ip,
+      'server_port': server_port,
+      'threads': threads,
+      'clients': clients,
+      'pipeline': pipelines,
+      'password': password,
+  }
+
+  connections_by_vm = collections.defaultdict(list)
+  for conn in connections:
+    connections_by_vm[conn.client_vm].append(conn)
+
+  # Currently more than one client VM will cause shards to be distributed
+  # evenly between them. This behavior could be customized later with a flag.
+  if len(connections_by_vm) > 1:
+    for vm, conns in connections_by_vm.items():
+      shard_addresses = ','.join(
+          f'{conn.address}:{conn.port}' for conn in conns
+      )
+      args = copy.deepcopy(base_args)
+      args.update({
+          'vm': vm,
+          'shard_addresses': shard_addresses,
+      })
+      run_args.append(((), args))
+  else:
+    for connection in connections:
+      args = copy.deepcopy(base_args)
+      args.update({
+          'vm': connection.client_vm,
+      })
+      run_args.append(((), args))
+  logging.info('Connections: %s', connections_by_vm)
+  logging.info('Running with args: %s', run_args)
+  return background_tasks.RunThreaded(_Run, run_args)
+
+
+class _LoadModifier(abc.ABC):
+  """Base class for load modification in binary search."""
+
+  @abc.abstractmethod
+  def GetInitialParameters(self) -> MemtierBinarySearchParameters:
+    """Returns the initial parameters used in the binary search."""
+
+  @abc.abstractmethod
+  def ModifyLoad(
+      self, parameters: MemtierBinarySearchParameters, latency: float
+  ) -> MemtierBinarySearchParameters:
+    """Returns new search parameters."""
 
-  for modify_load_func in [_ModifyPipelines, _ModifyClients]:
-    parameters = MemtierBinarySearchParameters(
-        lower_bound=0, upper_bound=math.inf, pipelines=1, threads=1, clients=1
+
+class _PipelineModifier(_LoadModifier):
+  """Modifies pipelines in single-client binary search."""
+
+  def GetInitialParameters(self) -> MemtierBinarySearchParameters:
+    return MemtierBinarySearchParameters(
+        upper_bound=MAX_PIPELINES_COUNT, pipelines=MAX_PIPELINES_COUNT // 2
+    )
+
+  def ModifyLoad(
+      self, parameters: MemtierBinarySearchParameters, latency: float
+  ) -> MemtierBinarySearchParameters:
+    if latency <= MEMTIER_LATENCY_CAP.value:
+      lower_bound = parameters.pipelines
+      upper_bound = min(parameters.upper_bound, MAX_PIPELINES_COUNT)
+    else:
+      lower_bound = parameters.lower_bound
+      upper_bound = parameters.pipelines
+
+    pipelines = lower_bound + math.ceil((upper_bound - lower_bound) / 2)
+    return MemtierBinarySearchParameters(
+        lower_bound=lower_bound,
+        upper_bound=upper_bound,
+        pipelines=pipelines,
+        threads=1,
+        clients=1,
+    )
+
+
+def _FindFactor(number: int, max_threads: int, max_clients: int) -> int:
+  """Find a factor of the given number (or close to it if it's prime)."""
+  for i in reversed(range(1, max_threads + 1)):
+    if number % i == 0 and number // i <= max_clients:
+      return i
+  return _FindFactor(number - 1, max_threads, max_clients)
+
+
+@dataclasses.dataclass
+class _ClientModifier(_LoadModifier):
+  """Modifies clines in single-pipeline binary search."""
+
+  max_clients: int
+  max_threads: int
+
+  def GetInitialParameters(self) -> MemtierBinarySearchParameters:
+    return MemtierBinarySearchParameters(
+        upper_bound=self.max_clients * self.max_threads,
+        threads=max(self.max_threads // 2, 1),
+        clients=self.max_clients,
+    )
+
+  def ModifyLoad(
+      self, parameters: MemtierBinarySearchParameters, latency: float
+  ) -> MemtierBinarySearchParameters:
+    if latency <= MEMTIER_LATENCY_CAP.value:
+      lower_bound = parameters.clients * parameters.threads + 1
+      upper_bound = min(
+          parameters.upper_bound, self.max_clients * self.max_threads
+      )
+    else:
+      lower_bound = parameters.lower_bound
+      upper_bound = parameters.clients * parameters.threads - 1
+
+    total_clients = lower_bound + math.ceil((upper_bound - lower_bound) / 2)
+    threads = _FindFactor(total_clients, self.max_threads, self.max_clients)
+    clients = total_clients // threads
+    return MemtierBinarySearchParameters(
+        lower_bound=lower_bound,
+        upper_bound=upper_bound,
+        pipelines=1,
+        threads=threads,
+        clients=clients,
     )
+
+
+def _CombineResults(results: list['MemtierResult']) -> 'MemtierResult':
+  """Combines multiple MemtierResults into a single aggregate."""
+  ops_per_sec = sum([result.ops_per_sec for result in results])
+  kb_per_sec = sum([result.kb_per_sec for result in results])
+  latency_ms = sum([result.latency_ms for result in results]) / len(results)
+  latency_dic = collections.defaultdict(int)
+  for result in results:
+    for k, v in result.latency_dic.items():
+      latency_dic[k] += v
+  for k in latency_dic:
+    latency_dic[k] /= len(results)
+  return MemtierResult(
+      ops_per_sec=ops_per_sec,
+      kb_per_sec=kb_per_sec,
+      latency_ms=latency_ms,
+      latency_dic=latency_dic,
+      metadata=results[0].metadata,
+      parameters=results[0].parameters,
+  )
+
+
+def _BinarySearchForLatencyCappedThroughput(
+    connections: list[MemtierConnection],
+    load_modifiers: list[_LoadModifier],
+    server_ip: str,
+    server_port: int,
+    password: Optional[str] = None,
+) -> list['MemtierResult']:
+  """Runs memtier to find the maximum throughput under a latency cap."""
+  results = []
+  for modifier in load_modifiers:
+    parameters = modifier.GetInitialParameters()
     current_max_result = MemtierResult(
-        0, 0, 0, {'50': 0, '90': 0, '95': 0, '99': 0, '99.5': 0, '99.9': 0,
-                  '99.950': 0, '99.990': 0}, [], [], [], [], {}, {})
-    current_metadata = None
+        latency_dic={
+            '50': 0,
+            '90': 0,
+            '95': 0,
+            '99': 0,
+            '99.5': 0,
+            '99.9': 0,
+            '99.950': 0,
+            '99.990': 0,
+        },
+    )
     while parameters.lower_bound < (parameters.upper_bound - 1):
-      result = _Run(
-          vm=client_vm,
-          server_ip=server_ip,
-          server_port=server_port,
-          threads=parameters.threads,
-          pipeline=parameters.pipelines,
-          clients=parameters.clients,
-          password=password,
+      parallel_results = _RunParallelConnections(
+          connections,
+          server_ip,
+          server_port,
+          parameters.threads,
+          parameters.clients,
+          parameters.pipelines,
+          password,
       )
+      result = _CombineResults(parallel_results)
       logging.info(
           (
-              'Binary search for latency capped throughput.\n'
-              '\tMemtier ops throughput: %s'
-              '\tmemtier 95th percentile latency: %s'
-              '\tlower bound: %s'
-              '\tupper bound: %s'
+              'Binary search for latency capped throughput.'
+              '\nMemtier ops throughput: %s qps'
+              '\nmemtier 95th percentile latency: %s ms'
+              '\n%s'
           ),
           result.ops_per_sec,
           result.latency_dic['95'],
-          parameters.lower_bound,
-          parameters.upper_bound,
+          parameters,
       )
       if (
           result.ops_per_sec > current_max_result.ops_per_sec
           and result.latency_dic['95'] <= MEMTIER_LATENCY_CAP.value
       ):
         current_max_result = result
-        current_metadata = GetMetadata(
-            clients=parameters.clients,
-            threads=parameters.threads,
-            pipeline=parameters.pipelines,
+        current_max_result.parameters = parameters
+        current_max_result.metadata.update(
+            GetMetadata(
+                clients=parameters.clients,
+                threads=parameters.threads,
+                pipeline=parameters.pipelines,
+            )
         )
       # 95 percentile used to decide latency cap
-      parameters = modify_load_func(parameters, result.latency_dic['95'])
-    samples.extend(current_max_result.GetSamples(current_metadata))
+      parameters = modifier.ModifyLoad(parameters, result.latency_dic['95'])
+    results.append(current_max_result)
+    logging.info(
+        'Found optimal parameters %s for throughput %s and p95 latency %s',
+        current_max_result.parameters,
+        current_max_result.ops_per_sec,
+        current_max_result.latency_dic['95'],
+    )
+  return results
+
+
+def MeasureLatencyCappedThroughput(
+    client_vm: virtual_machine.VirtualMachine,
+    server_shard_count: int,
+    server_ip: str,
+    server_port: int,
+    password: Optional[str] = None,
+) -> List[sample.Sample]:
+  """Runs memtier to find the maximum throughput under a latency cap."""
+  max_threads = client_vm.NumCpusForBenchmark(report_only_physical_cpus=True)
+  max_clients = MAX_CLIENTS_COUNT // server_shard_count
+  samples = []
+  for result in _BinarySearchForLatencyCappedThroughput(
+      [MemtierConnection(client_vm, server_ip, server_port)],
+      [_PipelineModifier(), _ClientModifier(max_clients, max_threads)],
+      server_ip,
+      server_port,
+      password,
+  ):
+    samples.extend(result.GetSamples())
   return samples
 
 
-def _ModifyPipelines(
-    current_parameters: 'MemtierBinarySearchParameters', latency: float
-) -> 'MemtierBinarySearchParameters':
-  """Modify pipelines count for next iteration of binary search."""
-  if latency <= MEMTIER_LATENCY_CAP.value:
-    lower_bound = current_parameters.pipelines
-    upper_bound = min(current_parameters.upper_bound, MAX_PIPELINES_COUNT)
-  else:
-    lower_bound = current_parameters.lower_bound
-    upper_bound = current_parameters.pipelines
-
-  pipelines = lower_bound + math.ceil((upper_bound - lower_bound) / 2)
-  return MemtierBinarySearchParameters(
-      lower_bound=lower_bound,
-      upper_bound=upper_bound,
-      pipelines=pipelines,
-      threads=1,
-      clients=1,
-  )
+def _CalculateMode(values: list[float]) -> float:
+  """Calculates the mode of a distribution using kernel density estimation."""
+  plt.clf()
+  ax = sns.histplot(values, kde=True)
+  kdeline = ax.lines[0]
+  xs = kdeline.get_xdata()
+  ys = kdeline.get_ydata()
+  mode_idx = np.argmax(ys)
+  mode = xs[mode_idx]
+  return mode
 
 
-def _ModifyClients(
-    current_parameters: 'MemtierBinarySearchParameters', latency: float
-) -> 'MemtierBinarySearchParameters':
-  """Modify clients count for next iteration of binary search."""
-  if latency <= MEMTIER_LATENCY_CAP.value:
-    lower_bound = current_parameters.clients * current_parameters.threads
-    upper_bound = min(current_parameters.upper_bound, MAX_CLIENTS_COUNT)
-  else:
-    lower_bound = current_parameters.lower_bound
-    upper_bound = current_parameters.clients * current_parameters.threads
-
-  total_clients = lower_bound + math.ceil((upper_bound - lower_bound) / 2)
-  threads = _FindFactor(total_clients)
-  clients = total_clients // threads
-  return MemtierBinarySearchParameters(
-      lower_bound=lower_bound,
-      upper_bound=upper_bound,
-      pipelines=1,
-      threads=threads,
-      clients=clients,
+def MeasureLatencyCappedThroughputDistribution(
+    connections: list[MemtierConnection],
+    server_ip: str,
+    server_port: int,
+    client_vms: list[virtual_machine.VirtualMachine],
+    server_shard_count: int,
+    password: Optional[str] = None,
+) -> list[sample.Sample]:
+  """Measures distribution of throughput across several iterations.
+
+  In particular, this function will first find the optimal number of threads and
+  clients per thread, and then run the test with those parameters for the
+  specified number of iterations. The reported samples will include mean and
+  stdev of QPS and latency across the series of runs.
+
+  Args:
+    connections: list of connections from client to server.
+    server_ip: Ip address of the server.
+    server_port: Port of the server.
+    client_vms: A list of client vms.
+    server_shard_count: Number of shards in the redis cluster.
+    password: Password of the server.
+
+  Returns:
+    A list of throughput and latency samples.
+  """
+  parameters_for_test = MemtierBinarySearchParameters(
+      pipelines=FLAGS.memtier_pipeline[0],
+      clients=FLAGS.memtier_clients[0],
+      threads=FLAGS.memtier_threads[0],
   )
+  if MEMTIER_DISTRIBUTION_BINARY_SEARCH.value:
+    max_threads = client_vms[0].NumCpusForBenchmark(
+        report_only_physical_cpus=True
+    )
+    shards_per_client = server_shard_count / len(client_vms)
+    max_clients = int(MAX_CLIENTS_COUNT // shards_per_client)
+    result = _BinarySearchForLatencyCappedThroughput(
+        connections,
+        [_ClientModifier(max_clients, max_threads)],
+        server_ip,
+        server_port,
+        password,
+    )[0]
+    parameters_for_test = result.parameters
 
+  logging.info(
+      'Starting test iterations with parameters %s', parameters_for_test
+  )
+  results = []
+  for _ in range(MEMTIER_DISTRIBUTION_ITERATIONS.value):
+    results_for_run = _RunParallelConnections(
+        connections,
+        server_ip,
+        server_port,
+        parameters_for_test.threads,
+        parameters_for_test.clients,
+        parameters_for_test.pipelines,
+        password,
+    )
+    results.extend(results_for_run)
 
-def _FindFactor(number):
-  """Find any factor of the given number. Returns 1 for primes."""
-  i = round(math.sqrt(number))
-  while i > 0:
-    if number % i == 0:
-      return i
-    i -= 1
+  samples = []
+  metrics = {
+      'ops_per_sec': 'ops/s',
+      'kb_per_sec': 'KB/s',
+      'latency_ms': 'ms',
+      '90': 'ms',
+      '95': 'ms',
+      '99': 'ms',
+  }
+  metadata = {
+      'distribution_iterations': MEMTIER_DISTRIBUTION_ITERATIONS.value,
+      'threads': parameters_for_test.threads,
+      'clients': parameters_for_test.clients,
+      'pipelines': parameters_for_test.pipelines,
+  }
+  for metric, units in metrics.items():
+    is_latency = metric.replace('.', '', 1).isdigit()
+    values = (
+        [result.latency_dic[metric] for result in results]
+        if is_latency
+        else [getattr(result, metric) for result in results]
+    )
+    if is_latency:
+      metric = f'p{metric} latency'
+    samples.extend([
+        sample.Sample(
+            f'Mean {metric}', statistics.mean(values), units, metadata
+        ),
+        sample.Sample(
+            f'Stdev {metric}',
+            statistics.stdev(values),
+            units,
+            metadata,
+        ),
+        sample.Sample(
+            f'Mode {metric}',
+            _CalculateMode(values),
+            units,
+            metadata,
+        ),
+    ])
+
+  return samples
 
 
 def RunGetLatencyAtCpu(cloud_instance, client_vms):
@@ -722,6 +1030,7 @@ def _Run(
     clients: int,
     password: Optional[str] = None,
     unique_id: Optional[str] = None,
+    shard_addresses: Optional[str] = None,
 ) -> 'MemtierResult':
   """Runs the memtier benchmark on the vm."""
   logging.info(
@@ -780,7 +1089,9 @@ def _Run(
       password=password,
       outfile=memtier_results_file,
       cluster_mode=MEMTIER_CLUSTER_MODE.value,
+      shard_addresses=shard_addresses,
       json_out_file=json_results_file,
+      tls=MEMTIER_TLS.value,
   )
   _IssueRetryableCommand(vm, cmd)
 
@@ -812,6 +1123,7 @@ def _Run(
 
   with open(output_path, 'r') as output:
     summary_data = output.read()
+    logging.info(summary_data)
   return MemtierResult.Parse(summary_data, time_series_json)
 
 
@@ -846,16 +1158,25 @@ def GetMetadata(clients: int, threads: int, pipeline: int) -> Dict[str, Any]:
 class MemtierResult:
   """Class that represents memtier results."""
 
-  ops_per_sec: float
-  kb_per_sec: float
-  latency_ms: float
-  latency_dic: Dict[str, float]
-  get_latency_histogram: MemtierHistogram
-  set_latency_histogram: MemtierHistogram
-  timestamps: List[int]
-  ops_series: List[int]
-  latency_series: Dict[str, List[int]]
-  runtime_info: Dict[Text, Text]
+  ops_per_sec: float = 0.0
+  kb_per_sec: float = 0.0
+
+  latency_ms: float = 0.0
+  latency_dic: Dict[str, float] = dataclasses.field(default_factory=dict)
+  get_latency_histogram: MemtierHistogram = dataclasses.field(
+      default_factory=list
+  )
+  set_latency_histogram: MemtierHistogram = dataclasses.field(
+      default_factory=list
+  )
+
+  timestamps: List[int] = dataclasses.field(default_factory=list)
+  ops_series: List[int] = dataclasses.field(default_factory=list)
+  latency_series: Dict[str, List[int]] = dataclasses.field(default_factory=dict)
+
+  runtime_info: Dict[Text, Text] = dataclasses.field(default_factory=dict)
+  metadata: Dict[str, Any] = dataclasses.field(default_factory=dict)
+  parameters: MemtierBinarySearchParameters = MemtierBinarySearchParameters()
 
   @classmethod
   def Parse(
@@ -920,21 +1241,27 @@ def Parse(
         runtime_info=runtime_info,
     )
 
-  def GetSamples(self, metadata: Dict[str, Any]) -> List[sample.Sample]:
+  def GetSamples(
+      self, metadata: Optional[Dict[str, Any]] = None
+  ) -> List[sample.Sample]:
     """Return this result as a list of samples."""
-    metadata['avg_latency'] = self.latency_ms
+    if metadata:
+      self.metadata.update(copy.deepcopy(metadata))
+    self.metadata['avg_latency'] = self.latency_ms
     for key, value in self.latency_dic.items():
-      metadata[f'p{key}_latency'] = value
+      self.metadata[f'p{key}_latency'] = value
     samples = [
-        sample.Sample('Ops Throughput', self.ops_per_sec, 'ops/s', metadata),
-        sample.Sample('KB Throughput', self.kb_per_sec, 'KB/s', metadata),
-        sample.Sample('Latency', self.latency_ms, 'ms', metadata),
+        sample.Sample(
+            'Ops Throughput', self.ops_per_sec, 'ops/s', self.metadata
+        ),
+        sample.Sample('KB Throughput', self.kb_per_sec, 'KB/s', self.metadata),
+        sample.Sample('Latency', self.latency_ms, 'ms', self.metadata),
     ]
     for name, histogram in [
         ('get', self.get_latency_histogram),
         ('set', self.set_latency_histogram),
     ]:
-      hist_meta = copy.deepcopy(metadata)
+      hist_meta = copy.deepcopy(self.metadata)
       hist_meta.update({'histogram': json.dumps(histogram)})
       samples.append(
           sample.Sample(f'{name} latency histogram', 0, '', hist_meta)
@@ -1092,6 +1419,30 @@ def AggregateMemtierResults(
             additional_metadata=metadata,
         )
     )
+  individual_latencies = collections.defaultdict(list)
+  for metric, latency_at_timestamp in latency_series.items():
+    for client_latency in latency_at_timestamp:
+      for client, latency in enumerate(client_latency):
+        if len(individual_latencies[metric]) <= client:
+          individual_latencies[metric].append([])
+        individual_latencies[metric][client].append(latency)
+
+  for metric, client_latencies in individual_latencies.items():
+    for client, latencies in enumerate(client_latencies):
+      additional_metadata = {}
+      additional_metadata.update(metadata)
+      additional_metadata['client'] = client
+      additional_metadata[sample.DISABLE_CONSOLE_LOG] = True
+      samples.append(
+          sample.CreateTimeSeriesSample(
+              latencies,
+              timestamps[0 : len(latencies)],
+              f'{metric}_time_series',
+              'ms',
+              1,
+              additional_metadata=additional_metadata,
+          )
+      )
   return samples
 
 
@@ -1190,7 +1541,11 @@ def _ParseLine(
   if not re.match(pattern, line):
     return last_total
 
-  _, msec, percent = line.split()
+  # Skip cases where we have an incomplete line (not enough values to unpack).
+  try:
+    _, msec, percent = line.split()
+  except ValueError:
+    return last_total
   counts = _ConvertPercentToAbsolute(approx_total, float(percent))
   bucket_counts = int(round(counts - last_total))
   if bucket_counts > 0:
diff --git a/perfkitbenchmarker/linux_packages/nccl.py b/perfkitbenchmarker/linux_packages/nccl.py
index 7ccce1bd88..1c4ee5710e 100644
--- a/perfkitbenchmarker/linux_packages/nccl.py
+++ b/perfkitbenchmarker/linux_packages/nccl.py
@@ -20,8 +20,10 @@
 from perfkitbenchmarker.linux_packages import cuda_toolkit
 
 flags.DEFINE_string(
-    'nccl_version', 'v2.12.12-1', 'NCCL version to install. '
-    'Input "None" to bypass installation.')
+    'nccl_version',
+    'v2.18.1-1',
+    'NCCL version to install. Input "None" to bypass installation.',
+)
 flags.DEFINE_string('nccl_net_plugin', None, 'NCCL network plugin name')
 flags.DEFINE_string('nccl_mpi', '/usr/bin/mpirun', 'MPI binary path')
 flags.DEFINE_string('nccl_mpi_home', '/usr/lib/x86_64-linux-gnu/openmpi',
diff --git a/perfkitbenchmarker/linux_packages/nvidia_docker.py b/perfkitbenchmarker/linux_packages/nvidia_docker.py
index 723d868b53..07001dd22e 100644
--- a/perfkitbenchmarker/linux_packages/nvidia_docker.py
+++ b/perfkitbenchmarker/linux_packages/nvidia_docker.py
@@ -22,8 +22,17 @@
                                'The version of nvidia docker to install.')
 
 
+def CheckNvidiaDockerExists(vm):
+  resp, _ = vm.RemoteHostCommand(
+      'command -v nvidia-docker', ignore_failure=True
+  )
+  return bool(resp.rstrip())
+
+
 def AptInstall(vm):
   """Installs the nvidia-docker package on the VM."""
+  if CheckNvidiaDockerExists(vm):
+    return
   vm.Install('docker')
   vm.RemoteCommand('curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey '
                    '| sudo apt-key add -')
@@ -39,6 +48,8 @@ def AptInstall(vm):
 
 def YumInstall(vm):
   """Installs the nvidia-docker package on the VM."""
+  if CheckNvidiaDockerExists(vm):
+    return
   vm.Install('docker')
   vm.RemoteCommand('curl -s -L https://nvidia.github.io/'
                    'nvidia-container-runtime/'
diff --git a/perfkitbenchmarker/linux_packages/nvidia_driver.py b/perfkitbenchmarker/linux_packages/nvidia_driver.py
index cf66c40c35..13c2bd15e9 100644
--- a/perfkitbenchmarker/linux_packages/nvidia_driver.py
+++ b/perfkitbenchmarker/linux_packages/nvidia_driver.py
@@ -32,7 +32,7 @@
 NVIDIA_TESLA_P100 = 'p100'
 NVIDIA_TESLA_V100 = 'v100'
 NVIDIA_TESLA_T4 = 't4'
-NVIDIA_TESLA_L4 = 'l4'
+NVIDIA_L4 = 'l4'
 NVIDIA_TESLA_A100 = 'a100'
 NVIDIA_TESLA_A10 = 'a10'
 
@@ -201,7 +201,7 @@ def GetGpuType(vm):
   elif 'T4' in gpu_types[0]:
     return NVIDIA_TESLA_T4
   elif 'L4' in gpu_types[0]:
-    return NVIDIA_TESLA_L4
+    return NVIDIA_L4
   elif 'A100' in gpu_types[0]:
     return NVIDIA_TESLA_A100
   elif 'A10' in gpu_types[0]:
@@ -474,8 +474,11 @@ def Install(vm):
   vm.Install('wget')
   tokens = re.split('/', location)
   filename = tokens[-1]
-  vm.RemoteCommand('wget {location} && chmod 755 {filename} '
-                   .format(location=location, filename=filename))
+  vm.RemoteCommand(
+      'wget --tries=3 {location} && chmod 755 {filename} '.format(
+          location=location, filename=filename
+      )
+  )
   vm.RemoteCommand(
       'sudo ./{filename} -q -x-module-path={x_module_path} '
       '--ui=none -x-library-path={x_library_path}'.format(
diff --git a/perfkitbenchmarker/linux_packages/redis_cli.py b/perfkitbenchmarker/linux_packages/redis_cli.py
new file mode 100644
index 0000000000..08a439b6a9
--- /dev/null
+++ b/perfkitbenchmarker/linux_packages/redis_cli.py
@@ -0,0 +1,41 @@
+# Copyright 2023 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing redis cli installation and cleanup functions."""
+
+
+def _Install(vm) -> None:
+  """Installs the redis package on the VM."""
+  vm.RemoteCommand(
+      'curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o'
+      ' /usr/share/keyrings/redis-archive-keyring.gpg'
+  )
+  vm.RemoteCommand(
+      'echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg]'
+      ' https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee'
+      ' /etc/apt/sources.list.d/redis.list'
+  )
+  vm.RemoteCommand('sudo apt-get update')
+  vm.RemoteCommand('sudo apt-get install -y redis')
+
+
+def AptInstall(vm) -> None:
+  """Installs the redis package on the VM."""
+  _Install(vm)
+
+
+def YumInstall(vm) -> None:
+  """Installs the redis package on the VM."""
+  del vm  # unused
+  raise NotImplementedError()
+
diff --git a/perfkitbenchmarker/linux_packages/stress_ng.py b/perfkitbenchmarker/linux_packages/stress_ng.py
deleted file mode 100644
index b827521c1b..0000000000
--- a/perfkitbenchmarker/linux_packages/stress_ng.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright 2021 PerfKitBenchmarker Authors. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Module containing stress-ng installation and cleanup functions."""
-
-from absl import flags
-
-FLAGS = flags.FLAGS
-
-GIT_REPO = 'https://github.com/ColinIanKing/stress-ng'
-GIT_TAG_MAP = {
-    '0.05.23': '54722768329c9f8184c1c98db63435f201377df1',  # ubuntu1604
-    '0.09.25': '2db2812edf99ec80c08edf98ee88806a3662031c',  # ubuntu1804
-}
-STRESS_NG_DIR = '~/stress_ng'
-
-
-def AptInstall(vm):
-  """Installs stress-ng."""
-  vm.InstallPackages(
-      'build-essential libaio-dev libapparmor-dev libattr1-dev libbsd-dev '
-      'libcap-dev libgcrypt11-dev libkeyutils-dev libsctp-dev zlib1g-dev'
-  )
-  vm.RemoteCommand('git clone {0} {1}'.format(GIT_REPO, STRESS_NG_DIR))
-  vm.RemoteCommand('cd {0} && git checkout {1}'.format(
-      STRESS_NG_DIR, GIT_TAG_MAP[FLAGS.stress_ng_version]))
-  vm.RemoteCommand('cd {0} && make && sudo make install'.format(STRESS_NG_DIR))
-
-
-def AptUninstall(vm):
-  """Uninstalls stress-ng."""
-  vm.RemoteCommand('cd {0} && sudo make uninstall'.format(STRESS_NG_DIR))
diff --git a/perfkitbenchmarker/linux_packages/ycsb.py b/perfkitbenchmarker/linux_packages/ycsb.py
index cef793a632..5988894a49 100644
--- a/perfkitbenchmarker/linux_packages/ycsb.py
+++ b/perfkitbenchmarker/linux_packages/ycsb.py
@@ -35,18 +35,10 @@
 Each workload runs for at most 30 minutes.
 """
 
-import bisect
-import collections
-from collections.abc import Iterable, Mapping, Sequence
+from collections.abc import Mapping, Sequence
 import copy
-import csv
-import dataclasses
 import io
-import itertools
-import json
 import logging
-import math
-import operator
 import os
 import posixpath
 import re
@@ -62,190 +54,250 @@
 from perfkitbenchmarker import virtual_machine
 from perfkitbenchmarker import vm_util
 from perfkitbenchmarker.linux_packages import maven
+from perfkitbenchmarker.linux_packages import ycsb_stats
 
 FLAGS = flags.FLAGS
 
-YCSB_URL_TEMPLATE = ('https://github.com/brianfrankcooper/YCSB/releases/'
-                     'download/{0}/ycsb-{0}.tar.gz')
+YCSB_URL_TEMPLATE = (
+    'https://github.com/brianfrankcooper/YCSB/releases/'
+    'download/{0}/ycsb-{0}.tar.gz'
+)
 YCSB_DIR = posixpath.join(linux_packages.INSTALL_DIR, 'ycsb')
 YCSB_EXE = posixpath.join(YCSB_DIR, 'bin', 'ycsb')
 HDRHISTOGRAM_DIR = posixpath.join(linux_packages.INSTALL_DIR, 'hdrhistogram')
-HDRHISTOGRAM_TAR_URL = ('https://github.com/HdrHistogram/HdrHistogram/archive/'
-                        'HdrHistogram-2.1.10.tar.gz')
-HDRHISTOGRAM_GROUPS = ['READ', 'UPDATE']
-
-_DEFAULT_PERCENTILES = 50, 75, 90, 95, 99, 99.9
-
-HISTOGRAM = 'histogram'
-HDRHISTOGRAM = 'hdrhistogram'
-TIMESERIES = 'timeseries'
-YCSB_MEASUREMENT_TYPES = [HISTOGRAM, HDRHISTOGRAM, TIMESERIES]
-
-# Binary operators to aggregate reported statistics.
-# Statistics with operator 'None' will be dropped.
-AGGREGATE_OPERATORS = {
-    'Operations': operator.add,
-    'RunTime(ms)': max,
-    'Return=0': operator.add,
-    'Return=-1': operator.add,
-    'Return=-2': operator.add,
-    'Return=-3': operator.add,
-    'Return=OK': operator.add,
-    'Return=ERROR': operator.add,
-    'Return=NOT_FOUND': operator.add,
-    'LatencyVariance(ms)': None,
-    'AverageLatency(ms)': None,  # Requires both average and # of ops.
-    'Throughput(ops/sec)': operator.add,
-    '95thPercentileLatency(ms)': None,  # Calculated across clients.
-    '99thPercentileLatency(ms)': None,  # Calculated across clients.
-    'MinLatency(ms)': min,
-    'MaxLatency(ms)': max
-}
-
-flags.DEFINE_string('ycsb_version', '0.17.0',
-                    'YCSB version to use. Defaults to version 0.17.0.')
+HDRHISTOGRAM_TAR_URL = (
+    'https://github.com/HdrHistogram/HdrHistogram/archive/'
+    'HdrHistogram-2.1.10.tar.gz'
+)
+
+flags.DEFINE_string(
+    'ycsb_version', '0.17.0', 'YCSB version to use. Defaults to version 0.17.0.'
+)
 flags.DEFINE_string(
-    'ycsb_tar_url', None, 'URL to a YCSB tarball to use '
-    'instead of the releases located on github.')
-flags.DEFINE_enum('ycsb_measurement_type', HISTOGRAM, YCSB_MEASUREMENT_TYPES,
-                  'Measurement type to use for ycsb. Defaults to histogram.')
-flags.DEFINE_enum('ycsb_measurement_interval', 'op', ['op', 'intended', 'both'],
-                  'Measurement interval to use for ycsb. Defaults to op.')
+    'ycsb_tar_url',
+    None,
+    'URL to a YCSB tarball to use instead of the releases located on github.',
+)
+flags.DEFINE_enum(
+    'ycsb_measurement_type',
+    ycsb_stats.HISTOGRAM,
+    ycsb_stats.YCSB_MEASUREMENT_TYPES,
+    'Measurement type to use for ycsb. Defaults to histogram.',
+)
+flags.DEFINE_enum(
+    'ycsb_measurement_interval',
+    'op',
+    ['op', 'intended', 'both'],
+    'Measurement interval to use for ycsb. Defaults to op.',
+)
 flags.DEFINE_boolean(
-    'ycsb_histogram', False, 'Include individual '
+    'ycsb_histogram',
+    False,
+    'Include individual '
     'histogram results from YCSB (will increase sample '
-    'count).')
-flags.DEFINE_boolean('ycsb_load_samples', True, 'Include samples '
-                     'from pre-populating database.')
+    'count).',
+)
+flags.DEFINE_boolean(
+    'ycsb_load_samples', True, 'Include samples from pre-populating database.'
+)
 flags.DEFINE_boolean(
-    'ycsb_skip_load_stage', False, 'If True, skip the data '
+    'ycsb_skip_load_stage',
+    False,
+    'If True, skip the data '
     'loading stage. It can be used when the database target '
-    'already exists with pre-populated data.')
+    'already exists with pre-populated data.',
+)
 flags.DEFINE_boolean(
-    'ycsb_skip_run_stage', False, 'If True, skip the workload '
+    'ycsb_skip_run_stage',
+    False,
+    'If True, skip the workload '
     'running stage. It can be used when you want to '
-    'pre-populate a database target.')
+    'pre-populate a database target.',
+)
 flags.DEFINE_boolean(
-    'ycsb_include_individual_results', False,
-    'Include results from each client VM, rather than just '
-    'combined results.')
+    'ycsb_include_individual_results',
+    False,
+    'Include results from each client VM, rather than just combined results.',
+)
 flags.DEFINE_boolean(
-    'ycsb_reload_database', True, 'Reload database, otherwise skip load stage. '
+    'ycsb_reload_database',
+    True,
+    'Reload database, otherwise skip load stage. '
     'Note, this flag is only used if the database '
-    'is already loaded.')
+    'is already loaded.',
+)
 flags.DEFINE_integer('ycsb_client_vms', 1, 'Number of YCSB client VMs.')
 flags.DEFINE_list(
-    'ycsb_workload_files', ['workloada', 'workloadb'],
+    'ycsb_workload_files',
+    ['workloada', 'workloadb'],
     'Path to YCSB workload file to use during *run* '
-    'stage only. Comma-separated list')
+    'stage only. Comma-separated list',
+)
 flags.DEFINE_list(
-    'ycsb_load_parameters', [],
+    'ycsb_load_parameters',
+    [],
     'Passed to YCSB during the load stage. Comma-separated list '
-    'of "key=value" pairs.')
+    'of "key=value" pairs.',
+)
 flags.DEFINE_list(
-    'ycsb_run_parameters', [],
+    'ycsb_run_parameters',
+    [],
     'Passed to YCSB during the run stage. Comma-separated list '
-    'of "key=value" pairs.')
+    'of "key=value" pairs.',
+)
 _THROUGHPUT_TIME_SERIES = flags.DEFINE_bool(
-    'ycsb_throughput_time_series', False,
+    'ycsb_throughput_time_series',
+    False,
     'If true, run prints status which includes a throughput time series (1s '
-    'granularity), and includes the results in the samples.')
+    'granularity), and includes the results in the samples.',
+)
 flags.DEFINE_list(
-    'ycsb_threads_per_client', ['32'], 'Number of threads per '
+    'ycsb_threads_per_client',
+    ['32'],
+    'Number of threads per '
     'loader during the benchmark run. Specify a list to vary the '
     'number of clients. For each thread count, optionally supply '
-    'target qps per client, which cause ycsb to self-throttle.')
+    'target qps per client, which cause ycsb to self-throttle.',
+)
 flags.DEFINE_integer(
-    'ycsb_preload_threads', None, 'Number of threads per '
+    'ycsb_preload_threads',
+    None,
+    'Number of threads per '
     'loader during the initial data population stage. '
-    'Default value depends on the target DB.')
+    'Default value depends on the target DB.',
+)
 flags.DEFINE_integer(
-    'ycsb_record_count', None, 'Pre-load with a total '
+    'ycsb_record_count',
+    None,
+    'Pre-load with a total '
     'dataset of records total. Overrides recordcount value in '
     'all workloads of this run. Defaults to None, where '
     'recordcount value in each workload is used. If neither '
-    'is not set, ycsb default of 0 is used.')
-flags.DEFINE_integer('ycsb_operation_count', None, 'Number of operations '
-                     '*per client VM*.')
+    'is not set, ycsb default of 0 is used.',
+)
 flags.DEFINE_integer(
-    'ycsb_timelimit', 1800, 'Maximum amount of time to run '
+    'ycsb_operation_count', None, 'Number of operations *per client VM*.'
+)
+flags.DEFINE_integer(
+    'ycsb_timelimit',
+    1800,
+    'Maximum amount of time to run '
     'each workload / client count combination in seconds. '
-    'Set to 0 for unlimited time.')
+    'Set to 0 for unlimited time.',
+)
 flags.DEFINE_integer(
-    'ycsb_field_count', 10, 'Number of fields in a record. '
-    'Defaults to 10, which is the default in ycsb v0.17.0.')
+    'ycsb_field_count',
+    10,
+    'Number of fields in a record. '
+    'Defaults to 10, which is the default in ycsb v0.17.0.',
+)
 flags.DEFINE_integer(
-    'ycsb_field_length', None, 'Size of each field. Defaults '
-    'to None which uses the ycsb default of 100.')
+    'ycsb_field_length',
+    None,
+    'Size of each field. Defaults to None which uses the ycsb default of 100.',
+)
 flags.DEFINE_enum(
-    'ycsb_requestdistribution', None, ['uniform', 'zipfian', 'latest'],
+    'ycsb_requestdistribution',
+    None,
+    ['uniform', 'zipfian', 'latest'],
     'Type of request distribution.  '
-    'This will overwrite workload file parameter')
+    'This will overwrite workload file parameter',
+)
 flags.DEFINE_float(
-    'ycsb_readproportion', None, 'The read proportion, '
-    'Default is 0.5 in workloada and 0.95 in YCSB.')
+    'ycsb_readproportion',
+    None,
+    'The read proportion, Default is 0.5 in workloada and 0.95 in YCSB.',
+)
 flags.DEFINE_float(
-    'ycsb_updateproportion', None, 'The update proportion, '
-    'Default is 0.5 in workloada and 0.05 in YCSB.')
+    'ycsb_updateproportion',
+    None,
+    'The update proportion, Default is 0.5 in workloada and 0.05 in YCSB.',
+)
 flags.DEFINE_float(
-    'ycsb_scanproportion', None, 'The scan proportion, '
-    'Default is 0 in workloada and 0 in YCSB.')
+    'ycsb_scanproportion',
+    None,
+    'The scan proportion, Default is 0 in workloada and 0 in YCSB.',
+)
 flags.DEFINE_boolean(
-    'ycsb_dynamic_load', False,
+    'ycsb_dynamic_load',
+    False,
     'Apply dynamic load to system under test and find out '
     'maximum sustained throughput (test length controlled by '
     'ycsb_operation_count and ycsb_timelimit) the '
-    'system capable of handling. ')
+    'system capable of handling. ',
+)
 flags.DEFINE_integer(
-    'ycsb_dynamic_load_throughput_lower_bound', None,
+    'ycsb_dynamic_load_throughput_lower_bound',
+    None,
     'Apply dynamic load to system under test. '
     'If not supplied, test will halt once reaching '
     'sustained load, otherwise, will keep running until '
-    'reaching lower bound.')
+    'reaching lower bound.',
+)
 flags.DEFINE_float(
-    'ycsb_dynamic_load_sustain_throughput_ratio', 0.95,
+    'ycsb_dynamic_load_sustain_throughput_ratio',
+    0.95,
     'To consider throughput sustainable when applying '
     'dynamic load, the actual overall throughput measured '
     'divided by target throughput applied should exceed '
     'this ratio. If not, we will lower target throughput and '
-    'retry.')
+    'retry.',
+)
 flags.DEFINE_integer(
-    'ycsb_dynamic_load_sustain_timelimit', 300,
+    'ycsb_dynamic_load_sustain_timelimit',
+    300,
     'Run duration in seconds for each throughput target '
-    'if we have already reached sustained throughput.')
-flags.DEFINE_integer('ycsb_sleep_after_load_in_sec', 0,
-                     'Sleep duration in seconds between load and run stage.')
+    'if we have already reached sustained throughput.',
+)
+flags.DEFINE_integer(
+    'ycsb_sleep_after_load_in_sec',
+    0,
+    'Sleep duration in seconds between load and run stage.',
+)
 _BURST_LOAD_MULTIPLIER = flags.DEFINE_integer(
-    'ycsb_burst_load', None,
+    'ycsb_burst_load',
+    None,
     'If set, applies burst load to the system, by running YCSB once, and then '
     'immediately running again with --ycsb_burst_load times the '
     'amount of load specified by the `target` parameter. Set to -1 for '
-    'the max throughput from the client.')
+    'the max throughput from the client.',
+)
 _INCREMENTAL_TARGET_QPS = flags.DEFINE_integer(
-    'ycsb_incremental_load', None,
+    'ycsb_incremental_load',
+    None,
     'If set, applies an incrementally increasing load until the target QPS is '
     'reached. This should be the aggregate load for all VMs. Running with '
     'this flag requires that there is not a QPS target passed in through '
-    '--ycsb_run_parameters.')
+    '--ycsb_run_parameters.',
+)
 _SHOULD_RECORD_COMMAND_LINE = flags.DEFINE_boolean(
-    'ycsb_record_command_line', True,
+    'ycsb_record_command_line',
+    True,
     'Whether to record the command line used for kicking off the runs as part '
     'of metadata. When there are many VMs, this can get long and clutter the '
-    'PKB log.')
+    'PKB log.',
+)
 _SHOULD_FAIL_ON_INCOMPLETE_LOADING = flags.DEFINE_boolean(
-    'ycsb_fail_on_incomplete_loading', False,
+    'ycsb_fail_on_incomplete_loading',
+    False,
     'Whether to fail the benchmarking if loading is not complete, '
-    'e.g., there are insert failures.')
+    'e.g., there are insert failures.',
+)
 _INCOMPLETE_LOADING_METRIC = flags.DEFINE_string(
-    'ycsb_insert_error_metric', 'insert Return=ERROR',
+    'ycsb_insert_error_metric',
+    'insert Return=ERROR',
     'Used with --ycsb_fail_on_incomplete_loading. Will fail the benchmark if '
-    'this metric\'s value is non-zero. This metric should be an indicator of '
+    "this metric's value is non-zero. This metric should be an indicator of "
     'incomplete table loading. If insertion retries are enabled via '
     'core_workload_insertion_retry_limit, then the default metric may be '
-    'non-zero even though the retried insertion eventually succeeded.')
+    'non-zero even though the retried insertion eventually succeeded.',
+)
 _ERROR_RATE_THRESHOLD = flags.DEFINE_float(
-    'ycsb_max_error_rate', 1.00, 'The maximum error rate allowed for the run. '
-    'By default, this allows any number of errors.')
+    'ycsb_max_error_rate',
+    1.00,
+    'The maximum error rate allowed for the run. '
+    'By default, this allows any number of errors.',
+)
 
 # Status line pattern
 _STATUS_PATTERN = r'(\d+) sec: \d+ operations; (\d+.\d+) current ops\/sec'
@@ -364,28 +416,33 @@ def CheckPrerequisites():
   # Following flags are mutully exclusive.
   run_target = 'target' in run_params
   per_thread_target = any(
-      [':' in thread_qps for thread_qps in FLAGS.ycsb_threads_per_client])
+      [':' in thread_qps for thread_qps in FLAGS.ycsb_threads_per_client]
+  )
   dynamic_load = FLAGS.ycsb_dynamic_load
 
   if run_target + per_thread_target + dynamic_load > 1:
     raise errors.Config.InvalidValue(
         'Setting YCSB target in ycsb_threads_per_client '
         'or ycsb_run_parameters or applying ycsb_dynamic_load_* flags'
-        ' are mutally exclusive.')
+        ' are mutally exclusive.'
+    )
 
   if FLAGS.ycsb_dynamic_load_throughput_lower_bound and not dynamic_load:
     raise errors.Config.InvalidValue(
-        'To apply dynamic load, set --ycsb_dynamic_load.')
+        'To apply dynamic load, set --ycsb_dynamic_load.'
+    )
 
   if _BURST_LOAD_MULTIPLIER.value and not run_target:
     raise errors.Config.InvalidValue(
         'Running in burst mode requires setting a target QPS using '
-        '--ycsb_run_parameters=target=qps. Got None.')
+        '--ycsb_run_parameters=target=qps. Got None.'
+    )
 
   if _INCREMENTAL_TARGET_QPS.value and run_target:
     raise errors.Config.InvalidValue(
         'Running in incremental mode requires setting a target QPS using '
-        '--ycsb_incremental_load=target and not --ycsb_run_parameters.')
+        '--ycsb_incremental_load=target and not --ycsb_run_parameters.'
+    )
 
 
 @vm_util.Retry(poll_interval=1)
@@ -397,8 +454,10 @@ def Install(vm):
   vm.Install('python')
   vm.InstallPackages('curl')
   ycsb_url = (
-      _ycsb_tar_url or FLAGS.ycsb_tar_url or
-      YCSB_URL_TEMPLATE.format(FLAGS.ycsb_version))
+      _ycsb_tar_url
+      or FLAGS.ycsb_tar_url
+      or YCSB_URL_TEMPLATE.format(FLAGS.ycsb_version)
+  )
   install_cmd = (
       'mkdir -p {0} && curl -L {1} | '
       'tar -C {0} --strip-components=1 -xzf - '
@@ -409,7 +468,8 @@ def Install(vm):
       # After https://github.com/brianfrankcooper/YCSB/pull/1583 is merged and
       # released, this will not be necessary.
       # TODO(user): Update minimum YCSB version and remove.
-      "--exclude='**/log4j-core-2*.jar' ")
+      "--exclude='**/log4j-core-2*.jar' "
+  )
   vm.RemoteCommand(install_cmd.format(YCSB_DIR, ycsb_url))
   if _GetVersion(FLAGS.ycsb_version) >= 11:
     vm.Install('maven')
@@ -426,557 +486,6 @@ def Install(vm):
     )
 
 
-@dataclasses.dataclass
-class _OpResult:
-  """Individual results for a single operation.
-
-  Attributes:
-    group: group name (e.g., update, insert, overall)
-    statistics: dict mapping from statistic name to value
-    data_type: Corresponds to --ycsb_measurement_type.
-    data:
-      For HISTOGRAM/HDRHISTOGRAM: list of (ms_lower_bound, count) tuples, e.g.
-      [(0, 530), (19, 1)] indicates that 530 ops took between 0ms and 1ms,
-      and 1 took between 19ms and 20ms. Empty bins are not reported.
-      For TIMESERIES: list of (time, latency us) tuples.
-  """
-  group: str = ''
-  data_type: str = ''
-  data: list[tuple[int, float]] = dataclasses.field(default_factory=list)
-  statistics: dict[str, float] = dataclasses.field(default_factory=dict)
-
-
-@dataclasses.dataclass
-class YcsbResult:
-  """Aggregate results for the YCSB run.
-
-  Attributes:
-    client: Contains YCSB version information.
-    command_line: Command line executed.
-    throughput_time_series: Time series of throughputs (interval, QPS).
-    groups: dict of operation group name to results for that operation.
-  """
-  client: str = ''
-  command_line: str = ''
-  throughput_time_series: _ThroughputTimeSeries = dataclasses.field(
-      default_factory=dict)
-  groups: dict[str, _OpResult] = dataclasses.field(default_factory=dict)
-
-
-def ParseResults(ycsb_result_string: str,
-                 data_type: str = 'histogram') -> 'YcsbResult':
-  """Parse YCSB results.
-
-  Example input for histogram datatype:
-
-    YCSB Client 0.1
-    Command line: -db com.yahoo.ycsb.db.HBaseClient -P /tmp/pkb/workloada
-    [OVERALL], RunTime(ms), 1800413.0
-    [OVERALL], Throughput(ops/sec), 2740.503428935472
-    [UPDATE], Operations, 2468054
-    [UPDATE], AverageLatency(us), 2218.8513395574005
-    [UPDATE], MinLatency(us), 554
-    [UPDATE], MaxLatency(us), 352634
-    [UPDATE], 95thPercentileLatency(ms), 4
-    [UPDATE], 99thPercentileLatency(ms), 7
-    [UPDATE], Return=0, 2468054
-    [UPDATE], 0, 398998
-    [UPDATE], 1, 1015682
-    [UPDATE], 2, 532078
-    ...
-
-  Example input for hdrhistogram datatype:
-
-    YCSB Client 0.17.0
-    Command line: -db com.yahoo.ycsb.db.RedisClient -P /opt/pkb/workloadb
-    [OVERALL], RunTime(ms), 29770.0
-    [OVERALL], Throughput(ops/sec), 33590.86328518643
-    [UPDATE], Operations, 49856.0
-    [UPDATE], AverageLatency(us), 1478.0115532734276
-    [UPDATE], MinLatency(us), 312.0
-    [UPDATE], MaxLatency(us), 24623.0
-    [UPDATE], 95thPercentileLatency(us), 3501.0
-    [UPDATE], 99thPercentileLatency(us), 6747.0
-    [UPDATE], Return=OK, 49856
-    ...
-
-  Example input for ycsb version 0.17.0+:
-
-    ...
-    Command line: -db com.yahoo.ycsb.db.HBaseClient10 ... -load
-    YCSB Client 0.17.0
-
-    Loading workload...
-    Starting test.
-    ...
-    [OVERALL], RunTime(ms), 11411
-    [OVERALL], Throughput(ops/sec), 8763.473841030585
-    [INSERT], Operations, 100000
-    [INSERT], AverageLatency(us), 74.92
-    [INSERT], MinLatency(us), 5
-    [INSERT], MaxLatency(us), 98495
-    [INSERT], 95thPercentileLatency(us), 42
-    [INSERT], 99thPercentileLatency(us), 1411
-    [INSERT], Return=OK, 100000
-    ...
-
-  Example input for timeseries datatype:
-
-    ...
-    [OVERALL], RunTime(ms), 240007.0
-    [OVERALL], Throughput(ops/sec), 10664.605615669543
-    ...
-    [READ], Operations, 1279253
-    [READ], AverageLatency(us), 3002.7057071587874
-    [READ], MinLatency(us), 63
-    [READ], MaxLatency(us), 93584
-    [READ], Return=OK, 1279281
-    [READ], 0, 528.6142757498257
-    [READ], 500, 360.95347448674966
-    [READ], 1000, 667.7379547689283
-    [READ], 1500, 731.5389357265888
-    [READ], 2000, 778.7992281717318
-    ...
-
-  Args:
-    ycsb_result_string: str. Text output from YCSB.
-    data_type: Either 'histogram' or 'timeseries' or 'hdrhistogram'. 'histogram'
-      and 'hdrhistogram' datasets are in the same format, with the difference
-      being lacking the (millisec, count) histogram component. Hence are parsed
-      similarly.
-
-  Returns:
-    A YcsbResult object that contains the results from parsing YCSB output.
-  Raises:
-    IOError: If the results contained unexpected lines.
-  """
-  if ('redis.clients.jedis.exceptions.JedisConnectionException'
-      in ycsb_result_string):
-    # This error is cause by ycsb using an old version of redis client 2.9.0
-    # https://github.com/xetorthio/jedis/issues/1977
-    raise errors.Benchmarks.KnownIntermittentError(
-        'errors.Benchmarks.KnownIntermittentError')
-
-  lines = []
-  client_string = 'YCSB'
-  command_line = 'unknown'
-  throughput_time_series = {}
-  fp = io.StringIO(ycsb_result_string)
-  result_string = next(fp).strip()
-
-  def IsHeadOfResults(line):
-    return line.startswith('[OVERALL]')
-
-  while not IsHeadOfResults(result_string):
-    if result_string.startswith('YCSB Client 0.'):
-      client_string = result_string
-    if result_string.startswith('Command line:'):
-      command_line = result_string
-    # Look for status lines which include throughput on a 1-sec basis.
-    match = re.search(_STATUS_PATTERN, result_string)
-    if match is not None:
-      timestamp, qps = int(match.group(1)), float(match.group(2))
-      # Repeats in the printed status are erroneous, ignore.
-      if timestamp not in throughput_time_series:
-        throughput_time_series[timestamp] = qps
-    try:
-      result_string = next(fp).strip()
-    except StopIteration:
-      raise IOError(
-          f'Could not parse YCSB output: {ycsb_result_string}') from None
-
-  if result_string.startswith('[OVERALL]'):  # YCSB > 0.7.0.
-    lines.append(result_string)
-  else:
-    # Received unexpected header
-    raise IOError(f'Unexpected header: {client_string}')
-
-  # Some databases print additional output to stdout.
-  # YCSB results start with [<OPERATION_NAME>];
-  # filter to just those lines.
-  def LineFilter(line):
-    return re.search(r'^\[[A-Z]+\]', line) is not None
-
-  lines = itertools.chain(lines, filter(LineFilter, fp))
-
-  r = csv.reader(lines)
-
-  by_operation = itertools.groupby(r, operator.itemgetter(0))
-
-  result = YcsbResult(client=client_string,
-                      command_line=command_line,
-                      throughput_time_series=throughput_time_series)
-
-  for operation, lines in by_operation:
-    operation = operation[1:-1].lower()
-
-    if operation == 'cleanup':
-      continue
-
-    op_result = _OpResult(group=operation,
-                          data_type=data_type)
-    latency_unit = 'ms'
-    for _, name, val in lines:
-      name = name.strip()
-      val = val.strip()
-      # Drop ">" from ">1000"
-      if name.startswith('>'):
-        name = name[1:]
-      val = float(val) if '.' in val or 'nan' in val.lower() else int(val)
-      if name.isdigit():
-        if val:
-          if data_type == TIMESERIES and latency_unit == 'us':
-            val /= 1000.0
-          op_result.data.append((int(name), val))
-      else:
-        if '(us)' in name:
-          name = name.replace('(us)', '(ms)')
-          val /= 1000.0
-          latency_unit = 'us'
-        op_result.statistics[name] = val
-
-    result.groups[operation] = op_result
-  _ValidateErrorRate(result)
-  return result
-
-
-def _ValidateErrorRate(result: YcsbResult) -> None:
-  """Raises an error if results contains entries with too high error rate.
-
-  Computes the error rate for each operation, example output looks like:
-
-    [INSERT], Operations, 100
-    [INSERT], AverageLatency(us), 74.92
-    [INSERT], MinLatency(us), 5
-    [INSERT], MaxLatency(us), 98495
-    [INSERT], 95thPercentileLatency(us), 42
-    [INSERT], 99thPercentileLatency(us), 1411
-    [INSERT], Return=OK, 90
-    [INSERT], Return=ERROR, 10
-
-  This function will then compute 10/100 = 0.1 error rate.
-
-  Args:
-    result: The result of running ParseResults()
-
-  Raises:
-    errors.Benchmarks.RunError: If the computed error rate is higher than the
-      threshold.
-  """
-  for operation in result.groups.values():
-    name, stats = operation.group, operation.statistics
-    # The operation count can be 0
-    count = stats.get('Operations', 0)
-    if count == 0:
-      continue
-    # These keys may be missing from the output.
-    error_rate = stats.get('Return=ERROR', 0) / count
-    if error_rate > _ERROR_RATE_THRESHOLD.value:
-      raise errors.Benchmarks.RunError(
-          f'YCSB had a {error_rate} error rate for {name}, higher than '
-          f'threshold {_ERROR_RATE_THRESHOLD.value}')
-
-
-def ParseHdrLogFile(logfile: str) -> list[_HdrHistogramTuple]:
-  """Parse a hdrhistogram log file into a list of (percentile, latency, count).
-
-  Example decrypted hdrhistogram logfile (value measures latency in microsec):
-
-  #[StartTime: 1523565997 (seconds since epoch), Thu Apr 12 20:46:37 UTC 2018]
-       Value     Percentile TotalCount 1/(1-Percentile)
-
-     314.000 0.000000000000          2           1.00
-     853.000 0.100000000000      49955           1.11
-     949.000 0.200000000000     100351           1.25
-     1033.000 0.300000000000     150110           1.43
-     ...
-     134271.000 0.999998664856    1000008      748982.86
-     134271.000 0.999998855591    1000008      873813.33
-     201983.000 0.999999046326    1000009     1048576.00
-  #[Mean    =     1287.159, StdDeviation   =      667.560]
-  #[Max     =   201983.000, Total count    =      1000009]
-  #[Buckets =            8, SubBuckets     =         2048]
-
-  Example of output:
-     [(0, 0.314, 2), (10, 0.853, 49953), (20, 0.949, 50396), ...]
-
-  Args:
-    logfile: Hdrhistogram log file.
-
-  Returns:
-    List of (percentile, value, count) tuples
-  """
-  result = []
-  last_percent_value = -1
-  prev_total_count = 0
-  for row in logfile.split('\n'):
-    if re.match(r'( *)(\d|\.)( *)', row):
-      row_vals = row.split()
-      # convert percentile to 100 based and round up to 3 decimal places
-      percentile = math.floor(float(row_vals[1]) * 100000) / 1000.0
-      current_total_count = int(row_vals[2])
-      if (percentile > last_percent_value and
-          current_total_count > prev_total_count):
-        # convert latency to millisec based and percentile to 100 based.
-        latency = float(row_vals[0]) / 1000
-        count = current_total_count - prev_total_count
-        result.append((percentile, latency, count))
-        last_percent_value = percentile
-        prev_total_count = current_total_count
-  return result
-
-
-def ParseHdrLogs(
-    hdrlogs: Mapping[str, str]) -> dict[str, list[_HdrHistogramTuple]]:
-  """Parse a dict of group to hdr logs into a dict of group to histogram tuples.
-
-  Args:
-    hdrlogs: Dict of group (read or update) to hdr logs for that group.
-
-  Returns:
-    Dict of group to histogram tuples of reportable percentile values.
-  """
-  parsed_hdr_histograms = {}
-  for group, logfile in hdrlogs.items():
-    values = ParseHdrLogFile(logfile)
-    parsed_hdr_histograms[group] = values
-  return parsed_hdr_histograms
-
-
-def _CumulativeSum(xs):
-  total = 0
-  for x in xs:
-    total += x
-    yield total
-
-
-def _WeightedQuantile(x, weights, p):
-  """Weighted quantile measurement for an ordered list.
-
-  This method interpolates to the higher value when the quantile is not a direct
-  member of the list. This works well for YCSB, since latencies are floored.
-
-  Args:
-    x: List of values.
-    weights: List of numeric weights.
-    p: float. Desired quantile in the interval [0, 1].
-
-  Returns:
-    float.
-
-  Raises:
-    ValueError: When 'x' and 'weights' are not the same length, or 'p' is not in
-      the interval [0, 1].
-  """
-  if len(x) != len(weights):
-    raise ValueError('Lengths do not match: {0} != {1}'.format(
-        len(x), len(weights)))
-  if p < 0 or p > 1:
-    raise ValueError('Invalid quantile: {0}'.format(p))
-  n = sum(weights)
-  target = n * float(p)
-  cumulative = list(_CumulativeSum(weights))
-
-  # Find the first cumulative weight >= target
-  i = bisect.bisect_left(cumulative, target)
-  if i == len(x):
-    return x[-1]
-  else:
-    return x[i]
-
-
-def _PercentilesFromHistogram(ycsb_histogram, percentiles=_DEFAULT_PERCENTILES):
-  """Calculate percentiles for from a YCSB histogram.
-
-  Args:
-    ycsb_histogram: List of (time_ms, frequency) tuples.
-    percentiles: iterable of floats, in the interval [0, 100].
-
-  Returns:
-    dict, mapping from percentile to value.
-  Raises:
-    ValueError: If one or more percentiles are outside [0, 100].
-  """
-  result = collections.OrderedDict()
-  histogram = sorted(ycsb_histogram)
-  for percentile in percentiles:
-    if percentile < 0 or percentile > 100:
-      raise ValueError('Invalid percentile: {0}'.format(percentile))
-    if math.modf(percentile)[0] < 1e-7:
-      percentile = int(percentile)
-    label = 'p{0}'.format(percentile)
-    latencies, freqs = list(zip(*histogram))
-    time_ms = _WeightedQuantile(latencies, freqs, percentile * 0.01)
-    result[label] = time_ms
-  return result
-
-
-def _CombineResults(result_list: Iterable[YcsbResult],
-                    measurement_type: str,
-                    combined_hdr: Mapping[str, list[_HdrHistogramTuple]]):
-  """Combine results from multiple YCSB clients.
-
-  Reduces a list of YCSB results (the output of ParseResults)
-  into a single result. Histogram bin counts, operation counts, and throughput
-  are summed; RunTime is replaced by the maximum runtime of any result.
-
-  Args:
-    result_list: Iterable of ParseResults outputs.
-    measurement_type: Measurement type used. If measurement type is histogram,
-      histogram bins are summed across results. If measurement type is
-      hdrhistogram, an aggregated hdrhistogram (combined_hdr) is expected.
-    combined_hdr: Dict of already aggregated histogram.
-
-  Returns:
-    A dictionary, as returned by ParseResults.
-  """
-
-  def DropUnaggregated(result: YcsbResult) -> None:
-    """Remove statistics which 'operators' specify should not be combined."""
-    drop_keys = {k for k, v in AGGREGATE_OPERATORS.items() if v is None}
-    for group in result.groups.values():
-      for k in drop_keys:
-        group.statistics.pop(k, None)
-
-  def CombineHistograms(hist1, hist2):
-    h1 = dict(hist1)
-    h2 = dict(hist2)
-    keys = sorted(frozenset(h1) | frozenset(h2))
-    result = []
-    for k in keys:
-      result.append((k, h1.get(k, 0) + h2.get(k, 0)))
-    return result
-
-  combined_weights = {}
-
-  def _CombineLatencyTimeSeries(
-      combined_series: list[tuple[int, float]],
-      individual_series: list[tuple[int, float]]) -> list[tuple[int, float]]:
-    """Combines two timeseries of average latencies.
-
-    Args:
-      combined_series: A list representing the timeseries with which the
-        individual series is being merged.
-      individual_series: A list representing the timeseries being merged with
-        the combined series.
-
-    Returns:
-      A list representing the new combined series.
-
-    Note that this assumes that each individual timeseries spent an equal
-    amount of time executing requests for each timeslice. This should hold for
-    runs without -target where each client has an equal number of threads, but
-    may not hold otherwise.
-    """
-    combined_series = dict(combined_series)
-    individual_series = dict(individual_series)
-    timestamps = set(combined_series) | set(individual_series)
-
-    result = []
-    for timestamp in sorted(timestamps):
-      if timestamp not in individual_series:
-        continue
-      if timestamp not in combined_weights:
-        combined_weights[timestamp] = 1.0
-      if timestamp not in combined_series:
-        result.append((timestamp, individual_series[timestamp]))
-        continue
-
-      # This computes a new combined average latency by dividing the sum of
-      # request latencies by the sum of request counts for the time period.
-      # The sum of latencies for an individual series is assumed to be "1",
-      # so the sum of latencies for the combined series is the total number of
-      # series i.e. "combined_weight".
-      # The request count for an individual series is 1 / average latency.
-      # This means the request count for the combined series is
-      # combined_weight * 1 / average latency.
-      combined_weight = combined_weights[timestamp]
-      average_latency = (combined_weight + 1.0) / (
-          (combined_weight / combined_series[timestamp]) +
-          (1.0 / individual_series[timestamp]))
-      result.append((timestamp, average_latency))
-      combined_weights[timestamp] += 1.0
-    return result
-
-  def _CombineThroughputTimeSeries(
-      series1: _ThroughputTimeSeries,
-      series2: _ThroughputTimeSeries) -> _ThroughputTimeSeries:
-    """Returns a combined dict of [timestamp, total QPS] from the two series."""
-    timestamps1 = set(series1)
-    timestamps2 = set(series2)
-    all_timestamps = timestamps1 | timestamps2
-    diff_timestamps = timestamps1 ^ timestamps2
-    if diff_timestamps:
-      # This case is rare but does happen occassionally, so log a warning
-      # instead of raising an exception.
-      logging.warning(
-          'Expected combined timestamps to be the same, got different '
-          'timestamps: %s', diff_timestamps)
-    result = {}
-    for timestamp in all_timestamps:
-      result[timestamp] = (
-          series1.get(timestamp, 0) +
-          series2.get(timestamp, 0))
-    return result
-
-  result_list = list(result_list)
-  result = copy.deepcopy(result_list[0])
-  DropUnaggregated(result)
-
-  for indiv in result_list[1:]:
-    for group_name, group in indiv.groups.items():
-      if group_name not in result.groups:
-        logging.warning(
-            'Found result group "%s" in individual YCSB result, '
-            'but not in accumulator.', group_name)
-        result.groups[group_name] = copy.deepcopy(group)
-        continue
-
-      # Combine reported statistics.
-      # If no combining operator is defined, the statistic is skipped.
-      # Otherwise, the aggregated value is either:
-      # * The value in 'indiv', if the statistic is not present in 'result' or
-      # * AGGREGATE_OPERATORS[statistic](result_value, indiv_value)
-      for k, v in group.statistics.items():
-        if k not in AGGREGATE_OPERATORS:
-          logging.warning('No operator for "%s". Skipping aggregation.', k)
-          continue
-        elif AGGREGATE_OPERATORS[k] is None:  # Drop
-          result.groups[group_name].statistics.pop(k, None)
-          continue
-        elif k not in result.groups[group_name].statistics:
-          logging.warning(
-              'Found statistic "%s.%s" in individual YCSB result, '
-              'but not in accumulator.', group_name, k)
-          result.groups[group_name].statistics[k] = copy.deepcopy(v)
-          continue
-
-        op = AGGREGATE_OPERATORS[k]
-        result.groups[group_name].statistics[k] = (
-            op(result.groups[group_name].statistics[k], v))
-
-      if measurement_type == HISTOGRAM:
-        result.groups[group_name].data = CombineHistograms(
-            result.groups[group_name].data, group.data)
-      elif measurement_type == TIMESERIES:
-        result.groups[group_name].data = _CombineLatencyTimeSeries(
-            result.groups[group_name].data, group.data)
-    result.client = ' '.join((result.client, indiv.client))
-    result.command_line = ';'.join(
-        (result.command_line, indiv.command_line))
-
-    if _THROUGHPUT_TIME_SERIES.value:
-      result.throughput_time_series = _CombineThroughputTimeSeries(
-          result.throughput_time_series, indiv.throughput_time_series)
-
-  if measurement_type == HDRHISTOGRAM:
-    for group_name in combined_hdr:
-      if group_name in result.groups:
-        result.groups[group_name].data = combined_hdr[group_name]
-
-  return result
-
-
 def ParseWorkload(contents):
   """Parse a YCSB workload file.
 
@@ -995,8 +504,11 @@ def ParseWorkload(contents):
   fp = io.StringIO(contents)
   result = {}
   for line in fp:
-    if (line.strip() and not line.lstrip().startswith('#') and
-        not line.lstrip().startswith('!')):
+    if (
+        line.strip()
+        and not line.lstrip().startswith('#')
+        and not line.lstrip().startswith('!')
+    ):
       k, v = re.split(r'\s*[:=]\s*', line, maxsplit=1)
       result[k] = v.strip()
   return result
@@ -1010,93 +522,6 @@ def PushWorkload(vm, workload_file, remote_path):
   vm.PushFile(workload_file, remote_path)
 
 
-def _CreateSamples(ycsb_result: YcsbResult,
-                   include_histogram: bool = False,
-                   **kwargs) -> list[sample.Sample]:
-  """Create PKB samples from a YCSB result.
-
-  Args:
-    ycsb_result: dict. Result of ParseResults.
-    include_histogram: bool. If True, include records for each histogram bin.
-      Note that this will increase the output volume significantly.
-    **kwargs: Base metadata for each sample.
-
-  Yields:
-    List of sample.Sample objects.
-  """
-  command_line = ycsb_result.command_line
-  stage = 'load' if command_line.endswith('-load') else 'run'
-  base_metadata = {
-      'stage': stage,
-      'ycsb_tar_url': _ycsb_tar_url,
-      'ycsb_version': FLAGS.ycsb_version
-  }
-  if _SHOULD_RECORD_COMMAND_LINE.value:
-    base_metadata['command_line'] = command_line
-  base_metadata.update(kwargs)
-
-  throughput_time_series = ycsb_result.throughput_time_series
-  if throughput_time_series:
-    yield sample.Sample(
-        'Throughput Time Series', 0, '',
-        {'throughput_time_series': sorted(throughput_time_series.items())})
-
-  for group_name, group in ycsb_result.groups.items():
-    meta = base_metadata.copy()
-    meta['operation'] = group_name
-    for statistic, value in group.statistics.items():
-      if value is None:
-        continue
-
-      unit = ''
-      m = re.match(r'^(.*) *\((us|ms|ops/sec)\)$', statistic)
-      if m:
-        statistic = m.group(1)
-        unit = m.group(2)
-      yield sample.Sample(' '.join([group_name, statistic]), value, unit, meta)
-
-    if group.data and group.data_type == HISTOGRAM:
-      percentiles = _PercentilesFromHistogram(group.data)
-      for label, value in percentiles.items():
-        yield sample.Sample(' '.join([group_name, label, 'latency']), value,
-                            'ms', meta)
-      if include_histogram:
-        for time_ms, count in group.data:
-          yield sample.Sample(
-              '{0}_latency_histogram_{1}_ms'.format(group_name, time_ms), count,
-              'count', meta)
-
-    if group.data and group.data_type == HDRHISTOGRAM:
-      # Strip percentile from the three-element tuples.
-      histogram = [value_count[-2:] for value_count in group.data]
-      percentiles = _PercentilesFromHistogram(histogram)
-      for label, value in percentiles.items():
-        yield sample.Sample(' '.join([group_name, label, 'latency']), value,
-                            'ms', meta)
-      if include_histogram:
-        histogram = []
-        for _, value, bucket_count in group.data:
-          histogram.append({
-              'microsec_latency': int(value * 1000),
-              'count': bucket_count
-          })
-        hist_meta = meta.copy()
-        hist_meta.update({'histogram': json.dumps(histogram)})
-        yield sample.Sample('{0} latency histogram'.format(group_name), 0, '',
-                            hist_meta)
-
-    if group.data and group.data_type == TIMESERIES:
-      for sample_time, average_latency in group.data:
-        timeseries_meta = meta.copy()
-        timeseries_meta['sample_time'] = sample_time
-        yield sample.Sample(
-            ' '.join([group_name, 'AverageLatency (timeseries)']),
-            average_latency, 'ms', timeseries_meta)
-      yield sample.Sample('Average Latency Time Series', 0, '', {
-          'latency_time_series': group.data
-      })
-
-
 class YCSBExecutor:
   """Load data and run benchmarks using YCSB.
 
@@ -1190,7 +615,9 @@ def _Load(self, vm, **kwargs):
       kwargs[param] = value
     command = self._BuildCommand('load', **kwargs)
     stdout, stderr = vm.RobustRemoteCommand(command)
-    return ParseResults(str(stderr + stdout), self.measurement_type)
+    return ycsb_stats.ParseResults(
+        str(stderr + stdout), self.measurement_type, _ERROR_RATE_THRESHOLD.value
+    )
 
   def _LoadThreaded(self, vms, workload_file, **kwargs):
     """Runs "Load" in parallel for each VM in VMs.
@@ -1222,7 +649,8 @@ def _LoadThreaded(self, vms, workload_file, **kwargs):
           stage='load',
           clients=len(vms) * kwargs['threads'],
           threads_per_client_vm=kwargs['threads'],
-          workload_name=os.path.basename(workload_file))
+          workload_name=os.path.basename(workload_file),
+      )
       self.workload_meta = workload_meta
     record_count = int(workload_meta.get('recordcount', '1000'))
     n_per_client = int(record_count) // len(vms)
@@ -1231,8 +659,9 @@ def _LoadThreaded(self, vms, workload_file, **kwargs):
         for i in range(len(vms))
     ]
 
-    remote_path = posixpath.join(linux_packages.INSTALL_DIR,
-                                 os.path.basename(workload_file))
+    remote_path = posixpath.join(
+        linux_packages.INSTALL_DIR, os.path.basename(workload_file)
+    )
 
     args = [((vm, workload_file, remote_path), {}) for vm in dict.fromkeys(vms)]
     background_tasks.RunThreaded(PushWorkload, args)
@@ -1255,31 +684,42 @@ def _Load(loader_index):
         event='load',
         start_timestamp=start,
         end_timestamp=time.time(),
-        metadata=copy.deepcopy(kwargs))
+        metadata=copy.deepcopy(kwargs),
+    )
 
     if len(results) != len(vms):
-      raise IOError('Missing results: only {0}/{1} reported\n{2}'.format(
-          len(results), len(vms), results))
+      raise IOError(
+          'Missing results: only {0}/{1} reported\n{2}'.format(
+              len(results), len(vms), results
+          )
+      )
 
     samples = []
     if FLAGS.ycsb_include_individual_results and len(results) > 1:
       for i, result in enumerate(results):
         samples.extend(
-            _CreateSamples(
-                result,
+            ycsb_stats.CreateSamples(
+                ycsb_result=result,
+                ycsb_version=FLAGS.ycsb_version,
+                include_command_line=_SHOULD_RECORD_COMMAND_LINE.value,
                 result_type='individual',
                 result_index=i,
-                include_histogram=FLAGS.ycsb_histogram,
-                **workload_meta))
+                **workload_meta,
+            )
+        )
 
     # hdr histograms not collected upon load, only upon run
-    combined = _CombineResults(results, self.measurement_type, {})
+    combined = ycsb_stats.CombineResults(results, self.measurement_type, {})
     samples.extend(
-        _CreateSamples(
-            combined,
-            result_type='combined',
+        ycsb_stats.CreateSamples(
+            ycsb_result=combined,
+            ycsb_version=FLAGS.ycsb_version,
             include_histogram=FLAGS.ycsb_histogram,
-            **workload_meta))
+            include_command_line=_SHOULD_RECORD_COMMAND_LINE.value,
+            result_type='combined',
+            **workload_meta,
+        )
+    )
 
     return samples
 
@@ -1296,7 +736,9 @@ def _Run(self, vm, **kwargs):
     if hdr_files_dir:
       vm.RemoteCommand('mkdir -p {0}'.format(hdr_files_dir))
     stdout, stderr = vm.RobustRemoteCommand(command)
-    return ParseResults(str(stderr + stdout), self.measurement_type)
+    return ycsb_stats.ParseResults(
+        str(stderr + stdout), self.measurement_type, _ERROR_RATE_THRESHOLD.value
+    )
 
   def _RunThreaded(self, vms, **kwargs):
     """Run a single workload using `vms`."""
@@ -1337,8 +779,11 @@ def _Run(loader_index):
     background_tasks.RunThreaded(_Run, list(range(len(vms))))
 
     if len(results) != len(vms):
-      raise IOError('Missing results: only {0}/{1} reported\n{2}'.format(
-          len(results), len(vms), results))
+      raise IOError(
+          'Missing results: only {0}/{1} reported\n{2}'.format(
+              len(results), len(vms), results
+          )
+      )
 
     return results
 
@@ -1364,9 +809,11 @@ def _GetRunLoadTarget(self, current_load, is_sustained=False):
     lower_bound = FLAGS.ycsb_dynamic_load_throughput_lower_bound
     step = (1 - FLAGS.ycsb_dynamic_load_sustain_throughput_ratio) * 2
 
-    if (not bool(lower_bound) and
-        is_sustained) or (lower_bound and
-                          current_load < lower_bound) or (current_load is None):
+    if (
+        (not bool(lower_bound) and is_sustained)
+        or (lower_bound and current_load < lower_bound)
+        or (current_load is None)
+    ):
       return None
     elif is_sustained:
       return current_load * (1 - step)
@@ -1402,7 +849,7 @@ def RunStaircaseLoads(self, vms, workloads, **kwargs):
       if FLAGS.ycsb_timelimit:
         parameters['maxexecutiontime'] = FLAGS.ycsb_timelimit
       hdr_files_dir = posixpath.join(self.hdr_dir, str(workload_index))
-      if FLAGS.ycsb_measurement_type == HDRHISTOGRAM:
+      if FLAGS.ycsb_measurement_type == ycsb_stats.HDRHISTOGRAM:
         parameters['hdrhistogram.fileoutput'] = True
         parameters['hdrhistogram.output.path'] = hdr_files_dir
       if FLAGS.ycsb_requestdistribution:
@@ -1414,8 +861,9 @@ def RunStaircaseLoads(self, vms, workloads, **kwargs):
       if FLAGS.ycsb_scanproportion is not None:
         parameters['scanproportion'] = FLAGS.ycsb_scanproportion
       parameters.update(kwargs)
-      remote_path = posixpath.join(linux_packages.INSTALL_DIR,
-                                   os.path.basename(workload_file))
+      remote_path = posixpath.join(
+          linux_packages.INSTALL_DIR, os.path.basename(workload_file)
+      )
 
       with open(workload_file) as fp:
         workload_meta = ParseWorkload(fp.read())
@@ -1423,7 +871,8 @@ def RunStaircaseLoads(self, vms, workloads, **kwargs):
         workload_meta.update(
             workload_name=os.path.basename(workload_file),
             workload_index=workload_index,
-            stage='run')
+            stage='run',
+        )
 
       args = [
           ((vm, workload_file, remote_path), {}) for vm in dict.fromkeys(vms)
@@ -1436,16 +885,16 @@ def RunStaircaseLoads(self, vms, workloads, **kwargs):
       # if no target is passed via flags.
       for client_count, target_qps_per_vm in _GetThreadsQpsPerLoaderList():
 
-        def _DoRunStairCaseLoad(client_count,
-                                target_qps_per_vm,
-                                workload_meta,
-                                is_sustained=False):
+        def _DoRunStairCaseLoad(
+            client_count, target_qps_per_vm, workload_meta, is_sustained=False
+        ):
           parameters['threads'] = client_count
           if target_qps_per_vm:
             parameters['target'] = int(target_qps_per_vm * len(vms))
           if is_sustained:
             parameters['maxexecutiontime'] = (
-                FLAGS.ycsb_dynamic_load_sustain_timelimit)
+                FLAGS.ycsb_dynamic_load_sustain_timelimit
+            )
           start = time.time()
           results = self._RunThreaded(vms, **parameters)
           events.record_event.send(
@@ -1453,12 +902,14 @@ def _DoRunStairCaseLoad(client_count,
               event='run',
               start_timestamp=start,
               end_timestamp=time.time(),
-              metadata=copy.deepcopy(parameters))
+              metadata=copy.deepcopy(parameters),
+          )
           client_meta = workload_meta.copy()
           client_meta.update(parameters)
           client_meta.update(
               clients=len(vms) * client_count,
-              threads_per_client_vm=client_count)
+              threads_per_client_vm=client_count,
+          )
           # Values passed in via this flag do not get recorded in metadata.
           # The target passed in is applied to each client VM, so multiply by
           # len(vms).
@@ -1471,27 +922,39 @@ def _DoRunStairCaseLoad(client_count,
           if FLAGS.ycsb_include_individual_results and len(results) > 1:
             for i, result in enumerate(results):
               all_results.extend(
-                  _CreateSamples(
-                      result,
+                  ycsb_stats.CreateSamples(
+                      ycsb_result=result,
+                      ycsb_version=FLAGS.ycsb_version,
+                      include_histogram=FLAGS.ycsb_histogram,
+                      include_command_line=_SHOULD_RECORD_COMMAND_LINE.value,
                       result_type='individual',
                       result_index=i,
-                      include_histogram=FLAGS.ycsb_histogram,
-                      **client_meta))
-
-          if self.measurement_type == HDRHISTOGRAM:
-            combined_log = self.CombineHdrHistogramLogFiles(
-                parameters['hdrhistogram.output.path'], vms)
-            parsed_hdr = ParseHdrLogs(combined_log)
-            combined = _CombineResults(results, self.measurement_type,
-                                       parsed_hdr)
+                      **client_meta,
+                  )
+              )
+
+          if self.measurement_type == ycsb_stats.HDRHISTOGRAM:
+            combined_log = ycsb_stats.CombineHdrHistogramLogFiles(
+                self.hdr_dir, parameters['hdrhistogram.output.path'], vms
+            )
+            parsed_hdr = ycsb_stats.ParseHdrLogs(combined_log)
+            combined = ycsb_stats.CombineResults(
+                results, self.measurement_type, parsed_hdr
+            )
           else:
-            combined = _CombineResults(results, self.measurement_type, {})
+            combined = ycsb_stats.CombineResults(
+                results, self.measurement_type, {}
+            )
           run_samples = list(
-              _CreateSamples(
-                  combined,
-                  result_type='combined',
+              ycsb_stats.CreateSamples(
+                  ycsb_result=combined,
+                  ycsb_version=FLAGS.ycsb_version,
+                  include_command_line=_SHOULD_RECORD_COMMAND_LINE.value,
                   include_histogram=FLAGS.ycsb_histogram,
-                  **client_meta))
+                  result_type='combined',
+                  **client_meta,
+              )
+          )
 
           overall_throughput = 0
           for s in run_samples:
@@ -1500,7 +963,8 @@ def _DoRunStairCaseLoad(client_count,
           return overall_throughput, run_samples
 
         target_throughput, run_samples = _DoRunStairCaseLoad(
-            client_count, target_qps_per_vm, workload_meta)
+            client_count, target_qps_per_vm, workload_meta
+        )
 
         # Uses 5 * unthrottled throughput as starting point.
         target_throughput *= 5
@@ -1508,71 +972,25 @@ def _DoRunStairCaseLoad(client_count,
         is_sustained = False
         while FLAGS.ycsb_dynamic_load:
           actual_throughput, run_samples = _DoRunStairCaseLoad(
-              client_count, target_throughput // len(vms), workload_meta,
-              is_sustained)
+              client_count,
+              target_throughput // len(vms),
+              workload_meta,
+              is_sustained,
+          )
           is_sustained = FLAGS.ycsb_dynamic_load_sustain_throughput_ratio < (
-              actual_throughput / target_throughput)
+              actual_throughput / target_throughput
+          )
           for s in run_samples:
             s.metadata['sustained'] = is_sustained
           all_results.extend(run_samples)
-          target_throughput = self._GetRunLoadTarget(actual_throughput,
-                                                     is_sustained)
+          target_throughput = self._GetRunLoadTarget(
+              actual_throughput, is_sustained
+          )
           if target_throughput is None:
             break
 
     return all_results
 
-  def CombineHdrHistogramLogFiles(self,
-                                  hdr_files_dir: str,
-                                  vms: Iterable[virtual_machine.VirtualMachine]
-                                  ) -> dict[str, str]:
-    """Combine multiple hdr histograms by group type.
-
-    Combine multiple hdr histograms in hdr log files format into 1 human
-    readable hdr histogram log file.
-    This is done by
-    1) copying hdrhistogram log files to a single file on a worker vm;
-    2) aggregating file containing multiple %-tile histogram into
-       a single %-tile histogram using HistogramLogProcessor from the
-       hdrhistogram package that is installed on the vms. Refer to https://
-       github.com/HdrHistogram/HdrHistogram/blob/master/HistogramLogProcessor
-
-    Args:
-      hdr_files_dir: directory on the remote vms where hdr files are stored.
-      vms: remote vms
-
-    Returns:
-      dict of hdrhistograms keyed by group type
-    """
-    vms = list(vms)
-    hdrhistograms = {}
-    for grouptype in HDRHISTOGRAM_GROUPS:
-
-      def _GetHdrHistogramLog(vm, group=grouptype):
-        filename = f'{hdr_files_dir}{group}.hdr'
-        return vm.RemoteCommand(f'touch {filename} && tail -1 {filename}')[0]
-
-      results = background_tasks.RunThreaded(_GetHdrHistogramLog, vms)
-
-      # It's possible that there is no result for certain group, e.g., read
-      # only, update only.
-      if not all(results):
-        continue
-
-      worker_vm = vms[0]
-      for hdr in results[1:]:
-        worker_vm.RemoteCommand(
-            'sudo chmod 755 {1}{2}.hdr && echo "{0}" >> {1}{2}.hdr'.format(
-                hdr[:-1], hdr_files_dir, grouptype))
-      hdrhistogram, _ = worker_vm.RemoteCommand(
-          'cd {0} && ./HistogramLogProcessor -i {1}{2}.hdr'
-          ' -outputValueUnitRatio 1'.format(
-              self.hdr_dir, hdr_files_dir, grouptype
-          )
-      )
-      hdrhistograms[grouptype.lower()] = hdrhistogram
-    return hdrhistograms
-
   def Load(self, vms, workloads=None, load_kwargs=None):
     """Load data using YCSB."""
     if FLAGS.ycsb_skip_load_stage:
@@ -1590,16 +1008,21 @@ def _HasInsertFailures(result_samples):
 
     if FLAGS.ycsb_reload_database or not self.loaded:
       load_samples += list(
-          self._LoadThreaded(vms, workloads[0], **(load_kwargs or {})))
-      if (_SHOULD_FAIL_ON_INCOMPLETE_LOADING.value and
-          _HasInsertFailures(load_samples)):
+          self._LoadThreaded(vms, workloads[0], **(load_kwargs or {}))
+      )
+      if _SHOULD_FAIL_ON_INCOMPLETE_LOADING.value and _HasInsertFailures(
+          load_samples
+      ):
         raise errors.Benchmarks.RunError(
-            'There are insert failures, so the table loading is incomplete')
+            'There are insert failures, so the table loading is incomplete'
+        )
 
       self.loaded = True
     if FLAGS.ycsb_sleep_after_load_in_sec > 0:
-      logging.info('Sleeping %s seconds after load stage.',
-                   FLAGS.ycsb_sleep_after_load_in_sec)
+      logging.info(
+          'Sleeping %s seconds after load stage.',
+          FLAGS.ycsb_sleep_after_load_in_sec,
+      )
       time.sleep(FLAGS.ycsb_sleep_after_load_in_sec)
     if FLAGS.ycsb_load_samples:
       return load_samples
@@ -1620,11 +1043,14 @@ def Run(self, vms, workloads=None, run_kwargs=None) -> list[sample.Sample]:
       samples = self._RunIncrementalMode(vms, workloads, run_kwargs)
     else:
       samples = list(self.RunStaircaseLoads(vms, workloads, **run_kwargs))
-    if (FLAGS.ycsb_sleep_after_load_in_sec > 0 and
-        not FLAGS.ycsb_skip_load_stage):
+    if (
+        FLAGS.ycsb_sleep_after_load_in_sec > 0
+        and not FLAGS.ycsb_skip_load_stage
+    ):
       for s in samples:
-        s.metadata[
-            'sleep_after_load_in_sec'] = FLAGS.ycsb_sleep_after_load_in_sec
+        s.metadata['sleep_after_load_in_sec'] = (
+            FLAGS.ycsb_sleep_after_load_in_sec
+        )
     return samples
 
   def _SetRunParameters(self, params: Mapping[str, Any]) -> None:
@@ -1632,8 +1058,7 @@ def _SetRunParameters(self, params: Mapping[str, Any]) -> None:
     # Ideally YCSB should be refactored to include a function that just takes
     # commands for a run, but that will be a large refactor.
     FLAGS['ycsb_run_parameters'].unparse()
-    FLAGS['ycsb_run_parameters'].parse(
-        [f'{k}={v}' for k, v in params.items()])
+    FLAGS['ycsb_run_parameters'].parse([f'{k}={v}' for k, v in params.items()])
 
   def _RunBurstMode(self, vms, workloads, run_kwargs=None):
     """Runs YCSB in burst mode, where the second run has increased QPS."""
@@ -1736,7 +1161,8 @@ def LoadAndRun(self, vms, workloads=None, load_kwargs=None, run_kwargs=None):
     load_samples = []
     if not FLAGS.ycsb_skip_load_stage:
       load_samples = self.Load(
-          vms, workloads=workloads, load_kwargs=load_kwargs)
+          vms, workloads=workloads, load_kwargs=load_kwargs
+      )
     run_samples = []
     if not FLAGS.ycsb_skip_run_stage:
       run_samples = self.Run(vms, workloads=workloads, run_kwargs=run_kwargs)
diff --git a/perfkitbenchmarker/linux_packages/ycsb_stats.py b/perfkitbenchmarker/linux_packages/ycsb_stats.py
new file mode 100644
index 0000000000..d48f679d55
--- /dev/null
+++ b/perfkitbenchmarker/linux_packages/ycsb_stats.py
@@ -0,0 +1,843 @@
+# Copyright 2023 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Parsing results from YCSB output into samples."""
+
+import bisect
+import collections
+from collections.abc import Iterable, Mapping
+import copy
+import csv
+import dataclasses
+import io
+import itertools
+import json
+import logging
+import math
+import operator
+import posixpath
+import re
+from absl import flags
+from perfkitbenchmarker import background_tasks
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import linux_packages
+from perfkitbenchmarker import sample
+from perfkitbenchmarker import virtual_machine
+
+FLAGS = flags.FLAGS
+
+YCSB_URL_TEMPLATE = (
+    'https://github.com/brianfrankcooper/YCSB/releases/'
+    'download/{0}/ycsb-{0}.tar.gz'
+)
+YCSB_DIR = posixpath.join(linux_packages.INSTALL_DIR, 'ycsb')
+YCSB_EXE = posixpath.join(YCSB_DIR, 'bin', 'ycsb')
+HDRHISTOGRAM_DIR = posixpath.join(linux_packages.INSTALL_DIR, 'hdrhistogram')
+HDRHISTOGRAM_TAR_URL = (
+    'https://github.com/HdrHistogram/HdrHistogram/archive/'
+    'HdrHistogram-2.1.10.tar.gz'
+)
+HDRHISTOGRAM_GROUPS = ['READ', 'UPDATE']
+
+_DEFAULT_PERCENTILES = 50, 75, 90, 95, 99, 99.9
+
+HISTOGRAM = 'histogram'
+HDRHISTOGRAM = 'hdrhistogram'
+TIMESERIES = 'timeseries'
+YCSB_MEASUREMENT_TYPES = [HISTOGRAM, HDRHISTOGRAM, TIMESERIES]
+
+# Binary operators to aggregate reported statistics.
+# Statistics with operator 'None' will be dropped.
+AGGREGATE_OPERATORS = {
+    'Operations': operator.add,
+    'RunTime(ms)': max,
+    'Return=0': operator.add,
+    'Return=-1': operator.add,
+    'Return=-2': operator.add,
+    'Return=-3': operator.add,
+    'Return=OK': operator.add,
+    'Return=ERROR': operator.add,
+    'Return=NOT_FOUND': operator.add,
+    'LatencyVariance(ms)': None,
+    'AverageLatency(ms)': None,  # Requires both average and # of ops.
+    'Throughput(ops/sec)': operator.add,
+    '95thPercentileLatency(ms)': None,  # Calculated across clients.
+    '99thPercentileLatency(ms)': None,  # Calculated across clients.
+    'MinLatency(ms)': min,
+    'MaxLatency(ms)': max,
+}
+
+# Status line pattern
+_STATUS_PATTERN = r'(\d+) sec: \d+ operations; (\d+.\d+) current ops\/sec'
+# Status interval default is 10 sec, change to 1 sec.
+_STATUS_INTERVAL_SEC = 1
+
+# Default loading thread count for non-batching backends.
+DEFAULT_PRELOAD_THREADS = 32
+
+# Customer YCSB tar url. If not set, the official YCSB release will be used.
+_ycsb_tar_url = None
+
+# Parameters for incremental workload. Can be made into flags in the future.
+_INCREMENTAL_STARTING_QPS = 500
+_INCREMENTAL_TIMELIMIT_SEC = 60 * 5
+
+_ThroughputTimeSeries = dict[int, float]
+# Tuple of (percentile, latency, count)
+_HdrHistogramTuple = tuple[float, float, int]
+
+
+@dataclasses.dataclass
+class _OpResult:
+  """Individual results for a single operation.
+
+  Attributes:
+    group: group name (e.g., update, insert, overall)
+    statistics: dict mapping from statistic name to value
+    data_type: Corresponds to --ycsb_measurement_type.
+    data: For HISTOGRAM/HDRHISTOGRAM: list of (ms_lower_bound, count) tuples,
+      e.g. [(0, 530), (19, 1)] indicates that 530 ops took between 0ms and 1ms,
+      and 1 took between 19ms and 20ms. Empty bins are not reported. For
+      TIMESERIES: list of (time, latency us) tuples.
+  """
+
+  group: str = ''
+  data_type: str = ''
+  data: list[tuple[int, float]] = dataclasses.field(default_factory=list)
+  statistics: dict[str, float] = dataclasses.field(default_factory=dict)
+
+
+@dataclasses.dataclass
+class YcsbResult:
+  """Aggregate results for the YCSB run.
+
+  Attributes:
+    client: Contains YCSB version information.
+    command_line: Command line executed.
+    throughput_time_series: Time series of throughputs (interval, QPS).
+    groups: dict of operation group name to results for that operation.
+  """
+
+  client: str = ''
+  command_line: str = ''
+  throughput_time_series: _ThroughputTimeSeries = dataclasses.field(
+      default_factory=dict
+  )
+  groups: dict[str, _OpResult] = dataclasses.field(default_factory=dict)
+
+
+def _ValidateErrorRate(result: YcsbResult, threshold: float) -> None:
+  """Raises an error if results contains entries with too high error rate.
+
+  Computes the error rate for each operation, example output looks like:
+
+    [INSERT], Operations, 100
+    [INSERT], AverageLatency(us), 74.92
+    [INSERT], MinLatency(us), 5
+    [INSERT], MaxLatency(us), 98495
+    [INSERT], 95thPercentileLatency(us), 42
+    [INSERT], 99thPercentileLatency(us), 1411
+    [INSERT], Return=OK, 90
+    [INSERT], Return=ERROR, 10
+
+  This function will then compute 10/100 = 0.1 error rate.
+
+  Args:
+    result: The result of running ParseResults()
+    threshold: The error rate before throwing an exception. 1.0 means no
+      exception will be thrown, 0.0 means an exception is always thrown.
+
+  Raises:
+    errors.Benchmarks.RunError: If the computed error rate is higher than the
+      threshold.
+  """
+  for operation in result.groups.values():
+    name, stats = operation.group, operation.statistics
+    # The operation count can be 0
+    count = stats.get('Operations', 0)
+    if count == 0:
+      continue
+    # These keys may be missing from the output.
+    error_rate = stats.get('Return=ERROR', 0) / count
+    if error_rate > threshold:
+      raise errors.Benchmarks.RunError(
+          f'YCSB had a {error_rate} error rate for {name}, higher than '
+          f'threshold {threshold}'
+      )
+
+
+def ParseResults(
+    ycsb_result_string: str,
+    data_type: str = 'histogram',
+    error_rate_threshold: float = 1.0,
+) -> 'YcsbResult':
+  """Parse YCSB results.
+
+  Example input for histogram datatype:
+
+    YCSB Client 0.1
+    Command line: -db com.yahoo.ycsb.db.HBaseClient -P /tmp/pkb/workloada
+    [OVERALL], RunTime(ms), 1800413.0
+    [OVERALL], Throughput(ops/sec), 2740.503428935472
+    [UPDATE], Operations, 2468054
+    [UPDATE], AverageLatency(us), 2218.8513395574005
+    [UPDATE], MinLatency(us), 554
+    [UPDATE], MaxLatency(us), 352634
+    [UPDATE], 95thPercentileLatency(ms), 4
+    [UPDATE], 99thPercentileLatency(ms), 7
+    [UPDATE], Return=0, 2468054
+    [UPDATE], 0, 398998
+    [UPDATE], 1, 1015682
+    [UPDATE], 2, 532078
+    ...
+
+  Example input for hdrhistogram datatype:
+
+    YCSB Client 0.17.0
+    Command line: -db com.yahoo.ycsb.db.RedisClient -P /opt/pkb/workloadb
+    [OVERALL], RunTime(ms), 29770.0
+    [OVERALL], Throughput(ops/sec), 33590.86328518643
+    [UPDATE], Operations, 49856.0
+    [UPDATE], AverageLatency(us), 1478.0115532734276
+    [UPDATE], MinLatency(us), 312.0
+    [UPDATE], MaxLatency(us), 24623.0
+    [UPDATE], 95thPercentileLatency(us), 3501.0
+    [UPDATE], 99thPercentileLatency(us), 6747.0
+    [UPDATE], Return=OK, 49856
+    ...
+
+  Example input for ycsb version 0.17.0+:
+
+    ...
+    Command line: -db com.yahoo.ycsb.db.HBaseClient10 ... -load
+    YCSB Client 0.17.0
+
+    Loading workload...
+    Starting test.
+    ...
+    [OVERALL], RunTime(ms), 11411
+    [OVERALL], Throughput(ops/sec), 8763.473841030585
+    [INSERT], Operations, 100000
+    [INSERT], AverageLatency(us), 74.92
+    [INSERT], MinLatency(us), 5
+    [INSERT], MaxLatency(us), 98495
+    [INSERT], 95thPercentileLatency(us), 42
+    [INSERT], 99thPercentileLatency(us), 1411
+    [INSERT], Return=OK, 100000
+    ...
+
+  Example input for timeseries datatype:
+
+    ...
+    [OVERALL], RunTime(ms), 240007.0
+    [OVERALL], Throughput(ops/sec), 10664.605615669543
+    ...
+    [READ], Operations, 1279253
+    [READ], AverageLatency(us), 3002.7057071587874
+    [READ], MinLatency(us), 63
+    [READ], MaxLatency(us), 93584
+    [READ], Return=OK, 1279281
+    [READ], 0, 528.6142757498257
+    [READ], 500, 360.95347448674966
+    [READ], 1000, 667.7379547689283
+    [READ], 1500, 731.5389357265888
+    [READ], 2000, 778.7992281717318
+    ...
+
+  Args:
+    ycsb_result_string: str. Text output from YCSB.
+    data_type: Either 'histogram' or 'timeseries' or 'hdrhistogram'. 'histogram'
+      and 'hdrhistogram' datasets are in the same format, with the difference
+      being lacking the (millisec, count) histogram component. Hence are parsed
+      similarly.
+    error_rate_threshold: Error statistics in the output should not exceed this
+      ratio.
+
+  Returns:
+    A YcsbResult object that contains the results from parsing YCSB output.
+  Raises:
+    IOError: If the results contained unexpected lines.
+  """
+  if (
+      'redis.clients.jedis.exceptions.JedisConnectionException'
+      in ycsb_result_string
+  ):
+    # This error is cause by ycsb using an old version of redis client 2.9.0
+    # https://github.com/xetorthio/jedis/issues/1977
+    raise errors.Benchmarks.KnownIntermittentError(
+        'errors.Benchmarks.KnownIntermittentError'
+    )
+
+  lines = []
+  client_string = 'YCSB'
+  command_line = 'unknown'
+  throughput_time_series = {}
+  fp = io.StringIO(ycsb_result_string)
+  result_string = next(fp).strip()
+
+  def IsHeadOfResults(line):
+    return line.startswith('[OVERALL]')
+
+  while not IsHeadOfResults(result_string):
+    if result_string.startswith('YCSB Client 0.'):
+      client_string = result_string
+    if result_string.startswith('Command line:'):
+      command_line = result_string
+    # Look for status lines which include throughput on a 1-sec basis.
+    match = re.search(_STATUS_PATTERN, result_string)
+    if match is not None:
+      timestamp, qps = int(match.group(1)), float(match.group(2))
+      # Repeats in the printed status are erroneous, ignore.
+      if timestamp not in throughput_time_series:
+        throughput_time_series[timestamp] = qps
+    try:
+      result_string = next(fp).strip()
+    except StopIteration:
+      raise IOError(
+          f'Could not parse YCSB output: {ycsb_result_string}'
+      ) from None
+
+  if result_string.startswith('[OVERALL]'):  # YCSB > 0.7.0.
+    lines.append(result_string)
+  else:
+    # Received unexpected header
+    raise IOError(f'Unexpected header: {client_string}')
+
+  # Some databases print additional output to stdout.
+  # YCSB results start with [<OPERATION_NAME>];
+  # filter to just those lines.
+  def LineFilter(line):
+    return re.search(r'^\[[A-Z]+\]', line) is not None
+
+  lines = itertools.chain(lines, filter(LineFilter, fp))
+
+  r = csv.reader(lines)
+
+  by_operation = itertools.groupby(r, operator.itemgetter(0))
+
+  result = YcsbResult(
+      client=client_string,
+      command_line=command_line,
+      throughput_time_series=throughput_time_series,
+  )
+
+  for operation, lines in by_operation:
+    operation = operation[1:-1].lower()
+
+    if operation == 'cleanup':
+      continue
+
+    op_result = _OpResult(group=operation, data_type=data_type)
+    latency_unit = 'ms'
+    for _, name, val in lines:
+      name = name.strip()
+      val = val.strip()
+      # Drop ">" from ">1000"
+      if name.startswith('>'):
+        name = name[1:]
+      val = float(val) if '.' in val or 'nan' in val.lower() else int(val)
+      if name.isdigit():
+        if val:
+          if data_type == TIMESERIES and latency_unit == 'us':
+            val /= 1000.0
+          op_result.data.append((int(name), val))
+      else:
+        if '(us)' in name:
+          name = name.replace('(us)', '(ms)')
+          val /= 1000.0
+          latency_unit = 'us'
+        op_result.statistics[name] = val
+
+    result.groups[operation] = op_result
+  _ValidateErrorRate(result, error_rate_threshold)
+  return result
+
+
+def ParseHdrLogFile(logfile: str) -> list[_HdrHistogramTuple]:
+  """Parse a hdrhistogram log file into a list of (percentile, latency, count).
+
+  Example decrypted hdrhistogram logfile (value measures latency in microsec):
+
+  #[StartTime: 1523565997 (seconds since epoch), Thu Apr 12 20:46:37 UTC 2018]
+       Value     Percentile TotalCount 1/(1-Percentile)
+
+     314.000 0.000000000000          2           1.00
+     853.000 0.100000000000      49955           1.11
+     949.000 0.200000000000     100351           1.25
+     1033.000 0.300000000000     150110           1.43
+     ...
+     134271.000 0.999998664856    1000008      748982.86
+     134271.000 0.999998855591    1000008      873813.33
+     201983.000 0.999999046326    1000009     1048576.00
+  #[Mean    =     1287.159, StdDeviation   =      667.560]
+  #[Max     =   201983.000, Total count    =      1000009]
+  #[Buckets =            8, SubBuckets     =         2048]
+
+  Example of output:
+     [(0, 0.314, 2), (10, 0.853, 49953), (20, 0.949, 50396), ...]
+
+  Args:
+    logfile: Hdrhistogram log file.
+
+  Returns:
+    List of (percentile, value, count) tuples
+  """
+  result = []
+  last_percent_value = -1
+  prev_total_count = 0
+  for row in logfile.split('\n'):
+    if re.match(r'( *)(\d|\.)( *)', row):
+      row_vals = row.split()
+      # convert percentile to 100 based and round up to 3 decimal places
+      percentile = math.floor(float(row_vals[1]) * 100000) / 1000.0
+      current_total_count = int(row_vals[2])
+      if (
+          percentile > last_percent_value
+          and current_total_count > prev_total_count
+      ):
+        # convert latency to millisec based and percentile to 100 based.
+        latency = float(row_vals[0]) / 1000
+        count = current_total_count - prev_total_count
+        result.append((percentile, latency, count))
+        last_percent_value = percentile
+        prev_total_count = current_total_count
+  return result
+
+
+def ParseHdrLogs(
+    hdrlogs: Mapping[str, str]
+) -> dict[str, list[_HdrHistogramTuple]]:
+  """Parse a dict of group to hdr logs into a dict of group to histogram tuples.
+
+  Args:
+    hdrlogs: Dict of group (read or update) to hdr logs for that group.
+
+  Returns:
+    Dict of group to histogram tuples of reportable percentile values.
+  """
+  parsed_hdr_histograms = {}
+  for group, logfile in hdrlogs.items():
+    values = ParseHdrLogFile(logfile)
+    parsed_hdr_histograms[group] = values
+  return parsed_hdr_histograms
+
+
+def _CumulativeSum(xs):
+  total = 0
+  for x in xs:
+    total += x
+    yield total
+
+
+def _WeightedQuantile(x, weights, p):
+  """Weighted quantile measurement for an ordered list.
+
+  This method interpolates to the higher value when the quantile is not a direct
+  member of the list. This works well for YCSB, since latencies are floored.
+
+  Args:
+    x: List of values.
+    weights: List of numeric weights.
+    p: float. Desired quantile in the interval [0, 1].
+
+  Returns:
+    float.
+
+  Raises:
+    ValueError: When 'x' and 'weights' are not the same length, or 'p' is not in
+      the interval [0, 1].
+  """
+  if len(x) != len(weights):
+    raise ValueError(
+        'Lengths do not match: {0} != {1}'.format(len(x), len(weights))
+    )
+  if p < 0 or p > 1:
+    raise ValueError('Invalid quantile: {0}'.format(p))
+  n = sum(weights)
+  target = n * float(p)
+  cumulative = list(_CumulativeSum(weights))
+
+  # Find the first cumulative weight >= target
+  i = bisect.bisect_left(cumulative, target)
+  if i == len(x):
+    return x[-1]
+  else:
+    return x[i]
+
+
+def _PercentilesFromHistogram(ycsb_histogram, percentiles=_DEFAULT_PERCENTILES):
+  """Calculate percentiles for from a YCSB histogram.
+
+  Args:
+    ycsb_histogram: List of (time_ms, frequency) tuples.
+    percentiles: iterable of floats, in the interval [0, 100].
+
+  Returns:
+    dict, mapping from percentile to value.
+  Raises:
+    ValueError: If one or more percentiles are outside [0, 100].
+  """
+  result = collections.OrderedDict()
+  histogram = sorted(ycsb_histogram)
+  for percentile in percentiles:
+    if percentile < 0 or percentile > 100:
+      raise ValueError('Invalid percentile: {0}'.format(percentile))
+    if math.modf(percentile)[0] < 1e-7:
+      percentile = int(percentile)
+    label = 'p{0}'.format(percentile)
+    latencies, freqs = list(zip(*histogram))
+    time_ms = _WeightedQuantile(latencies, freqs, percentile * 0.01)
+    result[label] = time_ms
+  return result
+
+
+def CombineResults(
+    result_list: Iterable[YcsbResult],
+    measurement_type: str,
+    combined_hdr: Mapping[str, list[_HdrHistogramTuple]],
+):
+  """Combine results from multiple YCSB clients.
+
+  Reduces a list of YCSB results (the output of ParseResults)
+  into a single result. Histogram bin counts, operation counts, and throughput
+  are summed; RunTime is replaced by the maximum runtime of any result.
+
+  Args:
+    result_list: Iterable of ParseResults outputs.
+    measurement_type: Measurement type used. If measurement type is histogram,
+      histogram bins are summed across results. If measurement type is
+      hdrhistogram, an aggregated hdrhistogram (combined_hdr) is expected.
+    combined_hdr: Dict of already aggregated histogram.
+
+  Returns:
+    A dictionary, as returned by ParseResults.
+  """
+
+  def DropUnaggregated(result: YcsbResult) -> None:
+    """Remove statistics which 'operators' specify should not be combined."""
+    drop_keys = {k for k, v in AGGREGATE_OPERATORS.items() if v is None}
+    for group in result.groups.values():
+      for k in drop_keys:
+        group.statistics.pop(k, None)
+
+  def CombineHistograms(hist1, hist2):
+    h1 = dict(hist1)
+    h2 = dict(hist2)
+    keys = sorted(frozenset(h1) | frozenset(h2))
+    result = []
+    for k in keys:
+      result.append((k, h1.get(k, 0) + h2.get(k, 0)))
+    return result
+
+  combined_weights = {}
+
+  def _CombineLatencyTimeSeries(
+      combined_series: list[tuple[int, float]],
+      individual_series: list[tuple[int, float]],
+  ) -> list[tuple[int, float]]:
+    """Combines two timeseries of average latencies.
+
+    Args:
+      combined_series: A list representing the timeseries with which the
+        individual series is being merged.
+      individual_series: A list representing the timeseries being merged with
+        the combined series.
+
+    Returns:
+      A list representing the new combined series.
+
+    Note that this assumes that each individual timeseries spent an equal
+    amount of time executing requests for each timeslice. This should hold for
+    runs without -target where each client has an equal number of threads, but
+    may not hold otherwise.
+    """
+    combined_series = dict(combined_series)
+    individual_series = dict(individual_series)
+    timestamps = set(combined_series) | set(individual_series)
+
+    result = []
+    for timestamp in sorted(timestamps):
+      if timestamp not in individual_series:
+        continue
+      if timestamp not in combined_weights:
+        combined_weights[timestamp] = 1.0
+      if timestamp not in combined_series:
+        result.append((timestamp, individual_series[timestamp]))
+        continue
+
+      # This computes a new combined average latency by dividing the sum of
+      # request latencies by the sum of request counts for the time period.
+      # The sum of latencies for an individual series is assumed to be "1",
+      # so the sum of latencies for the combined series is the total number of
+      # series i.e. "combined_weight".
+      # The request count for an individual series is 1 / average latency.
+      # This means the request count for the combined series is
+      # combined_weight * 1 / average latency.
+      combined_weight = combined_weights[timestamp]
+      average_latency = (combined_weight + 1.0) / (
+          (combined_weight / combined_series[timestamp])
+          + (1.0 / individual_series[timestamp])
+      )
+      result.append((timestamp, average_latency))
+      combined_weights[timestamp] += 1.0
+    return result
+
+  def _CombineThroughputTimeSeries(
+      series1: _ThroughputTimeSeries, series2: _ThroughputTimeSeries
+  ) -> _ThroughputTimeSeries:
+    """Returns a combined dict of [timestamp, total QPS] from the two series."""
+    timestamps1 = set(series1)
+    timestamps2 = set(series2)
+    all_timestamps = timestamps1 | timestamps2
+    diff_timestamps = timestamps1 ^ timestamps2
+    if diff_timestamps:
+      # This case is rare but does happen occassionally, so log a warning
+      # instead of raising an exception.
+      logging.warning(
+          'Expected combined timestamps to be the same, got different '
+          'timestamps: %s',
+          diff_timestamps,
+      )
+    result = {}
+    for timestamp in all_timestamps:
+      result[timestamp] = series1.get(timestamp, 0) + series2.get(timestamp, 0)
+    return result
+
+  result_list = list(result_list)
+  result = copy.deepcopy(result_list[0])
+  DropUnaggregated(result)
+
+  for indiv in result_list[1:]:
+    for group_name, group in indiv.groups.items():
+      if group_name not in result.groups:
+        logging.warning(
+            'Found result group "%s" in individual YCSB result, '
+            'but not in accumulator.',
+            group_name,
+        )
+        result.groups[group_name] = copy.deepcopy(group)
+        continue
+
+      # Combine reported statistics.
+      # If no combining operator is defined, the statistic is skipped.
+      # Otherwise, the aggregated value is either:
+      # * The value in 'indiv', if the statistic is not present in 'result' or
+      # * AGGREGATE_OPERATORS[statistic](result_value, indiv_value)
+      for k, v in group.statistics.items():
+        if k not in AGGREGATE_OPERATORS:
+          logging.warning('No operator for "%s". Skipping aggregation.', k)
+          continue
+        elif AGGREGATE_OPERATORS[k] is None:  # Drop
+          result.groups[group_name].statistics.pop(k, None)
+          continue
+        elif k not in result.groups[group_name].statistics:
+          logging.warning(
+              'Found statistic "%s.%s" in individual YCSB result, '
+              'but not in accumulator.',
+              group_name,
+              k,
+          )
+          result.groups[group_name].statistics[k] = copy.deepcopy(v)
+          continue
+
+        op = AGGREGATE_OPERATORS[k]
+        result.groups[group_name].statistics[k] = op(
+            result.groups[group_name].statistics[k], v
+        )
+
+      if measurement_type == HISTOGRAM:
+        result.groups[group_name].data = CombineHistograms(
+            result.groups[group_name].data, group.data
+        )
+      elif measurement_type == TIMESERIES:
+        result.groups[group_name].data = _CombineLatencyTimeSeries(
+            result.groups[group_name].data, group.data
+        )
+    result.client = ' '.join((result.client, indiv.client))
+    result.command_line = ';'.join((result.command_line, indiv.command_line))
+
+    # if _THROUGHPUT_TIME_SERIES.value:
+    result.throughput_time_series = _CombineThroughputTimeSeries(
+        result.throughput_time_series, indiv.throughput_time_series
+    )
+
+  if measurement_type == HDRHISTOGRAM:
+    for group_name in combined_hdr:
+      if group_name in result.groups:
+        result.groups[group_name].data = combined_hdr[group_name]
+
+  return result
+
+
+def CombineHdrHistogramLogFiles(
+    hdr_install_dir: str,
+    hdr_files_dir: str,
+    vms: Iterable[virtual_machine.VirtualMachine],
+) -> dict[str, str]:
+  """Combine multiple hdr histograms by group type.
+
+  Combine multiple hdr histograms in hdr log files format into 1 human
+  readable hdr histogram log file.
+  This is done by
+  1) copying hdrhistogram log files to a single file on a worker vm;
+  2) aggregating file containing multiple %-tile histogram into
+     a single %-tile histogram using HistogramLogProcessor from the
+     hdrhistogram package that is installed on the vms. Refer to https://
+     github.com/HdrHistogram/HdrHistogram/blob/master/HistogramLogProcessor
+
+  Args:
+    hdr_install_dir: directory where HistogramLogProcessor is located.
+    hdr_files_dir: directory on the remote vms where hdr files are stored.
+    vms: remote vms
+
+  Returns:
+    dict of hdrhistograms keyed by group type
+  """
+  vms = list(vms)
+  hdrhistograms = {}
+  for grouptype in HDRHISTOGRAM_GROUPS:
+
+    def _GetHdrHistogramLog(vm, group=grouptype):
+      filename = f'{hdr_files_dir}{group}.hdr'
+      return vm.RemoteCommand(f'touch {filename} && tail -1 {filename}')[0]
+
+    results = background_tasks.RunThreaded(_GetHdrHistogramLog, vms)
+
+    # It's possible that there is no result for certain group, e.g., read
+    # only, update only.
+    if not all(results):
+      continue
+
+    worker_vm = vms[0]
+    for hdr in results[1:]:
+      worker_vm.RemoteCommand(
+          'sudo chmod 755 {1}{2}.hdr && echo "{0}" >> {1}{2}.hdr'.format(
+              hdr[:-1], hdr_files_dir, grouptype
+          )
+      )
+    hdrhistogram, _ = worker_vm.RemoteCommand(
+        'cd {0} && ./HistogramLogProcessor -i {1}{2}.hdr'
+        ' -outputValueUnitRatio 1'.format(
+            hdr_install_dir, hdr_files_dir, grouptype
+        )
+    )
+    hdrhistograms[grouptype.lower()] = hdrhistogram
+  return hdrhistograms
+
+
+def CreateSamples(
+    ycsb_result: YcsbResult,
+    ycsb_version: str,
+    include_histogram: bool = False,
+    include_command_line=True,
+    **kwargs,
+) -> list[sample.Sample]:
+  """Create PKB samples from a YCSB result.
+
+  Args:
+    ycsb_result: Result of ParseResults.
+    ycsb_version: The version of YCSB used to run the tests.
+    include_histogram: If True, include records for each histogram bin. Note
+      that this will increase the output volume significantly.
+    include_command_line: If True, include command line in metadata. Note that
+      this makes sample output much longer if there are multiple client VMs.
+    **kwargs: Base metadata for each sample.
+
+  Yields:
+    List of sample.Sample objects.
+  """
+  command_line = ycsb_result.command_line
+  stage = 'load' if command_line.endswith('-load') else 'run'
+  base_metadata = {
+      'stage': stage,
+      'ycsb_tar_url': _ycsb_tar_url,
+      'ycsb_version': ycsb_version,
+  }
+  if include_command_line:
+    base_metadata['command_line'] = command_line
+  base_metadata.update(kwargs)
+
+  throughput_time_series = ycsb_result.throughput_time_series
+  if throughput_time_series:
+    yield sample.Sample(
+        'Throughput Time Series',
+        0,
+        '',
+        {'throughput_time_series': sorted(throughput_time_series.items())},
+    )
+
+  for group_name, group in ycsb_result.groups.items():
+    meta = base_metadata.copy()
+    meta['operation'] = group_name
+    for statistic, value in group.statistics.items():
+      if value is None:
+        continue
+
+      unit = ''
+      m = re.match(r'^(.*) *\((us|ms|ops/sec)\)$', statistic)
+      if m:
+        statistic = m.group(1)
+        unit = m.group(2)
+      yield sample.Sample(' '.join([group_name, statistic]), value, unit, meta)
+
+    if group.data and group.data_type == HISTOGRAM:
+      percentiles = _PercentilesFromHistogram(group.data)
+      for label, value in percentiles.items():
+        yield sample.Sample(
+            ' '.join([group_name, label, 'latency']), value, 'ms', meta
+        )
+      if include_histogram:
+        for time_ms, count in group.data:
+          yield sample.Sample(
+              '{0}_latency_histogram_{1}_ms'.format(group_name, time_ms),
+              count,
+              'count',
+              meta,
+          )
+
+    if group.data and group.data_type == HDRHISTOGRAM:
+      # Strip percentile from the three-element tuples.
+      histogram = [value_count[-2:] for value_count in group.data]
+      percentiles = _PercentilesFromHistogram(histogram)
+      for label, value in percentiles.items():
+        yield sample.Sample(
+            ' '.join([group_name, label, 'latency']), value, 'ms', meta
+        )
+      if include_histogram:
+        histogram = []
+        for _, value, bucket_count in group.data:
+          histogram.append(
+              {'microsec_latency': int(value * 1000), 'count': bucket_count}
+          )
+        hist_meta = meta.copy()
+        hist_meta.update({'histogram': json.dumps(histogram)})
+        yield sample.Sample(
+            '{0} latency histogram'.format(group_name), 0, '', hist_meta
+        )
+
+    if group.data and group.data_type == TIMESERIES:
+      for sample_time, average_latency in group.data:
+        timeseries_meta = meta.copy()
+        timeseries_meta['sample_time'] = sample_time
+        yield sample.Sample(
+            ' '.join([group_name, 'AverageLatency (timeseries)']),
+            average_latency,
+            'ms',
+            timeseries_meta,
+        )
+      yield sample.Sample(
+          'Average Latency Time Series',
+          0,
+          '',
+          {'latency_time_series': group.data},
+      )
diff --git a/perfkitbenchmarker/linux_virtual_machine.py b/perfkitbenchmarker/linux_virtual_machine.py
index 3274d50f8c..ed73ccb16b 100644
--- a/perfkitbenchmarker/linux_virtual_machine.py
+++ b/perfkitbenchmarker/linux_virtual_machine.py
@@ -60,6 +60,7 @@
 
 OS_PRETTY_NAME_REGEXP = r'PRETTY_NAME="(.*)"'
 _EPEL_URL = 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-{}.noarch.rpm'
+_ORACLE_EPEL_URL = 'oracle-epel-release-el{}'
 CLEAR_BUILD_REGEXP = r'Installed version:\s*(.*)\s*'
 UPDATE_RETRIES = 5
 DEFAULT_SSH_PORT = 22
@@ -206,7 +207,7 @@
 
 
 class CpuVulnerabilities:
-  """The 3 different vulnerablity statuses from vm.cpu_vulernabilities.
+  """The 3 different vulnerability statuses from vm.cpu_vulernabilities.
 
   Example input:
     /sys/devices/system/cpu/vulnerabilities/itlb_multihit:KVM: Vulnerable
@@ -1861,7 +1862,7 @@ class BaseRhelMixin(BaseLinuxMixin):
   """Class holding RHEL/CentOS specific VM methods and attributes."""
 
   # In all RHEL 8+ based distros yum is an alias to dnf.
-  # dnf is backwards compatibile with yum, but has some additional capabilities
+  # dnf is backwards compatible with yum, but has some additional capabilities
   # For CentOS and RHEL 7 we override this to yum and do not pass dnf-only flags
   # The commands are similar enough that forking whole methods seemed necessary.
   # This can be removed when CentOS and RHEL 7 are no longer supported by PKB.
@@ -2057,6 +2058,23 @@ def SetupPackageManager(self):
     # https://docs.fedoraproject.org/en-US/epel/#_rhel_9
     self.RemoteCommand(f'sudo dnf install -y {_EPEL_URL.format(9)}')
 
+class Oracle8Mixin(BaseRhelMixin):
+    """Class holding Oracle Linux 8 specific VM methods and attributes."""
+    OS_TYPE = os_types.ORACLE8
+    PYTHON_2_PACKAGE = None
+
+    def SetupPackageManager(self):
+        """Install EPEL."""
+        self.RemoteCommand(f'sudo dnf install -y {_ORACLE_EPEL_URL.format(8)}')
+
+class Oracle9Mixin(BaseRhelMixin):
+    """Class holding Oracle Linux 9 specific VM methods and attributes."""
+    OS_TYPE = os_types.ORACLE9
+    PYTHON_2_PACKAGE = None
+
+    def SetupPackageManager(self):
+        """Install EPEL."""
+        self.RemoteCommand(f'sudo dnf install -y {_ORACLE_EPEL_URL.format(9)}')
 
 class Fedora36Mixin(BaseRhelMixin):
   """Class holding Fedora36 specific methods and attributes."""
@@ -2387,11 +2405,13 @@ class Ubuntu1604Mixin(BaseUbuntuMixin, virtual_machine.DeprecatedOsMixin):
   ALTERNATIVE_OS = os_types.UBUNTU1804
 
 
-class Ubuntu1804Mixin(BaseUbuntuMixin):
+class Ubuntu1804Mixin(BaseUbuntuMixin, virtual_machine.DeprecatedOsMixin):
   """Class holding Ubuntu1804 specific VM methods and attributes."""
   OS_TYPE = os_types.UBUNTU1804
   # https://packages.ubuntu.com/bionic/python
   PYTHON_2_PACKAGE = 'python'
+  END_OF_LIFE = '2023-05-31'
+  ALTERNATIVE_OS = os_types.UBUNTU2004
 
   def UpdateEnvironmentPath(self):
     """Add /snap/bin to default search path for Ubuntu1804.
@@ -2409,15 +2429,22 @@ class Ubuntu1804EfaMixin(Ubuntu1804Mixin):
   OS_TYPE = os_types.UBUNTU1804_EFA
 
 
-# Inherit Ubuntu 18's idiosyncracies.
-# Note https://bugs.launchpad.net/snappy/+bug/1659719 is also marked not fix in
-# focal.
-class Ubuntu2004Mixin(Ubuntu1804Mixin):
+class Ubuntu2004Mixin(BaseUbuntuMixin):
   """Class holding Ubuntu2004 specific VM methods and attributes."""
   OS_TYPE = os_types.UBUNTU2004
   # https://packages.ubuntu.com/focal/python2
   PYTHON_2_PACKAGE = 'python2'
 
+  def UpdateEnvironmentPath(self):
+    """Add /snap/bin to default search path for Ubuntu2004.
+
+    See https://bugs.launchpad.net/snappy/+bug/1659719.
+    """
+    self.RemoteCommand(
+        r'sudo sed -i "1 i\export PATH=$PATH:/snap/bin" ~/.bashrc')
+    self.RemoteCommand(
+        r'sudo sed -i "1 i\export PATH=$PATH:/snap/bin" /etc/bash.bashrc')
+
 
 class Ubuntu2004EfaMixin(Ubuntu2004Mixin):
   """Class holding EFA specific VM methods and attributes."""
diff --git a/perfkitbenchmarker/managed_memory_store.py b/perfkitbenchmarker/managed_memory_store.py
index 0798c2c913..44dc0f7420 100644
--- a/perfkitbenchmarker/managed_memory_store.py
+++ b/perfkitbenchmarker/managed_memory_store.py
@@ -14,15 +14,20 @@
 """Module containing class for cloud managed memory stores."""
 
 import abc
-import logging
+import dataclasses
+import re
 from typing import Optional
 from absl import flags
+from absl import logging
 from perfkitbenchmarker import resource
+from perfkitbenchmarker import virtual_machine
 
 # List of memory store types
 REDIS = 'REDIS'
 MEMCACHED = 'MEMCACHED'
 
+_REDIS_SHARDS_REGEX = r'(?s)slots\n(\d+)\n(\d+).+?port\n(\d+)\nip\n(\S+)'
+
 FLAGS = flags.FLAGS
 
 
@@ -71,6 +76,19 @@ class Failover(object):
     False,
     'If True, provisions a cluster instead of a standalone instance.',
 )
+_NODE_COUNT = flags.DEFINE_integer(
+    'managed_memory_store_node_count',
+    1,
+    (
+        'Number of cache nodes (shards) to use. Only used if '
+        'managed_memory_store_cluster is True.'
+    ),
+)
+_ZONES = flags.DEFINE_list(
+    'cloud_redis_zones',
+    [],
+    'If using cluster mode, the zones to distribute shards between.',
+)
 flags.DEFINE_string(
     'cloud_redis_region',
     'us-central1',
@@ -79,6 +97,9 @@ class Failover(object):
         'Defaults to the GCP region of us-central1.'
     ),
 )
+_TLS = flags.DEFINE_bool(
+    'cloud_redis_tls', False, 'Whether to enable TLS on the instance.'
+)
 
 MEMCACHED_NODE_COUNT = 1
 
@@ -124,6 +145,23 @@ def ParseReadableVersion(version: str) -> str:
   return '.'.join(version.split('.', 2)[:2])
 
 
+@dataclasses.dataclass
+class RedisShard:
+  """An object representing a Redis shard.
+
+  Attributes:
+    slots: formatted like 2731-5461
+    ip: address of the redis shard
+    port: port of the redis shard
+    zone: location where the shard is located
+  """
+
+  slots: str
+  ip: str
+  port: int
+  zone: Optional[str] = None
+
+
 class BaseManagedMemoryStore(resource.BaseResource):
   """Object representing a cloud managed memory store."""
 
@@ -143,8 +181,13 @@ def __init__(self, spec):
     self._port: int = None
     self._password: str = None
     self._clustered: bool = _MANAGED_MEMORY_STORE_CLUSTER.value
+    self.node_count = _NODE_COUNT.value if self._clustered else 1
+    self.zones = _ZONES.value if self._clustered else []
+    self.enable_tls = _TLS.value
 
     self.metadata['clustered'] = self._clustered
+    self.metadata['node_count'] = self.node_count
+    self.metadata['enable_tls'] = self.enable_tls
 
   def GetMemoryStoreIp(self) -> str:
     """Returns the Ip address of the managed memory store."""
@@ -158,6 +201,30 @@ def GetMemoryStorePort(self) -> int:
       self._PopulateEndpoint()
     return self._port
 
+  def GetShardEndpoints(
+      self, client: virtual_machine.BaseVirtualMachine
+  ) -> list[RedisShard]:
+    """Returns shard endpoints for the cluster.
+
+    The format of the `cluster shards` command can be found here:
+    https://redis.io/commands/cluster-shards/.
+
+    Args:
+      client: VM that has access to the redis cluster.
+
+    Returns:
+      A list of redis shards.
+    """
+    result, _ = client.RemoteCommand(
+        f'redis-cli -h {self.GetMemoryStoreIp()} -p'
+        f' {self.GetMemoryStorePort()} cluster shards'
+    )
+    shards = re.findall(_REDIS_SHARDS_REGEX, result)
+    return [
+        RedisShard(slots=f'{slot_begin}-{slot_end}', ip=ip, port=int(port))
+        for slot_begin, slot_end, port, ip in shards
+    ]
+
   @abc.abstractmethod
   def _PopulateEndpoint(self) -> None:
     """Populates the endpoint information for the managed memory store."""
diff --git a/perfkitbenchmarker/network.py b/perfkitbenchmarker/network.py
index 331828b3df..3a1a380124 100644
--- a/perfkitbenchmarker/network.py
+++ b/perfkitbenchmarker/network.py
@@ -105,6 +105,7 @@ def __init__(self, zone=None, cidr=None, machine_type=None):
     self.zone = zone
     self.cidr = cidr
     self.machine_type = machine_type
+    self.subnet_name = None
 
   def __repr__(self):
     return '%s(%r)' % (self.__class__, self.__dict__)
diff --git a/perfkitbenchmarker/os_types.py b/perfkitbenchmarker/os_types.py
index 6da6bfaa9f..568a55fafe 100644
--- a/perfkitbenchmarker/os_types.py
+++ b/perfkitbenchmarker/os_types.py
@@ -38,6 +38,8 @@
 FEDORA36 = 'fedora36'
 FEDORA37 = 'fedora37'
 JUJU = 'juju'
+ORACLE8 = 'oracle8'
+ORACLE9 = 'oracle9'
 RHEL7 = 'rhel7'
 RHEL8 = 'rhel8'
 RHEL9 = 'rhel9'
@@ -48,7 +50,7 @@
 UBUNTU_CONTAINER = 'ubuntu_container'
 UBUNTU1604 = 'ubuntu1604'  # deprecated
 UBUNTU1604_CUDA9 = 'ubuntu1604_cuda9'
-UBUNTU1804 = 'ubuntu1804'
+UBUNTU1804 = 'ubuntu1804'  # deprecated
 UBUNTU1804_EFA = 'ubuntu1804_efa'
 UBUNTU2004 = 'ubuntu2004'
 UBUNTU2004_EFA = 'ubuntu2004_efa'
@@ -104,6 +106,8 @@
     FEDORA36,
     FEDORA37,
     JUJU,
+    ORACLE8,
+    ORACLE9,
     RHEL7,
     RHEL8,
     RHEL9,
@@ -114,7 +118,7 @@
     UBUNTU_CONTAINER,
     UBUNTU1604,  # deprecated
     UBUNTU1604_CUDA9,
-    UBUNTU1804,
+    UBUNTU1804,  # deprecated
     UBUNTU1804_EFA,
     UBUNTU2004,
     UBUNTU2004_EFA,
@@ -153,6 +157,6 @@
 BASE_OS_TYPES = [CLEAR, CORE_OS, DEBIAN, RHEL, WINDOWS]
 
 # May change from time to time.
-DEFAULT = UBUNTU1804
+DEFAULT = UBUNTU2004
 
 flags.DEFINE_enum('os_type', DEFAULT, ALL, 'The VM\'s OS type.')
diff --git a/perfkitbenchmarker/pkb.py b/perfkitbenchmarker/pkb.py
index fb70b8d863..4f29413ab5 100644
--- a/perfkitbenchmarker/pkb.py
+++ b/perfkitbenchmarker/pkb.py
@@ -1481,6 +1481,13 @@ def Main():
   if FLAGS.helpmatchmd:
     _PrintHelpMD(FLAGS.helpmatchmd)
     return 0
+
+  if not FLAGS.accept_licenses:
+    logging.warning(
+        'Please run with the --accept_licenses flag to '
+        'acknowledge PKB may install software on your behalf.'
+    )
+
   CheckVersionFlag()
   SetUpPKB()
   return RunBenchmarks()
diff --git a/perfkitbenchmarker/provider_info.py b/perfkitbenchmarker/provider_info.py
index ab727874d4..cc0635366f 100644
--- a/perfkitbenchmarker/provider_info.py
+++ b/perfkitbenchmarker/provider_info.py
@@ -36,9 +36,10 @@
 # Though Docker is not a cloud provider, it's inclusion is useful
 # for performing on premise to cloud benchmarks
 DOCKER = 'Docker'
+OCI = 'OCI'
 
 VALID_CLOUDS = (GCP, AZURE, AWS, IBMCLOUD, DIGITALOCEAN, KUBERNETES, OPENSTACK,
-                RACKSPACE, CLOUDSTACK, ALICLOUD, MESOS, PROFITBRICKS, DOCKER)
+                RACKSPACE, CLOUDSTACK, ALICLOUD, MESOS, PROFITBRICKS, DOCKER, OCI)
 
 _PROVIDER_INFO_REGISTRY = {}
 
diff --git a/perfkitbenchmarker/providers/aws/aws_elasticache_redis.py b/perfkitbenchmarker/providers/aws/aws_elasticache_redis.py
index f229552499..96f16bfd59 100644
--- a/perfkitbenchmarker/providers/aws/aws_elasticache_redis.py
+++ b/perfkitbenchmarker/providers/aws/aws_elasticache_redis.py
@@ -22,6 +22,7 @@
 from perfkitbenchmarker import errors
 from perfkitbenchmarker import managed_memory_store
 from perfkitbenchmarker import provider_info
+from perfkitbenchmarker import virtual_machine
 from perfkitbenchmarker import vm_util
 from perfkitbenchmarker.providers.aws import aws_network
 from perfkitbenchmarker.providers.aws import util
@@ -47,14 +48,13 @@ def __init__(self, spec):
     self.subnet_group_name = 'subnet-%s' % self.name
     self.version = REDIS_VERSION_MAPPING[spec.config.cloud_redis.redis_version]
     self.node_type = FLAGS.elasticache_node_type
-    self.node_count = (
-        FLAGS.elasticache_node_count if self._clustered else None
-    )
     self.redis_region = FLAGS.cloud_redis_region
     self.failover_zone = FLAGS.elasticache_failover_zone
     self.failover_subnet = None
     self.failover_style = FLAGS.redis_failover_style
 
+    self.subnets = []
+
   @staticmethod
   def CheckPrerequisites(benchmark_config):
     if (
@@ -131,6 +131,16 @@ def _CreateDependencies(self):
       self.failover_subnet.Create()
       cmd += [self.failover_subnet.id]
 
+    # Subnets determine where shards can be placed.
+    regional_network = self.spec.vms[0].network.regional_network
+    vpc_id = regional_network.vpc.id
+    for zone in self.zones:
+      cidr = regional_network.vpc.NextSubnetCidrBlock()
+      subnet = aws_network.AwsSubnet(zone, vpc_id, cidr_block=cidr)
+      subnet.Create()
+      cmd += [subnet.id]
+      self.subnets.append(subnet)
+
     vm_util.IssueCommand(cmd)
 
   def _DeleteDependencies(self):
@@ -147,6 +157,9 @@ def _DeleteDependencies(self):
     if self.failover_subnet:
       self.failover_subnet.Delete()
 
+    for subnet in self.subnets:
+      subnet.Delete()
+
   def _Create(self):
     """Creates the cluster."""
     cmd = [
@@ -186,6 +199,13 @@ def _Create(self):
     else:
       cmd += ['--num-node-groups', str(self.node_count)]
 
+    if self.enable_tls:
+      cmd += [
+          '--transit-encryption-enabled',
+          '--transit-encryption-mode',
+          'required',
+      ]
+
     cmd += ['--tags']
     cmd += util.MakeFormattedDefaultTags()
     _, stderr, _ = vm_util.IssueCommand(cmd, raise_on_failure=False)
@@ -267,3 +287,22 @@ def _PopulateEndpoint(self):
       primary_endpoint = cluster_info['NodeGroups'][0]['PrimaryEndpoint']
     self._ip = primary_endpoint['Address']
     self._port = primary_endpoint['Port']
+
+  def GetShardEndpoints(
+      self, client: virtual_machine.BaseVirtualMachine
+  ) -> list[managed_memory_store.RedisShard]:
+    """See base class."""
+    shards = super().GetShardEndpoints(client)
+    shards_by_slots = {shard.slots: shard for shard in shards}
+
+    cluster_info = self.DescribeInstance()
+    # See data/elasticache_describe_cluster.txt for an example
+    node_groups = cluster_info['NodeGroups']
+    zones_by_slots = {
+        node['Slots']: node['NodeGroupMembers'][0]['PreferredAvailabilityZone']
+        for node in node_groups
+    }
+    for slot in zones_by_slots:
+      shards_by_slots[slot].zone = zones_by_slots[slot]
+
+    return list(shards_by_slots.values())
diff --git a/perfkitbenchmarker/providers/aws/aws_elasticached_memcached.py b/perfkitbenchmarker/providers/aws/aws_elasticached_memcached.py
index d10634246e..ac54c2e6e0 100644
--- a/perfkitbenchmarker/providers/aws/aws_elasticached_memcached.py
+++ b/perfkitbenchmarker/providers/aws/aws_elasticached_memcached.py
@@ -22,6 +22,7 @@
 from perfkitbenchmarker import managed_memory_store
 from perfkitbenchmarker import provider_info
 from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import flags as aws_flags
 from perfkitbenchmarker.providers.aws import util
 
 
@@ -40,7 +41,7 @@ def __init__(self, spec):
     self.subnet_group_name = 'subnet-%s' % self.name
     self.zone = self.spec.vms[0].zone
     self.region = util.GetRegionFromZone(self.zone)
-    self.node_type = FLAGS.cache_node_type
+    self.node_type = aws_flags.ELASTICACHE_NODE_TYPE.value
     self.version = FLAGS.managed_memory_store_version
 
   @staticmethod
diff --git a/perfkitbenchmarker/providers/aws/aws_relational_db.py b/perfkitbenchmarker/providers/aws/aws_relational_db.py
index b289f8e648..a7cd17149e 100644
--- a/perfkitbenchmarker/providers/aws/aws_relational_db.py
+++ b/perfkitbenchmarker/providers/aws/aws_relational_db.py
@@ -320,6 +320,10 @@ def _IsInstanceReady(self, instance_id, timeout=IS_READY_TIMEOUT):
           if waiting_param:
             logging.info('Applying parameter')
 
+          if state == 'insufficient-capacity':
+            raise errors.Benchmarks.InsufficientCapacityCloudFailure(
+                'Insufficient capacity to provision this db.'
+            )
           if state == 'available' and not pending_values and not waiting_param:
             break
 
diff --git a/perfkitbenchmarker/providers/aws/flags.py b/perfkitbenchmarker/providers/aws/flags.py
index 414d8aefc3..96e73ae301 100644
--- a/perfkitbenchmarker/providers/aws/flags.py
+++ b/perfkitbenchmarker/providers/aws/flags.py
@@ -56,19 +56,11 @@
                     'IMAGE_NAME_REGEX.')
 flags.DEFINE_string('aws_preprovisioned_data_bucket', None,
                     'AWS bucket where pre-provisioned data has been copied.')
-flags.DEFINE_string(
+ELASTICACHE_NODE_TYPE = flags.DEFINE_string(
     'elasticache_node_type',
     'cache.m4.large',
     'The AWS cache node type to use for elasticache clusters.',
 )
-flags.DEFINE_integer(
-    'elasticache_node_count',
-    1,
-    (
-        'Number of cache nodes (shards) to use. Only used if '
-        'managed_memory_store_cluster is True.'
-    ),
-)
 flags.DEFINE_string(
     'elasticache_failover_zone', None, 'AWS elasticache failover zone'
 )
diff --git a/perfkitbenchmarker/providers/azure/azure_flexible_server.py b/perfkitbenchmarker/providers/azure/azure_flexible_server.py
index 759d09061b..768c04b824 100644
--- a/perfkitbenchmarker/providers/azure/azure_flexible_server.py
+++ b/perfkitbenchmarker/providers/azure/azure_flexible_server.py
@@ -31,7 +31,7 @@
 FLAGS = flags.FLAGS
 
 DISABLE_HA = 'Disabled'
-ENABLE_HA = 'Enabled'
+ENABLE_HA = 'SameZone'
 
 DEFAULT_MYSQL_VERSION = '8.0'
 DEFAULT_POSTGRES_VERSION = '13'
diff --git a/perfkitbenchmarker/providers/gcp/flags.py b/perfkitbenchmarker/providers/gcp/flags.py
index fa99cc2dc0..61d236ca38 100644
--- a/perfkitbenchmarker/providers/gcp/flags.py
+++ b/perfkitbenchmarker/providers/gcp/flags.py
@@ -133,7 +133,11 @@
 flags.DEFINE_boolean('gke_enable_alpha', False,
                      'Whether to enable alpha kubernetes clusters.')
 flags.DEFINE_boolean('gke_enable_gvnic', True,
-                     'Whether to use google vitrual interface on GKE nodes.')
+                     'Whether to use google virtual network interface on GKE '
+                     'nodes.')
+GKE_NCCL_FAST_SOCKET = flags.DEFINE_boolean(
+    'gke_enable_nccl_fast_socket', False,
+    'Whether to enable NCCL fast socket on GKE.')
 flags.DEFINE_string('gcp_dataproc_subnet', None,
                     'Specifies the subnet that the cluster will be part of.')
 flags.DEFINE_multi_string('gcp_dataproc_property', [],
diff --git a/perfkitbenchmarker/providers/gcp/gce_disk.py b/perfkitbenchmarker/providers/gcp/gce_disk.py
index 6336e78280..15484e3b22 100644
--- a/perfkitbenchmarker/providers/gcp/gce_disk.py
+++ b/perfkitbenchmarker/providers/gcp/gce_disk.py
@@ -26,6 +26,7 @@
 from perfkitbenchmarker import resource
 from perfkitbenchmarker import vm_util
 from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.providers.gcp import flags as gcp_flags
 from perfkitbenchmarker.providers.gcp import util
 
 FLAGS = flags.FLAGS
@@ -92,7 +93,7 @@ def PdDriveIsNvme(vm):
   # such as confidential VMs on Milan.
   # this is not robust, but can get refactored when
   # there is more clarity on what groups of VMs are NVMe.
-  if family in ['n2d', 'c2d'] and 'confidential' in vm.OS_TYPE:
+  if gcp_flags.GCE_CONFIDENTIAL_COMPUTE.value:
     return True
   return False
 
@@ -296,10 +297,7 @@ def Detach(self):
 
   def GetDevicePath(self):
     """Returns the path to the device inside the VM."""
-    if self.disk_type == disk.LOCAL and self.interface == NVME:
-      return '/dev/%s' % self.name
-    else:
-      if self.disk_type in GCE_REMOTE_DISK_TYPES and self.interface == NVME:
-        return self.name
-      # by default, returns this name id.
-      return '/dev/disk/by-id/google-%s' % self.name
+    if self.disk_type in GCE_REMOTE_DISK_TYPES and self.interface == NVME:
+      return self.name
+    # by default, returns this name id.
+    return f'/dev/disk/by-id/google-{self.name}'
diff --git a/perfkitbenchmarker/providers/gcp/gce_network.py b/perfkitbenchmarker/providers/gcp/gce_network.py
index 8a5aef5488..524d54bf08 100644
--- a/perfkitbenchmarker/providers/gcp/gce_network.py
+++ b/perfkitbenchmarker/providers/gcp/gce_network.py
@@ -45,7 +45,8 @@
 ALLOW_ALL = 'tcp:1-65535,udp:1-65535,icmp'
 
 _PLACEMENT_GROUP_PREFIXES = frozenset(
-    ['c2', 'c3', 'n2', 'n2d', 'c2d', 'a2'])
+    ['c2', 'c3', 'n2', 'n2d', 'c2d', 'c3d', 'a2', 'g2']
+)
 
 
 class GceVpnGateway(network.BaseVpnGateway):
diff --git a/perfkitbenchmarker/providers/gcp/gce_virtual_machine.py b/perfkitbenchmarker/providers/gcp/gce_virtual_machine.py
index 1bdae41993..742259e89d 100644
--- a/perfkitbenchmarker/providers/gcp/gce_virtual_machine.py
+++ b/perfkitbenchmarker/providers/gcp/gce_virtual_machine.py
@@ -82,7 +82,7 @@
 # Gcloud operations are complete when their 'status' is 'DONE'.
 OPERATION_DONE = 'DONE'
 
-# 2h timeout for LM notificaiton
+# 2h timeout for LM notification
 LM_NOTIFICATION_TIMEOUT_SECONDS = 60 * 60 * 2
 
 NVME = 'NVME'
@@ -94,7 +94,8 @@
     'Instance failed to start due to preemption.'
 )
 _GCE_VM_CREATE_TIMEOUT = 1200
-_GCE_NVIDIA_GPU_PREFIX = 'nvidia-tesla-'
+_GCE_NVIDIA_GPU_PREFIX = 'nvidia-'
+_GCE_NVIDIA_TESLA_GPU_PREFIX = 'nvidia-tesla-'
 _SHUTDOWN_SCRIPT = 'su "{user}" -c "echo | gsutil cp - {preempt_marker}"'
 METADATA_PREEMPT_URI = (
     'http://metadata.google.internal/computeMetadata/v1/instance/preempted'
@@ -134,8 +135,8 @@ class GceVmSpec(virtual_machine.BaseVmSpec):
     preemptible: boolean. True if the VM should be preemptible, False otherwise.
     project: string or None. The project to create the VM in.
     image_family: string or None. The image family used to locate the image.
-    image_project: string or None. The image project used to locate the specifed
-      image.
+    image_project: string or None. The image project used to locate the
+      specified image.
     boot_disk_size: None or int. The size of the boot disk in GB.
     boot_disk_type: string or None. The type of the boot disk.
   """
@@ -176,24 +177,6 @@ def __init__(self, *args, **kwargs):
       self.cpus = None
       self.memory = None
 
-    # The A2 machine family, unlike other GCP offerings has a preset number of
-    # GPUs, so we set them directly from the machine_type
-    # https://cloud.google.com/blog/products/compute/announcing-google-cloud-a2-vm-family-based-on-nvidia-a100-gpu
-    if self.machine_type and self.machine_type.startswith('a2-'):
-      a2_lookup = {
-          'a2-highgpu-1g': 1,
-          'a2-highgpu-2g': 2,
-          'a2-highgpu-4g': 4,
-          'a2-highgpu-8g': 8,
-          'a2-megagpu-16g': 16,
-          'a2-ultragpu-1g': 1,
-          'a2-ultragpu-2g': 2,
-          'a2-ultragpu-4g': 4,
-          'a2-ultragpu-8g': 8,
-      }
-      self.gpu_count = a2_lookup[self.machine_type]
-      self.gpu_type = virtual_machine.GPU_A100
-
   @classmethod
   def _ApplyFlags(cls, config_values, flag_values):
     """Modifies config options based on runtime flag values.
@@ -419,9 +402,13 @@ def GenerateAcceleratorSpecString(accelerator_type, accelerator_count):
     String to be used by gcloud to attach accelerators to a VM.
     Must be prepended by the flag '--accelerator'.
   """
-  gce_accelerator_type = (
-      FLAGS.gce_accelerator_type_override
-      or _GCE_NVIDIA_GPU_PREFIX + accelerator_type
+  gce_accelerator_type = FLAGS.gce_accelerator_type_override or (
+      (
+          _GCE_NVIDIA_TESLA_GPU_PREFIX
+          if accelerator_type in virtual_machine.TESLA_GPU_TYPES
+          else _GCE_NVIDIA_GPU_PREFIX
+      )
+      + accelerator_type
   )
   return 'type={0},count={1}'.format(gce_accelerator_type, accelerator_count)
 
@@ -501,6 +488,26 @@ def __init__(self, vm_spec):
     self.gce_tags = vm_spec.gce_tags
     self.gce_network_tier = FLAGS.gce_network_tier
     self.gce_nic_type = FLAGS.gce_nic_type
+
+    # The A2 machine family, unlike other GCP offerings has a preset number of
+    # GPUs, so we set them directly from the machine_type
+    # https://cloud.google.com/blog/products/compute/announcing-google-cloud-a2-vm-family-based-on-nvidia-a100-gpu
+    # machine_type is always defined when running, but not in unit tests.
+    if self.machine_type and self.machine_type.startswith('a2-'):
+      a2_lookup = {
+          'a2-highgpu-1g': 1,
+          'a2-highgpu-2g': 2,
+          'a2-highgpu-4g': 4,
+          'a2-highgpu-8g': 8,
+          'a2-megagpu-16g': 16,
+          'a2-ultragpu-1g': 1,
+          'a2-ultragpu-2g': 2,
+          'a2-ultragpu-4g': 4,
+          'a2-ultragpu-8g': 8,
+      }
+      self.gpu_count = a2_lookup[self.machine_type]
+      self.gpu_type = virtual_machine.GPU_A100
+
     if not self.SupportGVNIC():
       logging.warning('Changing gce_nic_type to VIRTIO_NET')
       self.gce_nic_type = 'VIRTIO_NET'
@@ -1068,11 +1075,7 @@ def CreateScratchDisk(self, disk_spec_id, disk_spec):
           name = 'local-ssd-%d' % self.local_disk_counter
           disk_number = self.local_disk_counter + 1
         elif self.ssd_interface == NVME:
-          # Device can either be /dev/nvme0n1 or /dev/nvme1n1. Find out which.
-          name, _ = self.RemoteCommand(
-              'find /dev/nvme*n%d' % (self.local_disk_counter + 1)
-          )
-          name = name.strip().split('/')[-1]
+          name = f'local-nvme-ssd-{self.local_disk_counter}'
           disk_number = self.local_disk_counter + self.NVME_START_INDEX
         else:
           raise errors.Error('Unknown Local SSD Interface.')
@@ -1342,7 +1345,7 @@ def CollectLMNotificationsTime(self):
     }
     lm_times = self._ReadLMNoticeContents()
     if not lm_times:
-      return events_dict
+      raise ValueError('Cannot collect lm times. Live Migration might failed.')
 
     # Result may contain errors captured, so we need to skip them
     for event_info in lm_times.splitlines():
diff --git a/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py b/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py
index c5aeb87f09..541803c270 100644
--- a/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py
+++ b/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py
@@ -425,6 +425,7 @@ def __init__(self, dpb_service_spec):
     super().__init__(dpb_service_spec)
     self._job_counter = 0
     self.batch_name = f'{self.cluster_id}-{self._job_counter}'
+    self.dpb_hdfs_type = 'HDD'
 
   def SubmitJob(self,
                 jarfile=None,
@@ -548,12 +549,23 @@ def GetJobProperties(self) -> Dict[str, str]:
       result['spark.dynamicAllocation.maxExecutors'] = (
           self.spec.dataproc_serverless_max_executors)
     if self.spec.worker_group.disk_spec.disk_size:
-      result['spark.dataproc.driver.disk_size'] = (
+      result['spark.dataproc.driver.disk.size'] = (
           f'{self.spec.worker_group.disk_spec.disk_size}g'
       )
-      result['spark.dataproc.executor.disk_size'] = (
+      result['spark.dataproc.executor.disk.size'] = (
           f'{self.spec.worker_group.disk_spec.disk_size}g'
       )
+    if self.spec.dataproc_serverless_memory:
+      result['spark.driver.memory'] = f'{self.spec.dataproc_serverless_memory}m'
+      result['spark.executor.memory'] = (
+          f'{self.spec.dataproc_serverless_memory}m')
+    if self.spec.dataproc_serverless_memory_overhead:
+      result['spark.driver.memoryOverhead'] = (
+          f'{self.spec.dataproc_serverless_memory_overhead}m'
+      )
+      result['spark.executor.memoryOverhead'] = (
+          f'{self.spec.dataproc_serverless_memory_overhead}m'
+      )
     result.update(super().GetJobProperties())
     return result
 
@@ -585,7 +597,11 @@ def GetMetadata(self):
         'dpb_cluster_max_executors': max_executors,
         'dpb_cluster_initial_executors': initial_executors,
         'dpb_cores_per_node': self.spec.dataproc_serverless_core_count,
-        'dpb_hdfs_type': 'default-disk',
+        'dpb_memory_per_node':
+            self.spec.dataproc_serverless_memory or 'default',
+        'dpb_memory_overhead_per_node':
+            self.spec.dataproc_serverless_memory_overhead or 'default',
+        'dpb_hdfs_type': basic_data['dpb_hdfs_type'],
         'dpb_disk_size': basic_data['dpb_disk_size'],
         'dpb_service_zone': basic_data['dpb_service_zone'],
         'dpb_job_properties': basic_data['dpb_job_properties'],
diff --git a/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc_serverless_prices.py b/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc_serverless_prices.py
index f20170ec44..6de0396c25 100644
--- a/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc_serverless_prices.py
+++ b/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc_serverless_prices.py
@@ -8,138 +8,138 @@
 DATAPROC_SERVERLESS_PRICES = {
     'us-west1': {
         'usd_per_milli_dcu_sec': 0.06 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.04 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.04 / 720 / 3600,
     },
     'us-west2': {
         'usd_per_milli_dcu_sec': 0.072071 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.048 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.048 / 720 / 3600,
     },
     'us-west3': {
         'usd_per_milli_dcu_sec': 0.072071 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.048 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.048 / 720 / 3600,
     },
     'us-west4': {
         'usd_per_milli_dcu_sec': 0.067572 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.044 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.044 / 720 / 3600,
     },
     'us-east1': {
         'usd_per_milli_dcu_sec': 0.06 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.04 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.04 / 720 / 3600,
     },
     'us-east4': {
         'usd_per_milli_dcu_sec': 0.067572 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.044 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.044 / 720 / 3600,
     },
     'us-east5': {
         'usd_per_milli_dcu_sec': 0.06 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.04 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.04 / 720 / 3600,
     },
     'us-central1': {
         'usd_per_milli_dcu_sec': 0.06 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.04 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.04 / 720 / 3600,
     },
     'us-south1': {
         'usd_per_milli_dcu_sec': 0.0708 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.0472 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.0472 / 720 / 3600,
     },
     'europe-north1': {
         'usd_per_milli_dcu_sec': 0.066062 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.044 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.044 / 720 / 3600,
     },
     'europe-west1': {
         'usd_per_milli_dcu_sec': 0.066007 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.04 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.04 / 720 / 3600,
     },
     'europe-west2': {
         'usd_per_milli_dcu_sec': 0.077307 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.048 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.048 / 720 / 3600,
     },
     'europe-west3': {
         'usd_per_milli_dcu_sec': 0.077307 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.048 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.048 / 720 / 3600,
     },
     'europe-west4': {
         'usd_per_milli_dcu_sec': 0.066057 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.044 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.044 / 720 / 3600,
     },
     'europe-west6': {
         'usd_per_milli_dcu_sec': 0.083955 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.052 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.052 / 720 / 3600,
     },
     'europe-west8': {
         'usd_per_milli_dcu_sec': 0.0696 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.0464 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.0464 / 720 / 3600,
     },
     'europe-west9': {
         'usd_per_milli_dcu_sec': 0.0696 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.0464 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.0464 / 720 / 3600,
     },
     'europe-central2': {
         'usd_per_milli_dcu_sec': 0.077307 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.048 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.048 / 720 / 3600,
     },
     'europe-southwest1': {
         'usd_per_milli_dcu_sec': 0.0708 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.047 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.047 / 720 / 3600,
     },
     'asia-northeast1': {
         'usd_per_milli_dcu_sec': 0.076976 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.052 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.052 / 720 / 3600,
     },
     'asia-northeast2': {
         'usd_per_milli_dcu_sec': 0.076976 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.052 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.052 / 720 / 3600,
     },
     'asia-northeast3': {
         'usd_per_milli_dcu_sec': 0.076976 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.052 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.052 / 720 / 3600,
     },
     'asia-east1': {
         'usd_per_milli_dcu_sec': 0.069477 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.04 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.04 / 720 / 3600,
     },
     'asia-east2': {
         'usd_per_milli_dcu_sec': 0.083955 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.05 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.05 / 720 / 3600,
     },
     'asia-south1': {
         'usd_per_milli_dcu_sec': 0.072071 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.048 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.048 / 720 / 3600,
     },
     'asia-south2': {
         'usd_per_milli_dcu_sec': 0.072071 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.048 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.048 / 720 / 3600,
     },
     'asia-southeast1': {
         'usd_per_milli_dcu_sec': 0.074015 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.044 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.044 / 720 / 3600,
     },
     'asia-southeast2': {
         'usd_per_milli_dcu_sec': 0.080674 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.052 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.052 / 720 / 3600,
     },
     'australia-southeast1': {
         'usd_per_milli_dcu_sec': 0.085135 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.054 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.054 / 720 / 3600,
     },
     'australia-southeast2': {
         'usd_per_milli_dcu_sec': 0.085135 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.054 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.054 / 720 / 3600,
     },
     'northamerica-northeast1': {
         'usd_per_milli_dcu_sec': 0.066057 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.044 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.044 / 720 / 3600,
     },
     'northamerica-northeast2': {
         'usd_per_milli_dcu_sec': 0.066057 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.044 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.044 / 720 / 3600,
     },
     'southamerica-east1': {
         'usd_per_milli_dcu_sec': 0.095246 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.06 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.06 / 720 / 3600,
     },
     'southamerica-west1': {
         'usd_per_milli_dcu_sec': 0.08581 / 1000 / 3600,
-        'usd_per_shuffle_storage_gb_sec': 0.057 / 744 / 3600,
+        'usd_per_shuffle_storage_gb_sec': 0.057 / 720 / 3600,
     },
 }
diff --git a/perfkitbenchmarker/providers/gcp/gcp_spanner.py b/perfkitbenchmarker/providers/gcp/gcp_spanner.py
index c45c75b623..0cc31362b3 100644
--- a/perfkitbenchmarker/providers/gcp/gcp_spanner.py
+++ b/perfkitbenchmarker/providers/gcp/gcp_spanner.py
@@ -30,6 +30,7 @@
 from google.cloud import monitoring_v3
 from google.cloud.monitoring_v3 import query
 import numpy as np
+from perfkitbenchmarker import background_tasks
 from perfkitbenchmarker import errors
 from perfkitbenchmarker import relational_db
 from perfkitbenchmarker import relational_db_spec
@@ -468,15 +469,23 @@ def GetDefaultPort(self) -> int:
 
   def _PostCreate(self):
     super()._PostCreate()
-    self.client_vm_query_tools.InstallPackages()
-
-  @property
-  def client_vm_query_tools(self):
-    if not hasattr(self, '_client_vm_query_tools'):
-      connection_properties = sql_engine_utils.DbConnectionProperties(
-          self.spec.engine, self.spec.engine_version, self.endpoint, self.port,
-          self.spec.database_username, self.spec.database_password,
-          self.instance_id, self.database, self.project)
-      self._client_vm_query_tools = sql_engine_utils.GetQueryToolsByEngine(
-          self.client_vm, connection_properties)
-    return self._client_vm_query_tools
+    # TODO(user) move to superclass.
+    background_tasks.RunThreaded(
+        lambda client_query_tools: client_query_tools.InstallPackages(),
+        self.client_vms_query_tools,
+    )
+
+  def _GetDbConnectionProperties(
+      self,
+  ) -> sql_engine_utils.DbConnectionProperties:
+    return sql_engine_utils.DbConnectionProperties(
+        self.spec.engine,
+        self.spec.engine_version,
+        self.endpoint,
+        self.port,
+        self.spec.database_username,
+        self.spec.database_password,
+        self.instance_id,
+        self.database,
+        self.project,
+    )
diff --git a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py
index e82c70645d..def6574e7b 100644
--- a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py
+++ b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py
@@ -125,6 +125,14 @@ def __init__(self, spec):
       os.environ['CLOUDSDK_API_ENDPOINT_OVERRIDES_CONTAINER'] = (
           gcp_flags.GKE_API_OVERRIDE.value)
 
+    self.enable_nccl_fast_socket = False
+    if gcp_flags.GKE_NCCL_FAST_SOCKET.value:
+      if self.nodepools:
+        self.enable_nccl_fast_socket = True
+      else:
+        raise errors.Config.InvalidValue(
+            'NCCL fast socket is only supported on secondary node pools.')
+
   def GetResourceMetadata(self):
     """Returns a dict containing metadata about the cluster.
 
@@ -143,6 +151,7 @@ def GetResourceMetadata(self):
       # TODO(pclay): support NVME when it leaves alpha
       # Also consider moving FLAGS.gce_ssd_interface into the vm_spec.
       result['gce_local_ssd_interface'] = gce_virtual_machine.SCSI
+    result['gke_nccl_fast_socket'] = self.enable_nccl_fast_socket
     return result
 
   def _GcloudCommand(self, *args, **kwargs):
@@ -230,10 +239,10 @@ def _AddNodeParamsToCmd(
     cmd.flags['labels'] = util.MakeFormattedDefaultTags()
 
     if vm_config.gpu_count:
-      cmd.flags['accelerator'] = (
-          gce_virtual_machine.GenerateAcceleratorSpecString(
-              vm_config.gpu_type,
-              vm_config.gpu_count))
+      if 'a2-' not in vm_config.machine_type:
+        cmd.flags['accelerator'] = (
+            gce_virtual_machine.GenerateAcceleratorSpecString(
+                vm_config.gpu_type, vm_config.gpu_count))
     if vm_config.min_cpu_platform:
       cmd.flags['min-cpu-platform'] = vm_config.min_cpu_platform
 
@@ -268,6 +277,11 @@ def _AddNodeParamsToCmd(
       cmd.args.append('--enable-gvnic')
     else:
       cmd.args.append('--no-enable-gvnic')
+    if (
+        self.enable_nccl_fast_socket
+        and name != container_service.DEFAULT_NODEPOOL
+    ):
+      cmd.args.append('--enable-fast-socket')
 
     if FLAGS.gke_node_system_config is not None:
       cmd.flags['system-config-from-file'] = FLAGS.gke_node_system_config
@@ -290,7 +304,10 @@ def _PostCreate(self):
     env['KUBECONFIG'] = FLAGS.kubeconfig
     cmd.IssueRetryable(env=env)
 
-    if self.vm_config.gpu_count:
+    should_install_nvidia_drivers = (
+        self.vm_config.gpu_count or
+        any(pool.vm_config.gpu_count for pool in self.nodepools.values()))
+    if should_install_nvidia_drivers:
       kubernetes_helper.CreateFromFile(NVIDIA_DRIVER_SETUP_DAEMON_SET_SCRIPT)
       kubernetes_helper.CreateFromFile(
           data.ResourcePath(NVIDIA_UNRESTRICTED_PERMISSIONS_DAEMON_SET))
diff --git a/perfkitbenchmarker/providers/oci/__init__.py b/perfkitbenchmarker/providers/oci/__init__.py
new file mode 100644
index 0000000000..8f33e6ef87
--- /dev/null
+++ b/perfkitbenchmarker/providers/oci/__init__.py
@@ -0,0 +1,3 @@
+"""Provider for Oracle Cloud."""
+
+OCI_PATH = 'oci'
diff --git a/perfkitbenchmarker/providers/oci/default_config_constants.yaml b/perfkitbenchmarker/providers/oci/default_config_constants.yaml
new file mode 100644
index 0000000000..6e0237f7c2
--- /dev/null
+++ b/perfkitbenchmarker/providers/oci/default_config_constants.yaml
@@ -0,0 +1,209 @@
+# All anchors defined in this file should be compatible
+# with *all* clouds. That means any vm_specs or disk_specs
+# defined here should have keys for every cloud.
+default_single_core: &default_single_core
+  GCP:
+    machine_type: n1-standard-1
+    zone: us-central1-a
+    image: null
+  Azure:
+    machine_type: Standard_A1
+    zone: eastus2
+    image: null
+  AWS:
+    machine_type: t2.small
+    zone: us-east-1
+    image: null
+  AliCloud:
+    machine_type: ecs.g5.large
+    zone: cn-beijing-g
+    image: null
+  DigitalOcean:
+    machine_type: 2gb
+    zone: sfo1
+    image: null
+  OpenStack:
+    machine_type: m1.small
+    zone: nova
+    image: null
+  CloudStack:
+    machine_type: 1vCPU.1GB
+    zone: QC-1
+    image: null
+  Rackspace:
+    machine_type: general1-1
+    zone: IAD
+    image: null
+  Kubernetes:
+    image: null
+  Mesos:
+    image: null
+  ProfitBricks:
+    machine_type: Small
+    zone: ZONE_1
+    image: null
+  Docker:
+    image: null
+    machine_type:
+      cpus: 1
+      memory: 2.0GiB
+  IBMCloud:
+    machine_type: cx2-2x4
+    zone: us-south-1
+    image: null
+  OCI:
+    machine_type: VM.Standard.A1.Flex
+    zone: us-ashburn-1
+    image: null
+
+# TODO: update the two core machines for more providers
+default_dual_core: &default_dual_core
+  GCP:
+    machine_type: n1-standard-2
+    zone: us-central1-a
+    image: null
+  Azure:
+    machine_type: Standard_D2_v3
+    zone: eastus2
+    image: null
+  AWS:
+    machine_type: m5.large
+    zone: us-east-1
+    image: null
+  Docker:
+    image: null
+    machine_type:
+      cpus: 2
+      memory: 4.0GiB
+  AliCloud:
+    machine_type: ecs.g5.xlarge
+    zone: cn-beijing-g
+    image: null
+  IBMCloud:
+    machine_type: cx2-4x8
+    zone: us-south-1
+    image: null
+  Kubernetes:
+    image: null
+  OCI:
+    machine_type: VM.Standard.A1.Flex
+    zone: us-ashburn-1
+    image: null
+
+# TODO(user): update the disk types below as more providers are
+# updated for the disk types refactor.
+default_500_gb: &default_500_gb
+  GCP:
+    disk_type: pd-standard
+    disk_size: 500
+    mount_point: /scratch
+  Azure:
+    disk_type: Standard_LRS
+    disk_size: 500
+    mount_point: /scratch
+  AWS:
+    disk_type: standard
+    disk_size: 500
+    mount_point: /scratch
+  AliCloud:
+    disk_type: standard
+    disk_size: 500
+    mount_point: /scratch
+  DigitalOcean:
+    disk_type: standard
+    disk_size: 500
+    mount_point: /scratch
+  OpenStack:
+    disk_type: standard
+    disk_size: 500
+    mount_point: /scratch
+  CloudStack:
+    disk_size: 500
+    mount_point: /scratch
+  Rackspace:
+    disk_type: standard
+    disk_size: 500
+    mount_point: /scratch
+  Kubernetes:
+    disk_type: emptyDir
+    disk_size: 500
+    mount_point: /scratch
+  Mesos:
+    disk_type: local
+    disk_size: 500
+    mount_point: /scratch
+  ProfitBricks:
+    disk_type: standard
+    disk_size: 500
+    mount_point: /scratch
+  Docker:
+    disk_type: local
+    disk_size: 500
+    mount_point: /scratch
+  IBMCloud:
+    disk_type: standard
+    disk_size: 500
+    mount_point: /scratch
+  OCI:
+    disk_type: paravirtualized          
+    disk_size: 500
+    mount_point: /scratch
+
+# TODO(user): update the disk types below as more providers are
+# updated for the disk types refactor.
+default_50_gb: &default_50_gb
+  GCP:
+    disk_type: pd-standard
+    disk_size: 50
+    mount_point: /scratch
+  Azure:
+    disk_type: Standard_LRS
+    disk_size: 50
+    mount_point: /scratch
+  AWS:
+    disk_type: standard
+    disk_size: 50
+    mount_point: /scratch
+  AliCloud:
+    disk_type: standard
+    disk_size: 50
+    mount_point: /scratch
+  DigitalOcean:
+    disk_type: standard
+    disk_size: 50
+    mount_point: /scratch
+  OpenStack:
+    disk_type: standard
+    disk_size: 50
+    mount_point: /scratch
+  CloudStack:
+    disk_size: 50
+    mount_point: /scratch
+  Rackspace:
+    disk_type: standard
+    disk_size: 50
+    mount_point: /scratch
+  Kubernetes:
+    disk_type: emptyDir
+    disk_size: 50
+    mount_point: /scratch
+  Mesos:
+    disk_type: local
+    disk_size: 50
+    mount_point: /scratch
+  ProfitBricks:
+    disk_type: standard
+    disk_size: 50
+    mount_point: /scratch
+  Docker:
+    disk_type: local
+    disk_size: 50
+    mount_point: /scratch
+  IBMCloud:
+    disk_type: standard
+    disk_size: 50
+    mount_point: /scratch
+  OCI:
+    disk_type: paravirtualized
+    disk_size: 50
+    mount_point: /scratch
diff --git a/perfkitbenchmarker/providers/oci/flags.py b/perfkitbenchmarker/providers/oci/flags.py
new file mode 100644
index 0000000000..b0858787d3
--- /dev/null
+++ b/perfkitbenchmarker/providers/oci/flags.py
@@ -0,0 +1,28 @@
+"""Module containing flags applicable across benchmark run on OCI."""
+
+from absl import flags
+
+VALID_TIERS = ['VM.Standard', 'VM.Optimized']
+
+VALID_SHAPES = ['.A1.Flex', '3.Flex', '.E4.Flex']
+
+flags.DEFINE_string('oci_availability_domain', None, 'The availability domain')
+
+flags.DEFINE_string('oci_fault_domain', None, 'The fault domain')
+
+flags.DEFINE_string('oci_shape', 'VM.Standard.A1.Flex', 'Performance tier to use for the machine type. Defaults to '
+                                                        'Standard.')
+
+flags.DEFINE_integer('oci_compute_units', 1, 'Number of compute units to allocate for the machine type')
+
+flags.DEFINE_integer('oci_compute_memory', None, 'Number of memory in gbs to allocate for the machine type')
+
+flags.DEFINE_integer('oci_boot_disk_size', 50, 'Size of Boot disk in GBs')
+
+flags.DEFINE_boolean('oci_use_vcn', True, 'Use in built networking')
+
+flags.DEFINE_integer('oci_num_local_ssds', 0, 'No. of disks')
+
+flags.DEFINE_string(
+    'oci_network_name', None, 'The name of an already created '
+    'network to use instead of creating a new one.')
diff --git a/perfkitbenchmarker/providers/oci/oci_disk.py b/perfkitbenchmarker/providers/oci/oci_disk.py
new file mode 100644
index 0000000000..80f1dbdedf
--- /dev/null
+++ b/perfkitbenchmarker/providers/oci/oci_disk.py
@@ -0,0 +1,173 @@
+
+"""Module containing classes related to Oracle disks."""
+
+import json
+import logging
+import threading
+from absl import flags
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.oci import util
+
+FLAGS = flags.FLAGS
+
+# https://docs.oracle.com/en-us/iaas/Content/Block/Concepts/blockvolumeperformance.htm
+
+# Acceptable values for vpus per GB is
+# 0: Represents Lower Cost option.
+# 10: Represents Balanced option.
+# 20: Represents Higher Performance option.
+DEFAULT_VPUS_PER_GB = 20
+
+DISK_CREATE_STATUSES = frozenset(
+    ['AVAILABLE', 'FAULTY', 'PROVISIONING', 'RESTORING', 'TERMINATED', 'TERMINATING']
+)
+
+DISK_ATTACH_STATUS = frozenset(
+    ['ATTACHED', 'ATTACHING', 'DETACHED', 'DETACHING']
+)
+
+
+class OciDisk(disk.BaseDisk):
+    _lock = threading.Lock()
+    vm_devices = {}
+
+    def __init__(self, disk_spec, vm_name, availability_domain, disk_number):
+        super(OciDisk, self).__init__(disk_spec)
+        self.id = None
+        self.availability_domain = availability_domain
+        self.disk_size = disk_spec.disk_size or 100
+        self.vpus_per_gb: int = DEFAULT_VPUS_PER_GB
+        self.status = None
+        self.name = f'{vm_name}-{disk_number}'
+        self.attachment_id = None
+        self.device_name = None
+        self.iqn: Optional[str] = None
+        self.port: Optional[str] = None
+        self.ipv4: Optional[str] = None
+        self.tags = util.MakeFormattedDefaultTags()
+
+
+    def _Create(self):
+        """Creates the disk."""
+        create_cmd = util.OCI_PREFIX + [
+            'bv',
+            'volume',
+            'create',
+            f'--availability-domain {self.availability_domain}',
+            f'--size-in-gbs {self.disk_size}',
+            f'--display-name {self.name}',
+            f'--freeform-tags {self.tags}',
+            f'--vpus-per-gb {str(self.vpus_per_gb)}']
+        create_cmd = util.GetEncodedCmd(create_cmd)
+        stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False)
+        response = json.loads(stdout)
+        self.id = response['data']['id']
+        self._WaitForDiskStatus(['AVAILABLE'])
+
+    def _Delete(self):
+        """Deletes the disk."""
+        # oci bv volume delete
+        delete_cmd = util.OCI_PREFIX + [
+            'bv',
+            'volume ',
+            'delete',
+            f'--volume-id {self.id}',
+            '--force']
+        delete_cmd = util.GetEncodedCmd(delete_cmd)
+        out, _ = vm_util.IssueRetryableCommand(delete_cmd)
+        self._WaitForDiskStatus(['TERMINATED'])
+
+    @vm_util.Retry(poll_interval=60, log_errors=False)
+    def _WaitForDiskStatus(self, status_list):
+        """Waits until the disk's status is in status_list."""
+        logging.info('Waiting until the instance status is: %s' % status_list)
+        status_cmd = util.OCI_PREFIX + [
+            'bv',
+            'volume',
+            'get',
+            f'--volume-id {self.id}']
+        status_cmd = util.GetEncodedCmd(status_cmd)
+        out, _ = vm_util.IssueRetryableCommand(status_cmd)
+        state = json.loads(out)
+        check_state = state['data']['lifecycle-state']
+        self.status = check_state
+        assert check_state in status_list
+
+    def Attach(self, disk_spec, vm):
+        attach_cmd = util.OCI_PREFIX + [
+            'compute',
+            'volume-attachment',
+            'attach',
+            f'--volume-id {self.id}',
+            f'--instance-id {vm.ocid}',
+            f'--type {self.disk_type}',
+            f'--device {self.device_name}']
+        logging.info('Attaching Oci disk %s.' % self.id)
+        attach_cmd = util.GetEncodedCmd(attach_cmd)
+        stdout, _ = vm_util.IssueRetryableCommand(attach_cmd)
+        response = json.loads(stdout)
+        self.attachment_id = response['data']['id']
+        self._WaitForDiskAttachStatus(disk_spec, ['ATTACHED'])
+
+    def Detach(self, disk_spec, vm):
+        if disk_spec.disk_type == 'iscsi':
+            self.ExecuteDetachIscsiCommands(vm)
+        detach_cmd = util.OCI_PREFIX + [
+            'compute',
+            'volume-attachment',
+            'detach',
+            f'--volume-attachment-id {self.attachment_id}',
+            '--force']
+        logging.info('Detaching Oci disk %s.' % self.id)
+        detach_cmd = util.GetEncodedCmd(detach_cmd)
+        out, _ = vm_util.IssueRetryableCommand(detach_cmd)
+        self._WaitForDiskAttachStatus(disk_spec, ['DETACHED'])
+
+    @vm_util.Retry(poll_interval=60, log_errors=False)
+    def _WaitForDiskAttachStatus(self, disk_spec, status_list):
+        """Waits until the disk's attach status is in status_list."""
+        logging.info('Waiting until the instance status is : %s' % status_list)
+        status_cmd = util.OCI_PREFIX + [
+            'compute',
+            'volume-attachment',
+            'get',
+            f'--volume-attachment-id {self.attachment_id}']
+        status_cmd = util.GetEncodedCmd(status_cmd)
+        out, _ = vm_util.IssueRetryableCommand(status_cmd)
+        state = json.loads(out)        
+        check_state = state['data']['lifecycle-state']
+        self.status = check_state
+        if self.status == 'ATTACHED' and disk_spec.disk_type == 'iscsi':
+            self.iqn = state['data']['iqn']
+            self.ipv4 = state['data']['ipv4']
+            self.port = state['data']['port']
+        assert check_state in status_list
+
+    def ExecuteAttachIscsiCommands(self, vm):
+        vm.RemoteCommand(f"sudo iscsiadm -m node -o new -T {self.iqn} -p {self.ipv4}:{self.port}")
+        vm.RemoteCommand(f"sudo iscsiadm -m node -o update -T {self.iqn} -n node.startup -v automatic")
+        vm.RemoteCommand(f"sudo iscsiadm -m node -T {self.iqn} -p {self.ipv4}:{self.port} -l")
+
+    def ExecuteDetachIscsiCommands(self, vm):
+        vm.RemoteCommand(f"sudo iscsiadm -m node -T {self.iqn} -p {self.ipv4}:{self.port} -u")
+        vm.RemoteCommand(f"sudo iscsiadm -m node -o delete -T {self.iqn} -p {self.ipv4}:{self.port}")
+
+    def GetDevicePath(self):
+        """Returns the path to the device inside the VM."""
+        return self.device_name
+
+    def GetFreeDeviceName(self, vm):
+        free_device_cmd = util.OCI_PREFIX + [
+            'compute',
+            'device',
+            'list-instance',
+            f'--instance-id {vm.ocid}']
+        free_device_cmd = util.GetEncodedCmd(free_device_cmd)
+        out, _ = vm_util.IssueRetryableCommand(free_device_cmd)
+        stdout, _ = vm_util.IssueRetryableCommand(free_device_cmd)
+        response = json.loads(stdout)
+        for free_disk in range(0, 31):
+            if response['data'][free_disk]['is-available'] is True:
+                self.device_name = response['data'][free_disk]['name']
+                break
diff --git a/perfkitbenchmarker/providers/oci/oci_network.py b/perfkitbenchmarker/providers/oci/oci_network.py
new file mode 100644
index 0000000000..b125cfe83f
--- /dev/null
+++ b/perfkitbenchmarker/providers/oci/oci_network.py
@@ -0,0 +1,409 @@
+"""Module containing classes related to Oracle Network."""
+
+import json
+import logging
+import uuid
+
+from absl import flags
+from perfkitbenchmarker import network
+from perfkitbenchmarker import provider_info
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.oci import util
+
+FLAGS = flags.FLAGS
+
+MAX_NAME_LENGTH = 128
+WAIT_INTERVAL_SECONDS = 600
+
+VCN_CREATE_STATUSES = frozenset(
+    ['AVAILABLE', 'PROVISIONING', 'TERMINATED', 'TERMINATING', 'UPDATING']
+)
+
+SUBNET_CREATE_STATUSES = frozenset(
+    ['AVAILABLE', 'PROVISIONING', 'TERMINATED', 'TERMINATING', 'UPDATING']
+)
+
+IG_CREATE_STATUSES = frozenset(
+    ['AVAILABLE', 'PROVISIONING', 'TERMINATED', 'TERMINATING']
+)
+
+ROUTE_TABLE_UPDATE_STATUSES = frozenset(
+    ['AVAILABLE', 'PROVISIONING', 'TERMINATED', 'TERMINATING']
+)
+
+SECURITY_LIST_UPDATE_STATUSES = frozenset(
+    ['AVAILABLE', 'PROVISIONING', 'TERMINATED', 'TERMINATING']
+)
+
+
+class OciVcn(resource.BaseResource):
+    """An object representing an Oci VCN."""
+
+    def __init__(self, name, region):
+        super(OciVcn, self).__init__()
+        self.status = None
+        self.region = region
+        self.id = None
+        self.name = name
+        self.cidr_blocks = ["172.16.0.0/16"]
+        self.cidr_block = None
+        self.vcn_id = None
+        self.subnet_id = None
+        self.ig_id = None
+        self.rt_id = None
+        self.security_list_id = None
+        self.tags = util.MakeFormattedDefaultTags()
+
+    @vm_util.Retry(poll_interval=60, log_errors=False)
+    def WaitForVcnStatus(self, status_list):
+        """Waits until the disk's status is in status_list."""
+        logging.info('Waiting until the instance status is: %s' % status_list)
+        status_cmd = util.OCI_PREFIX + [
+            'network',
+            'vcn',
+            'get',
+            f'--vcn-id {self.vcn_id}']
+        status_cmd = util.GetEncodedCmd(status_cmd)
+        out, _ = vm_util.IssueRetryableCommand(status_cmd)
+        state = json.loads(out)
+        check_state = state['data']['lifecycle-state']
+        self.status = check_state
+        assert check_state in status_list
+
+    def GetVcnIDFromName(self):
+        """Gets VCN OCIid from Name"""
+        get_cmd = util.OCI_PREFIX + [
+            'network',
+            'vcn',
+            'list',
+            f'--display-name {self.name}']
+        get_cmd = util.GetEncodedCmd(get_cmd)
+        logging.info(get_cmd)
+        stdout, _, _ = vm_util.IssueCommand(get_cmd, raise_on_failure=False)
+        response = json.loads(stdout)
+        self.vcn_id = response['data'][0]['id']
+        logging.info(self.vcn_id)
+
+    def _Create(self):
+        """Creates the VPC."""
+        logging.info("Creating custom CIDR Block")
+        create_cmd = util.OCI_PREFIX + [
+            'network',
+            'vcn',
+            'create',
+            f'--display-name pkb-{FLAGS.run_uri}',
+            f'--dns-label vcn{FLAGS.run_uri}',
+            f'--freeform-tags {self.tags}',
+            '--from-json \'{"cidr-blocks":["172.16.0.0/16"]}\'']
+        create_cmd = util.GetEncodedCmd(create_cmd)
+        logging.info(create_cmd)
+        stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False)
+        response = json.loads(stdout)
+        self.vcn_id = response['data']['id']
+        self.cidr_block = response['data']['cidr-block']
+
+    def _Delete(self):
+        delete_cmd = util.OCI_PREFIX + [
+            'network',
+            'vcn',
+            'delete',
+            f'--vcn-id {self.vcn_id}',
+            '--force']
+        delete_cmd = util.GetEncodedCmd(delete_cmd)
+        stdout, _, _ = vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+    def GetSubnetIdFromVCNId(self):
+        """Gets Subnet OCIid from Name"""
+        get_cmd = util.OCI_PREFIX + [
+            'network',
+            'subnet',
+            'list',
+            f'--vcn-id {self.vcn_id}']
+        get_cmd = util.GetEncodedCmd(get_cmd)
+        logging.info(get_cmd)
+        stdout, _, _ = vm_util.IssueCommand(get_cmd, raise_on_failure=False)
+        response = json.loads(stdout)
+        self.subnet_id = response['data'][0]['id']
+
+    @vm_util.Retry(poll_interval=60, log_errors=False)
+    def WaitForSubnetStatus(self, status_list):
+        """Waits until the disk's status is in status_list."""
+        logging.info('Waiting until the instance status is: %s' % status_list)
+        status_cmd = util.OCI_PREFIX + [
+            'network',
+            'subnet',
+            'get',
+            f'--subnet-id {self.subnet_id}']
+        status_cmd = util.GetEncodedCmd(status_cmd)
+        out, _ = vm_util.IssueRetryableCommand(status_cmd)
+        state = json.loads(out)
+        check_state = state['data']['lifecycle-state']
+        self.status = check_state
+        assert check_state in status_list
+
+    def CreateSubnet(self):
+        """Creates the VPC."""
+        logging.info("Creating custom subnet Block")
+        create_cmd = util.OCI_PREFIX + [
+            'network',
+            'subnet',
+            'create',
+            f'--display-name pkb-{FLAGS.run_uri}',
+            f'--dns-label sub{FLAGS.run_uri}',
+            f'--cidr-block {self.cidr_block}',
+            f'--vcn-id {self.vcn_id}']
+        create_cmd = util.GetEncodedCmd(create_cmd)
+        stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False)
+        response = json.loads(stdout)
+        self.subnet_id = response['data']['id']
+
+    def DeleteSubnet(self):
+        """Creates the VPC."""
+        logging.info("Creating custom subnet Block")
+        create_cmd = util.OCI_PREFIX + [
+            'network',
+            'subnet',
+            'delete',
+            f'--subnet-id {self.subnet_id}',
+            '--force']
+        create_cmd = util.GetEncodedCmd(create_cmd)
+        stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False)
+
+    def WaitForInternetGatewayStatus(self, status_list):
+        """Waits until the disk's status is in status_list."""
+        logging.info('Waiting until the instance status is: %s', status_list)
+        status_cmd = util.OCI_PREFIX + [
+            'network',
+            'internet-gateway',
+            'get',
+            f'--ig-id {self.ig_id}']
+        status_cmd = util.GetEncodedCmd(status_cmd)
+        out, _ = vm_util.IssueRetryableCommand(status_cmd)
+        state = json.loads(out)
+        check_state = state['data']['lifecycle-state']
+        self.status = check_state
+        assert check_state in status_list
+
+    def CreateInternetGateway(self):
+        """Creates the Internet Gateway."""
+        logging.info("Creating custom Internet Gateway")
+        create_cmd = util.OCI_PREFIX + [
+            'network',
+            'internet-gateway',
+            'create',
+            f'--display-name pkb-{FLAGS.run_uri}',
+            f'--vcn-id {self.vcn_id}',
+            '--is-enabled  True']
+        create_cmd = util.GetEncodedCmd(create_cmd)
+        stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False)
+        response = json.loads(stdout)
+        self.ig_id = response['data']['id']
+
+    def DeleteInternetGateway(self):
+        """Creates the VPC."""
+        logging.info("Creating custom subnet Block")
+        create_cmd = util.OCI_PREFIX + [
+            'network',
+            'internet-gateway',
+            'delete',
+            f'--ig-id {self.ig_id}',
+            '--force']
+        create_cmd = util.GetEncodedCmd(create_cmd)
+        stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False)
+
+    def WaitForRouteTableStatus(self, status_list):
+        """Waits until the disk's status is in status_list."""
+        logging.info('Waiting until the instance status is: %s', status_list)
+        status_cmd = util.OCI_PREFIX + [
+            'network',
+            'route-table',
+            'get',
+            f'--rt-id {self.rt_id}']
+        status_cmd = util.GetEncodedCmd(status_cmd)
+        out, _ = vm_util.IssueRetryableCommand(status_cmd)
+        state = json.loads(out)
+        check_state = state['data']['lifecycle-state']
+        self.status = check_state
+        assert check_state in status_list
+
+    def WaitForSecurityListStatus(self, status_list):
+        """Waits until the disk's status is in status_list."""
+        logging.info('Waiting until the instance status is: %s', status_list)
+        status_cmd = util.OCI_PREFIX + [
+            'network',
+            'security-list',
+            'get',
+            f'--security-list-id {self.security_list_id}']
+        status_cmd = util.GetEncodedCmd(status_cmd)
+        out, _ = vm_util.IssueRetryableCommand(status_cmd)
+        state = json.loads(out)
+        check_state = state['data']['lifecycle-state']
+        self.status = check_state
+        assert check_state in status_list
+
+    def UpdateRouteTable(self):
+        """Updates the Route Table."""
+        logging.info("Update Routing Table with Internet Gateway")
+        create_cmd = util.OCI_PREFIX + [
+            'network',
+            'route-table',
+            'update',
+            f'--rt-id {self.rt_id}',
+            '--force',
+            '--route-rules \'[{\"cidrBlock\":"0.0.0.0/0\",\"networkEntityId\":\"%s\"}]\'' % self.ig_id]
+        create_cmd = util.GetEncodedCmd(create_cmd)
+        stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False)
+
+    def ClearRouteTable(self):
+        """Updates the Route Table."""
+        logging.info("Update Routing Table with Internet Gateway")
+        create_cmd = util.OCI_PREFIX + [
+            'network',
+            'route-table',
+            'update',
+            f'--rt-id {self.rt_id}',
+            '--force',
+            '--route-rules \'[]\'']
+        create_cmd = util.GetEncodedCmd(create_cmd)
+        stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False)
+
+    def UpdateSecurityList(self):
+        # UNUSED / DEPRECIATED
+
+        """Updates the Route Table to allow all ports traffic on internal ip and 22 on Internet"""
+        logging.info("Update Routing Table with Internet Gateway")
+        create_cmd = util.OCI_PREFIX + [
+            'network',
+            'security-list',
+            'update',
+            f'--security-list-id {self.security_list_id}',
+            '--force',
+            '--ingress-security-rules \'[{\"source\": \"%s\", \"protocol\": \"all\", \"isStateless\": false},'
+            '{\"source\": "0.0.0.0/0", \"protocol\": \"6\", \"isStateless\": false, \"tcpOptions\": {'
+            '\"destinationPortRange\": {\"max\": 22, \"min\": 22}}}]\'' % self.cidr_block]
+        create_cmd = util.GetEncodedCmd(create_cmd)
+        stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False)
+
+    def AddSecurityListIngressRule(self, start_port=22, end_port=None):
+        if not end_port:
+            end_port = start_port
+
+        """Updates security list to allow traffic on a specific port"""
+        logging.info(f"Add ingress rule for ports {start_port} : {end_port}")
+        cmd = util.OCI_PREFIX + [
+            'network',
+            'security-list',
+            'update',
+            f'--security-list-id {self.security_list_id}',
+            '--force',
+            '--ingress-security-rules \'[{\"source\": \"%s\", \"protocol\": \"all\", \"isStateless\": false},'
+            '{\"source\": "0.0.0.0/0", \"protocol\": \"6\", \"isStateless\": false, \"tcpOptions\": {'
+            '\"destinationPortRange\": {\"max\": \"%i\", \"min\": \"%i\"}}}]\'' % (self.cidr_block, end_port, start_port)]
+        cmd = util.GetEncodedCmd(cmd)
+        stdout, _, _ = vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+    def GetDefaultRouteTableId(self):
+        """Get Default Route Table OCI Id."""
+        status_cmd = util.OCI_PREFIX + [
+            'network',
+            'vcn',
+            'get',
+            f'--vcn-id {self.vcn_id}']
+        status_cmd = util.GetEncodedCmd(status_cmd)
+        out, _, _ = vm_util.IssueCommand(status_cmd)
+        state = json.loads(out)
+        self.rt_id = state['data']['default-route-table-id']
+
+    def GetDefaultSecurityListId(self):
+        """Get Default Route Table OCI Id."""
+        status_cmd = util.OCI_PREFIX + [
+            'network',
+            'vcn',
+            'get',
+            f'--vcn-id {self.vcn_id}']
+        status_cmd = util.GetEncodedCmd(status_cmd)
+        out, _, _ = vm_util.IssueCommand(status_cmd)
+        state = json.loads(out)
+        self.security_list_id = state['data']['default-security-list-id']
+
+
+class OciNetwork(network.BaseNetwork):
+    """Object representing a AliCloud Network."""
+
+    CLOUD = provider_info.OCI
+
+    def __init__(self, spec):
+        super(OciNetwork, self).__init__(spec)
+        self.name = FLAGS.oci_network_name or ('perfkit-%s-%s' % (FLAGS.run_uri, str(uuid.uuid4())[-12:]))
+        self.region = spec.zone
+        self.use_vcn = FLAGS.oci_use_vcn
+        self.network_id = None
+        self.vcn_id = None
+
+        if self.use_vcn:
+            self.vcn = OciVcn(self.name, self.region)
+            self.security_group = None
+#        else:
+#            self.vcn = OciVcn(self.name, self.region)
+#            self.security_group = None
+
+    @vm_util.Retry()
+    def Create(self):
+        """Creates the network."""
+        if self.use_vcn:
+            self.vcn.Create()
+            self.vcn.WaitForVcnStatus(["AVAILABLE"])#AVAILABLE
+            self.vcn.GetDefaultRouteTableId()
+            self.vcn.GetDefaultSecurityListId()
+            self.vcn.CreateSubnet()
+            self.vcn.WaitForSubnetStatus(["AVAILABLE"])
+            self.network_id = self.vcn.subnet_id
+            self.vcn.CreateInternetGateway()
+            self.vcn.WaitForInternetGatewayStatus(["AVAILABLE"])
+            self.vcn.UpdateRouteTable()
+            self.vcn.WaitForRouteTableStatus(["AVAILABLE"])
+
+            # Add opening in VCN for SSH
+            self.vcn.AddSecurityListIngressRule(start_port=22)
+            self.vcn.WaitForSecurityListStatus(["AVAILABLE"])
+        else:
+            self.vcn.GetVcnIDFromName()
+            self.vcn.GetSubnetIdFromVCNId()
+            self.network_id = self.vcn.subnet_id
+
+    def Delete(self):
+        """Deletes the network."""
+        if self.use_vcn:
+            self.vcn.ClearRouteTable()
+            self.vcn.DeleteInternetGateway()
+            self.vcn.DeleteSubnet()
+            self.vcn.Delete()
+
+
+class OCIFirewall(network.BaseFirewall):
+
+    def __init__(self):
+        super(OCIFirewall, self).__init__()
+
+
+
+    def AllowPort(self, vm, start_port, end_port=None):
+        """
+        Open a port range on a specific vm. This seems to normally be called by the vm object.
+
+        :param vm:
+        :param start_port:
+        :param end_port:
+        :return:
+        """
+
+        if not vm.network.vcn:
+            # TODO: What happens when we do not have a vcn? Is that possible?
+            logging.error('Opening ports with OCI cloud only supported when using a VCN for now!')
+
+        else:
+            vm.network.vcn.AddSecurityListIngressRule(start_port, end_port=end_port)
+
+
+
diff --git a/perfkitbenchmarker/providers/oci/oci_virtual_machine.py b/perfkitbenchmarker/providers/oci/oci_virtual_machine.py
new file mode 100644
index 0000000000..c380cbddc1
--- /dev/null
+++ b/perfkitbenchmarker/providers/oci/oci_virtual_machine.py
@@ -0,0 +1,310 @@
+"""Class to represent an Oracle Virtual Machine object.
+
+Machine Types:
+https://docs.oracle.com/en-us/iaas/Content/Compute/References/computeshapes.htm
+
+All VM specifics are self-contained and the class provides methods to
+operate on the VM: boot, shutdown, etc.
+"""
+
+
+import itertools
+import json
+import logging
+import threading
+
+from absl import flags
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import linux_virtual_machine
+from perfkitbenchmarker import provider_info
+from perfkitbenchmarker import virtual_machine
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.providers.oci import util, oci_disk, oci_network
+
+FLAGS = flags.FLAGS
+
+INSTANCE_EXISTS_STATUSES = frozenset(
+    ['CREATING_IMAGE', 'MOVING', 'PROVISIONING', 'RUNNING', 'STARTING', 'STOPPED', 'STOPPING', 'TERMINATED',
+     'TERMINATING'
+     ])
+
+
+class OciVmSpec(virtual_machine.BaseVmSpec):
+    CLOUD = provider_info.OCI
+
+    def __init__(self, *args, **kwargs):
+        self.num_local_ssds: int = None
+        super(OciVmSpec, self).__init__(*args, **kwargs)
+
+    @classmethod
+    def _ApplyFlags(cls, config_values, flag_values):
+        super(OciVmSpec, cls)._ApplyFlags(config_values, flag_values)
+        if flag_values['oci_compute_units'].present:
+            config_values['oci_compute_units'] = flag_values.oci_compute_units
+        if flag_values['oci_compute_memory'].present:
+            config_values['oci_compute_memory'] = flag_values.oci_compute_memory
+        if flag_values['oci_availability_domain'].present:
+            config_values['oci_availability_domain'] = flag_values.oci_availability_domain
+        if flag_values['oci_fault_domain'].present:
+            config_values['oci_fault_domain'] = flag_values.oci_fault_domain
+        if flag_values['oci_boot_disk_size'].present:
+            config_values['oci_boot_disk_size'] = flag_values.oci_boot_disk_size
+        if flag_values['oci_use_vcn'].present:
+            config_values['oci_use_vcn'] = flag_values.oci_use_vcn
+        if flag_values['oci_num_local_ssds'].present:
+            config_values['num_local_ssds'] = flag_values.oci_num_local_ssds
+        if flag_values['machine_type'].present:
+            config_values['machine_type'] = flag_values.machine_type
+        if flag_values['oci_network_name'].present:
+            config_values['oci_network_name'] = flag_values.oci_network_name
+
+    @classmethod
+    def _GetOptionDecoderConstructions(cls):
+        """Gets decoder classes and constructor args for each configurable option.
+
+        Returns:
+          dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+              The pair specifies a decoder class and its __init__() keyword
+              arguments to construct in order to decode the named option.
+        """
+        result = super(OciVmSpec, cls)._GetOptionDecoderConstructions()
+        result.update({
+            'oci_compute_units': (option_decoders.IntDecoder, {'default': None}),
+            'oci_compute_memory': (option_decoders.IntDecoder, {'default': None}),
+            'oci_availability_domain': (option_decoders.StringDecoder, {'default': None}),
+            'oci_fault_domain': (option_decoders.StringDecoder, {'default': None}),            
+            'oci_boot_disk_size': (option_decoders.IntDecoder, {'default': 50}),
+            'oci_use_vcn': (option_decoders.BooleanDecoder, {'default': True}),
+            'num_local_ssds': (option_decoders.IntDecoder, {'default': 0}),
+            'machine_type': (option_decoders.StringDecoder, {'default': 'VM.Standard.A1.Flex'}),
+            'region': (option_decoders.StringDecoder, {'default': None}),
+            'oci_network_name': (option_decoders.StringDecoder, {'default': None}),
+        })
+        return result
+
+
+class OciVirtualMachine(virtual_machine.BaseVirtualMachine):
+    CLOUD = provider_info.OCI
+
+    _counter_lock = threading.Lock()
+    _counter = itertools.count()
+
+    def __init__(self, vm_spec):
+        super(OciVirtualMachine, self).__init__(vm_spec)
+        with self._counter_lock:
+            self.instance_number = next(self._counter)
+
+        MAX_LOCAL_DISKS = 32
+        self.name = 'perfkit-%s-%s' % (FLAGS.run_uri, self.instance_number)
+        self.ocid = ''
+        self.image = vm_spec.image or None
+        self.operating_system = None
+        self.operating_system_version = None
+        self.key_pair_name = ""
+        self.region = vm_spec.zone
+        self.subnet = None
+        self.availability_domain = vm_spec.oci_availability_domain
+        self.fault_domain = vm_spec.oci_fault_domain        
+        self.machine_type = vm_spec.machine_type
+        self.compute_units = vm_spec.oci_compute_units
+        self.compute_memory = vm_spec.oci_compute_memory
+        self.bv_size = vm_spec.oci_boot_disk_size
+        self.ip_address = None
+        self.internal_ip = None
+        self.status = None
+        self.user_name = 'perfkit'
+        self.network = oci_network.OciNetwork.GetNetwork(self)
+        self.local_disk_counter = 0
+        self.num_local_ssds = vm_spec.num_local_ssds
+        self.max_local_disks = MAX_LOCAL_DISKS
+        self.tags = util.MakeFormattedDefaultTags()
+
+    @vm_util.Retry(poll_interval=60, log_errors=False)
+    def _WaitForInstanceStatus(self, status_list):
+        """Waits until the instance's status is in status_list."""
+        logging.info('Waits until the instance\'s status is one of statuses: %s',
+                     status_list)
+        status_cmd = util.OCI_PREFIX + [
+            'compute',
+            'instance',
+            'list',
+            f'--display-name {self.name}',
+            '--sort-order DESC']
+        status_cmd = util.GetEncodedCmd(status_cmd)
+        out, _ = vm_util.IssueRetryableCommand(status_cmd)
+        state = json.loads(out)
+        check_state = state['data'][0]['lifecycle-state']
+        self.status = check_state
+        assert check_state in status_list
+
+    @vm_util.Retry(poll_interval=5, log_errors=False)
+    def _WaitForIPStatus(self, status_list):
+        """Waits until the instance's status is in status_list."""
+        logging.info('Waits until the instance\'s status is one of statuses: %s',
+                     status_list)
+        ipstatus_cmd = util.OCI_PREFIX + [
+            'compute',
+            'instance',
+            'list-vnics',
+            f'--instance-id {self.ocid}']
+        ipstatus_cmd = util.GetEncodedCmd(ipstatus_cmd)
+        out, _ = vm_util.IssueRetryableCommand(ipstatus_cmd)
+        state = json.loads(out)
+        check_state = state['data'][0]['lifecycle-state']
+        assert check_state in status_list
+
+    def _Create(self):
+        if self.compute_units is None:
+            self.compute_units = 1
+
+        if self.compute_memory is None:
+            self.compute_memory = self.compute_units * 4
+        ad_list = []
+        if self.availability_domain is None:
+            ad_list = util.GetAvailabilityDomainFromRegion(self.region)
+            self.availability_domain = ad_list[0]
+        if self.fault_domain is None:
+            fd_list = util.GetFaultDomainFromAvailabilityDomain(self.availability_domain)
+            self.fault_domain = fd_list[0]
+
+        if self.image is not None:
+            oci_image, oci_os_name, oci_os_version = util.GetOciImageIdFromName(self.image, self.machine_type)
+        else:
+            oci_os_name = util.GetOsFromImageFamily(self.DEFAULT_IMAGE_FAMILY)
+            oci_os_version = util.GetOsVersionFromOs(self.DEFAULT_IMAGE_PROJECT, oci_os_name)
+            oci_image = util.GetOciImageIdFromImage(oci_os_name, oci_os_version, self.machine_type)
+        self.image = oci_image
+
+        shape_config = "'{\"memoryInGBs\":%s,\"ocpus\":%s}'" % (self.compute_memory, self.compute_units)
+
+        key_file_path = vm_util.GetPublicKeyPath()
+
+        public_key = util.GetPublicKey()
+
+        if "Oracle" in oci_os_name:
+            user_data = util.ADD_CLOUDINIT_ORACLE_TEMPLATE.format(user_name=self.user_name,
+                                                                  public_key=public_key)
+        else:
+            user_data = util.ADD_CLOUDINIT_TEMPLATE.format(user_name=self.user_name,
+                                                           public_key=public_key)
+        user_data_filepath = '/tmp/user_data-' + self.name + '.sh'
+        with open(user_data_filepath, 'w') as user_data_file:
+            user_data_file.write(user_data)
+
+        create_cmd = util.OCI_PREFIX + [
+            'compute',
+            'instance',
+            'launch',
+            f'--subnet-id {self.network.network_id}',
+            f'--display-name {self.name}',
+            f'--hostname-label {self.name}',
+            f'--region {self.region}',
+            f'--availability-domain {self.availability_domain}',
+            f'--fault-domain {self.fault_domain}',
+            f'--image-id {self.image}',
+            f'--shape {self.machine_type}',
+            '--shape-config ',
+            f' {shape_config}',
+            f'--user-data-file {user_data_filepath}',
+            f' --boot-volume-size-in-gbs {self.bv_size}',
+            f'--freeform-tags {self.tags}',
+            f'--ssh-authorized-keys-file {key_file_path}',
+            '--assign-public-ip true']
+        create_cmd = util.GetEncodedCmd(create_cmd)
+        stdout, _, ret = vm_util.IssueCommand(create_cmd)
+        ociid = json.loads(stdout)
+        self.ocid = ociid['data']['id']
+        self._WaitForInstanceStatus(['RUNNING'])
+        self._GetPublicIP()
+
+    def _GetPublicIP(self):
+        self._WaitForIPStatus(['AVAILABLE'])
+        ip_cmd = util.OCI_PREFIX + [
+            'compute',
+            'instance',
+            'list-vnics',
+            f'--instance-id {self.ocid}']
+        ip_cmd = util.GetEncodedCmd(ip_cmd)
+        out, _, _ = vm_util.IssueCommand(ip_cmd)
+        ips = json.loads(out)
+        self.internal_ip = ips['data'][0]['private-ip']
+        self.ip_address = ips['data'][0]['public-ip']
+
+    def _Delete(self):
+        if self.status == 'RUNNING':
+            delete_cmd = util.OCI_PREFIX + [
+                'compute',
+                'instance',
+                'terminate',
+                f'--instance-id {self.ocid}',
+                '--preserve-boot-volume false',
+                '--force']
+            delete_cmd = util.GetEncodedCmd(delete_cmd)
+            out, _ = vm_util.IssueRetryableCommand(delete_cmd)
+            self._WaitForInstanceStatus(['TERMINATED'])
+
+    def _Exists(self):
+        """Returns true if the VM exists."""
+        if self.status == 'TERMINATED':
+            return False
+        return self.status in INSTANCE_EXISTS_STATUSES
+
+    def CreateScratchDisk(self, disk_spec_id, disk_spec):
+        """Create a VM's scratch disk.
+
+        Args:
+          disk_spec: virtual_machine.BaseDiskSpec object of the disk.
+        """
+        disk_number = disk_spec_id
+        self.local_disk_counter += 1
+        if self.local_disk_counter > self.max_local_disks:
+            raise errors.Error('Not enough local disks.')
+        logging.info("Now starting to create disks")
+        data_disk = oci_disk.OciDisk(disk_spec, self.name, self.availability_domain, disk_number)
+        self.scratch_disks.append(data_disk)
+        data_disk.Create()
+        data_disk.GetFreeDeviceName(self)
+        data_disk.Attach(disk_spec, self)
+        if disk_spec.disk_type == 'iscsi':
+            data_disk.ExecuteAttachIscsiCommands(self)
+        self.FormatDisk(data_disk.GetDevicePath(), disk.LOCAL)
+        self.MountDisk(data_disk.GetDevicePath(), disk_spec.mount_point,
+                       disk.LOCAL, data_disk.mount_options,
+                       data_disk.fstab_options)
+        
+    def AllowPort(self, start_port, end_port=None, source_range=None):
+
+        # TODO: Potentially replace for case where firewall skip flag is in place
+        super(OciVirtualMachine, self).AllowPort(start_port, end_port, source_range)
+
+
+class Ubuntu2204BasedOCIVirtualMachine(OciVirtualMachine,
+                                       linux_virtual_machine.Ubuntu2204Mixin):
+    DEFAULT_IMAGE_FAMILY = 'ubuntu-os-cloud'
+    DEFAULT_IMAGE_PROJECT = 'ubuntu-2204-lts'
+
+
+class Ubuntu2004BasedOCIVirtualMachine(OciVirtualMachine,
+                                       linux_virtual_machine.Ubuntu2004Mixin):
+    DEFAULT_IMAGE_FAMILY = 'ubuntu-os-cloud'
+    DEFAULT_IMAGE_PROJECT = 'ubuntu-2004-lts'
+
+
+class Ubuntu1804BasedOCIVirtualMachine(OciVirtualMachine,
+                                       linux_virtual_machine.Ubuntu1804Mixin):
+    DEFAULT_IMAGE_FAMILY = 'ubuntu-os-cloud'
+    DEFAULT_IMAGE_PROJECT = 'ubuntu-1804-lts'
+
+
+class Oracle9BasedVirtualMachine(OciVirtualMachine,
+                                 linux_virtual_machine.Oracle9Mixin):
+    DEFAULT_IMAGE_FAMILY = 'Oracle Linux'
+    DEFAULT_IMAGE_PROJECT = '9'
+
+
+class Oracle8BasedVirtualMachine(OciVirtualMachine,
+                                 linux_virtual_machine.Oracle8Mixin):
+    DEFAULT_IMAGE_FAMILY = 'Oracle Linux'
+    DEFAULT_IMAGE_PROJECT = '8'
diff --git a/perfkitbenchmarker/providers/oci/provider_info.py b/perfkitbenchmarker/providers/oci/provider_info.py
new file mode 100644
index 0000000000..1c06087554
--- /dev/null
+++ b/perfkitbenchmarker/providers/oci/provider_info.py
@@ -0,0 +1,9 @@
+"""Provider info for OCI."""
+
+from perfkitbenchmarker import provider_info
+from perfkitbenchmarker import providers
+
+
+class OCIProviderInfo(provider_info.BaseProviderInfo):
+    UNSUPPORTED_BENCHMARKS = ['mysql_service']
+    CLOUD = provider_info.OCI
diff --git a/perfkitbenchmarker/providers/oci/util.py b/perfkitbenchmarker/providers/oci/util.py
new file mode 100644
index 0000000000..20f3c4146d
--- /dev/null
+++ b/perfkitbenchmarker/providers/oci/util.py
@@ -0,0 +1,186 @@
+"""Utilities for working with OracleCloud Web Services resources."""
+
+import shlex
+
+from absl import flags
+from perfkitbenchmarker import vm_util
+import six
+import json
+from perfkitbenchmarker import context
+
+OCI_PREFIX = ['oci']
+
+ADD_CLOUDINIT_TEMPLATE = """#!/bin/bash
+echo "{user_name} ALL = NOPASSWD: ALL" >> /etc/sudoers
+useradd {user_name} --home /home/{user_name} --shell /bin/bash -m
+mkdir /home/{user_name}/.ssh
+echo "{public_key}" >> /home/{user_name}/.ssh/authorized_keys
+chown -R {user_name}:{user_name} /home/{user_name}/.ssh
+chmod 700 /home/{user_name}/.ssh
+chmod 600 /home/{user_name}/.ssh/authorized_keys
+sudo iptables -F
+"""
+
+ADD_CLOUDINIT_ORACLE_TEMPLATE = """#!/bin/bash
+echo "{user_name} ALL = NOPASSWD: ALL" >> /etc/sudoers
+useradd {user_name} --home /home/{user_name} --shell /bin/bash -m
+mkdir /home/{user_name}/.ssh
+echo "{public_key}" >> /home/{user_name}/.ssh/authorized_keys
+chown -R {user_name}:{user_name} /home/{user_name}/.ssh
+chmod 700 /home/{user_name}/.ssh
+chmod 600 /home/{user_name}/.ssh/authorized_keys
+sudo systemctl stop firewalld
+sudo systemctl disable firewalld
+"""
+
+
+def GetEncodedCmd(cmd):
+    cmd_line = ' '.join(cmd)
+    cmd_args = shlex.split(cmd_line)
+    return cmd_args
+
+
+def GetOciImageIdFromImage(operating_system, operating_system_version, shape):
+    # oci compute image list --all --operating-system "Canonical Ubuntu" --operating-system-version 18.04 --shape
+    # VM.Standard.A1.Flex -c ocid1.tenancy.oc1..aaaaaaaadfogwfmgjoi35onknsnu6u5zfp43gh657appkvbghhzyhfhh5oya
+    create_cmd = OCI_PREFIX + [
+        'compute',
+        'image',
+        'list',
+        '--all',
+        '--operating-system \"%s\"' % operating_system,
+        '--operating-system-version \"%s\"' % operating_system_version,
+        '--shape %s' % shape]
+    create_cmd = GetEncodedCmd(create_cmd)
+    stdout, _ = vm_util.IssueRetryableCommand(create_cmd)
+    image_names = json.loads(stdout)['data']
+    if len(image_names) > 0:
+        return image_names[0]['id']
+
+
+def GetOciImageIdFromName(name, shape):
+    create_cmd = OCI_PREFIX + [
+        'compute',
+        'image',
+        'list',
+        '--all',
+        '--display-name \"%s\"' % name,
+        '--shape %s' % shape]
+    create_cmd = GetEncodedCmd(create_cmd)
+    stdout, _ = vm_util.IssueRetryableCommand(create_cmd)
+    image_names = json.loads(stdout)['data']
+    if len(image_names) > 0:
+        return image_names[0]['id'], image_names[0]['operating-system'], image_names[0]['operating-system-version']
+
+
+def GetAvailabilityDomainFromRegion(region):
+    create_cmd = OCI_PREFIX + [
+        'iam',
+        'availability-domain',
+        'list',
+        '--region %s' % region]
+    create_cmd = GetEncodedCmd(create_cmd)
+    stdout, _ = vm_util.IssueRetryableCommand(create_cmd)
+    availability_domains = json.loads(stdout)['data']
+    availability_domains_list = []
+    if len(availability_domains) == 1:
+        availability_domains_list.append(availability_domains[0]['name'])
+    elif len(availability_domains) == 2:
+        availability_domains_list.append(availability_domains[0]['name'])
+        availability_domains_list.append(availability_domains[1]['name'])
+    else:
+        availability_domains_list.append(availability_domains[0]['name'])
+        availability_domains_list.append(availability_domains[1]['name'])
+        availability_domains_list.append(availability_domains[2]['name'])
+    return availability_domains_list
+
+
+def GetFaultDomainFromAvailabilityDomain(availability_domain):
+    create_cmd = OCI_PREFIX + [
+        'iam',
+        'fault-domain',
+        'list',
+        '--availability-domain %s' % availability_domain]
+    create_cmd = GetEncodedCmd(create_cmd)
+    stdout, _ = vm_util.IssueRetryableCommand(create_cmd)
+    fault_domains = json.loads(stdout)['data']
+    fault_domains_list = []
+    if len(fault_domains) == 1:
+        fault_domains_list.append(fault_domains[0]['name'])
+    elif len(fault_domains) == 2:
+        fault_domains_list.append(fault_domains[0]['name'])
+        fault_domains_list.append(fault_domains[1]['name'])
+    else:
+        fault_domains_list.append(fault_domains[0]['name'])
+        fault_domains_list.append(fault_domains[1]['name'])
+        fault_domains_list.append(fault_domains[2]['name'])
+    return fault_domains_list
+
+
+def GetOsFromImageFamily(operating_system):
+    if "ubuntu" in operating_system:
+        return 'Canonical Ubuntu'
+    elif "Oracle" in operating_system:
+        return 'Oracle Linux'
+
+
+def GetOsVersionFromOs(operating_system_version, operating_system):
+    if operating_system == 'Canonical Ubuntu':
+        if "1804" in operating_system_version:
+            return '18.04'
+        elif "2004" in operating_system_version:
+            return '20.04'
+        elif "2204" in operating_system_version:
+            return '22.04'
+    elif operating_system == 'Oracle Linux':
+        if '9' in operating_system_version:
+            return '9'
+        elif '8' in operating_system_version:
+            return '8'
+        
+
+def GetPublicKey():
+    cat_cmd = ['cat',
+               vm_util.GetPublicKeyPath()]
+    keyfile, _ = vm_util.IssueRetryableCommand(cat_cmd)
+    return keyfile.strip()
+
+
+def FormatTagsJSON(tags_dict):
+    """Format a dict of tags into arguments.
+
+  Args:
+    tags_dict: Tags to be formatted.
+
+  Returns:
+    A string contains formatted tags
+  """
+    tags = ','.join(f'"{k}": "{v}"' for k, v in sorted(six.iteritems(tags_dict)) if k != 'owner')
+    return json.dumps(tags)
+
+
+def GetDefaultTags(timeout_minutes=None):
+    """Get the default tags in a dictionary.
+
+  Args:
+    timeout_minutes: Timeout used for setting the timeout_utc tag.
+
+  Returns:
+    A dict of tags, contributed from the benchmark spec.
+  """
+    benchmark_spec = context.GetThreadBenchmarkSpec()
+    if not benchmark_spec:
+        return {}
+    return benchmark_spec.GetResourceTags(timeout_minutes)
+
+
+def MakeFormattedDefaultTags(timeout_minutes=None):
+    """Get the default tags formatted.
+
+  Args:
+    timeout_minutes: Timeout used for setting the timeout_utc tag.
+
+  Returns:
+    A string contains tags, contributed from the benchmark spec.
+  """
+    return "{" + FormatTagsJSON(GetDefaultTags(timeout_minutes)) + "}"
diff --git a/perfkitbenchmarker/publisher.py b/perfkitbenchmarker/publisher.py
index e5066735cc..7983379ef2 100755
--- a/perfkitbenchmarker/publisher.py
+++ b/perfkitbenchmarker/publisher.py
@@ -31,7 +31,6 @@
 import pprint
 import sys
 import time
-from typing import List
 import uuid
 
 from absl import flags
@@ -306,8 +305,11 @@ def AddMetadata(self, metadata, benchmark_spec):
 class SamplePublisher(six.with_metaclass(abc.ABCMeta, object)):
   """An object that can publish performance samples."""
 
+  # Time series data is long. Turn this flag off to hide time series data.
+  PUBLISH_CONSOLE_LOG_DATA = True
+
   @abc.abstractmethod
-  def PublishSamples(self, samples: List[pkb_sample.SampleDict]):
+  def PublishSamples(self, samples: list[pkb_sample.SampleDict]):
     """Publishes 'samples'.
 
     PublishSamples will be called exactly once. Calling
@@ -382,6 +384,8 @@ class PrettyPrintStreamPublisher(SamplePublisher):
     stream: File-like object. Output stream to print samples.
   """
 
+  PUBLISH_CONSOLE_LOG_DATA = False
+
   def __init__(self, stream=None):
     super().__init__()
     self.stream = stream or sys.stdout
@@ -458,10 +462,16 @@ def PublishSamples(self, samples):
             self._FormatMetadata(benchmark_meta)))
 
       for sample in test_samples:
-        meta = {k: v for k, v in six.iteritems(sample['metadata'])
-                if k not in all_constant_meta}
-        result.write('  {0:<30s} {1:>15f} {2:<30s}'.format(
-            sample['metric'], sample['value'], sample['unit']))
+        meta = {
+            k: v
+            for k, v in six.iteritems(sample['metadata'])
+            if k not in all_constant_meta
+        }
+        result.write(
+            '  {0:<30s} {1:>15f} {2:<30s}'.format(
+                sample['metric'], sample['value'], sample['unit']
+            )
+        )
         if meta:
           result.write(' ({0})'.format(self._FormatMetadata(meta)))
         result.write('\n')
@@ -485,6 +495,8 @@ class LogPublisher(SamplePublisher):
     logger: Logger to publish to. Defaults to the root logger.
   """
 
+  PUBLISH_CONSOLE_LOG_DATA = False
+
   def __init__(self, level=logging.INFO, logger=None):
     super().__init__()
     self.level = level
@@ -915,6 +927,7 @@ class SampleCollector(object):
 
   Attributes:
     samples: A list of Sample objects as dicts.
+    samples_for_console: A list of Sample objects to publish to console.
     metadata_providers: A list of MetadataProvider objects. Metadata providers
       to use.  Defaults to DEFAULT_METADATA_PROVIDERS.
     publishers: A list of SamplePublisher objects to publish to.
@@ -928,14 +941,15 @@ class SampleCollector(object):
 
   def __init__(self, metadata_providers=None, publishers=None,
                publishers_from_flags=True, add_default_publishers=True):
-    self.samples: List[pkb_sample.SampleDict] = []
+    self.samples: list[pkb_sample.SampleDict] = []
+    self.samples_for_console: list[pkb_sample.SampleDict] = []
 
     if metadata_providers is not None:
       self.metadata_providers = metadata_providers
     else:
       self.metadata_providers = DEFAULT_METADATA_PROVIDERS
 
-    self.publishers: List[SamplePublisher] = publishers[:] if publishers else []
+    self.publishers: list[SamplePublisher] = publishers[:] if publishers else []
     for publisher_class in EXTERNAL_PUBLISHERS:
       self.publishers.append(publisher_class())
     if publishers_from_flags:
@@ -1031,13 +1045,20 @@ def AddSamples(self, samples, benchmark, benchmark_spec):
       sample['sample_uri'] = str(uuid.uuid4())
       self.samples.append(sample)
 
+      if not s.DisableConsoleLog():
+        self.samples_for_console.append(sample)
+
   def PublishSamples(self):
     """Publish samples via all registered publishers."""
     if not self.samples:
       logging.warning('No samples to publish.')
       return
     for publisher in self.publishers:
-      publisher.PublishSamples(self.samples)
+      publisher.PublishSamples(
+          self.samples
+          if publisher.PUBLISH_CONSOLE_LOG_DATA
+          else self.samples_for_console
+      )
     self.samples = []
 
 
diff --git a/perfkitbenchmarker/relational_db.py b/perfkitbenchmarker/relational_db.py
index 9d8ad330e7..e12dbc51e7 100644
--- a/perfkitbenchmarker/relational_db.py
+++ b/perfkitbenchmarker/relational_db.py
@@ -230,20 +230,40 @@ def client_vm(self):
       raise RelationalDbPropertyNotSetError('client_vm is not set')
     return self._client_vm
 
+  # TODO(user): add support for multiple client VMs
   @client_vm.setter
   def client_vm(self, client_vm):
     self._client_vm = client_vm
 
+  def _GetDbConnectionProperties(
+      self,
+  ) -> sql_engine_utils.DbConnectionProperties:
+    return sql_engine_utils.DbConnectionProperties(
+        self.spec.engine,
+        self.spec.engine_version,
+        self.endpoint,
+        self.port,
+        self.spec.database_username,
+        self.spec.database_password,
+    )
+
+  # TODO(user): Deprecate in favor of client_vms_query_tools
   @property
   def client_vm_query_tools(self):
     if not hasattr(self, '_client_vm_query_tools'):
-      connection_properties = sql_engine_utils.DbConnectionProperties(
-          self.spec.engine, self.spec.engine_version, self.endpoint, self.port,
-          self.spec.database_username, self.spec.database_password)
-      self._client_vm_query_tools = sql_engine_utils.GetQueryToolsByEngine(
-          self.client_vm, connection_properties)
+      self._client_vm_query_tools = self.client_vms_query_tools[0]
     return self._client_vm_query_tools
 
+  @property
+  def client_vms_query_tools(self) -> list[sql_engine_utils.ISQLQueryTools]:
+    if not hasattr(self, '_client_vms_query_tools'):
+      connection_properties = self._GetDbConnectionProperties()
+      self._client_vms_query_tools = [
+          sql_engine_utils.GetQueryToolsByEngine(vm, connection_properties)
+          for vm in self.client_vms
+      ]
+    return self._client_vms_query_tools
+
   @property
   def client_vm_query_tools_for_replica(self):
     """Query tools to make custom queries on replica."""
@@ -258,8 +278,11 @@ def client_vm_query_tools_for_replica(self):
     return self._client_vm_query_tools_for_replica
 
   def SetVms(self, vm_groups):
-    self.client_vm = vm_groups['clients' if 'clients' in
-                               vm_groups else 'default'][0]
+    self.client_vms = vm_groups[
+        'clients' if 'clients' in vm_groups else 'default'
+    ]
+    # TODO(user): Remove this after moving to multiple client VMs.
+    self.client_vm = self.client_vms[0]
 
   @property
   def endpoint(self):
diff --git a/perfkitbenchmarker/sample.py b/perfkitbenchmarker/sample.py
index 6b282966fd..606c10b295 100644
--- a/perfkitbenchmarker/sample.py
+++ b/perfkitbenchmarker/sample.py
@@ -24,6 +24,9 @@
 
 PERCENTILES_LIST = 0.1, 1, 5, 10, 50, 90, 95, 99, 99.9
 
+# Add this flag to the metadata to hide logging to console.
+DISABLE_CONSOLE_LOG = 'disable_console_log'
+
 _SAMPLE_FIELDS = 'metric', 'value', 'unit', 'metadata', 'timestamp'
 
 # Metric names for time series
@@ -155,6 +158,20 @@ def __eq__(self, other) -> bool:
         return False
     return True
 
+  def DisableConsoleLog(self) -> bool:
+    """Disable log to console when this return True."""
+
+    # Disable Console log is set as a metadata rather than a field
+    # is due to the current structure of samples class.
+    # Adding extra field to a sample might break serialization of some publisher
+    # pipeline as they expect certain format.
+    # Modyfing asdict function is also not enough because when we pickle
+    # the samples,
+    return (
+        DISABLE_CONSOLE_LOG in self.metadata
+        and self.metadata[DISABLE_CONSOLE_LOG]
+    )
+
   def asdict(self)-> Dict[str, Any]:  # pylint:disable=invalid-name
     """Converts the Sample to a dictionary."""
     return self._asdict()
diff --git a/perfkitbenchmarker/static_virtual_machine.py b/perfkitbenchmarker/static_virtual_machine.py
index 456665c705..af3bd692f6 100644
--- a/perfkitbenchmarker/static_virtual_machine.py
+++ b/perfkitbenchmarker/static_virtual_machine.py
@@ -36,6 +36,7 @@
 from perfkitbenchmarker import os_types
 from perfkitbenchmarker import resource
 from perfkitbenchmarker import virtual_machine
+from perfkitbenchmarker import windows_virtual_machine
 
 FLAGS = flags.FLAGS
 
@@ -393,3 +394,10 @@ class Debian10BasedStaticVirtualMachine(StaticVirtualMachine,
 class Debian11BasedStaticVirtualMachine(StaticVirtualMachine,
                                         linux_virtual_machine.Debian11Mixin):
   pass
+
+
+class Windows2019SQLServer2019StandardStaticVirtualMachine(
+    StaticVirtualMachine,
+    windows_virtual_machine.Windows2019SQLServer2019Standard,
+):
+  pass
diff --git a/perfkitbenchmarker/test_util.py b/perfkitbenchmarker/test_util.py
index a98e9647e7..67ec5b1efe 100644
--- a/perfkitbenchmarker/test_util.py
+++ b/perfkitbenchmarker/test_util.py
@@ -85,6 +85,19 @@ def assertSampleListsEqualUpToTimestamp(self, a, b, msg=None):
         ex.args = (ex.message,)
         raise ex
 
+  def assertSampleInList(self, a, b, msg=None):  # pylint:disable=invalid-name
+    """Assert that sample a is in list b (up to timestamp)."""
+    found = False
+    for s in b:
+      try:
+        self.assertSamplesEqualUpToTimestamp(a, s, msg=msg)
+      except self.failureException:
+        continue
+      found = True
+    if not found:
+      msg = msg or f'{a} not found in {b}.'
+      raise AssertionError(msg)
+
 
 def assertDiskMounts(benchmark_config, mount_point):
   """Test whether a disk mounts in a given configuration.
diff --git a/perfkitbenchmarker/time_triggers/maintenance_simulation_trigger.py b/perfkitbenchmarker/time_triggers/maintenance_simulation_trigger.py
index 6c921b0018..96b755aed2 100644
--- a/perfkitbenchmarker/time_triggers/maintenance_simulation_trigger.py
+++ b/perfkitbenchmarker/time_triggers/maintenance_simulation_trigger.py
@@ -15,6 +15,7 @@
 
 import collections
 import copy
+import logging
 import statistics
 from typing import Any, List, Dict
 
@@ -114,7 +115,10 @@ def AppendSamples(
       for vm in self.vms:
         vm.WaitLMNotificationRelease()
         lm_events_dict = vm.CollectLMNotificationsTime()
-        lm_ends = max(lm_ends, float(lm_events_dict['Host_maintenance_end']))
+        # Host maintenance is in s
+        lm_ends = max(
+            lm_ends, float(lm_events_dict['Host_maintenance_end']) * 1000
+        )
         samples.append(
             sample.Sample(
                 'LM Total Time',
@@ -199,6 +203,9 @@ def _AggregateThroughputSample(self, s: sample.Sample) -> List[sample.Sample]:
     median = statistics.median(base_line_values)
     mean = statistics.mean(base_line_values)
 
+    logging.info('LM Baseline median: %s', median)
+    logging.info('LM Baseline mean: %s', mean)
+
     # Keep the metadata from the original sample except time series metadata
     for field in sample.TIME_SERIES_METADATA:
       if field in metadata:
@@ -213,6 +220,10 @@ def _AggregateThroughputSample(self, s: sample.Sample) -> List[sample.Sample]:
     if values_after_lm_ends:
       mean_after_lm_ends = statistics.mean(values_after_lm_ends)
       samples += self._ComputeDegradation(mean, mean_after_lm_ends, metadata)
+      logging.info('Mean after LM ends: %s', mean_after_lm_ends)
+      logging.info(
+          'Number of samples after LM ends: %s', len(values_after_lm_ends)
+      )
     return samples
 
   def _ComputeLossPercentile(
diff --git a/perfkitbenchmarker/traces/otel.py b/perfkitbenchmarker/traces/otel.py
index c6a2ea747d..e477748fab 100644
--- a/perfkitbenchmarker/traces/otel.py
+++ b/perfkitbenchmarker/traces/otel.py
@@ -34,6 +34,14 @@
 flags.DEFINE_integer(
     'otel_interval_secs', 60, 'Interval of the metrics to collect.'
 )
+
+_HIDE_LOGGING = flags.DEFINE_boolean(
+    'otel_hide_logging',
+    True,
+    'Hide logging to console for otel metrics.',
+)
+
+
 flags.DEFINE_string(
     'otel_config_file',
     './otel/config.yaml',
@@ -209,6 +217,8 @@ def _Analyze(role, file):
                 parsed_metrics[name]['vm_role'] = role
 
       for key, value in parsed_metrics.items():
+        if _HIDE_LOGGING.value:
+          value[sample.DISABLE_CONSOLE_LOG] = True
         samples.append(
             sample.Sample(
                 metric=key, value=-1, unit=value['unit'], metadata=value
diff --git a/perfkitbenchmarker/virtual_machine.py b/perfkitbenchmarker/virtual_machine.py
index 9b5d1c2138..3d70155284 100644
--- a/perfkitbenchmarker/virtual_machine.py
+++ b/perfkitbenchmarker/virtual_machine.py
@@ -162,7 +162,7 @@ class BootCompletionIpSubset(enum.Enum):
 GPU_T4 = 't4'
 GPU_L4 = 'l4'
 GPU_A10 = 'a10'
-VALID_GPU_TYPES = [
+TESLA_GPU_TYPES = [
     GPU_K80,
     GPU_P100,
     GPU_V100,
@@ -170,9 +170,9 @@ class BootCompletionIpSubset(enum.Enum):
     GPU_P4,
     GPU_P4_VWS,
     GPU_T4,
-    GPU_L4,
     GPU_A10,
 ]
+VALID_GPU_TYPES = TESLA_GPU_TYPES + [GPU_L4]
 CPUARCH_X86_64 = 'x86_64'
 CPUARCH_AARCH64 = 'aarch64'
 
diff --git a/perfkitbenchmarker/vm_util.py b/perfkitbenchmarker/vm_util.py
index dcbdf0361c..08c469bb0b 100644
--- a/perfkitbenchmarker/vm_util.py
+++ b/perfkitbenchmarker/vm_util.py
@@ -110,6 +110,18 @@
                      'wait for unresponsive servers.')
 
 
+class RetryError(Exception):
+  """Base class for retry errors."""
+
+
+class TimeoutExceededRetryError(RetryError):
+  """Exception that is raised when a retryable function times out."""
+
+
+class RetriesExceededRetryError(RetryError):
+  """Exception that is raised when a retryable function hits its retry limit."""
+
+
 class IpAddressSubset(object):
   """Enum of options for --ip_addresses."""
   REACHABLE = 'REACHABLE'
@@ -252,6 +264,12 @@ def Retry(poll_interval=POLL_INTERVAL, max_retries=MAX_RETRIES,
   Returns:
     A function that wraps functions in retry logic. It can be
         used as a decorator.
+
+  Raises:
+    TimeoutExceededRetryError - if the provided (or default) timeout is exceeded
+      while retrying the wrapped function.
+    RetriesExceededRetryError - if the provided (or default) limit on the number
+      of retry attempts is exceeded while retrying the wrapped function.
   """
   if retryable_exceptions is None:
     # TODO(user) Make retries less aggressive.
@@ -276,9 +294,10 @@ def WrappedFunction(*args, **kwargs):
         except retryable_exceptions as e:
           fuzz_multiplier = 1 - fuzz + random.random() * fuzz
           sleep_time = poll_interval * fuzz_multiplier
-          if ((time.time() + sleep_time) >= deadline or
-              (max_retries >= 0 and tries > max_retries)):
-            raise
+          if (time.time() + sleep_time) >= deadline:
+            raise TimeoutExceededRetryError() from e
+          elif max_retries >= 0 and tries > max_retries:
+            raise RetriesExceededRetryError() from e
           else:
             if log_errors:
               logging.info('Retrying exception running %s: %s', f.__name__, e)
diff --git a/perfkitbenchmarker/windows_virtual_machine.py b/perfkitbenchmarker/windows_virtual_machine.py
index 1dd2e1acfd..350081dbe9 100644
--- a/perfkitbenchmarker/windows_virtual_machine.py
+++ b/perfkitbenchmarker/windows_virtual_machine.py
@@ -401,7 +401,7 @@ def OnStartup(self):
     self.home_dir = stdout.strip()
     stdout, _ = self.RemoteCommand('echo $env:SystemDrive')
     self.system_drive = stdout.strip()
-    self.RemoteCommand('mkdir %s' % self.temp_dir)
+    self.RemoteCommand('mkdir %s -Force' % self.temp_dir)
     self.DisableGuestFirewall()
 
   def _Reboot(self):
diff --git a/requirements.txt b/requirements.txt
index 2b3e340faa..72a16523b9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -31,6 +31,8 @@ google-cloud-monitoring==0.31.1
 beautifulsoup4
 requests
 python-datetime-tz
+matplotlib
+seaborn
 
 # unlike setup.py requirements.txt cannot require a Python version AFAICT
 # this hack requires a non-existent package when Python is less than 3.9
diff --git a/tests/configs/benchmark_config_spec_test.py b/tests/configs/benchmark_config_spec_test.py
index 58c94c1d3c..74418877a7 100644
--- a/tests/configs/benchmark_config_spec_test.py
+++ b/tests/configs/benchmark_config_spec_test.py
@@ -171,7 +171,7 @@ class VmGroupSpecTestCase(pkb_common_test_case.PkbCommonTestCase):
   def setUp(self):
     super(VmGroupSpecTestCase, self).setUp()
     self._spec_class = vm_group_decoders.VmGroupSpec
-    self._kwargs = {'cloud': provider_info.GCP, 'os_type': os_types.UBUNTU1804,
+    self._kwargs = {'cloud': provider_info.GCP, 'os_type': os_types.DEFAULT,
                     'vm_spec': _GCP_AWS_VM_CONFIG}
 
   def testMissingValues(self):
@@ -187,7 +187,7 @@ def testDefaults(self):
     self.assertEqual(result.cloud, 'GCP')
     self.assertEqual(result.disk_count, 1)
     self.assertIsNone(result.disk_spec)
-    self.assertEqual(result.os_type, 'ubuntu1804')
+    self.assertEqual(result.os_type, 'ubuntu2004')
     self.assertEqual(result.static_vms, [])
     self.assertEqual(result.vm_count, 1)
     self.assertIsInstance(result.vm_spec, gce_virtual_machine.GceVmSpec)
@@ -280,7 +280,7 @@ def testMissingCloudDiskConfig(self):
       self._spec_class(
           _COMPONENT,
           cloud=provider_info.GCP,
-          os_type=os_types.UBUNTU1804,
+          os_type=os_types.DEFAULT,
           disk_spec={},
           vm_spec=_GCP_AWS_VM_CONFIG)
     self.assertEqual(
@@ -293,7 +293,7 @@ def testMissingCloudVmConfig(self):
       self._spec_class(
           _COMPONENT,
           cloud=provider_info.GCP,
-          os_type=os_types.UBUNTU1804,
+          os_type=os_types.DEFAULT,
           vm_spec={})
     self.assertEqual(
         str(cm.exception),
@@ -333,7 +333,7 @@ def testNonPresentFlagsAndPresentConfigValues(self):
         _COMPONENT, flag_values=self.createNonPresentFlags(), vm_count=2,
         **self._kwargs)
     self.assertEqual(result.cloud, 'GCP')
-    self.assertEqual(result.os_type, 'ubuntu1804')
+    self.assertEqual(result.os_type, 'ubuntu2004')
     self.assertEqual(result.vm_count, 2)
 
   def testVmCountNone(self):
@@ -369,13 +369,13 @@ def testNone(self):
 
   def testValidInput(self):
     result = self._decoder.Decode({
-        'default': {'cloud': provider_info.GCP, 'os_type': os_types.UBUNTU1804,
+        'default': {'cloud': provider_info.GCP, 'os_type': os_types.DEFAULT,
                     'vm_spec': _GCP_AWS_VM_CONFIG}}, _COMPONENT, {})
     self.assertIsInstance(result, dict)
     self.assertEqual(len(result), 1)
     self.assertIsInstance(result['default'], vm_group_decoders.VmGroupSpec)
     self.assertEqual(result['default'].cloud, 'GCP')
-    self.assertEqual(result['default'].os_type, 'ubuntu1804')
+    self.assertEqual(result['default'].os_type, 'ubuntu2004')
     self.assertIsInstance(result['default'].vm_spec,
                           gce_virtual_machine.GceVmSpec)
 
@@ -385,7 +385,7 @@ def testInvalidInput(self):
           {
               'default': {
                   'cloud': provider_info.GCP,
-                  'os_type': os_types.UBUNTU1804,
+                  'os_type': os_types.DEFAULT,
                   'static_vms': [{}, {'fake_option': 1.2}],
                   'vm_spec': _GCP_AWS_VM_CONFIG,
               }
@@ -451,7 +451,7 @@ def setUp(self):
     self._spec_class = benchmark_config_spec.BenchmarkConfigSpec
     self._description = 'Test description.'
     self._vm_groups = {'default': {'cloud': provider_info.GCP,
-                                   'os_type': os_types.UBUNTU1804,
+                                   'os_type': os_types.DEFAULT,
                                    'vm_spec': _GCP_AWS_VM_CONFIG}}
     self._kwargs = {'description': self._description,
                     'vm_groups': self._vm_groups}
@@ -466,7 +466,7 @@ def testValidInput(self):
     self.assertIsInstance(result.vm_groups['default'],
                           vm_group_decoders.VmGroupSpec)
     self.assertEqual(result.vm_groups['default'].cloud, 'GCP')
-    self.assertEqual(result.vm_groups['default'].os_type, 'ubuntu1804')
+    self.assertEqual(result.vm_groups['default'].os_type, 'ubuntu2004')
     self.assertIsInstance(result.vm_groups['default'].vm_spec,
                           gce_virtual_machine.GceVmSpec)
 
@@ -483,7 +483,7 @@ def testInvalidVmGroups(self):
   def testMismatchedOsTypes(self):
     self._kwargs['vm_groups'] = {
         os_type + '_group': {'os_type': os_type, 'vm_spec': _GCP_AWS_VM_CONFIG}
-        for os_type in (os_types.UBUNTU1804, os_types.RHEL8,
+        for os_type in (os_types.DEFAULT, os_types.RHEL8,
                         os_types.WINDOWS2019_CORE)}
     expected_os_types = os_types.JUJU, os_types.WINDOWS2019_CORE
     with self.assertRaises(errors.Config.InvalidValue) as cm:
@@ -497,7 +497,7 @@ def testMismatchedOsTypes(self):
         "'juju', 'windows2019_core'. The following VM group options are "
         "invalid:{sep}"
         "test_component.vm_groups['rhel8_group'].os_type: 'rhel8'{sep}"
-        "test_component.vm_groups['ubuntu1804_group'].os_type: 'ubuntu1804'"
+        "test_component.vm_groups['ubuntu2004_group'].os_type: 'ubuntu2004'"
         .format(sep=os.linesep)))
 
   def testFlagOverridesPropagate(self):
@@ -515,7 +515,7 @@ def testFlagOverridesPropagate(self):
     self.assertIsInstance(result.vm_groups['default'],
                           vm_group_decoders.VmGroupSpec)
     self.assertEqual(result.vm_groups['default'].cloud, 'AWS')
-    self.assertEqual(result.vm_groups['default'].os_type, 'ubuntu1804')
+    self.assertEqual(result.vm_groups['default'].os_type, 'ubuntu2004')
     self.assertIsInstance(result.vm_groups['default'].vm_spec,
                           virtual_machine.BaseVmSpec)
 
diff --git a/tests/data/elasticache_describe_cluster.txt b/tests/data/elasticache_describe_cluster.txt
new file mode 100644
index 0000000000..80e3614226
--- /dev/null
+++ b/tests/data/elasticache_describe_cluster.txt
@@ -0,0 +1,111 @@
+{
+    "ReplicationGroups": [
+        {
+            "ReplicationGroupId": "pkb-cbf06969",
+            "Description": "pkb-cbf06969",
+            "GlobalReplicationGroupInfo": {},
+            "Status": "available",
+            "PendingModifiedValues": {},
+            "MemberClusters": [
+                "pkb-cbf06969-0001-001",
+                "pkb-cbf06969-0002-001",
+                "pkb-cbf06969-0003-001",
+                "pkb-cbf06969-0004-001",
+                "pkb-cbf06969-0005-001",
+                "pkb-cbf06969-0006-001"
+            ],
+            "NodeGroups": [
+                {
+                    "NodeGroupId": "0001",
+                    "Status": "available",
+                    "Slots": "0-2730",
+                    "NodeGroupMembers": [
+                        {
+                            "CacheClusterId": "pkb-cbf06969-0001-001",
+                            "CacheNodeId": "0001",
+                            "PreferredAvailabilityZone": "us-east-1c"
+                        }
+                    ]
+                },
+                {
+                    "NodeGroupId": "0002",
+                    "Status": "available",
+                    "Slots": "2731-5461",
+                    "NodeGroupMembers": [
+                        {
+                            "CacheClusterId": "pkb-cbf06969-0002-001",
+                            "CacheNodeId": "0001",
+                            "PreferredAvailabilityZone": "us-east-1a"
+                        }
+                    ]
+                },
+                {
+                    "NodeGroupId": "0003",
+                    "Status": "available",
+                    "Slots": "5462-8192",
+                    "NodeGroupMembers": [
+                        {
+                            "CacheClusterId": "pkb-cbf06969-0003-001",
+                            "CacheNodeId": "0001",
+                            "PreferredAvailabilityZone": "us-east-1b"
+                        }
+                    ]
+                },
+                {
+                    "NodeGroupId": "0004",
+                    "Status": "available",
+                    "Slots": "8193-10923",
+                    "NodeGroupMembers": [
+                        {
+                            "CacheClusterId": "pkb-cbf06969-0004-001",
+                            "CacheNodeId": "0001",
+                            "PreferredAvailabilityZone": "us-east-1a"
+                        }
+                    ]
+                },
+                {
+                    "NodeGroupId": "0005",
+                    "Status": "available",
+                    "Slots": "10924-13653",
+                    "NodeGroupMembers": [
+                        {
+                            "CacheClusterId": "pkb-cbf06969-0005-001",
+                            "CacheNodeId": "0001",
+                            "PreferredAvailabilityZone": "us-east-1b"
+                        }
+                    ]
+                },
+                {
+                    "NodeGroupId": "0006",
+                    "Status": "available",
+                    "Slots": "13654-16383",
+                    "NodeGroupMembers": [
+                        {
+                            "CacheClusterId": "pkb-cbf06969-0006-001",
+                            "CacheNodeId": "0001",
+                            "PreferredAvailabilityZone": "us-east-1c"
+                        }
+                    ]
+                }
+            ],
+            "AutomaticFailover": "enabled",
+            "MultiAZ": "disabled",
+            "ConfigurationEndpoint": {
+                "Address": "pkb-cbf06969.t88vpu.clustercfg.use1.cache.amazonaws.com",
+                "Port": 6379
+            },
+            "SnapshotRetentionLimit": 0,
+            "SnapshotWindow": "03:00-04:00",
+            "ClusterEnabled": true,
+            "CacheNodeType": "cache.m5.large",
+            "AuthTokenEnabled": false,
+            "TransitEncryptionEnabled": false,
+            "AtRestEncryptionEnabled": false,
+            "ARN": "arn:aws:elasticache:us-east-1:835761027970:replicationgroup:pkb-cbf06969",
+            "LogDeliveryConfigurations": [],
+            "ReplicationGroupCreateTime": "2023-05-24T02:58:42.581Z",
+            "DataTiering": "disabled",
+            "AutoMinorVersionUpgrade": true
+        }
+    ]
+}
diff --git a/tests/data/linux_boot/systemd2.output b/tests/data/linux_boot/systemd2.output
index 645e07295f..d034b21d32 100644
--- a/tests/data/linux_boot/systemd2.output
+++ b/tests/data/linux_boot/systemd2.output
@@ -1,7 +1,7 @@
 The time when unit became active or started is printed after the "@" character.
 The time the unit took to start is printed after the "+" character.
 
-systemd-sysctl.service +103ms
+systemd-sysctl.service +103us
 systemd-modules-load.service @671ms +150ms
   systemd-journald.socket @530ms
     -.mount @300ms
diff --git a/tests/data/netperf_results_multistreams.json b/tests/data/netperf_results_multistreams.json
new file mode 100644
index 0000000000..7c79b29f91
--- /dev/null
+++ b/tests/data/netperf_results_multistreams.json
@@ -0,0 +1,22 @@
+[
+  [
+  "MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 20001 AF_INET to 104.154.50.86 () port 20001 AF_INET : +/-2.500% @ 99% conf.",
+  "Throughput,Throughput Units,Throughput Confidence Width (%),Confidence Iterations Run,Stddev Latency Microseconds,50th Percentile Latency Microseconds,90th Percentile Latency Microseconds,99th Percentile Latency Microseconds,Minimum Latency Microseconds,Maximum Latency Microseconds,Local Transport Retransmissions,Remote Transport Retransmissions,Transport MSS bytes",
+  "1000.00,10^6bits/s,10.100,20,1084.37,2,6,3374,1,3500,0,0,1408"
+  ],
+  [
+    "MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 20001 AF_INET to 104.154.50.86 () port 20001 AF_INET : +/-2.500% @ 99% conf.",
+    "Throughput,Throughput Units,Throughput Confidence Width (%),Confidence Iterations Run,Stddev Latency Microseconds,50th Percentile Latency Microseconds,90th Percentile Latency Microseconds,99th Percentile Latency Microseconds,Minimum Latency Microseconds,Maximum Latency Microseconds,Local Transport Retransmissions,Remote Transport Retransmissions,Transport MSS bytes",
+    "2000.00,10^6bits/s,10.100,20,2084.37,2,6,3374,1,3500,0,0,1408"
+  ],
+  [
+    "MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 20001 AF_INET to 104.154.50.86 () port 20001 AF_INET : +/-2.500% @ 99% conf.",
+    "Throughput,Throughput Units,Throughput Confidence Width (%),Confidence Iterations Run,Stddev Latency Microseconds,50th Percentile Latency Microseconds,90th Percentile Latency Microseconds,99th Percentile Latency Microseconds,Minimum Latency Microseconds,Maximum Latency Microseconds,Local Transport Retransmissions,Remote Transport Retransmissions,Transport MSS bytes",
+    "3000.00,10^6bits/s,10.100,20,1084.37,2,6,3374,1,3500,0,0,1408"
+  ],
+  [
+    "MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 20001 AF_INET to 104.154.50.86 () port 20001 AF_INET : +/-2.500% @ 99% conf.",
+    "Throughput,Throughput Units,Throughput Confidence Width (%),Confidence Iterations Run,Stddev Latency Microseconds,50th Percentile Latency Microseconds,90th Percentile Latency Microseconds,99th Percentile Latency Microseconds,Minimum Latency Microseconds,Maximum Latency Microseconds,Local Transport Retransmissions,Remote Transport Retransmissions,Transport MSS bytes",
+    "4000.00,10^6bits/s,10.100,20,1084.37,2,6,3374,1,3500,0,0,1408"
+  ]
+]
diff --git a/tests/data/redis_cluster_shards.txt b/tests/data/redis_cluster_shards.txt
new file mode 100644
index 0000000000..20f9b65085
--- /dev/null
+++ b/tests/data/redis_cluster_shards.txt
@@ -0,0 +1,120 @@
+slots
+2731
+5461
+nodes
+id
+72a3d871c65a09476808bbf8bb6b7cc9facc6f0d
+port
+6379
+ip
+10.0.1.117
+endpoint
+10.0.1.117
+hostname
+
+role
+master
+replication-offset
+0
+health
+online
+slots
+10924
+13653
+nodes
+id
+99a2cc672e715527b53816cc57ab8e9f7661fcb4
+port
+6379
+ip
+10.0.3.217
+endpoint
+10.0.3.217
+hostname
+
+role
+master
+replication-offset
+0
+health
+online
+slots
+8193
+10923
+nodes
+id
+e01888d0f30de88f2f0e271aa38237f9e1f7fc1b
+port
+6379
+ip
+10.0.2.177
+endpoint
+10.0.2.177
+hostname
+
+role
+master
+replication-offset
+0
+health
+online
+slots
+5462
+8192
+nodes
+id
+4ae859700bc028a88e4479fb964c7185575fd26e
+port
+6379
+ip
+10.0.3.6
+endpoint
+10.0.3.6
+hostname
+
+role
+master
+replication-offset
+0
+health
+online
+slots
+13654
+16383
+nodes
+id
+dae79ffbdbe5511fd163142c1aba5dad4ec72d0e
+port
+6379
+ip
+10.0.2.104
+endpoint
+10.0.2.104
+hostname
+
+role
+master
+replication-offset
+0
+health
+online
+slots
+0
+2730
+nodes
+id
+eaee237523068bb804e170b05726494c76b0c706
+port
+6379
+ip
+10.0.1.174
+endpoint
+10.0.1.174
+hostname
+
+role
+master
+replication-offset
+0
+health
+online
diff --git a/tests/disk_metadata_test.py b/tests/disk_metadata_test.py
index 3cf897b934..617e5d37d0 100644
--- a/tests/disk_metadata_test.py
+++ b/tests/disk_metadata_test.py
@@ -87,7 +87,7 @@ def DoAwsDiskTest(self, disk_type, machine_type,
 
     vm_spec = aws_virtual_machine.AwsVmSpec(
         'test_vm_spec.AWS', zone='us-east-1a', machine_type=machine_type)
-    vm = aws_virtual_machine.Ubuntu1804BasedAwsVirtualMachine(vm_spec)
+    vm = aws_virtual_machine.Ubuntu2004BasedAwsVirtualMachine(vm_spec)
     vm.GetNVMEDeviceInfo = mock.Mock()
     vm.GetNVMEDeviceInfo.return_value = [
         {
@@ -134,7 +134,7 @@ def DoAzureDiskTest(self, storage_type, disk_type, machine_type,
 
     vm_spec = azure_virtual_machine.AzureVmSpec(
         'test_vm_spec.AZURE', zone='eastus2', machine_type=machine_type)
-    vm = azure_virtual_machine.Ubuntu1604BasedAzureVirtualMachine(vm_spec)
+    vm = azure_virtual_machine.Ubuntu2004BasedAzureVirtualMachine(vm_spec)
 
     azure_disk.AzureDisk.Create = mock.Mock()
     azure_disk.AzureDisk.Attach = mock.Mock()
diff --git a/tests/gce_virtual_machine_test.py b/tests/gce_virtual_machine_test.py
index fff8bdc073..29a1c067cf 100644
--- a/tests/gce_virtual_machine_test.py
+++ b/tests/gce_virtual_machine_test.py
@@ -386,10 +386,10 @@ def _CreateFakeReturnValues(self, fake_image=''):
       fake_rets.append((json.dumps(_CreateFakeDiskMetadata(fake_image)), '', 0))
     return fake_rets
 
-  def testCreateUbuntu1804(self):
+  def testCreateUbuntu2004(self):
     vm_class = virtual_machine.GetVmClass(provider_info.GCP,
-                                          os_types.UBUNTU1804)
-    fake_image = 'fake-ubuntu1804'
+                                          os_types.UBUNTU2004)
+    fake_image = 'fake-ubuntu2004'
     with PatchCriticalObjects(
         self._CreateFakeReturnValues(fake_image)) as issue_command:
       vm = vm_class(self.spec)
@@ -398,20 +398,20 @@ def testCreateUbuntu1804(self):
       command_string = ' '.join(issue_command.call_args[0][0])
 
       self.assertEqual(issue_command.call_count, 1)
-      self.assertEqual(vm.GetDefaultImageFamily(False), 'ubuntu-1804-lts')
-      self.assertEqual(vm.GetDefaultImageFamily(True), 'ubuntu-1804-lts-arm64')
+      self.assertEqual(vm.GetDefaultImageFamily(False), 'ubuntu-2004-lts')
+      self.assertEqual(vm.GetDefaultImageFamily(True), 'ubuntu-2004-lts-arm64')
       self.assertEqual(vm.GetDefaultImageProject(), 'ubuntu-os-cloud')
       self.assertTrue(vm.SupportGVNIC())
       self.assertIn('gcloud compute instances create', command_string)
       self.assertIn(
-          '--image-family ubuntu-1804-lts --image-project ubuntu-os-cloud',
+          '--image-family ubuntu-2004-lts --image-project ubuntu-os-cloud',
           command_string)
       self.assertNotIn('--boot-disk-size', command_string)
       self.assertNotIn('--boot-disk-type', command_string)
       vm._PostCreate()
       self.assertEqual(issue_command.call_count, 3)
       self.assertDictContainsSubset({'image': fake_image,
-                                     'image_family': 'ubuntu-1804-lts',
+                                     'image_family': 'ubuntu-2004-lts',
                                      'image_project': 'ubuntu-os-cloud',
                                      'boot_disk_size': '10',
                                      'boot_disk_type': 'pd-standard'},
@@ -420,8 +420,8 @@ def testCreateUbuntu1804(self):
   def testCreateUbuntuInCustomProject(self):
     """Test simulating passing --image and --image_project."""
     vm_class = virtual_machine.GetVmClass(provider_info.GCP,
-                                          os_types.UBUNTU1804)
-    fake_image = 'fake-ubuntu1804'
+                                          os_types.UBUNTU2004)
+    fake_image = 'fake-ubuntu2004'
     fake_image_project = 'fake-project'
     spec = gce_virtual_machine.GceVmSpec(_COMPONENT,
                                          machine_type='fake-machine-type',
@@ -437,7 +437,7 @@ def testCreateUbuntuInCustomProject(self):
       self.assertEqual(issue_command.call_count, 1)
       self.assertIn('gcloud compute instances create', command_string)
       self.assertIn(
-          '--image fake-ubuntu1804 --image-project fake-project',
+          '--image fake-ubuntu2004 --image-project fake-project',
           command_string)
       self.assertNotIn('--image-family', command_string)
       vm._PostCreate()
@@ -451,8 +451,8 @@ def testCreateUbuntuInCustomProject(self):
   def testCreateUbuntuInCustomDisk(self):
     """Test simulating passing --image and --image_project."""
     vm_class = virtual_machine.GetVmClass(provider_info.GCP,
-                                          os_types.UBUNTU1804)
-    fake_image = 'fake-ubuntu1804'
+                                          os_types.UBUNTU2004)
+    fake_image = 'fake-ubuntu2004'
     fake_image_project = 'fake-project'
     spec = gce_virtual_machine.GceVmSpec(_COMPONENT,
                                          machine_type='fake-machine-type',
@@ -746,9 +746,10 @@ def testCreateRateLimitedMachineCreatedFailure(self, mock_cmd):
               'memory': '1.0GiB',
           })
       vm = pkb_common_test_case.TestGceVirtualMachine(spec)
-      with self.assertRaises(
-          errors.Benchmarks.QuotaFailure.RateLimitExceededError):
+      with self.assertRaises(vm_util.RetriesExceededRetryError) as e:
         vm._Create()
+      self.assertIs(type(e.exception.__cause__),
+                    errors.Benchmarks.QuotaFailure.RateLimitExceededError)
       self.assertEqual(issue_command.call_count,
                        util.RATE_LIMITED_MAX_RETRIES + 1)
 
@@ -1009,7 +1010,7 @@ class GvnicTest(GceVirtualMachineTestCase):
   def setUp(self):
     super(GvnicTest, self).setUp()
     vm_spec = gce_virtual_machine.GceVmSpec('test_component', project='test')
-    self.vm = gce_virtual_machine.Ubuntu1804BasedGceVirtualMachine(vm_spec)
+    self.vm = gce_virtual_machine.Ubuntu2004BasedGceVirtualMachine(vm_spec)
     self.vm.HasPackage = mock.Mock(return_value=False)
     self.mock_cmd = mock.Mock()
     self.vm.RemoteCommand = self.mock_cmd
diff --git a/tests/iaas_relational_db_test.py b/tests/iaas_relational_db_test.py
index 013e29d55b..ca94b79602 100644
--- a/tests/iaas_relational_db_test.py
+++ b/tests/iaas_relational_db_test.py
@@ -119,7 +119,7 @@ def testMakePostgresClientCommand(self):
     db = FakeRelationalDb(self.postgres_spec)
     db.endpoint = '1.1.1.1'
     db.port = db.GetDefaultPort()
-    db.client_vm = CreateTestLinuxVm()
+    db.SetVms({'default': [CreateTestLinuxVm()]})
     db.server_vm = CreateTestLinuxVm()
     self.assertEqual(
         db.client_vm_query_tools.MakeSqlCommand(
@@ -132,7 +132,7 @@ def testIssuePostgresClientCommand(self):
     db = FakeRelationalDb(self.postgres_spec)
     db.endpoint = '1.1.1.1'
     db.port = db.GetDefaultPort()
-    db.client_vm = CreateTestLinuxVm()
+    db.SetVms({'default': [CreateTestLinuxVm()]})
     db.server_vm = CreateTestLinuxVm()
     with mock.patch.object(db.client_vm, 'RemoteCommand') as remote_command:
       db.client_vm_query_tools.IssueSqlCommand('Select 1', database_name='abc')
@@ -152,7 +152,7 @@ def testIssuePostgresClientCommandWithSessionVariables(self):
     db = FakeRelationalDb(self.postgres_spec)
     db.endpoint = '1.1.1.1'
     db.port = db.GetDefaultPort()
-    db.client_vm = CreateTestLinuxVm()
+    db.SetVms({'default': [CreateTestLinuxVm()]})
     db.server_vm = CreateTestLinuxVm()
     with mock.patch.object(db.client_vm, 'RemoteCommand') as remote_command:
       db.client_vm_query_tools.IssueSqlCommand(
@@ -174,7 +174,7 @@ def testIssuePostgresClientCommandWithSessionVariables(self):
   def testMakePostgresServerCommand(self):
     FLAGS['use_managed_db'].parse(False)
     db = FakeRelationalDb(self.postgres_spec)
-    db.client_vm = CreateTestLinuxVm()
+    db.SetVms({'default': [CreateTestLinuxVm()]})
     db.server_vm = CreateTestLinuxVm()
     db.endpoint = '1.1.1.1'
     db.port = db.GetDefaultPort()
@@ -187,7 +187,7 @@ def testMakePostgresServerCommand(self):
   def testMakeMysqlCientCommand(self):
     FLAGS['use_managed_db'].parse(False)
     db = FakeRelationalDb(self.mysql_spec)
-    db.client_vm = CreateTestLinuxVm()
+    db.SetVms({'default': [CreateTestLinuxVm()]})
     db.server_vm = CreateTestLinuxVm()
     db.endpoint = '1.1.1.1'
     db.port = db.GetDefaultPort()
@@ -199,7 +199,7 @@ def testMakeMysqlCientCommand(self):
   def testMakeMysqlCommandWithLocalHost(self):
     FLAGS['use_managed_db'].parse(False)
     db = FakeRelationalDb(self.mysql_spec)
-    db.client_vm = CreateTestLinuxVm()
+    db.SetVms({'default': [CreateTestLinuxVm()]})
     db.server_vm = CreateTestLinuxVm()
     db.endpoint = '1.1.1.1'
     db.port = db.GetDefaultPort()
@@ -211,7 +211,7 @@ def testMakeMysqlCommandWithLocalHost(self):
   def testMakeSqlserverCommand(self):
     FLAGS['use_managed_db'].parse(False)
     db = FakeRelationalDb(self.sqlserver_spec)
-    db.client_vm = CreateTestLinuxVm()
+    db.SetVms({'default': [CreateTestLinuxVm()]})
     db.server_vm = CreateTestLinuxVm()
     db.endpoint = '1.1.1.1'
     db.port = db.GetDefaultPort()
@@ -223,7 +223,7 @@ def testMakeSqlserverCommand(self):
   def testMakeSqlserverCommandWithLocalHost(self):
     FLAGS['use_managed_db'].parse(False)
     db = FakeRelationalDb(self.sqlserver_spec)
-    db.client_vm = CreateTestLinuxVm()
+    db.SetVms({'default': [CreateTestLinuxVm()]})
     db.server_vm = CreateTestLinuxVm()
     db.endpoint = '1.1.1.1'
     db.port = db.GetDefaultPort()
@@ -238,7 +238,7 @@ def testInstallMYSQLServer(self):
     db = FakeRelationalDb(self.mysql_spec)
     db.endpoint = '1.1.1.1'
     db.port = db.GetDefaultPort()
-    db.client_vm = CreateTestLinuxVm()
+    db.SetVms({'default': [CreateTestLinuxVm()]})
     db.server_vm = CreateTestLinuxVm()
     db.server_vm.IS_REBOOTABLE = False
     db.client_vm.IS_REBOOTABLE = False
diff --git a/tests/linux_benchmarks/cloud_redis_memtier_benchmark_test.py b/tests/linux_benchmarks/cloud_redis_memtier_benchmark_test.py
index b1447d50de..f87d22bd3b 100644
--- a/tests/linux_benchmarks/cloud_redis_memtier_benchmark_test.py
+++ b/tests/linux_benchmarks/cloud_redis_memtier_benchmark_test.py
@@ -13,16 +13,47 @@
 # limitations under the License.
 """Tests for cloud_redis_memtier_benchmark."""
 
+import pathlib
 import unittest
 from absl import flags
+from absl.testing import flagsaver
 import mock
-
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.configs import benchmark_config_spec
 from perfkitbenchmarker.linux_benchmarks import cloud_redis_memtier_benchmark
 from perfkitbenchmarker.linux_packages import memtier
+from perfkitbenchmarker.providers.aws import aws_elasticache_redis  # pylint:disable=unused-import
 from tests import pkb_common_test_case
 
 FLAGS = flags.FLAGS
 
+_CLUSTER_SHARDS_OUTPUT = 'redis_cluster_shards.txt'
+_DESCRIBE_CLUSTER_OUTPUT = 'elasticache_describe_cluster.txt'
+
+
+def _ReadFile(filename):
+  path = pathlib.Path(__file__).parents[1] / 'data' / filename
+  with open(path) as f:
+    return f.read()
+
+
+def _GetTestRedisSpec():
+  spec_args = {'cloud': 'AWS', 'redis_version': 'redis_6_x'}
+  return benchmark_config_spec._CloudRedisSpec(
+      'test_component', flag_values=FLAGS, **spec_args
+  )
+
+
+def _GetTestRedisInstance():
+  test_spec = _GetTestRedisSpec()
+  mock_bm_spec = mock.Mock()
+  mock_bm_spec.config.cloud_redis = test_spec
+  redis_class = cloud_redis_memtier_benchmark._GetManagedMemoryStoreClass()
+  instance = redis_class(mock_bm_spec)
+  instance._ip = '0.0.0.0'
+  instance._port = 1234
+  return instance
+
 
 class CloudRedisMemtierBenchmarkTest(pkb_common_test_case.PkbCommonTestCase):
 
@@ -89,6 +120,83 @@ def testDelete(self):
     cloud_redis_memtier_benchmark.Cleanup(benchmark_spec)
     redis_instance.Delete.assert_called_once_with()
 
+  @flagsaver.flagsaver(cloud='AWS')
+  def testGetConnectionsMultiVm(self):
+    test_redis_instance = _GetTestRedisInstance()
+    test_redis_instance.name = 'pkb-cbf06969'
+    vm1 = pkb_common_test_case.TestLinuxVirtualMachine(
+        pkb_common_test_case.CreateTestVmSpec()
+    )
+    vm1.ip_address = 'vm1'
+    vm2 = pkb_common_test_case.TestLinuxVirtualMachine(
+        pkb_common_test_case.CreateTestVmSpec()
+    )
+    vm2.ip_address = 'vm2'
+    self.enter_context(
+        mock.patch.object(
+            vm1,
+            'RemoteCommand',
+            return_value=(_ReadFile(_CLUSTER_SHARDS_OUTPUT), ''),
+        )
+    )
+    self.enter_context(
+        mock.patch.object(
+            vm_util,
+            'IssueCommand',
+            return_value=(_ReadFile(_DESCRIBE_CLUSTER_OUTPUT), '', 0),
+        )
+    )
+
+    connections = cloud_redis_memtier_benchmark._GetConnections(
+        [vm1, vm2], test_redis_instance
+    )
+
+    self.assertCountEqual(
+        connections,
+        [
+            memtier.MemtierConnection(vm1, '10.0.1.117', 6379),
+            memtier.MemtierConnection(vm1, '10.0.2.104', 6379),
+            memtier.MemtierConnection(vm1, '10.0.3.217', 6379),
+            memtier.MemtierConnection(vm2, '10.0.2.177', 6379),
+            memtier.MemtierConnection(vm2, '10.0.1.174', 6379),
+            memtier.MemtierConnection(vm2, '10.0.3.6', 6379),
+        ],
+    )
+
+  @flagsaver.flagsaver(cloud='AWS')
+  def testGetConnectionsSingleVm(self):
+    test_redis_instance = _GetTestRedisInstance()
+    test_redis_instance.name = 'pkb-cbf06969'
+    vm1 = pkb_common_test_case.TestLinuxVirtualMachine(
+        pkb_common_test_case.CreateTestVmSpec()
+    )
+    vm1.ip_address = 'vm1'
+    self.enter_context(
+        mock.patch.object(
+            vm1,
+            'RemoteCommand',
+            return_value=(_ReadFile(_CLUSTER_SHARDS_OUTPUT), ''),
+        )
+    )
+    self.enter_context(
+        mock.patch.object(
+            vm_util,
+            'IssueCommand',
+            return_value=(_ReadFile(_DESCRIBE_CLUSTER_OUTPUT), '', 0),
+        )
+    )
+
+    connections = cloud_redis_memtier_benchmark._GetConnections(
+        [vm1], test_redis_instance
+    )
+
+    self.assertCountEqual(
+        connections,
+        [
+            memtier.MemtierConnection(vm1, '0.0.0.0', 1234),
+        ],
+    )
+
 
 if __name__ == '__main__':
   unittest.main()
diff --git a/tests/linux_benchmarks/cluster_boot_benchmark_test.py b/tests/linux_benchmarks/cluster_boot_benchmark_test.py
index 318287b53f..cdb5de5070 100644
--- a/tests/linux_benchmarks/cluster_boot_benchmark_test.py
+++ b/tests/linux_benchmarks/cluster_boot_benchmark_test.py
@@ -1,6 +1,6 @@
 """Tests for cluster_boot_delete."""
 
-import datetime
+from typing import Optional
 import unittest
 
 import freezegun
@@ -30,6 +30,28 @@ def vm_mock(index: int, timestamp: float) -> mock.Mock:
       OS_TYPE=f'linux{index}')
 
 
+def vm_mock_given_delete_times(
+    index: int,
+    delete_start_time: Optional[float] = None,
+    delete_end_time: Optional[float] = None) -> mock.Mock:
+  """Creates a mock vm with a provided delete_start_time and delete_end_time.
+
+  Args:
+    index: an integer which specifies index of vm in the list of vms.
+    delete_start_time: mock timestamp indicating the start of the VM delete
+      process. May be passed in as None to test MeasureDelete logic.
+    delete_end_time: mock timestamp indicating the end of the VM delete process.
+      May be passed in as None to test MeasureDelete logic.
+
+  Returns:
+    A mock vm.
+  """
+  return mock.Mock(
+      delete_start_time=delete_start_time,
+      delete_end_time=delete_end_time,
+      OS_TYPE=f'linux{index}')
+
+
 class ClusterBootBenchmarkTest(pkb_common_test_case.PkbCommonTestCase,
                                test_util.SamplesTestMixin):
 
@@ -42,8 +64,7 @@ def testMeasureDelete(self):
     vms_to_test = [vm_mock(i, timestamp) for i in range(num_vms)]
 
     # call Delete on vms
-    with freezegun.freeze_time(datetime.datetime.utcfromtimestamp(timestamp)):
-      actual_samples = cluster_boot_benchmark.MeasureDelete(vms_to_test)
+    actual_samples = cluster_boot_benchmark.MeasureDelete(vms_to_test)
 
     # for all vms create mock samples ie the expected samples
     expected_delete_times = [5, 6, 7]
@@ -69,6 +90,65 @@ def testMeasureDelete(self):
     # assert actual and expected samples are equal
     self.assertSampleListsEqualUpToTimestamp(actual_samples, expected_samples)
 
+  def testMeasureDeleteNoValidVMs(self):
+    """MeasureDelete test where no VMs have valid delete measurements."""
+    timestamp = 1625863325.003580
+    vm_with_neither = vm_mock_given_delete_times(index=0)
+    vm_with_start_only = vm_mock_given_delete_times(
+        index=1, delete_start_time=timestamp)
+    # We never expect to see this in production, but it does ensure that
+    # the conditional measurement of delete times works for all combinations
+    # of missing attributes.
+    vm_with_end_only = vm_mock_given_delete_times(
+        index=2, delete_end_time=timestamp + 5)
+    vms_to_test = [vm_with_neither, vm_with_start_only, vm_with_end_only]
+
+    # invoke MeasureDelete
+    actual_samples = cluster_boot_benchmark.MeasureDelete(vms_to_test)
+
+    # None of these VMs can have their delete times measured.
+    self.assertEmpty(actual_samples)
+
+  def testMeasureDeleteAttributeChecks(self):
+    """MeasureDelete test where some VMs do not have valid delete measurements."""
+    timestamp = 1625863325.003580
+    vm_with_neither = vm_mock_given_delete_times(index=0)
+    vm_with_start_only = vm_mock_given_delete_times(index=1,
+                                                    delete_start_time=timestamp)
+    vm_with_both = vm_mock_given_delete_times(index=2,
+                                              delete_start_time=timestamp,
+                                              delete_end_time=timestamp + 5)
+    vms_to_test = [vm_with_neither, vm_with_start_only, vm_with_both]
+    # invoke MeasureDelete
+    actual_samples = cluster_boot_benchmark.MeasureDelete(vms_to_test)
+
+    # Only vm_with_both should have its delete time measured.
+    # Note that the vm_instance metadata field is set after filtering out
+    # invalid VMs, so we still expect this to be 0.
+    expected_delete_time = 5
+    expected_instance_index = 2
+    expected_num_vms = 1
+    expected_samples = [
+        sample.Sample(
+            'Delete Time', expected_delete_time, 'seconds', {
+                'machine_instance': 0,
+                'num_vms': expected_num_vms,
+                'os_type': f'linux{expected_instance_index}',
+            }
+        )
+    ]
+
+    expected_cluster_delete_time = 5
+    expected_cluster_delete_metadata = {
+        'num_vms': 1,
+        'os_type': f'linux{expected_instance_index}',
+    }
+    expected_samples.append(
+        sample.Sample('Cluster Delete Time', expected_cluster_delete_time,
+                      'seconds', expected_cluster_delete_metadata))
+    # assert actual and expected samples are equal
+    self.assertSampleListsEqualUpToTimestamp(actual_samples, expected_samples)
+
   @freezegun.freeze_time('2023-03-07')
   def testGetTimeToBoot(self):
     context.SetThreadBenchmarkSpec(
diff --git a/tests/linux_benchmarks/netperf_benchmark_test.py b/tests/linux_benchmarks/netperf_benchmark_test.py
index 25df6c4bd7..3b68283b74 100644
--- a/tests/linux_benchmarks/netperf_benchmark_test.py
+++ b/tests/linux_benchmarks/netperf_benchmark_test.py
@@ -20,9 +20,9 @@
 from absl.testing import flagsaver
 from absl.testing import parameterized
 import mock
-
 from perfkitbenchmarker import benchmark_spec
 from perfkitbenchmarker import errors
+from perfkitbenchmarker import flag_util
 from perfkitbenchmarker import vm_util
 from perfkitbenchmarker.linux_benchmarks import netperf_benchmark
 
@@ -118,7 +118,9 @@ def testExternalAndInternal(self):
             ('TCP_CRR_Latency_max', 2500.0, 'us'),
             ('TCP_CRR_Latency_stddev', 551.07, 'us'),
             ('TCP_STREAM_Throughput', 1187.94, mbps),
+            ('TCP_STREAM_Throughput_1stream', 1187.94, mbps),
             ('TCP_STREAM_Throughput', 1973.37, 'Mbits/sec'),
+            ('TCP_STREAM_Throughput_1stream', 1973.37, 'Mbits/sec'),
             ('UDP_RR_Transaction_Rate', 1359.71, tps),
             ('UDP_RR_Latency_p50', 700.0, 'us'),
             ('UDP_RR_Latency_p90', 757.0, 'us'),
@@ -136,14 +138,17 @@ def testExternalAndInternal(self):
             ('UDP_STREAM_Throughput', 1102.42, mbps),
             ('UDP_STREAM_Throughput', 1802.72, 'Mbits/sec'),
         ],
-        [i[:3] for i in result])
+        [i[:3] for i in result],
+    )
 
     external_meta = {'ip_type': 'external'}
     internal_meta = {'ip_type': 'internal'}
-    expected_meta = (([external_meta] * 7 + [internal_meta] * 7) * 2 +
-                     [external_meta, internal_meta] +
-                     [external_meta] * 7 +
-                     [internal_meta] * 7)
+    expected_meta = (
+        ([external_meta] * 7 + [internal_meta] * 7) * 2
+        + [external_meta, external_meta, internal_meta, internal_meta]
+        + [external_meta] * 7
+        + [internal_meta] * 7
+    )
 
     for i, meta in enumerate(expected_meta):
       self.assertIsInstance(result[i][3], dict)
@@ -166,6 +171,55 @@ def testParseNetperfOutputError(self, output):
                                            False)
     self.assertIn('Failed to parse stdout', str(e.exception))
 
+  @flagsaver.flagsaver(netperf_benchmarks=[netperf_benchmark.TCP_STREAM])
+  def testMultiStreams(self):
+    self._ConfigureIpTypes()
+    num_streams = 4
+    FLAGS.netperf_num_streams = flag_util.IntegerList([num_streams])
+    self.should_run_external.return_value = True
+    self.should_run_internal.return_value = False
+    # Read netperf mock results for multiple streams
+    path = os.path.join(
+        os.path.dirname(__file__),
+        '..',
+        'data',
+        'netperf_results_multistreams.json',
+    )
+    with open(path) as fp:
+      stdouts = ['\n'.join(i) for i in json.load(fp)]
+      self.expected_stdout = []
+      for i in range(0, len(stdouts), num_streams):
+        self.expected_stdout.append(
+            json.dumps((stdouts[i : i + num_streams], [''], [0]))
+        )
+
+    vm_spec = mock.MagicMock(spec=benchmark_spec.BenchmarkSpec)
+    vm_spec.vms = [mock.MagicMock(), mock.MagicMock()]
+    vm_spec.vms[0].RobustRemoteCommand.side_effect = [
+        (i, '') for i in self.expected_stdout
+    ]
+    vm_spec.vms[1].GetInternalIPs.return_value = ['test_ip']
+    run_result = netperf_benchmark.Run(vm_spec)
+    result = []
+    for sample in run_result:
+      if sample[0] not in ['start_time', 'end_time']:
+        result.append(sample)
+
+    self.assertListEqual(
+        [
+            ('TCP_STREAM_Throughput_p50', 3000.0, 'Mbits/sec'),
+            ('TCP_STREAM_Throughput_p90', 4000.0, 'Mbits/sec'),
+            ('TCP_STREAM_Throughput_p99', 4000.0, 'Mbits/sec'),
+            ('TCP_STREAM_Throughput_average', 2500.0, 'Mbits/sec'),
+            ('TCP_STREAM_Throughput_stddev', 1290.9944487358057, 'Mbits/sec'),
+            ('TCP_STREAM_Throughput_min', 1000.0, 'Mbits/sec'),
+            ('TCP_STREAM_Throughput_max', 4000.0, 'Mbits/sec'),
+            ('TCP_STREAM_Throughput_total', 10000.0, 'Mbits/sec'),
+            ('TCP_STREAM_Throughput_4streams', 10000.0, 'Mbits/sec'),
+        ],
+        [i[:3] for i in result],
+    )
+
 
 if __name__ == '__main__':
   unittest.main()
diff --git a/tests/linux_packages/cmake_test.py b/tests/linux_packages/cmake_test.py
index b6e483dc11..be695e8747 100644
--- a/tests/linux_packages/cmake_test.py
+++ b/tests/linux_packages/cmake_test.py
@@ -21,13 +21,13 @@ class CmakeTests(pkb_common_test_case.PkbCommonTestCase):
 
   @flagsaver.flagsaver(cmake_kitware=True)
   def testAptInstallViaKitware(self):
-    vm = MockVm('ubuntu1804')
+    vm = MockVm('ubuntu2004')
     cmake.AptInstall(vm)
     expected_cmds = [
         'curl --silent https://apt.kitware.com/keys/kitware-archive-latest.asc '
         '| gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/kitware.gpg '
         '>/dev/null', 'sudo apt-add-repository '
-        '"deb https://apt.kitware.com/ubuntu/ bionic main"'
+        '"deb https://apt.kitware.com/ubuntu/ focal main"'
     ]
     vm.RemoteCommand.assert_has_calls([mock.call(cmd) for cmd in expected_cmds])
 
diff --git a/tests/linux_packages/linux_boot_test.py b/tests/linux_packages/linux_boot_test.py
index 1eb390b234..cdeb44f4d5 100644
--- a/tests/linux_packages/linux_boot_test.py
+++ b/tests/linux_packages/linux_boot_test.py
@@ -124,9 +124,9 @@ def testParseSystemDCriticalChain(self):
     with open(os.path.join(self.data_dir, 'systemd2.output')) as f:
       output = f.read()
       self.assertEqual(linux_boot.ParseSystemDCriticalChainOutput(output),
-                       0.774)
+                       0.671103)
       self.assertEqual(linux_boot.ParseSystemDCriticalChainServiceTime(output),
-                       0.103)
+                       0.000103)
 
   def testCollectVmToVmSamples(self):
     """Test vm to vm networking result parsing."""
diff --git a/tests/linux_packages/memtier_test.py b/tests/linux_packages/memtier_test.py
index a1d4f32c90..10ec80c459 100644
--- a/tests/linux_packages/memtier_test.py
+++ b/tests/linux_packages/memtier_test.py
@@ -8,6 +8,7 @@
 
 from absl import flags
 from absl.testing import flagsaver
+from perfkitbenchmarker import background_tasks
 from perfkitbenchmarker import sample
 from perfkitbenchmarker import test_util
 from perfkitbenchmarker.linux_packages import memtier
@@ -43,13 +44,19 @@
 ---
 GET               0         50.0
 GET               2       100.00
+GET
 """
 
 METADATA = {
     'test': 'foobar',
+    'p50_latency': 1.215,
     'p90_latency': 2.295,
     'p95_latency': 2.319,
     'p99_latency': 2.399,
+    'p99.5_latency': 3.871,
+    'p99.9_latency': 3.872,
+    'p99.950_latency': 3.873,
+    'p99.990_latency': 3.874,
     'avg_latency': 1.54,
 }
 
@@ -345,6 +352,7 @@ def testAggregateMemtierWithOneResult(self):
                 'values': [1, 1, 1, 1, 1],
                 'timestamps': [0, 1000, 2000, 3000, 4000],
                 'interval': 1,
+                'ramp_down_starts': 4000,
             },
             timestamp=0,
         ),
@@ -436,6 +444,102 @@ def testAggregateMemtierWithOneResult(self):
             },
             timestamp=0,
         ),
+        sample.Sample(
+            metric='Average Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Max Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Min Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p50.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p90.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p95.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p99.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p99.90_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
     ]
     self.assertEqual(samples, expected_result)
 
@@ -533,90 +637,366 @@ def testAggregateMemtierResultsWithMultipleResultsDifferentStartTime(self):
             timestamp=0,
         ),
         sample.Sample(
-            metric='OPS_time_series',
+            metric='OPS_time_series',
+            value=0.0,
+            unit='ops',
+            metadata={
+                'values': [1, 2, 3, 3, 3],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'ramp_down_starts': 4000,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Average Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 5, 5, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Max Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 5, 5, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Min Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 5, 5, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p50.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 5, 5, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p90.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 5, 5, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p95.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 5, 5, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p99.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 5, 5, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p99.90_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 5, 5, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Average Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Average Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [0, 5, 4, 3, 2],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Average Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [0, 0, 5, 4, 3],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 2,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Max Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Max Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [0, 5, 4, 3, 2],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Max Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [0, 0, 5, 4, 3],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 2,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Min Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Min Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [0, 5, 4, 3, 2],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Min Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [0, 0, 5, 4, 3],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 2,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p50.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p50.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [0, 5, 4, 3, 2],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p50.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [0, 0, 5, 4, 3],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 2,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p90.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p90.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [0, 5, 4, 3, 2],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p90.00_time_series',
             value=0.0,
-            unit='ops',
+            unit='ms',
             metadata={
-                'values': [1, 2, 3, 3, 3],
+                'values': [0, 0, 5, 4, 3],
                 'timestamps': [0, 1000, 2000, 3000, 4000],
                 'interval': 1,
+                'client': 2,
             },
             timestamp=0,
         ),
         sample.Sample(
-            metric='Average Latency_time_series',
+            metric='p95.00_time_series',
             value=0.0,
             unit='ms',
             metadata={
-                'values': [1, 5, 5, 4, 5],
+                'values': [1, 2, 3, 4, 5],
                 'timestamps': [0, 1000, 2000, 3000, 4000],
                 'interval': 1,
+                'client': 0,
             },
             timestamp=0,
         ),
         sample.Sample(
-            metric='Max Latency_time_series',
+            metric='p95.00_time_series',
             value=0.0,
             unit='ms',
             metadata={
-                'values': [1, 5, 5, 4, 5],
+                'values': [0, 5, 4, 3, 2],
                 'timestamps': [0, 1000, 2000, 3000, 4000],
                 'interval': 1,
+                'client': 1,
             },
             timestamp=0,
         ),
         sample.Sample(
-            metric='Min Latency_time_series',
+            metric='p95.00_time_series',
             value=0.0,
             unit='ms',
             metadata={
-                'values': [1, 5, 5, 4, 5],
+                'values': [0, 0, 5, 4, 3],
                 'timestamps': [0, 1000, 2000, 3000, 4000],
                 'interval': 1,
+                'client': 2,
             },
             timestamp=0,
         ),
         sample.Sample(
-            metric='p50.00_time_series',
+            metric='p99.00_time_series',
             value=0.0,
             unit='ms',
             metadata={
-                'values': [1, 5, 5, 4, 5],
+                'values': [1, 2, 3, 4, 5],
                 'timestamps': [0, 1000, 2000, 3000, 4000],
                 'interval': 1,
+                'client': 0,
             },
             timestamp=0,
         ),
         sample.Sample(
-            metric='p90.00_time_series',
+            metric='p99.00_time_series',
             value=0.0,
             unit='ms',
             metadata={
-                'values': [1, 5, 5, 4, 5],
+                'values': [0, 5, 4, 3, 2],
                 'timestamps': [0, 1000, 2000, 3000, 4000],
                 'interval': 1,
+                'client': 1,
             },
             timestamp=0,
         ),
         sample.Sample(
-            metric='p95.00_time_series',
+            metric='p99.00_time_series',
             value=0.0,
             unit='ms',
             metadata={
-                'values': [1, 5, 5, 4, 5],
+                'values': [0, 0, 5, 4, 3],
                 'timestamps': [0, 1000, 2000, 3000, 4000],
                 'interval': 1,
+                'client': 2,
             },
             timestamp=0,
         ),
         sample.Sample(
-            metric='p99.00_time_series',
+            metric='p99.90_time_series',
             value=0.0,
             unit='ms',
             metadata={
-                'values': [1, 5, 5, 4, 5],
+                'values': [1, 2, 3, 4, 5],
                 'timestamps': [0, 1000, 2000, 3000, 4000],
                 'interval': 1,
+                'client': 0,
             },
             timestamp=0,
         ),
@@ -625,9 +1005,22 @@ def testAggregateMemtierResultsWithMultipleResultsDifferentStartTime(self):
             value=0.0,
             unit='ms',
             metadata={
-                'values': [1, 5, 5, 4, 5],
+                'values': [0, 5, 4, 3, 2],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p99.90_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [0, 0, 5, 4, 3],
                 'timestamps': [0, 1000, 2000, 3000, 4000],
                 'interval': 1,
+                'client': 2,
             },
             timestamp=0,
         ),
@@ -710,6 +1103,7 @@ def testAggregateMemtierResultsWithMultipleResults(self):
                 'values': [2, 2, 2, 2, 2],
                 'timestamps': [0, 1000, 2000, 3000, 4000],
                 'interval': 1,
+                'ramp_down_starts': 4000,
             },
             timestamp=0,
         ),
@@ -801,7 +1195,200 @@ def testAggregateMemtierResultsWithMultipleResults(self):
             },
             timestamp=0,
         ),
+        sample.Sample(
+            metric='Average Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Average Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [5, 4, 3, 2, 1],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Max Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Max Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [5, 4, 3, 2, 1],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Min Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='Min Latency_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [5, 4, 3, 2, 1],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p50.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p50.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [5, 4, 3, 2, 1],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p90.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p90.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [5, 4, 3, 2, 1],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p95.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p95.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [5, 4, 3, 2, 1],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p99.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p99.00_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [5, 4, 3, 2, 1],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 1,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p99.90_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [1, 2, 3, 4, 5],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 0,
+            },
+            timestamp=0,
+        ),
+        sample.Sample(
+            metric='p99.90_time_series',
+            value=0.0,
+            unit='ms',
+            metadata={
+                'values': [5, 4, 3, 2, 1],
+                'timestamps': [0, 1000, 2000, 3000, 4000],
+                'interval': 1,
+                'client': 1,
+            },
+            timestamp=0,
+        ),
     ]
+    print(samples)
     self.assertEqual(samples, expected_result)
 
   def testParseResults_no_time_series(self):
@@ -860,45 +1447,280 @@ def testParseResults_no_time_series(self):
     samples.extend(results.GetSamples(METADATA))
     self.assertSampleListsEqualUpToTimestamp(samples, expected_result)
 
+  @flagsaver.flagsaver(num_cpus_override=16)
   def testMeasureLatencyCappedThroughput(self):
     mock_run_results = [
         # Multi-pipeline
-        GetMemtierResult(7270, 0.175),
-        GetMemtierResult(386941, 6.751),
-        GetMemtierResult(424626, 3.247),
-        GetMemtierResult(408957, 1.591),
-        GetMemtierResult(398920, 0.839),
-        GetMemtierResult(408290, 1.207),
-        GetMemtierResult(405672, 1.015),
-        GetMemtierResult(408808, 0.951),
-        GetMemtierResult(405209, 0.967),
-        GetMemtierResult(398249, 1.015),
-        GetMemtierResult(409221, 0.967),
-        GetMemtierResult(413240, 0.975),
-        GetMemtierResult(412573, 0.975),
+        GetMemtierResult(10, 10.0),
+        GetMemtierResult(20, 5.0),
+        GetMemtierResult(30, 2.0),
+        GetMemtierResult(8, 1.5),
+        GetMemtierResult(9, 0.7),
+        GetMemtierResult(3, 1.4),
+        GetMemtierResult(2, 0.8),
+        GetMemtierResult(4, 1.3),
+        GetMemtierResult(15, 0.9),
+        GetMemtierResult(7, 1.2),
+        GetMemtierResult(10, 0.9),
+        GetMemtierResult(1, 1.1),
+        GetMemtierResult(9, 0.9),
+        GetMemtierResult(30, 1.2),
         # Multi-client
-        GetMemtierResult(7433, 0.159),
-        GetMemtierResult(218505, 2.975),
-        GetMemtierResult(79875, 4.447),
-        GetMemtierResult(323469, 0.519),
-        GetMemtierResult(321503, 0.743),
-        GetMemtierResult(324469, 0.855),
-        GetMemtierResult(308853, 1.007),
-        GetMemtierResult(322717, 0.903),
-        GetMemtierResult(321258, 0.919),
-        GetMemtierResult(323695, 0.927),
-        GetMemtierResult(310044, 0.983),
+        GetMemtierResult(10, 10.0),
+        GetMemtierResult(20, 5.0),
+        GetMemtierResult(30, 2.0),
+        GetMemtierResult(8, 1.5),
+        GetMemtierResult(9, 0.7),
+        GetMemtierResult(3, 1.4),
     ]
     self.enter_context(
         mock.patch.object(memtier, '_Run', side_effect=mock_run_results))
 
-    results = memtier.MeasureLatencyCappedThroughput(None, 'unused', 0)
+    mock_vm = pkb_common_test_case.TestLinuxVirtualMachine(
+        pkb_common_test_case.CreateTestVmSpec()
+    )
+    results = memtier.MeasureLatencyCappedThroughput(mock_vm, 1, 'unused', 0)
 
     actual_throughputs = []
     for s in results:
       if s.metric == 'Ops Throughput':
         actual_throughputs.append(s.value)
-    self.assertEqual(actual_throughputs, [413240, 324469])
+    self.assertEqual(actual_throughputs, [15.0, 9.0])
+
+  def testRunParallelSingleVm(self):
+    vm1 = pkb_common_test_case.TestLinuxVirtualMachine(
+        pkb_common_test_case.CreateTestVmSpec()
+    )
+    connections = [
+        memtier.MemtierConnection(vm1, '10.0.1.117', 6379),
+    ]
+    mock_run_threaded = self.enter_context(
+        mock.patch.object(background_tasks, 'RunThreaded')
+    )
+
+    memtier._RunParallelConnections(connections, '0.0.0.0', 1234, 1, 2, 3)
+
+    mock_run_threaded.assert_called_once_with(
+        memtier._Run,
+        [
+            (
+                (),
+                {
+                    'vm': vm1,
+                    'server_ip': '0.0.0.0',
+                    'server_port': 1234,
+                    'threads': 1,
+                    'clients': 2,
+                    'pipeline': 3,
+                    'password': None,
+                },
+            ),
+        ],
+    )
+
+  def testRunParallelMultipleVms(self):
+    vm1 = pkb_common_test_case.TestLinuxVirtualMachine(
+        pkb_common_test_case.CreateTestVmSpec()
+    )
+    vm1.ip_address = 'vm1'
+    vm2 = pkb_common_test_case.TestLinuxVirtualMachine(
+        pkb_common_test_case.CreateTestVmSpec()
+    )
+    vm2.ip_address = 'vm2'
+    connections = [
+        memtier.MemtierConnection(vm1, '10.0.1.117', 6379),
+        memtier.MemtierConnection(vm1, '10.0.2.104', 6379),
+        memtier.MemtierConnection(vm1, '10.0.3.217', 6379),
+        memtier.MemtierConnection(vm2, '10.0.2.177', 6379),
+        memtier.MemtierConnection(vm2, '10.0.1.174', 6379),
+        memtier.MemtierConnection(vm2, '10.0.3.6', 6379),
+    ]
+    mock_run_threaded = self.enter_context(
+        mock.patch.object(background_tasks, 'RunThreaded')
+    )
+
+    memtier._RunParallelConnections(connections, '0.0.0.0', 1234, 1, 2, 3)
+
+    mock_run_threaded.assert_called_once_with(
+        memtier._Run,
+        [
+            (
+                (),
+                {
+                    'vm': vm1,
+                    'server_ip': '0.0.0.0',
+                    'server_port': 1234,
+                    'threads': 1,
+                    'clients': 2,
+                    'pipeline': 3,
+                    'password': None,
+                    'shard_addresses': (
+                        '10.0.1.117:6379,10.0.2.104:6379,10.0.3.217:6379'
+                    ),
+                },
+            ),
+            (
+                (),
+                {
+                    'vm': vm2,
+                    'server_ip': '0.0.0.0',
+                    'server_port': 1234,
+                    'threads': 1,
+                    'clients': 2,
+                    'pipeline': 3,
+                    'password': None,
+                    'shard_addresses': (
+                        '10.0.2.177:6379,10.0.1.174:6379,10.0.3.6:6379'
+                    ),
+                },
+            ),
+        ],
+    )
+
+  @flagsaver.flagsaver(memtier_distribution_iterations=1, num_cpus_override=16)
+  def testMeasureLatencyCappedThroughputDistribution(self):
+    vm1 = pkb_common_test_case.TestLinuxVirtualMachine(
+        pkb_common_test_case.CreateTestVmSpec()
+    )
+    vm1.ip_address = 'vm1'
+    vm2 = pkb_common_test_case.TestLinuxVirtualMachine(
+        pkb_common_test_case.CreateTestVmSpec()
+    )
+    vm2.ip_address = 'vm2'
+    connections = [
+        memtier.MemtierConnection(vm1, '10.0.1.117', 6379),
+        memtier.MemtierConnection(vm1, '10.0.2.104', 6379),
+        memtier.MemtierConnection(vm1, '10.0.3.217', 6379),
+        memtier.MemtierConnection(vm2, '10.0.2.177', 6379),
+        memtier.MemtierConnection(vm2, '10.0.1.174', 6379),
+        memtier.MemtierConnection(vm2, '10.0.3.6', 6379),
+    ]
+
+    mock_binary_search = self.enter_context(
+        mock.patch.object(
+            memtier,
+            '_BinarySearchForLatencyCappedThroughput',
+            return_value=[
+                memtier.MemtierResult(
+                    parameters=memtier.MemtierBinarySearchParameters(
+                        pipelines=1, threads=2, clients=3
+                    )
+                )
+            ],
+        )
+    )
+    mock_results = [
+        memtier.MemtierResult(
+            ops_per_sec=0,
+            kb_per_sec=0,
+            latency_ms=0,
+            latency_dic={'90': 0, '95': 50, '99': 1.0},
+        ),
+        memtier.MemtierResult(
+            ops_per_sec=200,
+            kb_per_sec=2,
+            latency_ms=0.2,
+            latency_dic={'90': 10, '95': 40, '99': 0.8},
+        ),
+        memtier.MemtierResult(
+            ops_per_sec=400,
+            kb_per_sec=4,
+            latency_ms=0.4,
+            latency_dic={'90': 20, '95': 30, '99': 0.6},
+        ),
+        memtier.MemtierResult(
+            ops_per_sec=600,
+            kb_per_sec=6,
+            latency_ms=0.6,
+            latency_dic={'90': 30, '95': 20, '99': 0.4},
+        ),
+        memtier.MemtierResult(
+            ops_per_sec=800,
+            kb_per_sec=8,
+            latency_ms=0.8,
+            latency_dic={'90': 40, '95': 10, '99': 0.2},
+        ),
+        memtier.MemtierResult(
+            ops_per_sec=1000,
+            kb_per_sec=10,
+            latency_ms=1.0,
+            latency_dic={'90': 50, '95': 0, '99': 0.0},
+        ),
+    ]
+    mock_run = self.enter_context(
+        mock.patch.object(
+            memtier,
+            '_RunParallelConnections',
+            return_value=mock_results,
+        )
+    )
+
+    results = memtier.MeasureLatencyCappedThroughputDistribution(
+        connections, '0.0.0.0', 1234, [vm1, vm2], 6
+    )
+
+    expected_metadata = {
+        'distribution_iterations': 1,
+        'threads': 2,
+        'clients': 3,
+        'pipelines': 1,
+    }
+
+    with self.subTest('SamplesAreCorrect'):
+      # self.assertSampleListsEqualUpToTimestamp(results, expected_samples)
+      self.assertSampleInList(
+          sample.Sample(
+              metric='Mean ops_per_sec',
+              value=500.0,
+              unit='ops/s',
+              metadata=expected_metadata,
+          ),
+          results,
+      )
+      self.assertSampleInList(
+          sample.Sample(
+              metric='Stdev kb_per_sec',
+              value=3.7416573867739413,
+              unit='KB/s',
+              metadata=expected_metadata,
+          ),
+          results,
+      )
+    with self.subTest('BinarySearchHasCorrectArgs'):
+      mock_binary_search.assert_called_once_with(
+          connections, [memtier._ClientModifier(10, 16)], '0.0.0.0', 1234, None
+      )
+    with self.subTest('RunHasCorrectArgs'):
+      mock_run.assert_has_calls(
+          [mock.call(connections, '0.0.0.0', 1234, 2, 3, 1, None)]
+      )
+
+  def testCombineResults(self):
+    result1 = memtier.MemtierResult(
+        ops_per_sec=800,
+        kb_per_sec=8,
+        latency_ms=0.8,
+        latency_dic={'90': 40, '95': 10, '99': 0.2},
+        metadata={'test_metadata': True},
+        parameters=memtier.MemtierBinarySearchParameters(lower_bound=1),
+    )
+    result2 = memtier.MemtierResult(
+        ops_per_sec=1000,
+        kb_per_sec=10,
+        latency_ms=1.0,
+        latency_dic={'90': 50, '95': 0, '99': 0.0},
+    )
+    expected_result = memtier.MemtierResult(
+        ops_per_sec=1800,
+        kb_per_sec=18,
+        latency_ms=0.9,
+        latency_dic={'90': 45, '95': 5, '99': 0.1},
+        metadata={'test_metadata': True},
+        parameters=memtier.MemtierBinarySearchParameters(lower_bound=1),
+    )
+    self.assertEqual(
+        expected_result, memtier._CombineResults([result1, result2])
+    )
 
 
 if __name__ == '__main__':
diff --git a/tests/linux_packages/speccpu_test.py b/tests/linux_packages/speccpu_test.py
index 83f4fb24c0..9869197528 100644
--- a/tests/linux_packages/speccpu_test.py
+++ b/tests/linux_packages/speccpu_test.py
@@ -423,7 +423,7 @@ def setUp(self):
     self.addCleanup(mock.patch.stopall)
 
   def testParseResultsC(self):
-    vm = mock.Mock(vm=linux_virtual_machine.Ubuntu1804Mixin)
+    vm = mock.Mock(vm=linux_virtual_machine.Ubuntu2004Mixin)
     spec_test_config = speccpu.SpecInstallConfigurations()
     spec_test_config.benchmark_name = 'speccpu2006'
     spec_test_config.log_format = r'Est. (SPEC.*_base2006)\s*(\S*)'
@@ -465,7 +465,7 @@ def testParseResultsC(self):
 
   def testParseSpeedResults(self):
     speccpu.FLAGS.spec_runmode = 'base'
-    vm = mock.Mock(vm=linux_virtual_machine.Ubuntu1804Mixin)
+    vm = mock.Mock(vm=linux_virtual_machine.Ubuntu2004Mixin)
     spec_test_config = speccpu.SpecInstallConfigurations()
     spec_test_config.benchmark_name = 'speccpu2006'
     spec_test_config.log_format = r'Est. (SPEC.*_base2006)\s*(\S*)'
@@ -478,7 +478,7 @@ def testParseSpeedResults(self):
 
   def testParseAllResults(self):
     speccpu.FLAGS.spec_runmode = 'all'
-    vm = mock.Mock(vm=linux_virtual_machine.Ubuntu1804Mixin)
+    vm = mock.Mock(vm=linux_virtual_machine.Ubuntu2004Mixin)
     spec_test_config = speccpu.SpecInstallConfigurations()
     spec_test_config.benchmark_name = 'speccpu2017'
     spec_test_config.log_format = r'Est. (SPEC.*2017_.*_base)\s*(\S*)'
@@ -491,7 +491,7 @@ def testParseAllResults(self):
 
   def testParsePeakResults(self):
     speccpu.FLAGS.spec_runmode = 'peak'
-    vm = mock.Mock(vm=linux_virtual_machine.Ubuntu1804Mixin)
+    vm = mock.Mock(vm=linux_virtual_machine.Ubuntu2004Mixin)
     spec_test_config = speccpu.SpecInstallConfigurations()
     spec_test_config.benchmark_name = 'speccpu2017'
     spec_test_config.log_format = r'Est. (SPEC.*2017_.*_base)\s*(\S*)'
@@ -504,7 +504,7 @@ def testParsePeakResults(self):
 
   def testParsePartialPeakResults(self):
     speccpu.FLAGS.spec_runmode = 'peak'
-    vm = mock.Mock(vm=linux_virtual_machine.Ubuntu1804Mixin)
+    vm = mock.Mock(vm=linux_virtual_machine.Ubuntu2004Mixin)
     spec_test_config = speccpu.SpecInstallConfigurations()
     spec_test_config.benchmark_name = 'speccpu2017'
     spec_test_config.log_format = r'Est. (SPEC.*2017_.*_base)\s*(\S*)'
diff --git a/tests/linux_packages/ycsb_test.py b/tests/linux_packages/ycsb_test.py
index 9aea5c39ce..de07a6b3ab 100644
--- a/tests/linux_packages/ycsb_test.py
+++ b/tests/linux_packages/ycsb_test.py
@@ -24,6 +24,7 @@
 import mock
 from perfkitbenchmarker import errors
 from perfkitbenchmarker.linux_packages import ycsb
+from perfkitbenchmarker.linux_packages import ycsb_stats
 from tests import matchers
 from tests import pkb_common_test_case
 
@@ -38,7 +39,7 @@ def open_data_file(filename):
 
 def _parse_and_return_time_series(filename):
   content = open_data_file(filename)
-  return ycsb.ParseResults(content, 'timeseries')
+  return ycsb_stats.ParseResults(content, 'timeseries')
 
 
 class SimpleResultParserTestCase(pkb_common_test_case.PkbCommonTestCase):
@@ -46,36 +47,39 @@ class SimpleResultParserTestCase(pkb_common_test_case.PkbCommonTestCase):
   def setUp(self):
     super(SimpleResultParserTestCase, self).setUp()
     self.contents = open_data_file('ycsb-test-run.dat')
-    self.results = ycsb.ParseResults(self.contents, 'histogram')
+    self.results = ycsb_stats.ParseResults(self.contents, 'histogram')
 
   def testCommandLineSet(self):
     self.assertEqual(
-        'Command line: -db com.yahoo.ycsb.BasicDB '
-        '-P workloads/workloada -t', self.results.command_line)
+        'Command line: -db com.yahoo.ycsb.BasicDB -P workloads/workloada -t',
+        self.results.command_line,
+    )
 
   def testClientSet(self):
     self.assertEqual('YCSB Client 0.1', self.results.client)
 
   def testUpdateStatisticsParsed(self):
     self.assertEqual(
-        ycsb._OpResult(
+        ycsb_stats._OpResult(
             group='update',
             statistics={
                 'Operations': 531,
                 'Return=0': 531,
-                'AverageLatency(ms)': .0659774011299435,
+                'AverageLatency(ms)': 0.0659774011299435,
                 'MinLatency(ms)': 0.042,
-                'MaxLatency(ms)': .345,
+                'MaxLatency(ms)': 0.345,
                 '95thPercentileLatency(ms)': 0,
-                '99thPercentileLatency(ms)': 0
+                '99thPercentileLatency(ms)': 0,
             },
-            data_type=ycsb.HISTOGRAM,
-            data=[(0, 530), (19, 1)]),
-        self.results.groups['update'])
+            data_type=ycsb_stats.HISTOGRAM,
+            data=[(0, 530), (19, 1)],
+        ),
+        self.results.groups['update'],
+    )
 
   def testReadStatisticsParsed(self):
     self.assertEqual(
-        ycsb._OpResult(
+        ycsb_stats._OpResult(
             group='read',
             statistics={
                 'Operations': 469,
@@ -84,23 +88,24 @@ def testReadStatisticsParsed(self):
                 'MinLatency(ms)': 0.034,
                 'MaxLatency(ms)': 0.102,
                 '95thPercentileLatency(ms)': 0,
-                '99thPercentileLatency(ms)': 0
+                '99thPercentileLatency(ms)': 0,
             },
-            data_type=ycsb.HISTOGRAM,
-            data=[(0, 469)]),
-        self.results.groups['read'])
+            data_type=ycsb_stats.HISTOGRAM,
+            data=[(0, 469)],
+        ),
+        self.results.groups['read'],
+    )
 
   def testOverallStatisticsParsed(self):
     self.assertEqual(
-        ycsb._OpResult(
+        ycsb_stats._OpResult(
             group='overall',
-            statistics={
-                'RunTime(ms)': 80.0,
-                'Throughput(ops/sec)': 12500.0
-            },
+            statistics={'RunTime(ms)': 80.0, 'Throughput(ops/sec)': 12500.0},
             data_type='histogram',
-            data=[]),
-        self.results.groups['overall'])
+            data=[],
+        ),
+        self.results.groups['overall'],
+    )
 
 
 class DetailedResultParserTestCase(unittest.TestCase):
@@ -108,23 +113,24 @@ class DetailedResultParserTestCase(unittest.TestCase):
   def setUp(self):
     super(DetailedResultParserTestCase, self).setUp()
     self.contents = open_data_file('ycsb-test-run-2.dat')
-    self.results = ycsb.ParseResults(self.contents, 'histogram')
+    self.results = ycsb_stats.ParseResults(self.contents, 'histogram')
 
   def testPercentilesFromHistogram_read(self):
     hist = self.results.groups['read'].data
-    percentiles = ycsb._PercentilesFromHistogram(hist)
+    percentiles = ycsb_stats._PercentilesFromHistogram(hist)
     self.assertEqual(1, percentiles['p50'])
     self.assertEqual(7, percentiles['p99'])
 
   def testPercentilesFromHistogram_update(self):
     hist = self.results.groups['update'].data
-    percentiles = ycsb._PercentilesFromHistogram(hist)
+    percentiles = ycsb_stats._PercentilesFromHistogram(hist)
     self.assertEqual(1, percentiles['p50'])
     self.assertEqual(7, percentiles['p99'])
 
 
-class ThroughputTimeSeriesParserTestCase(pkb_common_test_case.PkbCommonTestCase
-                                        ):
+class ThroughputTimeSeriesParserTestCase(
+    pkb_common_test_case.PkbCommonTestCase
+):
 
   def setUp(self):
     super().setUp()
@@ -139,7 +145,7 @@ def testParsedThroughputTimeSeriesIsCorrect(self):
         30: 2496.8,
         40: 2509.6,
         50: 2487.2,
-        60: 2513.2
+        60: 2513.2,
     }
     self.assertEqual(results.throughput_time_series, expected)
 
@@ -148,10 +154,11 @@ def testCombinedThroughputTimeSeriesIsCorrect(self):
     results_1 = _parse_and_return_time_series('ycsb-time-series.dat')
     results_2 = _parse_and_return_time_series('ycsb-time-series-2.dat')
 
-    combined = ycsb._CombineResults(
+    combined = ycsb_stats.CombineResults(
         result_list=[results_1, results_2],
-        measurement_type=ycsb.TIMESERIES,
-        combined_hdr={})
+        measurement_type=ycsb_stats.TIMESERIES,
+        combined_hdr={},
+    )
 
     expected = {
         10: 4187.5,
@@ -168,14 +175,22 @@ class BadResultParserTestCase(unittest.TestCase):
 
   def testBadTestRun(self):
     contents = open_data_file('ycsb-test-run-3.dat')
-    self.assertRaises(errors.Benchmarks.KnownIntermittentError,
-                      ycsb.ParseResults, contents, 'histogram')
+    self.assertRaises(
+        errors.Benchmarks.KnownIntermittentError,
+        ycsb_stats.ParseResults,
+        contents,
+        'histogram',
+    )
 
-  @flagsaver.flagsaver(ycsb_max_error_rate=0.95)
   def testErrorRate(self):
     contents = open_data_file('ycsb-test-run-4.dat')
-    self.assertRaises(errors.Benchmarks.RunError, ycsb.ParseResults, contents,
-                      'hdrhistogram')
+    self.assertRaises(
+        errors.Benchmarks.RunError,
+        ycsb_stats.ParseResults,
+        contents,
+        'hdrhistogram',
+        0.95,
+    )
 
 
 class WeightedQuantileTestCase(unittest.TestCase):
@@ -183,26 +198,28 @@ class WeightedQuantileTestCase(unittest.TestCase):
   def testEvenlyWeightedSamples(self):
     x = list(range(1, 101))  # 1-100
     weights = [1 for _ in x]
-    self.assertEqual(50, ycsb._WeightedQuantile(x, weights, 0.50))
-    self.assertEqual(75, ycsb._WeightedQuantile(x, weights, 0.75))
-    self.assertEqual(90, ycsb._WeightedQuantile(x, weights, 0.90))
-    self.assertEqual(95, ycsb._WeightedQuantile(x, weights, 0.95))
-    self.assertEqual(99, ycsb._WeightedQuantile(x, weights, 0.99))
-    self.assertEqual(100, ycsb._WeightedQuantile(x, weights, 1))
+    self.assertEqual(50, ycsb_stats._WeightedQuantile(x, weights, 0.50))
+    self.assertEqual(75, ycsb_stats._WeightedQuantile(x, weights, 0.75))
+    self.assertEqual(90, ycsb_stats._WeightedQuantile(x, weights, 0.90))
+    self.assertEqual(95, ycsb_stats._WeightedQuantile(x, weights, 0.95))
+    self.assertEqual(99, ycsb_stats._WeightedQuantile(x, weights, 0.99))
+    self.assertEqual(100, ycsb_stats._WeightedQuantile(x, weights, 1))
 
   def testLowWeight(self):
     x = [1, 4]
     weights = [99, 1]
     for i in range(100):
-      self.assertEqual(1, ycsb._WeightedQuantile(x, weights, i / 100.0))
-    self.assertEqual(4, ycsb._WeightedQuantile(x, weights, 0.995))
+      self.assertEqual(1, ycsb_stats._WeightedQuantile(x, weights, i / 100.0))
+    self.assertEqual(4, ycsb_stats._WeightedQuantile(x, weights, 0.995))
 
   def testMidWeight(self):
     x = [0, 1.2, 4]
     weights = [1, 98, 1]
     for i in range(2, 99):
-      self.assertAlmostEqual(1.2, ycsb._WeightedQuantile(x, weights, i / 100.0))
-    self.assertEqual(4, ycsb._WeightedQuantile(x, weights, 0.995))
+      self.assertAlmostEqual(
+          1.2, ycsb_stats._WeightedQuantile(x, weights, i / 100.0)
+      )
+    self.assertEqual(4, ycsb_stats._WeightedQuantile(x, weights, 0.995))
 
 
 class ParseWorkloadTestCase(unittest.TestCase):
@@ -212,11 +229,13 @@ def testParsesEmptyString(self):
 
   def testIgnoresComment(self):
     self.assertDictEqual({}, ycsb.ParseWorkload('#\n'))
-    self.assertDictEqual({},
-                         ycsb.ParseWorkload('#recordcount = 10\n'
-                                            '# columnfamily=cf'))
-    self.assertDictEqual({'recordcount': '10'},
-                         ycsb.ParseWorkload('#Sample!\nrecordcount = 10'))
+    self.assertDictEqual(
+        {}, ycsb.ParseWorkload('#recordcount = 10\n# columnfamily=cf')
+    )
+    self.assertDictEqual(
+        {'recordcount': '10'},
+        ycsb.ParseWorkload('#Sample!\nrecordcount = 10'),
+    )
 
   def testParsesSampleWorkload(self):
     contents = open_data_file('ycsb_workloada')
@@ -231,7 +250,7 @@ def testParsesSampleWorkload(self):
         'updateproportion': '0.5',
         'scanproportion': '0',
         'insertproportion': '0',
-        'requestdistribution': 'zipfian'
+        'requestdistribution': 'zipfian',
     }
 
     self.assertDictEqual(expected, actual)
@@ -240,63 +259,56 @@ def testParsesSampleWorkload(self):
 class CombineResultsTestCase(unittest.TestCase):
 
   def testGroupMissing(self):
-    r1 = ycsb.YcsbResult(
+    r1 = ycsb_stats.YcsbResult(
         groups={
-            'read': ycsb._OpResult(
+            'read': ycsb_stats._OpResult(
                 group='read',
-                statistics={
-                    'Operations': 100,
-                    'Return=0': 100
-                },
-                data_type=ycsb.HISTOGRAM,
+                statistics={'Operations': 100, 'Return=0': 100},
+                data_type=ycsb_stats.HISTOGRAM,
             )
-        })
-    r2 = ycsb.YcsbResult(
+        }
+    )
+    r2 = ycsb_stats.YcsbResult(
         groups={
-            'read': ycsb._OpResult(
+            'read': ycsb_stats._OpResult(
                 group='read',
-                statistics={
-                    'Operations': 96,
-                    'Return=0': 94,
-                    'Return=-1': 2
-                },
-                data_type=ycsb.HISTOGRAM,
+                statistics={'Operations': 96, 'Return=0': 94, 'Return=-1': 2},
+                data_type=ycsb_stats.HISTOGRAM,
             ),
-            'update': ycsb._OpResult(
+            'update': ycsb_stats._OpResult(
                 group='update',
-                statistics={
-                    'Operations': 100,
-                    'AverageLatency(ms)': 25
-                },
-                data_type=ycsb.HISTOGRAM,
-            )
-        })
-    combined = ycsb._CombineResults([r1, r2], 'histogram', {})
+                statistics={'Operations': 100, 'AverageLatency(ms)': 25},
+                data_type=ycsb_stats.HISTOGRAM,
+            ),
+        }
+    )
+    combined = ycsb_stats.CombineResults([r1, r2], 'histogram', {})
     self.assertCountEqual(['read', 'update'], combined.groups)
-    self.assertCountEqual(['Operations', 'Return=0', 'Return=-1'],
-                          combined.groups['read'].statistics)
+    self.assertCountEqual(
+        ['Operations', 'Return=0', 'Return=-1'],
+        combined.groups['read'].statistics,
+    )
     read_stats = combined.groups['read'].statistics
-    self.assertEqual({
-        'Operations': 196,
-        'Return=0': 194,
-        'Return=-1': 2
-    }, read_stats)
+    self.assertEqual(
+        {'Operations': 196, 'Return=0': 194, 'Return=-1': 2}, read_stats
+    )
 
   def testDropUnaggregatedFromSingleResult(self):
-    r = ycsb.YcsbResult(
+    r = ycsb_stats.YcsbResult(
         client='',
         command_line='',
         groups={
-            'read':
-                ycsb._OpResult(
-                    group='read',
-                    statistics={'AverageLatency(ms)': 21},
-                    data_type=ycsb.HISTOGRAM)
-        })
+            'read': ycsb_stats._OpResult(
+                group='read',
+                statistics={'AverageLatency(ms)': 21},
+                data_type=ycsb_stats.HISTOGRAM,
+            )
+        },
+    )
 
     r_copy = copy.deepcopy(r)
     self.assertEqual(r, r_copy)
-    combined = ycsb._CombineResults([r], 'histogram', {})
+    combined = ycsb_stats.CombineResults([r], 'histogram', {})
     self.assertEqual(r, r_copy)
     r.groups['read'].statistics = {}
     self.assertEqual(r, combined)
@@ -318,9 +330,13 @@ def testParseHdrLogFile(self):
       #[Max     =   203903.000, Total count    =       499019]
       #[Buckets =            8, SubBuckets     =         2048]
     """
-    actual = ycsb.ParseHdrLogFile(rawlog)
-    expected = [(0.0, 0.314, 2), (10.0, 0.853, 49953),
-                (20.0, 0.949, 50396), (30.0, 1.033, 49759)]
+    actual = ycsb_stats.ParseHdrLogFile(rawlog)
+    expected = [
+        (0.0, 0.314, 2),
+        (10.0, 0.853, 49953),
+        (20.0, 0.949, 50396),
+        (30.0, 1.033, 49759),
+    ]
     self.assertEqual(actual, expected)
 
 
@@ -328,24 +344,21 @@ class PrerequisitesTestCase(pkb_common_test_case.PkbCommonTestCase):
 
   @parameterized.named_parameters(
       {
-          'testcase_name':
-              'SnapshotVersion',
-          'url':
-              'https://storage.googleapis.com/externally_shared_files/ycsb-0.18.0-SNAPSHOT.tar.gz',
-          'expected_version':
-              18,
-      }, {
+          'testcase_name': 'SnapshotVersion',
+          'url': 'https://storage.googleapis.com/externally_shared_files/ycsb-0.18.0-SNAPSHOT.tar.gz',
+          'expected_version': 18,
+      },
+      {
           'testcase_name': 'StandardVersion',
           'url': 'https://storage.googleapis.com/ycsbclient/ycsb-0.17.0.tar.gz',
           'expected_version': 17,
-      }, {
-          'testcase_name':
-              'GitHubVersion',
-          'url':
-              'https://github.com/brianfrankcooper/YCSB/releases/download/0.17.0/ycsb-0.17.0.tar.gz',
-          'expected_version':
-              17,
-      })
+      },
+      {
+          'testcase_name': 'GitHubVersion',
+          'url': 'https://github.com/brianfrankcooper/YCSB/releases/download/0.17.0/ycsb-0.17.0.tar.gz',
+          'expected_version': 17,
+      },
+  )
   def testGetVersionIndexFromUrl(self, url, expected_version):
     actual_version = ycsb._GetVersionFromUrl(url)
     self.assertEqual(actual_version, expected_version)
@@ -367,7 +380,7 @@ def setUp(self):
     FLAGS.ycsb_workload_files = ['workloadc']
     self.test_executor = ycsb.YCSBExecutor('test_database')
     # Result parsing is already handled elsewhere
-    self.enter_context(mock.patch.object(ycsb, 'ParseResults'))
+    self.enter_context(mock.patch.object(ycsb_stats, 'ParseResults'))
     # Test VM with mocked command
     self.test_vm = mock.Mock()
     self.test_cmd = self.test_vm.RobustRemoteCommand
@@ -429,7 +442,7 @@ def testIncrementalLoadCalledWithCorrectTarget(self):
             mock.call(matchers.HAS('-target 8542')),
             mock.call(matchers.HAS('-target 10000')),
         ],
-        self.test_cmd.mock_calls
+        self.test_cmd.mock_calls,
     )
 
   @flagsaver.flagsaver
@@ -438,8 +451,9 @@ def testIncrementalLoadUsesCorrectThreadCounts(self):
     FLAGS.ycsb_incremental_load = 2500
     FLAGS.ycsb_client_vms = 1
     FLAGS['ycsb_threads_per_client'].parse(['1000'])
-    mock_set_thread_count = self.enter_context(mock.patch.object(
-        self.test_executor, '_SetClientThreadCount'))
+    mock_set_thread_count = self.enter_context(
+        mock.patch.object(self.test_executor, '_SetClientThreadCount')
+    )
 
     # Act
     self.test_executor.Run([self.test_vm])
@@ -453,7 +467,7 @@ def testIncrementalLoadUsesCorrectThreadCounts(self):
             mock.call(1000),
             mock.call(1000),
         ],
-        mock_set_thread_count.mock_calls
+        mock_set_thread_count.mock_calls,
     )
 
   @flagsaver.flagsaver
@@ -467,8 +481,7 @@ def testIncrementalLoadCalledWithLowerTarget(self):
 
     # Assert
     self.assertSequenceEqual(
-        [mock.call(matchers.HAS('-target 200'))],
-        self.test_cmd.mock_calls
+        [mock.call(matchers.HAS('-target 200'))], self.test_cmd.mock_calls
     )
 
 
diff --git a/tests/pkb_test.py b/tests/pkb_test.py
index 132d3e1b7a..8870b1c7e8 100644
--- a/tests/pkb_test.py
+++ b/tests/pkb_test.py
@@ -303,7 +303,7 @@ def testCollectMeminfoHandler(self):
     vm = mock.Mock()
     vm.RemoteCommand.return_value = 'b: 100\na: 10\nbadline', ''
     vm.name = 'pkb-1234-0'
-    vm.OS_TYPE = 'ubuntu1804'
+    vm.OS_TYPE = 'ubuntu2004'
     vm.machine_type = 'n1-standard-2'
     benchmark_spec = mock.Mock(vms=[vm])
     samples = []
@@ -316,7 +316,7 @@ def testCollectMeminfoHandler(self):
         'meminfo_keys': 'a,b',
         'meminfo_malformed': 'badline',
         'meminfo_machine_type': 'n1-standard-2',
-        'meminfo_os_type': 'ubuntu1804',
+        'meminfo_os_type': 'ubuntu2004',
         'meminfo_vmname': 'pkb-1234-0',
     }
     expected_sample = sample.Sample('meminfo', 0, '', expected_metadata)
diff --git a/tests/providers/aws/aws_capacity_reservation_test.py b/tests/providers/aws/aws_capacity_reservation_test.py
index 802727576c..0abd653efe 100644
--- a/tests/providers/aws/aws_capacity_reservation_test.py
+++ b/tests/providers/aws/aws_capacity_reservation_test.py
@@ -44,7 +44,7 @@ def __init__(self):
     self.zone = 'us-west-1'
     self.region = 'us-west-1'
     self.machine_type = 'fake_machine_type'
-    self.OS_TYPE = 'ubuntu1804'  # pylint: disable=invalid-name
+    self.OS_TYPE = 'ubuntu2004'  # pylint: disable=invalid-name
     self.network = mock.MagicMock()
     self.capacity_reservation_id = None
 
diff --git a/tests/providers/aws/aws_dynamodb_test.py b/tests/providers/aws/aws_dynamodb_test.py
index ab05294034..d5f2020008 100644
--- a/tests/providers/aws/aws_dynamodb_test.py
+++ b/tests/providers/aws/aws_dynamodb_test.py
@@ -299,8 +299,9 @@ def testTagResourceFailsWithNonExistentResource(self):
     self.enter_context(
         mock.patch.object(test_instance, '_Exists', return_value=False))
 
-    with self.assertRaises(errors.Resource.CreationError):
+    with self.assertRaises(vm_util.RetriesExceededRetryError) as e:
       test_instance._GetTagResourceCommand(['test', 'tag'])
+    self.assertIs(type(e.exception.__cause__), errors.Resource.CreationError)
 
   def testUpdateWithDefaultTags(self):
     test_instance = GetTestDynamoDBInstance()
diff --git a/tests/providers/gcp/gcp_dpb_dataproc_test.py b/tests/providers/gcp/gcp_dpb_dataproc_test.py
index a4ad814012..20af4da802 100644
--- a/tests/providers/gcp/gcp_dpb_dataproc_test.py
+++ b/tests/providers/gcp/gcp_dpb_dataproc_test.py
@@ -16,7 +16,6 @@
 import unittest
 from absl import flags
 import mock
-
 from perfkitbenchmarker import dpb_service
 from perfkitbenchmarker import errors
 from perfkitbenchmarker import vm_util
@@ -56,14 +55,17 @@
     applications=['foo-component', 'bar-component'],
     worker_group=mock.Mock(
         vm_spec=mock.Mock(machine_type='fake-machine-type', num_local_ssds=2),
-        disk_spec=mock.Mock(disk_type='pd-ssd', disk_size=42)))
+        disk_spec=mock.Mock(disk_type='pd-ssd', disk_size=42),
+    ),
+)
 
 DPGKE_CLUSTER_SPEC = mock.Mock(
     static_dpb_service_instance=None,
     gke_cluster_name='gke-cluster',
     gke_cluster_location='gke-cluster-loc',
     version='preview-0.3',
-    gke_cluster_nodepools='name:pool-name,role:driver,min:3')
+    gke_cluster_nodepools='name:pool-name,role:driver,min:3',
+)
 
 SERVERLESS_SPEC = mock.Mock(
     static_dpb_service_instance=None,
@@ -72,7 +74,13 @@
     dataproc_serverless_initial_executors=4,
     dataproc_serverless_min_executors=2,
     dataproc_serverless_max_executors=10,
-    worker_group=mock.Mock(disk_spec=mock.Mock(disk_size=42))
+    dataproc_serverless_memory=10000,
+    dataproc_serverless_memory_overhead=4000,
+    worker_group=mock.Mock(
+        disk_spec=mock.Mock(
+            disk_size=42,
+        ),
+    ),
 )
 
 
@@ -96,7 +104,8 @@ def setUp(self):
     FLAGS.zones = [GCP_ZONE_US_CENTRAL1_A]
 
   @mock.patch.object(
-      vm_util, 'IssueCommand', return_value=('fake_stdout', 'fake_stderr', 0))
+      vm_util, 'IssueCommand', return_value=('fake_stdout', 'fake_stderr', 0)
+  )
   def testCreate(self, mock_issue):
     cluster = LocalGcpDpbDataproc()
     cluster._Create()
@@ -113,8 +122,9 @@ def testCreate(self, mock_issue):
     self.assertIn('--worker-machine-type fake-machine-type', command_string)
     self.assertIn('--num-worker-local-ssds 2', command_string)
     self.assertIn('--num-workers 2', command_string)
-    self.assertIn('--optional-components foo-component,bar-component',
-                  command_string)
+    self.assertIn(
+        '--optional-components foo-component,bar-component', command_string
+    )
     self.assertIn('--project fake-project ', command_string)
     self.assertIn('--region us-central1', command_string)
     self.assertIn('--zone us-central1-a', command_string)
@@ -123,8 +133,13 @@ def testCreate(self, mock_issue):
       vm_util,
       'IssueCommand',
       return_value=(
-          'fake_stdout', "The zone 'projects/fake-project/zones/us-central1-a' "
-          'does not have enough resources available to fulfill the request.', 1)
+          'fake_stdout',
+          (
+              "The zone 'projects/fake-project/zones/us-central1-a' "
+              'does not have enough resources available to fulfill the request.'
+          ),
+          1,
+      ),
   )
   def testCreateResourceExhausted(self, mock_issue):
     cluster = LocalGcpDpbDataproc()
@@ -151,14 +166,16 @@ def setUp(self):
     FLAGS.dpb_service_bucket = STAGING_BUCKET
 
   @mock.patch.object(
-      vm_util, 'IssueCommand', return_value=('fake_stdout', 'fake_stderr', 0))
+      vm_util, 'IssueCommand', return_value=('fake_stdout', 'fake_stderr', 0)
+  )
   def testCreate(self, mock_issue):
     cluster = LocalGcpDpbDPGKE()
     cluster._Create()
     self.assertEqual(mock_issue.call_count, 1)
     command_string = ' '.join(mock_issue.call_args[0][0])
-    self.assertIn('gcloud alpha dataproc clusters gke create pkb-fakeru',
-                  command_string)
+    self.assertIn(
+        'gcloud alpha dataproc clusters gke create pkb-fakeru', command_string
+    )
     self.assertIn('--gke-cluster gke-cluster ', command_string)
     self.assertIn('--namespace pkb-fakeru ', command_string)
     self.assertIn('--gke-cluster-location gke-cluster-loc ', command_string)
@@ -174,12 +191,15 @@ def testMissingAttrs(self):
             'version',
         ],
         static_dpb_service_instance=None,
-        gke_cluster_nodepools='')
+        gke_cluster_nodepools='',
+    )
     with self.assertRaises(errors.Setup.InvalidSetupError) as ex:
       LocalGcpDpbDPGKE(spec=cluster_spec)
     self.assertIn(
-        "['gke_cluster_name', 'gke_cluster_nodepools', 'gke_cluster_location'] must be provided for provisioning DPGKE.",
-        str(ex.exception))
+        "['gke_cluster_name', 'gke_cluster_nodepools', 'gke_cluster_location']"
+        ' must be provided for provisioning DPGKE.',
+        str(ex.exception),
+    )
 
 
 class GcpDpbDataprocServerlessTest(pkb_common_test_case.PkbCommonTestCase):
@@ -190,16 +210,21 @@ def setUp(self):
     FLAGS.dpb_service_zone = GCP_ZONE_US_CENTRAL1_A
 
   @mock.patch.object(
-      vm_util, 'IssueCommand', return_value=(SERVERLESS_MOCK_BATCH, '', 0))
+      vm_util, 'IssueCommand', return_value=(SERVERLESS_MOCK_BATCH, '', 0)
+  )
   def testSubmitJob(self, mock_issue):
     service = gcp_dpb_dataproc.GcpDpbDataprocServerless(SERVERLESS_SPEC)
     result = service.SubmitJob(
         pyspark_file=(
-            'gs://pkb-fab5770b/spark_sql_test_scripts/spark_sql_runner.py'),
+            'gs://pkb-fab5770b/spark_sql_test_scripts/spark_sql_runner.py'
+        ),
         job_arguments=[
-            '--sql-scripts', 'gs://pkb-fab5770b/2.sql', '--report-dir',
-            'gs://pkb-fab5770b/report-1643853399069', '--table-metadata',
-            'gs://pkb-fab5770b/metadata.json'
+            '--sql-scripts',
+            'gs://pkb-fab5770b/2.sql',
+            '--report-dir',
+            'gs://pkb-fab5770b/report-1643853399069',
+            '--table-metadata',
+            'gs://pkb-fab5770b/metadata.json',
         ],
         job_jars=[],
         job_type='pyspark',
@@ -208,32 +233,66 @@ def testSubmitJob(self, mock_issue):
     self.assertEqual(result.pending_time, 72.282181)
     self.assertEqual(mock_issue.call_count, 2)
     mock_issue.assert_has_calls([
-        mock.call([
-            'gcloud', 'dataproc', 'batches', 'submit', 'pyspark',
-            'gs://pkb-fab5770b/spark_sql_test_scripts/spark_sql_runner.py',
-            '--batch', 'pkb-fakeru-0',
-            '--format', 'json',
-            '--labels', '',
-            '--properties',
-            ('^@^spark.executor.cores=4@'
-             'spark.driver.cores=4@'
-             'spark.executor.instances=4@'
-             'spark.dynamicAllocation.minExecutors=2@'
-             'spark.dynamicAllocation.maxExecutors=10@'
-             'spark.dataproc.driver.disk_size=42g@'
-             'spark.dataproc.executor.disk_size=42g'),
-            '--quiet',
-            '--region', 'us-central1',
-            '--version', 'fake-4.2',
-            '--',
-            '--sql-scripts', 'gs://pkb-fab5770b/2.sql',
-            '--report-dir', 'gs://pkb-fab5770b/report-1643853399069',
-            '--table-metadata', 'gs://pkb-fab5770b/metadata.json'
-        ], raise_on_failure=False, timeout=None),
-        mock.call([
-            'gcloud', 'dataproc', 'batches', 'describe', 'pkb-fakeru-0',
-            '--format', 'json', '--quiet', '--region', 'us-central1'
-        ], raise_on_failure=False, timeout=None)
+        mock.call(
+            [
+                'gcloud',
+                'dataproc',
+                'batches',
+                'submit',
+                'pyspark',
+                'gs://pkb-fab5770b/spark_sql_test_scripts/spark_sql_runner.py',
+                '--batch',
+                'pkb-fakeru-0',
+                '--format',
+                'json',
+                '--labels',
+                '',
+                '--properties',
+                (
+                    '^@^spark.executor.cores=4@'
+                    'spark.driver.cores=4@'
+                    'spark.executor.instances=4@'
+                    'spark.dynamicAllocation.minExecutors=2@'
+                    'spark.dynamicAllocation.maxExecutors=10@'
+                    'spark.dataproc.driver.disk.size=42g@'
+                    'spark.dataproc.executor.disk.size=42g@'
+                    'spark.driver.memory=10000m@'
+                    'spark.executor.memory=10000m@'
+                    'spark.driver.memoryOverhead=4000m@'
+                    'spark.executor.memoryOverhead=4000m'
+                ),
+                '--quiet',
+                '--region',
+                'us-central1',
+                '--version',
+                'fake-4.2',
+                '--',
+                '--sql-scripts',
+                'gs://pkb-fab5770b/2.sql',
+                '--report-dir',
+                'gs://pkb-fab5770b/report-1643853399069',
+                '--table-metadata',
+                'gs://pkb-fab5770b/metadata.json',
+            ],
+            raise_on_failure=False,
+            timeout=None,
+        ),
+        mock.call(
+            [
+                'gcloud',
+                'dataproc',
+                'batches',
+                'describe',
+                'pkb-fakeru-0',
+                '--format',
+                'json',
+                '--quiet',
+                '--region',
+                'us-central1',
+            ],
+            raise_on_failure=False,
+            timeout=None,
+        ),
     ])
 
 
diff --git a/tests/providers_test.py b/tests/providers_test.py
index 3b3d665b3a..5975652c5d 100644
--- a/tests/providers_test.py
+++ b/tests/providers_test.py
@@ -74,7 +74,7 @@ def testBenchmarkConfigSpecLoadsProvider(self):
         'vm_groups': {
             'group1': {
                 'cloud': 'AWS',
-                'os_type': 'ubuntu1804',
+                'os_type': 'ubuntu2004',
                 'vm_count': 0,
                 'vm_spec': {'AWS': {}}
             }
diff --git a/tests/relational_db_test.py b/tests/relational_db_test.py
new file mode 100644
index 0000000000..1560080a81
--- /dev/null
+++ b/tests/relational_db_test.py
@@ -0,0 +1,54 @@
+"""Tests for relational_db."""
+
+import unittest
+from absl import flags
+import mock
+from perfkitbenchmarker import relational_db
+from perfkitbenchmarker import relational_db_spec
+from tests import pkb_common_test_case
+
+FLAGS = flags.FLAGS
+
+
+# Implements some abstract functions so we can instantiate BaseRelationalDb.
+class TestBaseRelationalDb(relational_db.BaseRelationalDb):
+
+  def _Create(self):
+    pass
+
+  def _Delete(self):
+    pass
+
+  def GetDefaultEngineVersion(self, engine):
+    return 'test'
+
+
+class RelationalDbTest(pkb_common_test_case.PkbCommonTestCase):
+
+  def setUp(self):
+    super().setUp()
+    minimal_spec = {
+        'cloud': 'GCP',
+        'engine': 'mysql',
+        'db_spec': {'GCP': {'machine_type': 'n1-standard-1'}},
+        'db_disk_spec': {'GCP': {'disk_size': 500}},
+    }
+    self.spec = relational_db_spec.RelationalDbSpec(
+        'test_component', flag_values=FLAGS, **minimal_spec
+    )
+    FLAGS['run_uri'].parse('test_uri')
+
+  def test_client_vm_query_tools(self):
+    test_db = TestBaseRelationalDb(self.spec)
+    test_db._endpoint = 'test_endpoint'
+    mock_vms = {'default': [mock.Mock(), mock.Mock()]}
+    test_db.SetVms(mock_vms)
+
+    self.assertLen(test_db.client_vms_query_tools, 2)
+    self.assertEqual(
+        test_db.client_vm_query_tools, test_db.client_vms_query_tools[0]
+    )
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/tests/scratch_disk_test.py b/tests/scratch_disk_test.py
index c187ce63f9..401602ee03 100644
--- a/tests/scratch_disk_test.py
+++ b/tests/scratch_disk_test.py
@@ -158,7 +158,7 @@ def _PatchCloudSpecific(self):
   def _CreateVm(self):
     vm_spec = azure_virtual_machine.AzureVmSpec(
         'test_vm_spec.Azure', zone='eastus2', machine_type='test_machine_type')
-    return azure_virtual_machine.Ubuntu1604BasedAzureVirtualMachine(vm_spec)
+    return azure_virtual_machine.Ubuntu2004BasedAzureVirtualMachine(vm_spec)
 
   def _GetDiskClass(self):
     return azure_disk.AzureDisk
@@ -172,7 +172,7 @@ def _PatchCloudSpecific(self):
   def _CreateVm(self):
     vm_spec = gce_virtual_machine.GceVmSpec('test_vm_spec.GCP',
                                             machine_type='test_machine_type')
-    vm = gce_virtual_machine.Ubuntu1804BasedGceVirtualMachine(vm_spec)
+    vm = gce_virtual_machine.Ubuntu2004BasedGceVirtualMachine(vm_spec)
     vm.GetNVMEDeviceInfo = mock.Mock()
     vm.GetNVMEDeviceInfo.return_value = [
         {
@@ -206,7 +206,7 @@ def _PatchCloudSpecific(self):
   def _CreateVm(self):
     vm_spec = aws_virtual_machine.AwsVmSpec(
         'test_vm_spec.AWS', zone='us-east-1a', machine_type='test_machine_type')
-    vm = aws_virtual_machine.Ubuntu1604BasedAwsVirtualMachine(vm_spec)
+    vm = aws_virtual_machine.Ubuntu2004BasedAwsVirtualMachine(vm_spec)
 
     vm.LogDeviceByDiskSpecId('0_0', 'foobar_1')
     vm.LogDeviceByName('foobar_1', 'vol67890', None)
diff --git a/tests/time_triggers/maintenance_simulation_trigger_test.py b/tests/time_triggers/maintenance_simulation_trigger_test.py
index f53086ff7b..0825055e06 100644
--- a/tests/time_triggers/maintenance_simulation_trigger_test.py
+++ b/tests/time_triggers/maintenance_simulation_trigger_test.py
@@ -533,115 +533,151 @@ def testAppendLossFunctionSamplesWithNotification(self):
     samples = [s]
     trigger.trigger_time = datetime.datetime.fromtimestamp(4)
     vm = mock.MagicMock()
-    vm.CollectLMNotificationsTime = mock.MagicMock(return_value={
-        'LM_total_time': 100,
-        'Host_maintenance_end': 8000
-    })
+    vm.CollectLMNotificationsTime = mock.MagicMock(
+        return_value={'LM_total_time': 100, 'Host_maintenance_end': 8}
+    )
     trigger.vms = [vm]
     trigger.AppendSamples(None, vm_spec, samples)
-    self.assertEqual(samples, [
-        Sample(
-            metric='TPM_time_series',
-            value=0.0,
-            unit='TPM',
-            metadata={
-                'values':
-                    [1, 1, 1, 1, 0, 0.1, 0.2, 0.3, 0.95, 0.95, 0.95, 0.95],
-                'timestamps': [
-                    1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000,
-                    11000, 12000
-                ],
-                'interval': 1
-            },
-            timestamp=0),
-        Sample(
-            metric='LM Total Time',
-            value=100.0,
-            unit='seconds',
-            metadata={
-                'LM_total_time': 100,
-                'Host_maintenance_end': 8000
-            },
-            timestamp=0),
-        Sample(
-            metric='seconds_dropped_below_0_percent',
-            value=1.0,
-            unit='s',
-            metadata={},
-            timestamp=0),
-        Sample(
-            metric='seconds_dropped_below_10_percent',
-            value=2.0,
-            unit='s',
-            metadata={},
-            timestamp=0),
-        Sample(
-            metric='seconds_dropped_below_20_percent',
-            value=3.0,
-            unit='s',
-            metadata={},
-            timestamp=0),
-        Sample(
-            metric='seconds_dropped_below_30_percent',
-            value=4.0,
-            unit='s',
-            metadata={},
-            timestamp=0),
-        Sample(
-            metric='seconds_dropped_below_40_percent',
-            value=4.0,
-            unit='s',
-            metadata={},
-            timestamp=0),
-        Sample(
-            metric='seconds_dropped_below_50_percent',
-            value=4.0,
-            unit='s',
-            metadata={},
-            timestamp=0),
-        Sample(
-            metric='seconds_dropped_below_60_percent',
-            value=4.0,
-            unit='s',
-            metadata={},
-            timestamp=0),
-        Sample(
-            metric='seconds_dropped_below_70_percent',
-            value=4.0,
-            unit='s',
-            metadata={},
-            timestamp=0),
-        Sample(
-            metric='seconds_dropped_below_80_percent',
-            value=4.0,
-            unit='s',
-            metadata={},
-            timestamp=0),
-        Sample(
-            metric='seconds_dropped_below_90_percent',
-            value=4.0,
-            unit='s',
-            metadata={},
-            timestamp=0),
-        Sample(
-            metric='unresponsive_metric',
-            value=2.584,
-            unit='metric',
-            metadata={},
-            timestamp=0),
-        Sample(
-            metric='total_loss_seconds',
-            value=3.4,
-            unit='seconds',
-            metadata={},
-            timestamp=0),
-        Sample(
-            metric='degradation_percent',
-            value=5.0,
-            unit='%',
-            metadata={},
-            timestamp=0)
-    ])
+    self.assertEqual(
+        samples,
+        [
+            Sample(
+                metric='TPM_time_series',
+                value=0.0,
+                unit='TPM',
+                metadata={
+                    'values': [
+                        1,
+                        1,
+                        1,
+                        1,
+                        0,
+                        0.1,
+                        0.2,
+                        0.3,
+                        0.95,
+                        0.95,
+                        0.95,
+                        0.95,
+                    ],
+                    'timestamps': [
+                        1000,
+                        2000,
+                        3000,
+                        4000,
+                        5000,
+                        6000,
+                        7000,
+                        8000,
+                        9000,
+                        10000,
+                        11000,
+                        12000,
+                    ],
+                    'interval': 1,
+                },
+                timestamp=0,
+            ),
+            Sample(
+                metric='LM Total Time',
+                value=100.0,
+                unit='seconds',
+                metadata={'LM_total_time': 100, 'Host_maintenance_end': 8},
+                timestamp=0,
+            ),
+            Sample(
+                metric='seconds_dropped_below_0_percent',
+                value=1.0,
+                unit='s',
+                metadata={},
+                timestamp=0,
+            ),
+            Sample(
+                metric='seconds_dropped_below_10_percent',
+                value=2.0,
+                unit='s',
+                metadata={},
+                timestamp=0,
+            ),
+            Sample(
+                metric='seconds_dropped_below_20_percent',
+                value=3.0,
+                unit='s',
+                metadata={},
+                timestamp=0,
+            ),
+            Sample(
+                metric='seconds_dropped_below_30_percent',
+                value=4.0,
+                unit='s',
+                metadata={},
+                timestamp=0,
+            ),
+            Sample(
+                metric='seconds_dropped_below_40_percent',
+                value=4.0,
+                unit='s',
+                metadata={},
+                timestamp=0,
+            ),
+            Sample(
+                metric='seconds_dropped_below_50_percent',
+                value=4.0,
+                unit='s',
+                metadata={},
+                timestamp=0,
+            ),
+            Sample(
+                metric='seconds_dropped_below_60_percent',
+                value=4.0,
+                unit='s',
+                metadata={},
+                timestamp=0,
+            ),
+            Sample(
+                metric='seconds_dropped_below_70_percent',
+                value=4.0,
+                unit='s',
+                metadata={},
+                timestamp=0,
+            ),
+            Sample(
+                metric='seconds_dropped_below_80_percent',
+                value=4.0,
+                unit='s',
+                metadata={},
+                timestamp=0,
+            ),
+            Sample(
+                metric='seconds_dropped_below_90_percent',
+                value=4.0,
+                unit='s',
+                metadata={},
+                timestamp=0,
+            ),
+            Sample(
+                metric='unresponsive_metric',
+                value=2.584,
+                unit='metric',
+                metadata={},
+                timestamp=0,
+            ),
+            Sample(
+                metric='total_loss_seconds',
+                value=3.4,
+                unit='seconds',
+                metadata={},
+                timestamp=0,
+            ),
+            Sample(
+                metric='degradation_percent',
+                value=5.0,
+                unit='%',
+                metadata={},
+                timestamp=0,
+            ),
+        ],
+    )
 
   @mock.patch('time.time', mock.MagicMock(return_value=0))
   def testAppendLossFunctionSamplesContainsMetadata(self):
@@ -658,116 +694,152 @@ def testAppendLossFunctionSamplesContainsMetadata(self):
     samples = [s]
     trigger.trigger_time = datetime.datetime.fromtimestamp(4)
     vm = mock.MagicMock()
-    vm.CollectLMNotificationsTime = mock.MagicMock(return_value={
-        'LM_total_time': 100,
-        'Host_maintenance_end': 8000
-    })
+    vm.CollectLMNotificationsTime = mock.MagicMock(
+        return_value={'LM_total_time': 100, 'Host_maintenance_end': 8}
+    )
     trigger.vms = [vm]
     trigger.AppendSamples(None, vm_spec, samples)
-    self.assertEqual(samples, [
-        sample.Sample(
-            metric='TPM_time_series',
-            value=0.0,
-            unit='TPM',
-            metadata={
-                'values':
-                    [1, 1, 1, 1, 0, 0.1, 0.2, 0.3, 0.95, 0.95, 0.95, 0.95],
-                'timestamps': [
-                    1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000,
-                    11000, 12000
-                ],
-                'interval': 1,
-                'random': 'random'
-            },
-            timestamp=0),
-        sample.Sample(
-            metric='LM Total Time',
-            value=100.0,
-            unit='seconds',
-            metadata={
-                'LM_total_time': 100,
-                'Host_maintenance_end': 8000
-            },
-            timestamp=0),
-        sample.Sample(
-            metric='seconds_dropped_below_0_percent',
-            value=1.0,
-            unit='s',
-            metadata={'random': 'random'},
-            timestamp=0),
-        sample.Sample(
-            metric='seconds_dropped_below_10_percent',
-            value=2.0,
-            unit='s',
-            metadata={'random': 'random'},
-            timestamp=0),
-        sample.Sample(
-            metric='seconds_dropped_below_20_percent',
-            value=3.0,
-            unit='s',
-            metadata={'random': 'random'},
-            timestamp=0),
-        sample.Sample(
-            metric='seconds_dropped_below_30_percent',
-            value=4.0,
-            unit='s',
-            metadata={'random': 'random'},
-            timestamp=0),
-        sample.Sample(
-            metric='seconds_dropped_below_40_percent',
-            value=4.0,
-            unit='s',
-            metadata={'random': 'random'},
-            timestamp=0),
-        sample.Sample(
-            metric='seconds_dropped_below_50_percent',
-            value=4.0,
-            unit='s',
-            metadata={'random': 'random'},
-            timestamp=0),
-        sample.Sample(
-            metric='seconds_dropped_below_60_percent',
-            value=4.0,
-            unit='s',
-            metadata={'random': 'random'},
-            timestamp=0),
-        sample.Sample(
-            metric='seconds_dropped_below_70_percent',
-            value=4.0,
-            unit='s',
-            metadata={'random': 'random'},
-            timestamp=0),
-        sample.Sample(
-            metric='seconds_dropped_below_80_percent',
-            value=4.0,
-            unit='s',
-            metadata={'random': 'random'},
-            timestamp=0),
-        sample.Sample(
-            metric='seconds_dropped_below_90_percent',
-            value=4.0,
-            unit='s',
-            metadata={'random': 'random'},
-            timestamp=0),
-        sample.Sample(
-            metric='unresponsive_metric',
-            value=2.584,
-            unit='metric',
-            metadata={'random': 'random'},
-            timestamp=0),
-        sample.Sample(
-            metric='total_loss_seconds',
-            value=3.4,
-            unit='seconds',
-            metadata={'random': 'random'},
-            timestamp=0),
-        sample.Sample(
-            metric='degradation_percent',
-            value=5.0,
-            unit='%',
-            metadata={'random': 'random'},
-            timestamp=0)
-    ])
+    self.assertEqual(
+        samples,
+        [
+            sample.Sample(
+                metric='TPM_time_series',
+                value=0.0,
+                unit='TPM',
+                metadata={
+                    'values': [
+                        1,
+                        1,
+                        1,
+                        1,
+                        0,
+                        0.1,
+                        0.2,
+                        0.3,
+                        0.95,
+                        0.95,
+                        0.95,
+                        0.95,
+                    ],
+                    'timestamps': [
+                        1000,
+                        2000,
+                        3000,
+                        4000,
+                        5000,
+                        6000,
+                        7000,
+                        8000,
+                        9000,
+                        10000,
+                        11000,
+                        12000,
+                    ],
+                    'interval': 1,
+                    'random': 'random',
+                },
+                timestamp=0,
+            ),
+            sample.Sample(
+                metric='LM Total Time',
+                value=100.0,
+                unit='seconds',
+                metadata={'LM_total_time': 100, 'Host_maintenance_end': 8},
+                timestamp=0,
+            ),
+            sample.Sample(
+                metric='seconds_dropped_below_0_percent',
+                value=1.0,
+                unit='s',
+                metadata={'random': 'random'},
+                timestamp=0,
+            ),
+            sample.Sample(
+                metric='seconds_dropped_below_10_percent',
+                value=2.0,
+                unit='s',
+                metadata={'random': 'random'},
+                timestamp=0,
+            ),
+            sample.Sample(
+                metric='seconds_dropped_below_20_percent',
+                value=3.0,
+                unit='s',
+                metadata={'random': 'random'},
+                timestamp=0,
+            ),
+            sample.Sample(
+                metric='seconds_dropped_below_30_percent',
+                value=4.0,
+                unit='s',
+                metadata={'random': 'random'},
+                timestamp=0,
+            ),
+            sample.Sample(
+                metric='seconds_dropped_below_40_percent',
+                value=4.0,
+                unit='s',
+                metadata={'random': 'random'},
+                timestamp=0,
+            ),
+            sample.Sample(
+                metric='seconds_dropped_below_50_percent',
+                value=4.0,
+                unit='s',
+                metadata={'random': 'random'},
+                timestamp=0,
+            ),
+            sample.Sample(
+                metric='seconds_dropped_below_60_percent',
+                value=4.0,
+                unit='s',
+                metadata={'random': 'random'},
+                timestamp=0,
+            ),
+            sample.Sample(
+                metric='seconds_dropped_below_70_percent',
+                value=4.0,
+                unit='s',
+                metadata={'random': 'random'},
+                timestamp=0,
+            ),
+            sample.Sample(
+                metric='seconds_dropped_below_80_percent',
+                value=4.0,
+                unit='s',
+                metadata={'random': 'random'},
+                timestamp=0,
+            ),
+            sample.Sample(
+                metric='seconds_dropped_below_90_percent',
+                value=4.0,
+                unit='s',
+                metadata={'random': 'random'},
+                timestamp=0,
+            ),
+            sample.Sample(
+                metric='unresponsive_metric',
+                value=2.584,
+                unit='metric',
+                metadata={'random': 'random'},
+                timestamp=0,
+            ),
+            sample.Sample(
+                metric='total_loss_seconds',
+                value=3.4,
+                unit='seconds',
+                metadata={'random': 'random'},
+                timestamp=0,
+            ),
+            sample.Sample(
+                metric='degradation_percent',
+                value=5.0,
+                unit='%',
+                metadata={'random': 'random'},
+                timestamp=0,
+            ),
+        ],
+    )
 
   @mock.patch('time.time', mock.MagicMock(return_value=0))
   def testAppendLossFunctionSamplesHandleTimeDrift(self):
@@ -787,10 +859,9 @@ def testAppendLossFunctionSamplesHandleTimeDrift(self):
     samples = [s]
     trigger.trigger_time = datetime.datetime.fromtimestamp(4)
     vm = mock.MagicMock()
-    vm.CollectLMNotificationsTime = mock.MagicMock(return_value={
-        'LM_total_time': 100,
-        'Host_maintenance_end': 11000
-    })
+    vm.CollectLMNotificationsTime = mock.MagicMock(
+        return_value={'LM_total_time': 100, 'Host_maintenance_end': 11}
+    )
     trigger.vms = [vm]
     trigger.AppendSamples(None, vm_spec, samples)
 
@@ -813,7 +884,7 @@ def testAppendLossFunctionSamplesHandleTimeDrift(self):
                 metric='LM Total Time',
                 value=100.0,
                 unit='seconds',
-                metadata={'LM_total_time': 100, 'Host_maintenance_end': 11000},
+                metadata={'LM_total_time': 100, 'Host_maintenance_end': 11},
                 timestamp=0,
             ),
             sample.Sample(