diff --git a/CHANGES.next.md b/CHANGES.next.md index 9d98c14ae5..49dff15187 100644 --- a/CHANGES.next.md +++ b/CHANGES.next.md @@ -1,5 +1,7 @@ ### Breaking changes: - +- Added --accept_licenses flag. User have to turn this flag on to acknowledge + that PKB may install software thereby accepting license agreements on the + user's behalf. - Renamed Database-related flags from managed_db* to db* Added alias for backwards compatibility, might not be supported in the future release. - Require Python 3.9+ @@ -46,6 +48,7 @@ - Remove flag fio_write_against_multiple_clients from FIO. - Drop windows coremark benchmark. - Remove cudnn linux package. +- Make Ubuntu 20 the default os_type. ### New features: diff --git a/LICENSE b/LICENSE index 261eeb9e9f..c4488f1754 100644 --- a/LICENSE +++ b/LICENSE @@ -199,3 +199,39 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + +--- + +Files: data/blaze_config.j2, data/blazemark_config.j2 +#================================================================================================== +# +# Configfile file for the Blaze library +# +# Copyright (C) 2013 Klaus Iglberger - All Rights Reserved +# +# This file is part of the Blaze library. You can redistribute it and/or modify it under +# the terms of the New (Revised) BSD License. Redistribution and use in source and binary +# forms, with or without modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, this list +# of conditions and the following disclaimer in the documentation and/or other materials +# provided with the distribution. +# 3. Neither the names of the Blaze development group nor the names of its contributors +# may be used to endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +# SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +# DAMAGE. +# +#================================================================================================== diff --git a/README.md b/README.md index dddf7b49e9..7fde1b252e 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,8 @@ as part of a benchmark run. Therefore you must accept the license of each of the benchmarks individually, and take responsibility for using them before you use the PerfKit Benchmarker. +Moving forward, you will need to run PKB with the explicit flag --accept-licenses. + In its current release these are the benchmarks that are executed: - `aerospike`: diff --git a/perfkitbenchmarker/configs/benchmark_config_spec.py b/perfkitbenchmarker/configs/benchmark_config_spec.py index 6d403b5644..d519b3645e 100644 --- a/perfkitbenchmarker/configs/benchmark_config_spec.py +++ b/perfkitbenchmarker/configs/benchmark_config_spec.py @@ -190,6 +190,14 @@ def _GetOptionDecoderConstructions(cls): 'default': None, 'none_ok': True }), + 'dataproc_serverless_memory': (option_decoders.IntDecoder, { + 'default': None, + 'none_ok': True + }), + 'dataproc_serverless_memory_overhead': (option_decoders.IntDecoder, { + 'default': None, + 'none_ok': True + }), 'emr_serverless_executor_count': (option_decoders.IntDecoder, { 'default': None, 'none_ok': True diff --git a/perfkitbenchmarker/configs/default_config_constants.yaml b/perfkitbenchmarker/configs/default_config_constants.yaml index 28713624a4..7f756614ee 100644 --- a/perfkitbenchmarker/configs/default_config_constants.yaml +++ b/perfkitbenchmarker/configs/default_config_constants.yaml @@ -52,6 +52,10 @@ default_single_core: &default_single_core machine_type: cx2-2x4 zone: us-south-1 image: null + OCI: + machine_type: VM.Standard.A1.Flex + zone: us-ashburn-1 + image: null # TODO: update the two core machines for more providers default_dual_core: &default_dual_core @@ -83,6 +87,10 @@ default_dual_core: &default_dual_core image: null Kubernetes: image: null + OCI: + machine_type: VM.Standard.A1.Flex + zone: us-ashburn-1 + image: null # TODO(user): update the disk types below as more providers are # updated for the disk types refactor. @@ -138,6 +146,10 @@ default_500_gb: &default_500_gb disk_type: standard disk_size: 500 mount_point: /scratch + OCI: + disk_type: paravirtualized + disk_size: 500 + mount_point: /scratch # TODO(user): update the disk types below as more providers are @@ -194,3 +206,8 @@ default_50_gb: &default_50_gb disk_type: standard disk_size: 50 mount_point: /scratch + OCI: + disk_type: paravirtualized + disk_size: 50 + mount_point: /scratch + diff --git a/perfkitbenchmarker/flags.py b/perfkitbenchmarker/flags.py index d61d79dada..9ed73b2a34 100644 --- a/perfkitbenchmarker/flags.py +++ b/perfkitbenchmarker/flags.py @@ -36,6 +36,13 @@ def GetCurrentUser(): except KeyError: return 'user_unknown' + +flags.DEFINE_boolean( + 'accept_licenses', + False, + 'Acknowledge that PKB may install software thereby accepting license' + ' agreements on the users behalf.', +) flags.DEFINE_list('ssh_options', [], 'Additional options to pass to ssh.') flags.DEFINE_boolean('use_ipv6', False, 'Whether to use ipv6 for ssh/scp.') flags.DEFINE_list('benchmarks', ['cluster_boot'], diff --git a/perfkitbenchmarker/linux_benchmarks/apachebench_benchmark.py b/perfkitbenchmarker/linux_benchmarks/apachebench_benchmark.py index f99c115cd2..a657b27d70 100644 --- a/perfkitbenchmarker/linux_benchmarks/apachebench_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/apachebench_benchmark.py @@ -151,11 +151,11 @@ class ApacheBenchRunMode(object): description: Runs apachebench benchmark. vm_groups: client: - os_type: ubuntu1804 + os_type: ubuntu2004 vm_spec: *default_single_core vm_count: 1 server: - os_type: ubuntu1804 + os_type: ubuntu2004 vm_spec: *default_single_core """ FLAGS = flags.FLAGS diff --git a/perfkitbenchmarker/linux_benchmarks/cloud_redis_memtier_benchmark.py b/perfkitbenchmarker/linux_benchmarks/cloud_redis_memtier_benchmark.py index 443cc14448..dc9f508fcf 100644 --- a/perfkitbenchmarker/linux_benchmarks/cloud_redis_memtier_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/cloud_redis_memtier_benchmark.py @@ -16,10 +16,14 @@ Spins up a cloud redis instance, runs memtier against it, then spins it down. """ +import collections from absl import flags +from absl import logging from perfkitbenchmarker import background_tasks from perfkitbenchmarker import configs +from perfkitbenchmarker import linux_virtual_machine from perfkitbenchmarker import managed_memory_store +from perfkitbenchmarker import sample from perfkitbenchmarker.linux_packages import memtier FLAGS = flags.FLAGS @@ -36,6 +40,9 @@ vm_count: 1 """ +_LinuxVm = linux_virtual_machine.BaseLinuxVirtualMachine +_ManagedRedis = managed_memory_store.BaseManagedMemoryStore + def GetConfig(user_config): config = configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME) @@ -90,6 +97,74 @@ def Prepare(benchmark_spec): memtier.Load(vm, memory_store_ip, memory_store_port, password) +def _GetConnections( + vms: list[_LinuxVm], redis_instance: _ManagedRedis +) -> list[memtier.MemtierConnection]: + """Gets a list of connections mapping client VMs to shards.""" + if len(vms) == 1: + return [ + memtier.MemtierConnection( + vms[0], + redis_instance.GetMemoryStoreIp(), + redis_instance.GetMemoryStorePort(), + ) + ] + # Spread shards by client VM (evenly distributed by zone) such that each + # client VM gets an equal number of shards in each zone. + connections = [] + shards = redis_instance.GetShardEndpoints(vms[0]) + shards_by_zone = collections.defaultdict(list) + for shard in shards: + shards_by_zone[shard.zone].append(shard) + shards_by_vm = collections.defaultdict(list) + for shards_list in shards_by_zone.values(): + for shard_index, shard in enumerate(shards_list): + vm_index = shard_index % len(vms) + vm = vms[vm_index] + connections.append(memtier.MemtierConnection(vm, shard.ip, shard.port)) + shards_by_vm[vm].append(shard) + logging.info('Shards by VM: %s', shards_by_vm) + return connections + + +def _MeasureMemtierDistribution( + redis_instance: _ManagedRedis, + vms: list[_LinuxVm], +) -> list[sample.Sample]: + """Runs and reports stats across a series of memtier runs.""" + connections = _GetConnections(vms, redis_instance) + return memtier.MeasureLatencyCappedThroughputDistribution( + connections, + redis_instance.GetMemoryStoreIp(), + redis_instance.GetMemoryStorePort(), + vms, + redis_instance.node_count, + redis_instance.GetMemoryStorePassword(), + ) + + +def _Run(vms: list[_LinuxVm], redis_instance: _ManagedRedis): + """Runs memtier based on provided flags.""" + if memtier.MEMTIER_RUN_MODE.value == memtier.MemtierMode.MEASURE_CPU_LATENCY: + return memtier.RunGetLatencyAtCpu(redis_instance, vms) + if memtier.MEMTIER_LATENCY_CAPPED_THROUGHPUT.value: + if memtier.MEMTIER_DISTRIBUTION_ITERATIONS.value: + return _MeasureMemtierDistribution(redis_instance, vms) + return memtier.MeasureLatencyCappedThroughput( + vms[0], + redis_instance.node_count, + redis_instance.GetMemoryStoreIp(), + redis_instance.GetMemoryStorePort(), + redis_instance.GetMemoryStorePassword(), + ) + return memtier.RunOverAllThreadsPipelinesAndClients( + vms, + redis_instance.GetMemoryStoreIp(), + [redis_instance.GetMemoryStorePort()], + redis_instance.GetMemoryStorePassword(), + ) + + def Run(benchmark_spec): """Run benchmark and collect samples. @@ -101,30 +176,10 @@ def Run(benchmark_spec): A list of sample.Sample instances. """ memtier_vms = benchmark_spec.vm_groups['clients'] - samples = [] - if memtier.MEMTIER_RUN_MODE.value == memtier.MemtierMode.MEASURE_CPU_LATENCY: - samples = memtier.RunGetLatencyAtCpu( - benchmark_spec.cloud_redis_instance, memtier_vms - ) - elif memtier.MEMTIER_LATENCY_CAPPED_THROUGHPUT.value: - samples = memtier.MeasureLatencyCappedThroughput( - memtier_vms[0], - benchmark_spec.cloud_redis_instance.GetMemoryStoreIp(), - benchmark_spec.cloud_redis_instance.GetMemoryStorePort(), - benchmark_spec.cloud_redis_instance.GetMemoryStorePassword(), - ) - else: - samples = memtier.RunOverAllThreadsPipelinesAndClients( - memtier_vms, - benchmark_spec.cloud_redis_instance.GetMemoryStoreIp(), - [benchmark_spec.cloud_redis_instance.GetMemoryStorePort()], - benchmark_spec.cloud_redis_instance.GetMemoryStorePassword(), - ) - - for sample in samples: - sample.metadata.update( - benchmark_spec.cloud_redis_instance.GetResourceMetadata() - ) + redis_instance: _ManagedRedis = benchmark_spec.cloud_redis_instance + samples = _Run(memtier_vms, redis_instance) + for s in samples: + s.metadata.update(benchmark_spec.cloud_redis_instance.GetResourceMetadata()) return samples @@ -140,4 +195,6 @@ def Cleanup(benchmark_spec): def _Install(vm): + """Installs necessary client packages.""" vm.Install('memtier') + vm.Install('redis_cli') diff --git a/perfkitbenchmarker/linux_benchmarks/cluster_boot_benchmark.py b/perfkitbenchmarker/linux_benchmarks/cluster_boot_benchmark.py index 5031126761..f0c36dbfaf 100644 --- a/perfkitbenchmarker/linux_benchmarks/cluster_boot_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/cluster_boot_benchmark.py @@ -368,6 +368,10 @@ def MeasureDelete( List of Samples containing the delete times and an overall cluster delete time. """ + # Only measure VMs that have a delete time. + vms = [vm for vm in vms if vm.delete_start_time and vm.delete_end_time] + if not vms: + return [] # Collect a delete time from each VM. delete_times = [vm.delete_end_time - vm.delete_start_time for vm in vms] # Get the cluster delete time. diff --git a/perfkitbenchmarker/linux_benchmarks/hammerdbcli_benchmark.py b/perfkitbenchmarker/linux_benchmarks/hammerdbcli_benchmark.py index 6972b9dad1..4fa1ff25aa 100644 --- a/perfkitbenchmarker/linux_benchmarks/hammerdbcli_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/hammerdbcli_benchmark.py @@ -12,6 +12,7 @@ from perfkitbenchmarker import virtual_machine from perfkitbenchmarker.linux_packages import hammerdb +from perfkitbenchmarker.providers.gcp import gcp_alloy_db # pylint: disable=unused-import # MYSQL Config file path MYSQL_CONFIG_PATH = '/etc/mysql/mysql.conf.d/mysqld.cnf' diff --git a/perfkitbenchmarker/linux_benchmarks/lmbench_benchmark.py b/perfkitbenchmarker/linux_benchmarks/lmbench_benchmark.py index 87bb0df5a9..400019a50d 100644 --- a/perfkitbenchmarker/linux_benchmarks/lmbench_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/lmbench_benchmark.py @@ -219,9 +219,10 @@ def _AddProcessorMetricSamples(lmbench_output, processor_metric_list, metadata, regex = '%s: (.*)' % metric value_unit = regex_util.ExtractGroup(regex, lmbench_output) [value, unit] = value_unit.split(' ') - results.append( - sample.Sample('%s' % metric.replace('\\', ''), float(value), unit, - metadata)) + if unit == 'microseconds': + results.append( + sample.Sample('%s' % metric.replace('\\', ''), float(value), unit, + metadata)) def _ParseOutput(lmbench_output): diff --git a/perfkitbenchmarker/linux_benchmarks/mlperf_benchmark.py b/perfkitbenchmarker/linux_benchmarks/mlperf_benchmark.py index 5b2a326848..be8a5c9fbf 100644 --- a/perfkitbenchmarker/linux_benchmarks/mlperf_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/mlperf_benchmark.py @@ -183,6 +183,10 @@ def PrepareBenchmark(benchmark_spec, vm=None): _UpdateBenchmarkSpecWithFlags(benchmark_spec) vm = vm or benchmark_spec.vms[0] + has_gpu = nvidia_driver.CheckNvidiaGpuExists(vm) + if has_gpu: + vm.Install('cuda_toolkit') + if (bool(benchmark_spec.tpus) and nvidia_driver.CheckNvidiaGpuExists(vm)): raise errors.Config.InvalidValue( 'Invalid configuration. GPUs and TPUs can not both present in the config.' @@ -362,10 +366,6 @@ def PrepareRunner(benchmark_spec, vm=None): else: benchmark_spec.model_dir = '/tmp' - has_gpu = nvidia_driver.CheckNvidiaGpuExists(vm) - if has_gpu: - vm.Install('cuda_toolkit') - vm.Install('nvidia_docker') docker.AddUser(vm) vm.RemoteCommand('sudo usermod -aG docker $USER') diff --git a/perfkitbenchmarker/linux_benchmarks/netperf_benchmark.py b/perfkitbenchmarker/linux_benchmarks/netperf_benchmark.py index dee83bccca..a85e390a3b 100644 --- a/perfkitbenchmarker/linux_benchmarks/netperf_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/netperf_benchmark.py @@ -503,7 +503,18 @@ def RunNetperf(vm, benchmark_name, server_ips, num_streams): if len(parsed_output) == 1: # Only 1 netperf thread throughput_sample, latency_samples, histogram = parsed_output[0] - return samples + [throughput_sample] + latency_samples + output_samples = samples + [throughput_sample] + latency_samples + # Create formatted output for TCP stream throughput metrics + if benchmark_name.upper() == 'TCP_STREAM': + output_samples.append( + sample.Sample( + throughput_sample.metric + '_1stream', + throughput_sample.value, + throughput_sample.unit, + throughput_sample.metadata, + ) + ) + return output_samples else: # Multiple netperf threads # Unzip parsed output @@ -527,6 +538,17 @@ def RunNetperf(vm, benchmark_name, server_ips, num_streams): samples.append( sample.Sample(f'{benchmark_name}_Throughput_{stat}', float(value), throughput_unit, metadata)) + # Create formatted output, following {benchmark_name}_Throughput_Xstream(s) + # for TCP stream throughput metrics + if benchmark_name.upper() == 'TCP_STREAM': + samples.append( + sample.Sample( + f'{benchmark_name}_Throughput_{len(parsed_output)}streams', + throughput_stats['total'], + throughput_unit, + metadata, + ) + ) if enable_latency_histograms: # Combine all of the latency histogram dictionaries latency_histogram = collections.Counter() diff --git a/perfkitbenchmarker/linux_benchmarks/stress_ng_benchmark.py b/perfkitbenchmarker/linux_benchmarks/stress_ng_benchmark.py index 2f2316aeed..1b99c5b194 100644 --- a/perfkitbenchmarker/linux_benchmarks/stress_ng_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/stress_ng_benchmark.py @@ -122,12 +122,6 @@ 'stress_ng_thread_workloads', lambda workloads: workloads and set(workloads).issubset(ALL_WORKLOADS)) -ALL_VERSIONS = ['0.05.23', '0.09.25'] -flags.DEFINE_enum( - 'stress_ng_version', '0.09.25', ALL_VERSIONS, - 'Stress-ng version to use. Default is 0.09.25 which ' - 'is the default package on Ubuntu 1804.') - def _GeoMeanOverflow(iterable): """Returns the geometric mean. @@ -171,7 +165,7 @@ def Prepare(benchmark_spec): required to run the benchmark. """ vm = benchmark_spec.vms[0] - vm.Install('stress_ng') + vm.InstallPackages('stress-ng') def _ParseStressngResult(metadata, @@ -231,7 +225,6 @@ def _RunWorkload(vm, num_threads): metadata = { 'duration_sec': FLAGS.stress_ng_duration, 'threads': num_threads, - 'version': FLAGS.stress_ng_version, } samples = [] @@ -245,10 +238,7 @@ def _RunWorkload(vm, num_threads): numthreads=num_threads, duration=FLAGS.stress_ng_duration)) stdout, stderr = vm.RemoteCommand(cmd) - # TODO(user): Find the actual stress-ng version that changes output to - # stderr instead of stdout - if FLAGS.stress_ng_version > '0.05.23': - stdout = stderr + stdout = stderr stressng_sample = _ParseStressngResult(metadata, stdout) if stressng_sample: samples.append(stressng_sample) @@ -318,4 +308,4 @@ def Cleanup(benchmark_spec): required to run the benchmark. """ vm = benchmark_spec.vms[0] - vm.Uninstall('stress_ng') + vm.Uninstall('stress-ng') diff --git a/perfkitbenchmarker/linux_benchmarks/sysbench_benchmark.py b/perfkitbenchmarker/linux_benchmarks/sysbench_benchmark.py index 593c1c716a..4ae8dcf2af 100644 --- a/perfkitbenchmarker/linux_benchmarks/sysbench_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/sysbench_benchmark.py @@ -27,6 +27,7 @@ from typing import List from absl import flags +from perfkitbenchmarker import background_tasks from perfkitbenchmarker import configs from perfkitbenchmarker import errors from perfkitbenchmarker import flag_util @@ -329,7 +330,10 @@ def _PrepareSysbench(client_vm, benchmark_spec): # Some databases install these query tools during _PostCreate, which is # skipped if the database is user managed / restored. if db.user_managed or db.restored: - db.client_vm_query_tools.InstallPackages() + background_tasks.RunThreaded( + lambda client_query_tools: client_query_tools.InstallPackages, + db.client_vms_query_tools, + ) if _SKIP_LOAD_STAGE.value or db.restored: logging.info('Skipping the load stage') diff --git a/perfkitbenchmarker/linux_packages/cuda_toolkit.py b/perfkitbenchmarker/linux_packages/cuda_toolkit.py index edd3c3eb99..a13f4837ca 100644 --- a/perfkitbenchmarker/linux_packages/cuda_toolkit.py +++ b/perfkitbenchmarker/linux_packages/cuda_toolkit.py @@ -70,8 +70,8 @@ CUDA_PIN = 'https://developer.download.nvidia.com/compute/cuda/repos/{os}/{cpu_arch}/cuda-{os}.pin' -CUDA_12_1_TOOLKIT = 'https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda-repo-{os}-12-1-local_12.1.0-530.30.02-1_{cpu_arch}.deb' -CUDA_12_0_TOOLKIT = 'https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda-repo-{os}-12-0-local_12.0.0-525.60.13-1_{cpu_arch}.deb' +CUDA_12_1_TOOLKIT = 'https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda-repo-{os}-12-1-local_12.1.1-530.30.02-1_{cpu_arch}.deb' +CUDA_12_0_TOOLKIT = 'https://developer.download.nvidia.com/compute/cuda/12.0.1/local_installers/cuda-repo-{os}-12-0-local_12.0.1-525.85.12-1_{cpu_arch}.deb' CUDA_11_8_TOOLKIT = 'https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda-repo-{os}-11-8-local_11.8.0-520.61.05-1_{cpu_arch}.deb' CUDA_11_7_TOOLKIT = 'https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda-repo-{os}-11-7-local_11.7.1-515.65.01-1_{cpu_arch}.deb' CUDA_11_6_TOOLKIT = 'https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda-repo-{os}-11-6-local_11.6.2-510.47.03-1_{cpu_arch}.deb' @@ -295,6 +295,13 @@ def _InstallCuda10Point2(vm): 'cuda-libraries-dev-10-2') +def _DownloadCuda(vm, toolkit_fmt): + toolkit = toolkit_fmt.format(os=_CudaOs(vm.OS_TYPE), cpu_arch=_GetCpuArch(vm)) + basename = posixpath.basename(toolkit) + vm.RemoteCommand(f'wget --tries=3 {toolkit}') + vm.RemoteCommand(f'sudo apt -o DPkg::Lock::Timeout=60 install ./{basename}') + + def _InstallCuda12Generic(vm, toolkit_fmt, version_dash): """Installs CUDA Toolkit 12.x from NVIDIA. @@ -303,8 +310,6 @@ def _InstallCuda12Generic(vm, toolkit_fmt, version_dash): toolkit_fmt: format string to use for the toolkit name version_dash: Version (ie 12-1) to install """ - toolkit = toolkit_fmt.format(os=_CudaOs(vm.OS_TYPE), cpu_arch=_GetCpuArch(vm)) - basename = posixpath.basename(toolkit) vm.RemoteCommand( 'wget -q' f' {CUDA_PIN.format(os=_CudaOs(vm.OS_TYPE), cpu_arch=GetCpuArchPath(vm))}' @@ -313,8 +318,7 @@ def _InstallCuda12Generic(vm, toolkit_fmt, version_dash): f'sudo mv cuda-{_CudaOs(vm.OS_TYPE)}.pin ' '/etc/apt/preferences.d/cuda-repository-pin-600' ) - vm.RemoteCommand(f'wget -q {toolkit}') - vm.RemoteCommand(f'sudo dpkg -i {basename}') + _DownloadCuda(vm, toolkit_fmt) EnrollSigningKey(vm) vm.AptUpdate() vm.InstallPackages( @@ -333,15 +337,12 @@ def _InstallCuda11Generic(vm, toolkit_fmt, version_dash): toolkit_fmt: format string to use for the toolkit name version_dash: Version (ie 11-1) to install """ - toolkit = toolkit_fmt.format(os=_CudaOs(vm.OS_TYPE), cpu_arch=_GetCpuArch(vm)) - basename = posixpath.basename(toolkit) vm.RemoteCommand( f'wget -q {CUDA_PIN.format(os=_CudaOs(vm.OS_TYPE), cpu_arch=GetCpuArchPath(vm))}' ) vm.RemoteCommand(f'sudo mv cuda-{_CudaOs(vm.OS_TYPE)}.pin ' '/etc/apt/preferences.d/cuda-repository-pin-600') - vm.RemoteCommand(f'wget -q {toolkit}') - vm.RemoteCommand(f'sudo dpkg -i {basename}') + _DownloadCuda(vm, toolkit_fmt) EnrollSigningKey(vm) vm.AptUpdate() vm.InstallPackages(f'cuda-toolkit-{version_dash} ' diff --git a/perfkitbenchmarker/linux_packages/dpdk.py b/perfkitbenchmarker/linux_packages/dpdk.py index 28fcd73a4d..29a988debc 100644 --- a/perfkitbenchmarker/linux_packages/dpdk.py +++ b/perfkitbenchmarker/linux_packages/dpdk.py @@ -113,7 +113,9 @@ def _InstallDPDK(vm): ) # Build and Install - vm.RobustRemoteCommand('cd dpdk && sudo meson setup -Dexamples=all build') + vm.RobustRemoteCommand( + 'cd dpdk && sudo meson setup -Dexamples=l3fwd,l2fwd build' + ) vm.RobustRemoteCommand( 'cd dpdk && sudo ninja install -C build && sudo ldconfig' ) diff --git a/perfkitbenchmarker/linux_packages/linux_boot.py b/perfkitbenchmarker/linux_packages/linux_boot.py index 96da98efe7..e7afadbc62 100644 --- a/perfkitbenchmarker/linux_packages/linux_boot.py +++ b/perfkitbenchmarker/linux_packages/linux_boot.py @@ -324,6 +324,8 @@ def _ParseSeconds(formatted_time: str) -> float: secs += float(part[0 : len(part) - 3]) * 60 elif part.endswith('ms'): secs += float(part[0 : len(part) - 2]) / 1000 + elif part.endswith('us'): + secs += float(part[0 : len(part) - 2]) / 1000 / 1000 elif part.endswith('s'): secs += float(part[0 : len(part) - 1]) else: diff --git a/perfkitbenchmarker/linux_packages/lmbench.py b/perfkitbenchmarker/linux_packages/lmbench.py index 1b1b90a718..d91a167582 100644 --- a/perfkitbenchmarker/linux_packages/lmbench.py +++ b/perfkitbenchmarker/linux_packages/lmbench.py @@ -19,7 +19,7 @@ LMBENCH_DIR = posixpath.join(linux_packages.INSTALL_DIR, 'lmbench') GIT = 'https://github.com/intel/lmbench.git' -COMMIT = '4e4efa113b244b70a1faafd13744578b4edeaeb3' +COMMIT = '701c6c35b0270d4634fb1dc5272721340322b8ed' def _Install(vm): diff --git a/perfkitbenchmarker/linux_packages/memtier.py b/perfkitbenchmarker/linux_packages/memtier.py index 4c5f6b157a..121e4bf532 100644 --- a/perfkitbenchmarker/linux_packages/memtier.py +++ b/perfkitbenchmarker/linux_packages/memtier.py @@ -13,26 +13,32 @@ # limitations under the License. """Module containing memtier installation, utilization and cleanup functions.""" +import abc import collections import copy import dataclasses import json -import logging import math import os import pathlib import random import re +import statistics import time from typing import Any, Dict, List, Optional, Text, Tuple, Union from absl import flags +from absl import logging +import matplotlib.pyplot as plt +import numpy as np from perfkitbenchmarker import background_tasks from perfkitbenchmarker import errors from perfkitbenchmarker import flag_util from perfkitbenchmarker import linux_packages from perfkitbenchmarker import sample +from perfkitbenchmarker import virtual_machine from perfkitbenchmarker import vm_util +import seaborn as sns GIT_REPO = 'https://github.com/RedisLabs/memtier_benchmark' GIT_TAG = '1.4.0' @@ -55,9 +61,7 @@ # upper limit to pipelines when binary searching for latency-capped throughput. # arbitrarily chosen for large latency. MAX_PIPELINES_COUNT = 5000 -# upper limit to clients when binary searching for latency-capped throughput -# arbitrarily chosen for large latency. -MAX_CLIENTS_COUNT = 1000 +MAX_CLIENTS_COUNT = 30 MemtierHistogram = List[Dict[str, Union[float, int]]] @@ -157,6 +161,28 @@ class MemtierMode(object): ' memtier_latency_cap. Defaults to False. ' ), ) +MEMTIER_DISTRIBUTION_ITERATIONS = flags.DEFINE_integer( + 'memtier_distribution_iterations', + None, + ( + 'If set, measures the distribution of latency capped throughput across' + ' multiple iterations. Will run a set number of iterations for the' + ' benchmark test and calculate mean/stddev for metrics. Note that this' + ' is different from memtier_run_count which is a passthrough to the' + ' actual memtier benchmark tool which reports different aggregate' + ' stats.' + ), +) +MEMTIER_DISTRIBUTION_BINARY_SEARCH = flags.DEFINE_bool( + 'memtier_distribution_binary_search', + True, + ( + 'If true, uses a binary search to measure the optimal client and thread' + ' count needed for max throughput under latency cap. Else, uses' + ' --memtier_clients, --memtier_threads, and --memtier_pipelines for the' + ' iterations.' + ), +) MEMTIER_LATENCY_CAP = flags.DEFINE_float( 'memtier_latency_cap', 1.0, @@ -227,6 +253,7 @@ class MemtierMode(object): ' uniform.' ), ) +MEMTIER_TLS = flags.DEFINE_bool('memtier_tls', False, 'Whether to enable TLS.') class BuildFailureError(Exception): @@ -302,6 +329,8 @@ def BuildMemtierCommand( outfile: Optional[pathlib.PosixPath] = None, password: Optional[str] = None, cluster_mode: Optional[bool] = None, + shard_addresses: Optional[str] = None, + tls: Optional[bool] = None, json_out_file: Optional[pathlib.PosixPath] = None, ) -> str: """Returns command arguments used to run memtier.""" @@ -325,11 +354,20 @@ def BuildMemtierCommand( 'out-file': outfile, 'json-out-file': json_out_file, 'print-percentile': '50,90,95,99,99.5,99.9,99.95,99.99', + 'shard-addresses': shard_addresses, } # Arguments passed without a parameter - no_param_args = {'random-data': random_data, 'cluster-mode': cluster_mode} + no_param_args = { + 'random-data': random_data, + 'cluster-mode': cluster_mode, + 'tls': tls, + 'tls-skip-verify': tls, + } # Build the command - cmd = ['memtier_benchmark'] + cmd = [] + if cluster_mode: + cmd += ['ulimit -n 32758 &&'] + cmd += ['memtier_benchmark'] for arg, value in args.items(): if value is not None: cmd.extend([f'--{arg}', str(value)]) @@ -365,6 +403,7 @@ def Load( requests='allkeys', cluster_mode=MEMTIER_CLUSTER_MODE.value, password=server_password, + tls=MEMTIER_TLS.value, ) _IssueRetryableCommand(client_vm, cmd) @@ -456,120 +495,389 @@ def RunOverAllThreadsPipelinesAndClients( class MemtierBinarySearchParameters: """Parameters to aid binary search of memtier.""" - lower_bound: float - upper_bound: float - pipelines: int - threads: int - clients: int + lower_bound: float = 0 + upper_bound: float = math.inf + pipelines: int = 1 + threads: int = 1 + clients: int = 1 -def MeasureLatencyCappedThroughput( - client_vm, +@dataclasses.dataclass(frozen=True) +class MemtierConnection: + """Parameters mapping client to server endpoint.""" + + client_vm: virtual_machine.BaseVirtualMachine + address: str + port: int + + +def _RunParallelConnections( + connections: list[MemtierConnection], server_ip: str, server_port: int, + threads: int, + clients: int, + pipelines: int, password: Optional[str] = None, -) -> List[sample.Sample]: - """Runs memtier to find the maximum throughput under a latency cap.""" - samples = [] +) -> list['MemtierResult']: + """Runs memtier in parallel with the given connections.""" + run_args = [] + base_args = { + 'server_ip': server_ip, + 'server_port': server_port, + 'threads': threads, + 'clients': clients, + 'pipeline': pipelines, + 'password': password, + } + + connections_by_vm = collections.defaultdict(list) + for conn in connections: + connections_by_vm[conn.client_vm].append(conn) + + # Currently more than one client VM will cause shards to be distributed + # evenly between them. This behavior could be customized later with a flag. + if len(connections_by_vm) > 1: + for vm, conns in connections_by_vm.items(): + shard_addresses = ','.join( + f'{conn.address}:{conn.port}' for conn in conns + ) + args = copy.deepcopy(base_args) + args.update({ + 'vm': vm, + 'shard_addresses': shard_addresses, + }) + run_args.append(((), args)) + else: + for connection in connections: + args = copy.deepcopy(base_args) + args.update({ + 'vm': connection.client_vm, + }) + run_args.append(((), args)) + logging.info('Connections: %s', connections_by_vm) + logging.info('Running with args: %s', run_args) + return background_tasks.RunThreaded(_Run, run_args) + + +class _LoadModifier(abc.ABC): + """Base class for load modification in binary search.""" + + @abc.abstractmethod + def GetInitialParameters(self) -> MemtierBinarySearchParameters: + """Returns the initial parameters used in the binary search.""" + + @abc.abstractmethod + def ModifyLoad( + self, parameters: MemtierBinarySearchParameters, latency: float + ) -> MemtierBinarySearchParameters: + """Returns new search parameters.""" - for modify_load_func in [_ModifyPipelines, _ModifyClients]: - parameters = MemtierBinarySearchParameters( - lower_bound=0, upper_bound=math.inf, pipelines=1, threads=1, clients=1 + +class _PipelineModifier(_LoadModifier): + """Modifies pipelines in single-client binary search.""" + + def GetInitialParameters(self) -> MemtierBinarySearchParameters: + return MemtierBinarySearchParameters( + upper_bound=MAX_PIPELINES_COUNT, pipelines=MAX_PIPELINES_COUNT // 2 + ) + + def ModifyLoad( + self, parameters: MemtierBinarySearchParameters, latency: float + ) -> MemtierBinarySearchParameters: + if latency <= MEMTIER_LATENCY_CAP.value: + lower_bound = parameters.pipelines + upper_bound = min(parameters.upper_bound, MAX_PIPELINES_COUNT) + else: + lower_bound = parameters.lower_bound + upper_bound = parameters.pipelines + + pipelines = lower_bound + math.ceil((upper_bound - lower_bound) / 2) + return MemtierBinarySearchParameters( + lower_bound=lower_bound, + upper_bound=upper_bound, + pipelines=pipelines, + threads=1, + clients=1, + ) + + +def _FindFactor(number: int, max_threads: int, max_clients: int) -> int: + """Find a factor of the given number (or close to it if it's prime).""" + for i in reversed(range(1, max_threads + 1)): + if number % i == 0 and number // i <= max_clients: + return i + return _FindFactor(number - 1, max_threads, max_clients) + + +@dataclasses.dataclass +class _ClientModifier(_LoadModifier): + """Modifies clines in single-pipeline binary search.""" + + max_clients: int + max_threads: int + + def GetInitialParameters(self) -> MemtierBinarySearchParameters: + return MemtierBinarySearchParameters( + upper_bound=self.max_clients * self.max_threads, + threads=max(self.max_threads // 2, 1), + clients=self.max_clients, + ) + + def ModifyLoad( + self, parameters: MemtierBinarySearchParameters, latency: float + ) -> MemtierBinarySearchParameters: + if latency <= MEMTIER_LATENCY_CAP.value: + lower_bound = parameters.clients * parameters.threads + 1 + upper_bound = min( + parameters.upper_bound, self.max_clients * self.max_threads + ) + else: + lower_bound = parameters.lower_bound + upper_bound = parameters.clients * parameters.threads - 1 + + total_clients = lower_bound + math.ceil((upper_bound - lower_bound) / 2) + threads = _FindFactor(total_clients, self.max_threads, self.max_clients) + clients = total_clients // threads + return MemtierBinarySearchParameters( + lower_bound=lower_bound, + upper_bound=upper_bound, + pipelines=1, + threads=threads, + clients=clients, ) + + +def _CombineResults(results: list['MemtierResult']) -> 'MemtierResult': + """Combines multiple MemtierResults into a single aggregate.""" + ops_per_sec = sum([result.ops_per_sec for result in results]) + kb_per_sec = sum([result.kb_per_sec for result in results]) + latency_ms = sum([result.latency_ms for result in results]) / len(results) + latency_dic = collections.defaultdict(int) + for result in results: + for k, v in result.latency_dic.items(): + latency_dic[k] += v + for k in latency_dic: + latency_dic[k] /= len(results) + return MemtierResult( + ops_per_sec=ops_per_sec, + kb_per_sec=kb_per_sec, + latency_ms=latency_ms, + latency_dic=latency_dic, + metadata=results[0].metadata, + parameters=results[0].parameters, + ) + + +def _BinarySearchForLatencyCappedThroughput( + connections: list[MemtierConnection], + load_modifiers: list[_LoadModifier], + server_ip: str, + server_port: int, + password: Optional[str] = None, +) -> list['MemtierResult']: + """Runs memtier to find the maximum throughput under a latency cap.""" + results = [] + for modifier in load_modifiers: + parameters = modifier.GetInitialParameters() current_max_result = MemtierResult( - 0, 0, 0, {'50': 0, '90': 0, '95': 0, '99': 0, '99.5': 0, '99.9': 0, - '99.950': 0, '99.990': 0}, [], [], [], [], {}, {}) - current_metadata = None + latency_dic={ + '50': 0, + '90': 0, + '95': 0, + '99': 0, + '99.5': 0, + '99.9': 0, + '99.950': 0, + '99.990': 0, + }, + ) while parameters.lower_bound < (parameters.upper_bound - 1): - result = _Run( - vm=client_vm, - server_ip=server_ip, - server_port=server_port, - threads=parameters.threads, - pipeline=parameters.pipelines, - clients=parameters.clients, - password=password, + parallel_results = _RunParallelConnections( + connections, + server_ip, + server_port, + parameters.threads, + parameters.clients, + parameters.pipelines, + password, ) + result = _CombineResults(parallel_results) logging.info( ( - 'Binary search for latency capped throughput.\n' - '\tMemtier ops throughput: %s' - '\tmemtier 95th percentile latency: %s' - '\tlower bound: %s' - '\tupper bound: %s' + 'Binary search for latency capped throughput.' + '\nMemtier ops throughput: %s qps' + '\nmemtier 95th percentile latency: %s ms' + '\n%s' ), result.ops_per_sec, result.latency_dic['95'], - parameters.lower_bound, - parameters.upper_bound, + parameters, ) if ( result.ops_per_sec > current_max_result.ops_per_sec and result.latency_dic['95'] <= MEMTIER_LATENCY_CAP.value ): current_max_result = result - current_metadata = GetMetadata( - clients=parameters.clients, - threads=parameters.threads, - pipeline=parameters.pipelines, + current_max_result.parameters = parameters + current_max_result.metadata.update( + GetMetadata( + clients=parameters.clients, + threads=parameters.threads, + pipeline=parameters.pipelines, + ) ) # 95 percentile used to decide latency cap - parameters = modify_load_func(parameters, result.latency_dic['95']) - samples.extend(current_max_result.GetSamples(current_metadata)) + parameters = modifier.ModifyLoad(parameters, result.latency_dic['95']) + results.append(current_max_result) + logging.info( + 'Found optimal parameters %s for throughput %s and p95 latency %s', + current_max_result.parameters, + current_max_result.ops_per_sec, + current_max_result.latency_dic['95'], + ) + return results + + +def MeasureLatencyCappedThroughput( + client_vm: virtual_machine.VirtualMachine, + server_shard_count: int, + server_ip: str, + server_port: int, + password: Optional[str] = None, +) -> List[sample.Sample]: + """Runs memtier to find the maximum throughput under a latency cap.""" + max_threads = client_vm.NumCpusForBenchmark(report_only_physical_cpus=True) + max_clients = MAX_CLIENTS_COUNT // server_shard_count + samples = [] + for result in _BinarySearchForLatencyCappedThroughput( + [MemtierConnection(client_vm, server_ip, server_port)], + [_PipelineModifier(), _ClientModifier(max_clients, max_threads)], + server_ip, + server_port, + password, + ): + samples.extend(result.GetSamples()) return samples -def _ModifyPipelines( - current_parameters: 'MemtierBinarySearchParameters', latency: float -) -> 'MemtierBinarySearchParameters': - """Modify pipelines count for next iteration of binary search.""" - if latency <= MEMTIER_LATENCY_CAP.value: - lower_bound = current_parameters.pipelines - upper_bound = min(current_parameters.upper_bound, MAX_PIPELINES_COUNT) - else: - lower_bound = current_parameters.lower_bound - upper_bound = current_parameters.pipelines - - pipelines = lower_bound + math.ceil((upper_bound - lower_bound) / 2) - return MemtierBinarySearchParameters( - lower_bound=lower_bound, - upper_bound=upper_bound, - pipelines=pipelines, - threads=1, - clients=1, - ) +def _CalculateMode(values: list[float]) -> float: + """Calculates the mode of a distribution using kernel density estimation.""" + plt.clf() + ax = sns.histplot(values, kde=True) + kdeline = ax.lines[0] + xs = kdeline.get_xdata() + ys = kdeline.get_ydata() + mode_idx = np.argmax(ys) + mode = xs[mode_idx] + return mode -def _ModifyClients( - current_parameters: 'MemtierBinarySearchParameters', latency: float -) -> 'MemtierBinarySearchParameters': - """Modify clients count for next iteration of binary search.""" - if latency <= MEMTIER_LATENCY_CAP.value: - lower_bound = current_parameters.clients * current_parameters.threads - upper_bound = min(current_parameters.upper_bound, MAX_CLIENTS_COUNT) - else: - lower_bound = current_parameters.lower_bound - upper_bound = current_parameters.clients * current_parameters.threads - - total_clients = lower_bound + math.ceil((upper_bound - lower_bound) / 2) - threads = _FindFactor(total_clients) - clients = total_clients // threads - return MemtierBinarySearchParameters( - lower_bound=lower_bound, - upper_bound=upper_bound, - pipelines=1, - threads=threads, - clients=clients, +def MeasureLatencyCappedThroughputDistribution( + connections: list[MemtierConnection], + server_ip: str, + server_port: int, + client_vms: list[virtual_machine.VirtualMachine], + server_shard_count: int, + password: Optional[str] = None, +) -> list[sample.Sample]: + """Measures distribution of throughput across several iterations. + + In particular, this function will first find the optimal number of threads and + clients per thread, and then run the test with those parameters for the + specified number of iterations. The reported samples will include mean and + stdev of QPS and latency across the series of runs. + + Args: + connections: list of connections from client to server. + server_ip: Ip address of the server. + server_port: Port of the server. + client_vms: A list of client vms. + server_shard_count: Number of shards in the redis cluster. + password: Password of the server. + + Returns: + A list of throughput and latency samples. + """ + parameters_for_test = MemtierBinarySearchParameters( + pipelines=FLAGS.memtier_pipeline[0], + clients=FLAGS.memtier_clients[0], + threads=FLAGS.memtier_threads[0], ) + if MEMTIER_DISTRIBUTION_BINARY_SEARCH.value: + max_threads = client_vms[0].NumCpusForBenchmark( + report_only_physical_cpus=True + ) + shards_per_client = server_shard_count / len(client_vms) + max_clients = int(MAX_CLIENTS_COUNT // shards_per_client) + result = _BinarySearchForLatencyCappedThroughput( + connections, + [_ClientModifier(max_clients, max_threads)], + server_ip, + server_port, + password, + )[0] + parameters_for_test = result.parameters + logging.info( + 'Starting test iterations with parameters %s', parameters_for_test + ) + results = [] + for _ in range(MEMTIER_DISTRIBUTION_ITERATIONS.value): + results_for_run = _RunParallelConnections( + connections, + server_ip, + server_port, + parameters_for_test.threads, + parameters_for_test.clients, + parameters_for_test.pipelines, + password, + ) + results.extend(results_for_run) -def _FindFactor(number): - """Find any factor of the given number. Returns 1 for primes.""" - i = round(math.sqrt(number)) - while i > 0: - if number % i == 0: - return i - i -= 1 + samples = [] + metrics = { + 'ops_per_sec': 'ops/s', + 'kb_per_sec': 'KB/s', + 'latency_ms': 'ms', + '90': 'ms', + '95': 'ms', + '99': 'ms', + } + metadata = { + 'distribution_iterations': MEMTIER_DISTRIBUTION_ITERATIONS.value, + 'threads': parameters_for_test.threads, + 'clients': parameters_for_test.clients, + 'pipelines': parameters_for_test.pipelines, + } + for metric, units in metrics.items(): + is_latency = metric.replace('.', '', 1).isdigit() + values = ( + [result.latency_dic[metric] for result in results] + if is_latency + else [getattr(result, metric) for result in results] + ) + if is_latency: + metric = f'p{metric} latency' + samples.extend([ + sample.Sample( + f'Mean {metric}', statistics.mean(values), units, metadata + ), + sample.Sample( + f'Stdev {metric}', + statistics.stdev(values), + units, + metadata, + ), + sample.Sample( + f'Mode {metric}', + _CalculateMode(values), + units, + metadata, + ), + ]) + + return samples def RunGetLatencyAtCpu(cloud_instance, client_vms): @@ -722,6 +1030,7 @@ def _Run( clients: int, password: Optional[str] = None, unique_id: Optional[str] = None, + shard_addresses: Optional[str] = None, ) -> 'MemtierResult': """Runs the memtier benchmark on the vm.""" logging.info( @@ -780,7 +1089,9 @@ def _Run( password=password, outfile=memtier_results_file, cluster_mode=MEMTIER_CLUSTER_MODE.value, + shard_addresses=shard_addresses, json_out_file=json_results_file, + tls=MEMTIER_TLS.value, ) _IssueRetryableCommand(vm, cmd) @@ -812,6 +1123,7 @@ def _Run( with open(output_path, 'r') as output: summary_data = output.read() + logging.info(summary_data) return MemtierResult.Parse(summary_data, time_series_json) @@ -846,16 +1158,25 @@ def GetMetadata(clients: int, threads: int, pipeline: int) -> Dict[str, Any]: class MemtierResult: """Class that represents memtier results.""" - ops_per_sec: float - kb_per_sec: float - latency_ms: float - latency_dic: Dict[str, float] - get_latency_histogram: MemtierHistogram - set_latency_histogram: MemtierHistogram - timestamps: List[int] - ops_series: List[int] - latency_series: Dict[str, List[int]] - runtime_info: Dict[Text, Text] + ops_per_sec: float = 0.0 + kb_per_sec: float = 0.0 + + latency_ms: float = 0.0 + latency_dic: Dict[str, float] = dataclasses.field(default_factory=dict) + get_latency_histogram: MemtierHistogram = dataclasses.field( + default_factory=list + ) + set_latency_histogram: MemtierHistogram = dataclasses.field( + default_factory=list + ) + + timestamps: List[int] = dataclasses.field(default_factory=list) + ops_series: List[int] = dataclasses.field(default_factory=list) + latency_series: Dict[str, List[int]] = dataclasses.field(default_factory=dict) + + runtime_info: Dict[Text, Text] = dataclasses.field(default_factory=dict) + metadata: Dict[str, Any] = dataclasses.field(default_factory=dict) + parameters: MemtierBinarySearchParameters = MemtierBinarySearchParameters() @classmethod def Parse( @@ -920,21 +1241,27 @@ def Parse( runtime_info=runtime_info, ) - def GetSamples(self, metadata: Dict[str, Any]) -> List[sample.Sample]: + def GetSamples( + self, metadata: Optional[Dict[str, Any]] = None + ) -> List[sample.Sample]: """Return this result as a list of samples.""" - metadata['avg_latency'] = self.latency_ms + if metadata: + self.metadata.update(copy.deepcopy(metadata)) + self.metadata['avg_latency'] = self.latency_ms for key, value in self.latency_dic.items(): - metadata[f'p{key}_latency'] = value + self.metadata[f'p{key}_latency'] = value samples = [ - sample.Sample('Ops Throughput', self.ops_per_sec, 'ops/s', metadata), - sample.Sample('KB Throughput', self.kb_per_sec, 'KB/s', metadata), - sample.Sample('Latency', self.latency_ms, 'ms', metadata), + sample.Sample( + 'Ops Throughput', self.ops_per_sec, 'ops/s', self.metadata + ), + sample.Sample('KB Throughput', self.kb_per_sec, 'KB/s', self.metadata), + sample.Sample('Latency', self.latency_ms, 'ms', self.metadata), ] for name, histogram in [ ('get', self.get_latency_histogram), ('set', self.set_latency_histogram), ]: - hist_meta = copy.deepcopy(metadata) + hist_meta = copy.deepcopy(self.metadata) hist_meta.update({'histogram': json.dumps(histogram)}) samples.append( sample.Sample(f'{name} latency histogram', 0, '', hist_meta) @@ -1092,6 +1419,30 @@ def AggregateMemtierResults( additional_metadata=metadata, ) ) + individual_latencies = collections.defaultdict(list) + for metric, latency_at_timestamp in latency_series.items(): + for client_latency in latency_at_timestamp: + for client, latency in enumerate(client_latency): + if len(individual_latencies[metric]) <= client: + individual_latencies[metric].append([]) + individual_latencies[metric][client].append(latency) + + for metric, client_latencies in individual_latencies.items(): + for client, latencies in enumerate(client_latencies): + additional_metadata = {} + additional_metadata.update(metadata) + additional_metadata['client'] = client + additional_metadata[sample.DISABLE_CONSOLE_LOG] = True + samples.append( + sample.CreateTimeSeriesSample( + latencies, + timestamps[0 : len(latencies)], + f'{metric}_time_series', + 'ms', + 1, + additional_metadata=additional_metadata, + ) + ) return samples @@ -1190,7 +1541,11 @@ def _ParseLine( if not re.match(pattern, line): return last_total - _, msec, percent = line.split() + # Skip cases where we have an incomplete line (not enough values to unpack). + try: + _, msec, percent = line.split() + except ValueError: + return last_total counts = _ConvertPercentToAbsolute(approx_total, float(percent)) bucket_counts = int(round(counts - last_total)) if bucket_counts > 0: diff --git a/perfkitbenchmarker/linux_packages/nccl.py b/perfkitbenchmarker/linux_packages/nccl.py index 7ccce1bd88..1c4ee5710e 100644 --- a/perfkitbenchmarker/linux_packages/nccl.py +++ b/perfkitbenchmarker/linux_packages/nccl.py @@ -20,8 +20,10 @@ from perfkitbenchmarker.linux_packages import cuda_toolkit flags.DEFINE_string( - 'nccl_version', 'v2.12.12-1', 'NCCL version to install. ' - 'Input "None" to bypass installation.') + 'nccl_version', + 'v2.18.1-1', + 'NCCL version to install. Input "None" to bypass installation.', +) flags.DEFINE_string('nccl_net_plugin', None, 'NCCL network plugin name') flags.DEFINE_string('nccl_mpi', '/usr/bin/mpirun', 'MPI binary path') flags.DEFINE_string('nccl_mpi_home', '/usr/lib/x86_64-linux-gnu/openmpi', diff --git a/perfkitbenchmarker/linux_packages/nvidia_docker.py b/perfkitbenchmarker/linux_packages/nvidia_docker.py index 723d868b53..07001dd22e 100644 --- a/perfkitbenchmarker/linux_packages/nvidia_docker.py +++ b/perfkitbenchmarker/linux_packages/nvidia_docker.py @@ -22,8 +22,17 @@ 'The version of nvidia docker to install.') +def CheckNvidiaDockerExists(vm): + resp, _ = vm.RemoteHostCommand( + 'command -v nvidia-docker', ignore_failure=True + ) + return bool(resp.rstrip()) + + def AptInstall(vm): """Installs the nvidia-docker package on the VM.""" + if CheckNvidiaDockerExists(vm): + return vm.Install('docker') vm.RemoteCommand('curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey ' '| sudo apt-key add -') @@ -39,6 +48,8 @@ def AptInstall(vm): def YumInstall(vm): """Installs the nvidia-docker package on the VM.""" + if CheckNvidiaDockerExists(vm): + return vm.Install('docker') vm.RemoteCommand('curl -s -L https://nvidia.github.io/' 'nvidia-container-runtime/' diff --git a/perfkitbenchmarker/linux_packages/nvidia_driver.py b/perfkitbenchmarker/linux_packages/nvidia_driver.py index cf66c40c35..13c2bd15e9 100644 --- a/perfkitbenchmarker/linux_packages/nvidia_driver.py +++ b/perfkitbenchmarker/linux_packages/nvidia_driver.py @@ -32,7 +32,7 @@ NVIDIA_TESLA_P100 = 'p100' NVIDIA_TESLA_V100 = 'v100' NVIDIA_TESLA_T4 = 't4' -NVIDIA_TESLA_L4 = 'l4' +NVIDIA_L4 = 'l4' NVIDIA_TESLA_A100 = 'a100' NVIDIA_TESLA_A10 = 'a10' @@ -201,7 +201,7 @@ def GetGpuType(vm): elif 'T4' in gpu_types[0]: return NVIDIA_TESLA_T4 elif 'L4' in gpu_types[0]: - return NVIDIA_TESLA_L4 + return NVIDIA_L4 elif 'A100' in gpu_types[0]: return NVIDIA_TESLA_A100 elif 'A10' in gpu_types[0]: @@ -474,8 +474,11 @@ def Install(vm): vm.Install('wget') tokens = re.split('/', location) filename = tokens[-1] - vm.RemoteCommand('wget {location} && chmod 755 {filename} ' - .format(location=location, filename=filename)) + vm.RemoteCommand( + 'wget --tries=3 {location} && chmod 755 {filename} '.format( + location=location, filename=filename + ) + ) vm.RemoteCommand( 'sudo ./{filename} -q -x-module-path={x_module_path} ' '--ui=none -x-library-path={x_library_path}'.format( diff --git a/perfkitbenchmarker/linux_packages/redis_cli.py b/perfkitbenchmarker/linux_packages/redis_cli.py new file mode 100644 index 0000000000..08a439b6a9 --- /dev/null +++ b/perfkitbenchmarker/linux_packages/redis_cli.py @@ -0,0 +1,41 @@ +# Copyright 2023 PerfKitBenchmarker Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Module containing redis cli installation and cleanup functions.""" + + +def _Install(vm) -> None: + """Installs the redis package on the VM.""" + vm.RemoteCommand( + 'curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o' + ' /usr/share/keyrings/redis-archive-keyring.gpg' + ) + vm.RemoteCommand( + 'echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg]' + ' https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee' + ' /etc/apt/sources.list.d/redis.list' + ) + vm.RemoteCommand('sudo apt-get update') + vm.RemoteCommand('sudo apt-get install -y redis') + + +def AptInstall(vm) -> None: + """Installs the redis package on the VM.""" + _Install(vm) + + +def YumInstall(vm) -> None: + """Installs the redis package on the VM.""" + del vm # unused + raise NotImplementedError() + diff --git a/perfkitbenchmarker/linux_packages/stress_ng.py b/perfkitbenchmarker/linux_packages/stress_ng.py deleted file mode 100644 index b827521c1b..0000000000 --- a/perfkitbenchmarker/linux_packages/stress_ng.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright 2021 PerfKitBenchmarker Authors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Module containing stress-ng installation and cleanup functions.""" - -from absl import flags - -FLAGS = flags.FLAGS - -GIT_REPO = 'https://github.com/ColinIanKing/stress-ng' -GIT_TAG_MAP = { - '0.05.23': '54722768329c9f8184c1c98db63435f201377df1', # ubuntu1604 - '0.09.25': '2db2812edf99ec80c08edf98ee88806a3662031c', # ubuntu1804 -} -STRESS_NG_DIR = '~/stress_ng' - - -def AptInstall(vm): - """Installs stress-ng.""" - vm.InstallPackages( - 'build-essential libaio-dev libapparmor-dev libattr1-dev libbsd-dev ' - 'libcap-dev libgcrypt11-dev libkeyutils-dev libsctp-dev zlib1g-dev' - ) - vm.RemoteCommand('git clone {0} {1}'.format(GIT_REPO, STRESS_NG_DIR)) - vm.RemoteCommand('cd {0} && git checkout {1}'.format( - STRESS_NG_DIR, GIT_TAG_MAP[FLAGS.stress_ng_version])) - vm.RemoteCommand('cd {0} && make && sudo make install'.format(STRESS_NG_DIR)) - - -def AptUninstall(vm): - """Uninstalls stress-ng.""" - vm.RemoteCommand('cd {0} && sudo make uninstall'.format(STRESS_NG_DIR)) diff --git a/perfkitbenchmarker/linux_packages/ycsb.py b/perfkitbenchmarker/linux_packages/ycsb.py index cef793a632..5988894a49 100644 --- a/perfkitbenchmarker/linux_packages/ycsb.py +++ b/perfkitbenchmarker/linux_packages/ycsb.py @@ -35,18 +35,10 @@ Each workload runs for at most 30 minutes. """ -import bisect -import collections -from collections.abc import Iterable, Mapping, Sequence +from collections.abc import Mapping, Sequence import copy -import csv -import dataclasses import io -import itertools -import json import logging -import math -import operator import os import posixpath import re @@ -62,190 +54,250 @@ from perfkitbenchmarker import virtual_machine from perfkitbenchmarker import vm_util from perfkitbenchmarker.linux_packages import maven +from perfkitbenchmarker.linux_packages import ycsb_stats FLAGS = flags.FLAGS -YCSB_URL_TEMPLATE = ('https://github.com/brianfrankcooper/YCSB/releases/' - 'download/{0}/ycsb-{0}.tar.gz') +YCSB_URL_TEMPLATE = ( + 'https://github.com/brianfrankcooper/YCSB/releases/' + 'download/{0}/ycsb-{0}.tar.gz' +) YCSB_DIR = posixpath.join(linux_packages.INSTALL_DIR, 'ycsb') YCSB_EXE = posixpath.join(YCSB_DIR, 'bin', 'ycsb') HDRHISTOGRAM_DIR = posixpath.join(linux_packages.INSTALL_DIR, 'hdrhistogram') -HDRHISTOGRAM_TAR_URL = ('https://github.com/HdrHistogram/HdrHistogram/archive/' - 'HdrHistogram-2.1.10.tar.gz') -HDRHISTOGRAM_GROUPS = ['READ', 'UPDATE'] - -_DEFAULT_PERCENTILES = 50, 75, 90, 95, 99, 99.9 - -HISTOGRAM = 'histogram' -HDRHISTOGRAM = 'hdrhistogram' -TIMESERIES = 'timeseries' -YCSB_MEASUREMENT_TYPES = [HISTOGRAM, HDRHISTOGRAM, TIMESERIES] - -# Binary operators to aggregate reported statistics. -# Statistics with operator 'None' will be dropped. -AGGREGATE_OPERATORS = { - 'Operations': operator.add, - 'RunTime(ms)': max, - 'Return=0': operator.add, - 'Return=-1': operator.add, - 'Return=-2': operator.add, - 'Return=-3': operator.add, - 'Return=OK': operator.add, - 'Return=ERROR': operator.add, - 'Return=NOT_FOUND': operator.add, - 'LatencyVariance(ms)': None, - 'AverageLatency(ms)': None, # Requires both average and # of ops. - 'Throughput(ops/sec)': operator.add, - '95thPercentileLatency(ms)': None, # Calculated across clients. - '99thPercentileLatency(ms)': None, # Calculated across clients. - 'MinLatency(ms)': min, - 'MaxLatency(ms)': max -} - -flags.DEFINE_string('ycsb_version', '0.17.0', - 'YCSB version to use. Defaults to version 0.17.0.') +HDRHISTOGRAM_TAR_URL = ( + 'https://github.com/HdrHistogram/HdrHistogram/archive/' + 'HdrHistogram-2.1.10.tar.gz' +) + +flags.DEFINE_string( + 'ycsb_version', '0.17.0', 'YCSB version to use. Defaults to version 0.17.0.' +) flags.DEFINE_string( - 'ycsb_tar_url', None, 'URL to a YCSB tarball to use ' - 'instead of the releases located on github.') -flags.DEFINE_enum('ycsb_measurement_type', HISTOGRAM, YCSB_MEASUREMENT_TYPES, - 'Measurement type to use for ycsb. Defaults to histogram.') -flags.DEFINE_enum('ycsb_measurement_interval', 'op', ['op', 'intended', 'both'], - 'Measurement interval to use for ycsb. Defaults to op.') + 'ycsb_tar_url', + None, + 'URL to a YCSB tarball to use instead of the releases located on github.', +) +flags.DEFINE_enum( + 'ycsb_measurement_type', + ycsb_stats.HISTOGRAM, + ycsb_stats.YCSB_MEASUREMENT_TYPES, + 'Measurement type to use for ycsb. Defaults to histogram.', +) +flags.DEFINE_enum( + 'ycsb_measurement_interval', + 'op', + ['op', 'intended', 'both'], + 'Measurement interval to use for ycsb. Defaults to op.', +) flags.DEFINE_boolean( - 'ycsb_histogram', False, 'Include individual ' + 'ycsb_histogram', + False, + 'Include individual ' 'histogram results from YCSB (will increase sample ' - 'count).') -flags.DEFINE_boolean('ycsb_load_samples', True, 'Include samples ' - 'from pre-populating database.') + 'count).', +) +flags.DEFINE_boolean( + 'ycsb_load_samples', True, 'Include samples from pre-populating database.' +) flags.DEFINE_boolean( - 'ycsb_skip_load_stage', False, 'If True, skip the data ' + 'ycsb_skip_load_stage', + False, + 'If True, skip the data ' 'loading stage. It can be used when the database target ' - 'already exists with pre-populated data.') + 'already exists with pre-populated data.', +) flags.DEFINE_boolean( - 'ycsb_skip_run_stage', False, 'If True, skip the workload ' + 'ycsb_skip_run_stage', + False, + 'If True, skip the workload ' 'running stage. It can be used when you want to ' - 'pre-populate a database target.') + 'pre-populate a database target.', +) flags.DEFINE_boolean( - 'ycsb_include_individual_results', False, - 'Include results from each client VM, rather than just ' - 'combined results.') + 'ycsb_include_individual_results', + False, + 'Include results from each client VM, rather than just combined results.', +) flags.DEFINE_boolean( - 'ycsb_reload_database', True, 'Reload database, otherwise skip load stage. ' + 'ycsb_reload_database', + True, + 'Reload database, otherwise skip load stage. ' 'Note, this flag is only used if the database ' - 'is already loaded.') + 'is already loaded.', +) flags.DEFINE_integer('ycsb_client_vms', 1, 'Number of YCSB client VMs.') flags.DEFINE_list( - 'ycsb_workload_files', ['workloada', 'workloadb'], + 'ycsb_workload_files', + ['workloada', 'workloadb'], 'Path to YCSB workload file to use during *run* ' - 'stage only. Comma-separated list') + 'stage only. Comma-separated list', +) flags.DEFINE_list( - 'ycsb_load_parameters', [], + 'ycsb_load_parameters', + [], 'Passed to YCSB during the load stage. Comma-separated list ' - 'of "key=value" pairs.') + 'of "key=value" pairs.', +) flags.DEFINE_list( - 'ycsb_run_parameters', [], + 'ycsb_run_parameters', + [], 'Passed to YCSB during the run stage. Comma-separated list ' - 'of "key=value" pairs.') + 'of "key=value" pairs.', +) _THROUGHPUT_TIME_SERIES = flags.DEFINE_bool( - 'ycsb_throughput_time_series', False, + 'ycsb_throughput_time_series', + False, 'If true, run prints status which includes a throughput time series (1s ' - 'granularity), and includes the results in the samples.') + 'granularity), and includes the results in the samples.', +) flags.DEFINE_list( - 'ycsb_threads_per_client', ['32'], 'Number of threads per ' + 'ycsb_threads_per_client', + ['32'], + 'Number of threads per ' 'loader during the benchmark run. Specify a list to vary the ' 'number of clients. For each thread count, optionally supply ' - 'target qps per client, which cause ycsb to self-throttle.') + 'target qps per client, which cause ycsb to self-throttle.', +) flags.DEFINE_integer( - 'ycsb_preload_threads', None, 'Number of threads per ' + 'ycsb_preload_threads', + None, + 'Number of threads per ' 'loader during the initial data population stage. ' - 'Default value depends on the target DB.') + 'Default value depends on the target DB.', +) flags.DEFINE_integer( - 'ycsb_record_count', None, 'Pre-load with a total ' + 'ycsb_record_count', + None, + 'Pre-load with a total ' 'dataset of records total. Overrides recordcount value in ' 'all workloads of this run. Defaults to None, where ' 'recordcount value in each workload is used. If neither ' - 'is not set, ycsb default of 0 is used.') -flags.DEFINE_integer('ycsb_operation_count', None, 'Number of operations ' - '*per client VM*.') + 'is not set, ycsb default of 0 is used.', +) flags.DEFINE_integer( - 'ycsb_timelimit', 1800, 'Maximum amount of time to run ' + 'ycsb_operation_count', None, 'Number of operations *per client VM*.' +) +flags.DEFINE_integer( + 'ycsb_timelimit', + 1800, + 'Maximum amount of time to run ' 'each workload / client count combination in seconds. ' - 'Set to 0 for unlimited time.') + 'Set to 0 for unlimited time.', +) flags.DEFINE_integer( - 'ycsb_field_count', 10, 'Number of fields in a record. ' - 'Defaults to 10, which is the default in ycsb v0.17.0.') + 'ycsb_field_count', + 10, + 'Number of fields in a record. ' + 'Defaults to 10, which is the default in ycsb v0.17.0.', +) flags.DEFINE_integer( - 'ycsb_field_length', None, 'Size of each field. Defaults ' - 'to None which uses the ycsb default of 100.') + 'ycsb_field_length', + None, + 'Size of each field. Defaults to None which uses the ycsb default of 100.', +) flags.DEFINE_enum( - 'ycsb_requestdistribution', None, ['uniform', 'zipfian', 'latest'], + 'ycsb_requestdistribution', + None, + ['uniform', 'zipfian', 'latest'], 'Type of request distribution. ' - 'This will overwrite workload file parameter') + 'This will overwrite workload file parameter', +) flags.DEFINE_float( - 'ycsb_readproportion', None, 'The read proportion, ' - 'Default is 0.5 in workloada and 0.95 in YCSB.') + 'ycsb_readproportion', + None, + 'The read proportion, Default is 0.5 in workloada and 0.95 in YCSB.', +) flags.DEFINE_float( - 'ycsb_updateproportion', None, 'The update proportion, ' - 'Default is 0.5 in workloada and 0.05 in YCSB.') + 'ycsb_updateproportion', + None, + 'The update proportion, Default is 0.5 in workloada and 0.05 in YCSB.', +) flags.DEFINE_float( - 'ycsb_scanproportion', None, 'The scan proportion, ' - 'Default is 0 in workloada and 0 in YCSB.') + 'ycsb_scanproportion', + None, + 'The scan proportion, Default is 0 in workloada and 0 in YCSB.', +) flags.DEFINE_boolean( - 'ycsb_dynamic_load', False, + 'ycsb_dynamic_load', + False, 'Apply dynamic load to system under test and find out ' 'maximum sustained throughput (test length controlled by ' 'ycsb_operation_count and ycsb_timelimit) the ' - 'system capable of handling. ') + 'system capable of handling. ', +) flags.DEFINE_integer( - 'ycsb_dynamic_load_throughput_lower_bound', None, + 'ycsb_dynamic_load_throughput_lower_bound', + None, 'Apply dynamic load to system under test. ' 'If not supplied, test will halt once reaching ' 'sustained load, otherwise, will keep running until ' - 'reaching lower bound.') + 'reaching lower bound.', +) flags.DEFINE_float( - 'ycsb_dynamic_load_sustain_throughput_ratio', 0.95, + 'ycsb_dynamic_load_sustain_throughput_ratio', + 0.95, 'To consider throughput sustainable when applying ' 'dynamic load, the actual overall throughput measured ' 'divided by target throughput applied should exceed ' 'this ratio. If not, we will lower target throughput and ' - 'retry.') + 'retry.', +) flags.DEFINE_integer( - 'ycsb_dynamic_load_sustain_timelimit', 300, + 'ycsb_dynamic_load_sustain_timelimit', + 300, 'Run duration in seconds for each throughput target ' - 'if we have already reached sustained throughput.') -flags.DEFINE_integer('ycsb_sleep_after_load_in_sec', 0, - 'Sleep duration in seconds between load and run stage.') + 'if we have already reached sustained throughput.', +) +flags.DEFINE_integer( + 'ycsb_sleep_after_load_in_sec', + 0, + 'Sleep duration in seconds between load and run stage.', +) _BURST_LOAD_MULTIPLIER = flags.DEFINE_integer( - 'ycsb_burst_load', None, + 'ycsb_burst_load', + None, 'If set, applies burst load to the system, by running YCSB once, and then ' 'immediately running again with --ycsb_burst_load times the ' 'amount of load specified by the `target` parameter. Set to -1 for ' - 'the max throughput from the client.') + 'the max throughput from the client.', +) _INCREMENTAL_TARGET_QPS = flags.DEFINE_integer( - 'ycsb_incremental_load', None, + 'ycsb_incremental_load', + None, 'If set, applies an incrementally increasing load until the target QPS is ' 'reached. This should be the aggregate load for all VMs. Running with ' 'this flag requires that there is not a QPS target passed in through ' - '--ycsb_run_parameters.') + '--ycsb_run_parameters.', +) _SHOULD_RECORD_COMMAND_LINE = flags.DEFINE_boolean( - 'ycsb_record_command_line', True, + 'ycsb_record_command_line', + True, 'Whether to record the command line used for kicking off the runs as part ' 'of metadata. When there are many VMs, this can get long and clutter the ' - 'PKB log.') + 'PKB log.', +) _SHOULD_FAIL_ON_INCOMPLETE_LOADING = flags.DEFINE_boolean( - 'ycsb_fail_on_incomplete_loading', False, + 'ycsb_fail_on_incomplete_loading', + False, 'Whether to fail the benchmarking if loading is not complete, ' - 'e.g., there are insert failures.') + 'e.g., there are insert failures.', +) _INCOMPLETE_LOADING_METRIC = flags.DEFINE_string( - 'ycsb_insert_error_metric', 'insert Return=ERROR', + 'ycsb_insert_error_metric', + 'insert Return=ERROR', 'Used with --ycsb_fail_on_incomplete_loading. Will fail the benchmark if ' - 'this metric\'s value is non-zero. This metric should be an indicator of ' + "this metric's value is non-zero. This metric should be an indicator of " 'incomplete table loading. If insertion retries are enabled via ' 'core_workload_insertion_retry_limit, then the default metric may be ' - 'non-zero even though the retried insertion eventually succeeded.') + 'non-zero even though the retried insertion eventually succeeded.', +) _ERROR_RATE_THRESHOLD = flags.DEFINE_float( - 'ycsb_max_error_rate', 1.00, 'The maximum error rate allowed for the run. ' - 'By default, this allows any number of errors.') + 'ycsb_max_error_rate', + 1.00, + 'The maximum error rate allowed for the run. ' + 'By default, this allows any number of errors.', +) # Status line pattern _STATUS_PATTERN = r'(\d+) sec: \d+ operations; (\d+.\d+) current ops\/sec' @@ -364,28 +416,33 @@ def CheckPrerequisites(): # Following flags are mutully exclusive. run_target = 'target' in run_params per_thread_target = any( - [':' in thread_qps for thread_qps in FLAGS.ycsb_threads_per_client]) + [':' in thread_qps for thread_qps in FLAGS.ycsb_threads_per_client] + ) dynamic_load = FLAGS.ycsb_dynamic_load if run_target + per_thread_target + dynamic_load > 1: raise errors.Config.InvalidValue( 'Setting YCSB target in ycsb_threads_per_client ' 'or ycsb_run_parameters or applying ycsb_dynamic_load_* flags' - ' are mutally exclusive.') + ' are mutally exclusive.' + ) if FLAGS.ycsb_dynamic_load_throughput_lower_bound and not dynamic_load: raise errors.Config.InvalidValue( - 'To apply dynamic load, set --ycsb_dynamic_load.') + 'To apply dynamic load, set --ycsb_dynamic_load.' + ) if _BURST_LOAD_MULTIPLIER.value and not run_target: raise errors.Config.InvalidValue( 'Running in burst mode requires setting a target QPS using ' - '--ycsb_run_parameters=target=qps. Got None.') + '--ycsb_run_parameters=target=qps. Got None.' + ) if _INCREMENTAL_TARGET_QPS.value and run_target: raise errors.Config.InvalidValue( 'Running in incremental mode requires setting a target QPS using ' - '--ycsb_incremental_load=target and not --ycsb_run_parameters.') + '--ycsb_incremental_load=target and not --ycsb_run_parameters.' + ) @vm_util.Retry(poll_interval=1) @@ -397,8 +454,10 @@ def Install(vm): vm.Install('python') vm.InstallPackages('curl') ycsb_url = ( - _ycsb_tar_url or FLAGS.ycsb_tar_url or - YCSB_URL_TEMPLATE.format(FLAGS.ycsb_version)) + _ycsb_tar_url + or FLAGS.ycsb_tar_url + or YCSB_URL_TEMPLATE.format(FLAGS.ycsb_version) + ) install_cmd = ( 'mkdir -p {0} && curl -L {1} | ' 'tar -C {0} --strip-components=1 -xzf - ' @@ -409,7 +468,8 @@ def Install(vm): # After https://github.com/brianfrankcooper/YCSB/pull/1583 is merged and # released, this will not be necessary. # TODO(user): Update minimum YCSB version and remove. - "--exclude='**/log4j-core-2*.jar' ") + "--exclude='**/log4j-core-2*.jar' " + ) vm.RemoteCommand(install_cmd.format(YCSB_DIR, ycsb_url)) if _GetVersion(FLAGS.ycsb_version) >= 11: vm.Install('maven') @@ -426,557 +486,6 @@ def Install(vm): ) -@dataclasses.dataclass -class _OpResult: - """Individual results for a single operation. - - Attributes: - group: group name (e.g., update, insert, overall) - statistics: dict mapping from statistic name to value - data_type: Corresponds to --ycsb_measurement_type. - data: - For HISTOGRAM/HDRHISTOGRAM: list of (ms_lower_bound, count) tuples, e.g. - [(0, 530), (19, 1)] indicates that 530 ops took between 0ms and 1ms, - and 1 took between 19ms and 20ms. Empty bins are not reported. - For TIMESERIES: list of (time, latency us) tuples. - """ - group: str = '' - data_type: str = '' - data: list[tuple[int, float]] = dataclasses.field(default_factory=list) - statistics: dict[str, float] = dataclasses.field(default_factory=dict) - - -@dataclasses.dataclass -class YcsbResult: - """Aggregate results for the YCSB run. - - Attributes: - client: Contains YCSB version information. - command_line: Command line executed. - throughput_time_series: Time series of throughputs (interval, QPS). - groups: dict of operation group name to results for that operation. - """ - client: str = '' - command_line: str = '' - throughput_time_series: _ThroughputTimeSeries = dataclasses.field( - default_factory=dict) - groups: dict[str, _OpResult] = dataclasses.field(default_factory=dict) - - -def ParseResults(ycsb_result_string: str, - data_type: str = 'histogram') -> 'YcsbResult': - """Parse YCSB results. - - Example input for histogram datatype: - - YCSB Client 0.1 - Command line: -db com.yahoo.ycsb.db.HBaseClient -P /tmp/pkb/workloada - [OVERALL], RunTime(ms), 1800413.0 - [OVERALL], Throughput(ops/sec), 2740.503428935472 - [UPDATE], Operations, 2468054 - [UPDATE], AverageLatency(us), 2218.8513395574005 - [UPDATE], MinLatency(us), 554 - [UPDATE], MaxLatency(us), 352634 - [UPDATE], 95thPercentileLatency(ms), 4 - [UPDATE], 99thPercentileLatency(ms), 7 - [UPDATE], Return=0, 2468054 - [UPDATE], 0, 398998 - [UPDATE], 1, 1015682 - [UPDATE], 2, 532078 - ... - - Example input for hdrhistogram datatype: - - YCSB Client 0.17.0 - Command line: -db com.yahoo.ycsb.db.RedisClient -P /opt/pkb/workloadb - [OVERALL], RunTime(ms), 29770.0 - [OVERALL], Throughput(ops/sec), 33590.86328518643 - [UPDATE], Operations, 49856.0 - [UPDATE], AverageLatency(us), 1478.0115532734276 - [UPDATE], MinLatency(us), 312.0 - [UPDATE], MaxLatency(us), 24623.0 - [UPDATE], 95thPercentileLatency(us), 3501.0 - [UPDATE], 99thPercentileLatency(us), 6747.0 - [UPDATE], Return=OK, 49856 - ... - - Example input for ycsb version 0.17.0+: - - ... - Command line: -db com.yahoo.ycsb.db.HBaseClient10 ... -load - YCSB Client 0.17.0 - - Loading workload... - Starting test. - ... - [OVERALL], RunTime(ms), 11411 - [OVERALL], Throughput(ops/sec), 8763.473841030585 - [INSERT], Operations, 100000 - [INSERT], AverageLatency(us), 74.92 - [INSERT], MinLatency(us), 5 - [INSERT], MaxLatency(us), 98495 - [INSERT], 95thPercentileLatency(us), 42 - [INSERT], 99thPercentileLatency(us), 1411 - [INSERT], Return=OK, 100000 - ... - - Example input for timeseries datatype: - - ... - [OVERALL], RunTime(ms), 240007.0 - [OVERALL], Throughput(ops/sec), 10664.605615669543 - ... - [READ], Operations, 1279253 - [READ], AverageLatency(us), 3002.7057071587874 - [READ], MinLatency(us), 63 - [READ], MaxLatency(us), 93584 - [READ], Return=OK, 1279281 - [READ], 0, 528.6142757498257 - [READ], 500, 360.95347448674966 - [READ], 1000, 667.7379547689283 - [READ], 1500, 731.5389357265888 - [READ], 2000, 778.7992281717318 - ... - - Args: - ycsb_result_string: str. Text output from YCSB. - data_type: Either 'histogram' or 'timeseries' or 'hdrhistogram'. 'histogram' - and 'hdrhistogram' datasets are in the same format, with the difference - being lacking the (millisec, count) histogram component. Hence are parsed - similarly. - - Returns: - A YcsbResult object that contains the results from parsing YCSB output. - Raises: - IOError: If the results contained unexpected lines. - """ - if ('redis.clients.jedis.exceptions.JedisConnectionException' - in ycsb_result_string): - # This error is cause by ycsb using an old version of redis client 2.9.0 - # https://github.com/xetorthio/jedis/issues/1977 - raise errors.Benchmarks.KnownIntermittentError( - 'errors.Benchmarks.KnownIntermittentError') - - lines = [] - client_string = 'YCSB' - command_line = 'unknown' - throughput_time_series = {} - fp = io.StringIO(ycsb_result_string) - result_string = next(fp).strip() - - def IsHeadOfResults(line): - return line.startswith('[OVERALL]') - - while not IsHeadOfResults(result_string): - if result_string.startswith('YCSB Client 0.'): - client_string = result_string - if result_string.startswith('Command line:'): - command_line = result_string - # Look for status lines which include throughput on a 1-sec basis. - match = re.search(_STATUS_PATTERN, result_string) - if match is not None: - timestamp, qps = int(match.group(1)), float(match.group(2)) - # Repeats in the printed status are erroneous, ignore. - if timestamp not in throughput_time_series: - throughput_time_series[timestamp] = qps - try: - result_string = next(fp).strip() - except StopIteration: - raise IOError( - f'Could not parse YCSB output: {ycsb_result_string}') from None - - if result_string.startswith('[OVERALL]'): # YCSB > 0.7.0. - lines.append(result_string) - else: - # Received unexpected header - raise IOError(f'Unexpected header: {client_string}') - - # Some databases print additional output to stdout. - # YCSB results start with []; - # filter to just those lines. - def LineFilter(line): - return re.search(r'^\[[A-Z]+\]', line) is not None - - lines = itertools.chain(lines, filter(LineFilter, fp)) - - r = csv.reader(lines) - - by_operation = itertools.groupby(r, operator.itemgetter(0)) - - result = YcsbResult(client=client_string, - command_line=command_line, - throughput_time_series=throughput_time_series) - - for operation, lines in by_operation: - operation = operation[1:-1].lower() - - if operation == 'cleanup': - continue - - op_result = _OpResult(group=operation, - data_type=data_type) - latency_unit = 'ms' - for _, name, val in lines: - name = name.strip() - val = val.strip() - # Drop ">" from ">1000" - if name.startswith('>'): - name = name[1:] - val = float(val) if '.' in val or 'nan' in val.lower() else int(val) - if name.isdigit(): - if val: - if data_type == TIMESERIES and latency_unit == 'us': - val /= 1000.0 - op_result.data.append((int(name), val)) - else: - if '(us)' in name: - name = name.replace('(us)', '(ms)') - val /= 1000.0 - latency_unit = 'us' - op_result.statistics[name] = val - - result.groups[operation] = op_result - _ValidateErrorRate(result) - return result - - -def _ValidateErrorRate(result: YcsbResult) -> None: - """Raises an error if results contains entries with too high error rate. - - Computes the error rate for each operation, example output looks like: - - [INSERT], Operations, 100 - [INSERT], AverageLatency(us), 74.92 - [INSERT], MinLatency(us), 5 - [INSERT], MaxLatency(us), 98495 - [INSERT], 95thPercentileLatency(us), 42 - [INSERT], 99thPercentileLatency(us), 1411 - [INSERT], Return=OK, 90 - [INSERT], Return=ERROR, 10 - - This function will then compute 10/100 = 0.1 error rate. - - Args: - result: The result of running ParseResults() - - Raises: - errors.Benchmarks.RunError: If the computed error rate is higher than the - threshold. - """ - for operation in result.groups.values(): - name, stats = operation.group, operation.statistics - # The operation count can be 0 - count = stats.get('Operations', 0) - if count == 0: - continue - # These keys may be missing from the output. - error_rate = stats.get('Return=ERROR', 0) / count - if error_rate > _ERROR_RATE_THRESHOLD.value: - raise errors.Benchmarks.RunError( - f'YCSB had a {error_rate} error rate for {name}, higher than ' - f'threshold {_ERROR_RATE_THRESHOLD.value}') - - -def ParseHdrLogFile(logfile: str) -> list[_HdrHistogramTuple]: - """Parse a hdrhistogram log file into a list of (percentile, latency, count). - - Example decrypted hdrhistogram logfile (value measures latency in microsec): - - #[StartTime: 1523565997 (seconds since epoch), Thu Apr 12 20:46:37 UTC 2018] - Value Percentile TotalCount 1/(1-Percentile) - - 314.000 0.000000000000 2 1.00 - 853.000 0.100000000000 49955 1.11 - 949.000 0.200000000000 100351 1.25 - 1033.000 0.300000000000 150110 1.43 - ... - 134271.000 0.999998664856 1000008 748982.86 - 134271.000 0.999998855591 1000008 873813.33 - 201983.000 0.999999046326 1000009 1048576.00 - #[Mean = 1287.159, StdDeviation = 667.560] - #[Max = 201983.000, Total count = 1000009] - #[Buckets = 8, SubBuckets = 2048] - - Example of output: - [(0, 0.314, 2), (10, 0.853, 49953), (20, 0.949, 50396), ...] - - Args: - logfile: Hdrhistogram log file. - - Returns: - List of (percentile, value, count) tuples - """ - result = [] - last_percent_value = -1 - prev_total_count = 0 - for row in logfile.split('\n'): - if re.match(r'( *)(\d|\.)( *)', row): - row_vals = row.split() - # convert percentile to 100 based and round up to 3 decimal places - percentile = math.floor(float(row_vals[1]) * 100000) / 1000.0 - current_total_count = int(row_vals[2]) - if (percentile > last_percent_value and - current_total_count > prev_total_count): - # convert latency to millisec based and percentile to 100 based. - latency = float(row_vals[0]) / 1000 - count = current_total_count - prev_total_count - result.append((percentile, latency, count)) - last_percent_value = percentile - prev_total_count = current_total_count - return result - - -def ParseHdrLogs( - hdrlogs: Mapping[str, str]) -> dict[str, list[_HdrHistogramTuple]]: - """Parse a dict of group to hdr logs into a dict of group to histogram tuples. - - Args: - hdrlogs: Dict of group (read or update) to hdr logs for that group. - - Returns: - Dict of group to histogram tuples of reportable percentile values. - """ - parsed_hdr_histograms = {} - for group, logfile in hdrlogs.items(): - values = ParseHdrLogFile(logfile) - parsed_hdr_histograms[group] = values - return parsed_hdr_histograms - - -def _CumulativeSum(xs): - total = 0 - for x in xs: - total += x - yield total - - -def _WeightedQuantile(x, weights, p): - """Weighted quantile measurement for an ordered list. - - This method interpolates to the higher value when the quantile is not a direct - member of the list. This works well for YCSB, since latencies are floored. - - Args: - x: List of values. - weights: List of numeric weights. - p: float. Desired quantile in the interval [0, 1]. - - Returns: - float. - - Raises: - ValueError: When 'x' and 'weights' are not the same length, or 'p' is not in - the interval [0, 1]. - """ - if len(x) != len(weights): - raise ValueError('Lengths do not match: {0} != {1}'.format( - len(x), len(weights))) - if p < 0 or p > 1: - raise ValueError('Invalid quantile: {0}'.format(p)) - n = sum(weights) - target = n * float(p) - cumulative = list(_CumulativeSum(weights)) - - # Find the first cumulative weight >= target - i = bisect.bisect_left(cumulative, target) - if i == len(x): - return x[-1] - else: - return x[i] - - -def _PercentilesFromHistogram(ycsb_histogram, percentiles=_DEFAULT_PERCENTILES): - """Calculate percentiles for from a YCSB histogram. - - Args: - ycsb_histogram: List of (time_ms, frequency) tuples. - percentiles: iterable of floats, in the interval [0, 100]. - - Returns: - dict, mapping from percentile to value. - Raises: - ValueError: If one or more percentiles are outside [0, 100]. - """ - result = collections.OrderedDict() - histogram = sorted(ycsb_histogram) - for percentile in percentiles: - if percentile < 0 or percentile > 100: - raise ValueError('Invalid percentile: {0}'.format(percentile)) - if math.modf(percentile)[0] < 1e-7: - percentile = int(percentile) - label = 'p{0}'.format(percentile) - latencies, freqs = list(zip(*histogram)) - time_ms = _WeightedQuantile(latencies, freqs, percentile * 0.01) - result[label] = time_ms - return result - - -def _CombineResults(result_list: Iterable[YcsbResult], - measurement_type: str, - combined_hdr: Mapping[str, list[_HdrHistogramTuple]]): - """Combine results from multiple YCSB clients. - - Reduces a list of YCSB results (the output of ParseResults) - into a single result. Histogram bin counts, operation counts, and throughput - are summed; RunTime is replaced by the maximum runtime of any result. - - Args: - result_list: Iterable of ParseResults outputs. - measurement_type: Measurement type used. If measurement type is histogram, - histogram bins are summed across results. If measurement type is - hdrhistogram, an aggregated hdrhistogram (combined_hdr) is expected. - combined_hdr: Dict of already aggregated histogram. - - Returns: - A dictionary, as returned by ParseResults. - """ - - def DropUnaggregated(result: YcsbResult) -> None: - """Remove statistics which 'operators' specify should not be combined.""" - drop_keys = {k for k, v in AGGREGATE_OPERATORS.items() if v is None} - for group in result.groups.values(): - for k in drop_keys: - group.statistics.pop(k, None) - - def CombineHistograms(hist1, hist2): - h1 = dict(hist1) - h2 = dict(hist2) - keys = sorted(frozenset(h1) | frozenset(h2)) - result = [] - for k in keys: - result.append((k, h1.get(k, 0) + h2.get(k, 0))) - return result - - combined_weights = {} - - def _CombineLatencyTimeSeries( - combined_series: list[tuple[int, float]], - individual_series: list[tuple[int, float]]) -> list[tuple[int, float]]: - """Combines two timeseries of average latencies. - - Args: - combined_series: A list representing the timeseries with which the - individual series is being merged. - individual_series: A list representing the timeseries being merged with - the combined series. - - Returns: - A list representing the new combined series. - - Note that this assumes that each individual timeseries spent an equal - amount of time executing requests for each timeslice. This should hold for - runs without -target where each client has an equal number of threads, but - may not hold otherwise. - """ - combined_series = dict(combined_series) - individual_series = dict(individual_series) - timestamps = set(combined_series) | set(individual_series) - - result = [] - for timestamp in sorted(timestamps): - if timestamp not in individual_series: - continue - if timestamp not in combined_weights: - combined_weights[timestamp] = 1.0 - if timestamp not in combined_series: - result.append((timestamp, individual_series[timestamp])) - continue - - # This computes a new combined average latency by dividing the sum of - # request latencies by the sum of request counts for the time period. - # The sum of latencies for an individual series is assumed to be "1", - # so the sum of latencies for the combined series is the total number of - # series i.e. "combined_weight". - # The request count for an individual series is 1 / average latency. - # This means the request count for the combined series is - # combined_weight * 1 / average latency. - combined_weight = combined_weights[timestamp] - average_latency = (combined_weight + 1.0) / ( - (combined_weight / combined_series[timestamp]) + - (1.0 / individual_series[timestamp])) - result.append((timestamp, average_latency)) - combined_weights[timestamp] += 1.0 - return result - - def _CombineThroughputTimeSeries( - series1: _ThroughputTimeSeries, - series2: _ThroughputTimeSeries) -> _ThroughputTimeSeries: - """Returns a combined dict of [timestamp, total QPS] from the two series.""" - timestamps1 = set(series1) - timestamps2 = set(series2) - all_timestamps = timestamps1 | timestamps2 - diff_timestamps = timestamps1 ^ timestamps2 - if diff_timestamps: - # This case is rare but does happen occassionally, so log a warning - # instead of raising an exception. - logging.warning( - 'Expected combined timestamps to be the same, got different ' - 'timestamps: %s', diff_timestamps) - result = {} - for timestamp in all_timestamps: - result[timestamp] = ( - series1.get(timestamp, 0) + - series2.get(timestamp, 0)) - return result - - result_list = list(result_list) - result = copy.deepcopy(result_list[0]) - DropUnaggregated(result) - - for indiv in result_list[1:]: - for group_name, group in indiv.groups.items(): - if group_name not in result.groups: - logging.warning( - 'Found result group "%s" in individual YCSB result, ' - 'but not in accumulator.', group_name) - result.groups[group_name] = copy.deepcopy(group) - continue - - # Combine reported statistics. - # If no combining operator is defined, the statistic is skipped. - # Otherwise, the aggregated value is either: - # * The value in 'indiv', if the statistic is not present in 'result' or - # * AGGREGATE_OPERATORS[statistic](result_value, indiv_value) - for k, v in group.statistics.items(): - if k not in AGGREGATE_OPERATORS: - logging.warning('No operator for "%s". Skipping aggregation.', k) - continue - elif AGGREGATE_OPERATORS[k] is None: # Drop - result.groups[group_name].statistics.pop(k, None) - continue - elif k not in result.groups[group_name].statistics: - logging.warning( - 'Found statistic "%s.%s" in individual YCSB result, ' - 'but not in accumulator.', group_name, k) - result.groups[group_name].statistics[k] = copy.deepcopy(v) - continue - - op = AGGREGATE_OPERATORS[k] - result.groups[group_name].statistics[k] = ( - op(result.groups[group_name].statistics[k], v)) - - if measurement_type == HISTOGRAM: - result.groups[group_name].data = CombineHistograms( - result.groups[group_name].data, group.data) - elif measurement_type == TIMESERIES: - result.groups[group_name].data = _CombineLatencyTimeSeries( - result.groups[group_name].data, group.data) - result.client = ' '.join((result.client, indiv.client)) - result.command_line = ';'.join( - (result.command_line, indiv.command_line)) - - if _THROUGHPUT_TIME_SERIES.value: - result.throughput_time_series = _CombineThroughputTimeSeries( - result.throughput_time_series, indiv.throughput_time_series) - - if measurement_type == HDRHISTOGRAM: - for group_name in combined_hdr: - if group_name in result.groups: - result.groups[group_name].data = combined_hdr[group_name] - - return result - - def ParseWorkload(contents): """Parse a YCSB workload file. @@ -995,8 +504,11 @@ def ParseWorkload(contents): fp = io.StringIO(contents) result = {} for line in fp: - if (line.strip() and not line.lstrip().startswith('#') and - not line.lstrip().startswith('!')): + if ( + line.strip() + and not line.lstrip().startswith('#') + and not line.lstrip().startswith('!') + ): k, v = re.split(r'\s*[:=]\s*', line, maxsplit=1) result[k] = v.strip() return result @@ -1010,93 +522,6 @@ def PushWorkload(vm, workload_file, remote_path): vm.PushFile(workload_file, remote_path) -def _CreateSamples(ycsb_result: YcsbResult, - include_histogram: bool = False, - **kwargs) -> list[sample.Sample]: - """Create PKB samples from a YCSB result. - - Args: - ycsb_result: dict. Result of ParseResults. - include_histogram: bool. If True, include records for each histogram bin. - Note that this will increase the output volume significantly. - **kwargs: Base metadata for each sample. - - Yields: - List of sample.Sample objects. - """ - command_line = ycsb_result.command_line - stage = 'load' if command_line.endswith('-load') else 'run' - base_metadata = { - 'stage': stage, - 'ycsb_tar_url': _ycsb_tar_url, - 'ycsb_version': FLAGS.ycsb_version - } - if _SHOULD_RECORD_COMMAND_LINE.value: - base_metadata['command_line'] = command_line - base_metadata.update(kwargs) - - throughput_time_series = ycsb_result.throughput_time_series - if throughput_time_series: - yield sample.Sample( - 'Throughput Time Series', 0, '', - {'throughput_time_series': sorted(throughput_time_series.items())}) - - for group_name, group in ycsb_result.groups.items(): - meta = base_metadata.copy() - meta['operation'] = group_name - for statistic, value in group.statistics.items(): - if value is None: - continue - - unit = '' - m = re.match(r'^(.*) *\((us|ms|ops/sec)\)$', statistic) - if m: - statistic = m.group(1) - unit = m.group(2) - yield sample.Sample(' '.join([group_name, statistic]), value, unit, meta) - - if group.data and group.data_type == HISTOGRAM: - percentiles = _PercentilesFromHistogram(group.data) - for label, value in percentiles.items(): - yield sample.Sample(' '.join([group_name, label, 'latency']), value, - 'ms', meta) - if include_histogram: - for time_ms, count in group.data: - yield sample.Sample( - '{0}_latency_histogram_{1}_ms'.format(group_name, time_ms), count, - 'count', meta) - - if group.data and group.data_type == HDRHISTOGRAM: - # Strip percentile from the three-element tuples. - histogram = [value_count[-2:] for value_count in group.data] - percentiles = _PercentilesFromHistogram(histogram) - for label, value in percentiles.items(): - yield sample.Sample(' '.join([group_name, label, 'latency']), value, - 'ms', meta) - if include_histogram: - histogram = [] - for _, value, bucket_count in group.data: - histogram.append({ - 'microsec_latency': int(value * 1000), - 'count': bucket_count - }) - hist_meta = meta.copy() - hist_meta.update({'histogram': json.dumps(histogram)}) - yield sample.Sample('{0} latency histogram'.format(group_name), 0, '', - hist_meta) - - if group.data and group.data_type == TIMESERIES: - for sample_time, average_latency in group.data: - timeseries_meta = meta.copy() - timeseries_meta['sample_time'] = sample_time - yield sample.Sample( - ' '.join([group_name, 'AverageLatency (timeseries)']), - average_latency, 'ms', timeseries_meta) - yield sample.Sample('Average Latency Time Series', 0, '', { - 'latency_time_series': group.data - }) - - class YCSBExecutor: """Load data and run benchmarks using YCSB. @@ -1190,7 +615,9 @@ def _Load(self, vm, **kwargs): kwargs[param] = value command = self._BuildCommand('load', **kwargs) stdout, stderr = vm.RobustRemoteCommand(command) - return ParseResults(str(stderr + stdout), self.measurement_type) + return ycsb_stats.ParseResults( + str(stderr + stdout), self.measurement_type, _ERROR_RATE_THRESHOLD.value + ) def _LoadThreaded(self, vms, workload_file, **kwargs): """Runs "Load" in parallel for each VM in VMs. @@ -1222,7 +649,8 @@ def _LoadThreaded(self, vms, workload_file, **kwargs): stage='load', clients=len(vms) * kwargs['threads'], threads_per_client_vm=kwargs['threads'], - workload_name=os.path.basename(workload_file)) + workload_name=os.path.basename(workload_file), + ) self.workload_meta = workload_meta record_count = int(workload_meta.get('recordcount', '1000')) n_per_client = int(record_count) // len(vms) @@ -1231,8 +659,9 @@ def _LoadThreaded(self, vms, workload_file, **kwargs): for i in range(len(vms)) ] - remote_path = posixpath.join(linux_packages.INSTALL_DIR, - os.path.basename(workload_file)) + remote_path = posixpath.join( + linux_packages.INSTALL_DIR, os.path.basename(workload_file) + ) args = [((vm, workload_file, remote_path), {}) for vm in dict.fromkeys(vms)] background_tasks.RunThreaded(PushWorkload, args) @@ -1255,31 +684,42 @@ def _Load(loader_index): event='load', start_timestamp=start, end_timestamp=time.time(), - metadata=copy.deepcopy(kwargs)) + metadata=copy.deepcopy(kwargs), + ) if len(results) != len(vms): - raise IOError('Missing results: only {0}/{1} reported\n{2}'.format( - len(results), len(vms), results)) + raise IOError( + 'Missing results: only {0}/{1} reported\n{2}'.format( + len(results), len(vms), results + ) + ) samples = [] if FLAGS.ycsb_include_individual_results and len(results) > 1: for i, result in enumerate(results): samples.extend( - _CreateSamples( - result, + ycsb_stats.CreateSamples( + ycsb_result=result, + ycsb_version=FLAGS.ycsb_version, + include_command_line=_SHOULD_RECORD_COMMAND_LINE.value, result_type='individual', result_index=i, - include_histogram=FLAGS.ycsb_histogram, - **workload_meta)) + **workload_meta, + ) + ) # hdr histograms not collected upon load, only upon run - combined = _CombineResults(results, self.measurement_type, {}) + combined = ycsb_stats.CombineResults(results, self.measurement_type, {}) samples.extend( - _CreateSamples( - combined, - result_type='combined', + ycsb_stats.CreateSamples( + ycsb_result=combined, + ycsb_version=FLAGS.ycsb_version, include_histogram=FLAGS.ycsb_histogram, - **workload_meta)) + include_command_line=_SHOULD_RECORD_COMMAND_LINE.value, + result_type='combined', + **workload_meta, + ) + ) return samples @@ -1296,7 +736,9 @@ def _Run(self, vm, **kwargs): if hdr_files_dir: vm.RemoteCommand('mkdir -p {0}'.format(hdr_files_dir)) stdout, stderr = vm.RobustRemoteCommand(command) - return ParseResults(str(stderr + stdout), self.measurement_type) + return ycsb_stats.ParseResults( + str(stderr + stdout), self.measurement_type, _ERROR_RATE_THRESHOLD.value + ) def _RunThreaded(self, vms, **kwargs): """Run a single workload using `vms`.""" @@ -1337,8 +779,11 @@ def _Run(loader_index): background_tasks.RunThreaded(_Run, list(range(len(vms)))) if len(results) != len(vms): - raise IOError('Missing results: only {0}/{1} reported\n{2}'.format( - len(results), len(vms), results)) + raise IOError( + 'Missing results: only {0}/{1} reported\n{2}'.format( + len(results), len(vms), results + ) + ) return results @@ -1364,9 +809,11 @@ def _GetRunLoadTarget(self, current_load, is_sustained=False): lower_bound = FLAGS.ycsb_dynamic_load_throughput_lower_bound step = (1 - FLAGS.ycsb_dynamic_load_sustain_throughput_ratio) * 2 - if (not bool(lower_bound) and - is_sustained) or (lower_bound and - current_load < lower_bound) or (current_load is None): + if ( + (not bool(lower_bound) and is_sustained) + or (lower_bound and current_load < lower_bound) + or (current_load is None) + ): return None elif is_sustained: return current_load * (1 - step) @@ -1402,7 +849,7 @@ def RunStaircaseLoads(self, vms, workloads, **kwargs): if FLAGS.ycsb_timelimit: parameters['maxexecutiontime'] = FLAGS.ycsb_timelimit hdr_files_dir = posixpath.join(self.hdr_dir, str(workload_index)) - if FLAGS.ycsb_measurement_type == HDRHISTOGRAM: + if FLAGS.ycsb_measurement_type == ycsb_stats.HDRHISTOGRAM: parameters['hdrhistogram.fileoutput'] = True parameters['hdrhistogram.output.path'] = hdr_files_dir if FLAGS.ycsb_requestdistribution: @@ -1414,8 +861,9 @@ def RunStaircaseLoads(self, vms, workloads, **kwargs): if FLAGS.ycsb_scanproportion is not None: parameters['scanproportion'] = FLAGS.ycsb_scanproportion parameters.update(kwargs) - remote_path = posixpath.join(linux_packages.INSTALL_DIR, - os.path.basename(workload_file)) + remote_path = posixpath.join( + linux_packages.INSTALL_DIR, os.path.basename(workload_file) + ) with open(workload_file) as fp: workload_meta = ParseWorkload(fp.read()) @@ -1423,7 +871,8 @@ def RunStaircaseLoads(self, vms, workloads, **kwargs): workload_meta.update( workload_name=os.path.basename(workload_file), workload_index=workload_index, - stage='run') + stage='run', + ) args = [ ((vm, workload_file, remote_path), {}) for vm in dict.fromkeys(vms) @@ -1436,16 +885,16 @@ def RunStaircaseLoads(self, vms, workloads, **kwargs): # if no target is passed via flags. for client_count, target_qps_per_vm in _GetThreadsQpsPerLoaderList(): - def _DoRunStairCaseLoad(client_count, - target_qps_per_vm, - workload_meta, - is_sustained=False): + def _DoRunStairCaseLoad( + client_count, target_qps_per_vm, workload_meta, is_sustained=False + ): parameters['threads'] = client_count if target_qps_per_vm: parameters['target'] = int(target_qps_per_vm * len(vms)) if is_sustained: parameters['maxexecutiontime'] = ( - FLAGS.ycsb_dynamic_load_sustain_timelimit) + FLAGS.ycsb_dynamic_load_sustain_timelimit + ) start = time.time() results = self._RunThreaded(vms, **parameters) events.record_event.send( @@ -1453,12 +902,14 @@ def _DoRunStairCaseLoad(client_count, event='run', start_timestamp=start, end_timestamp=time.time(), - metadata=copy.deepcopy(parameters)) + metadata=copy.deepcopy(parameters), + ) client_meta = workload_meta.copy() client_meta.update(parameters) client_meta.update( clients=len(vms) * client_count, - threads_per_client_vm=client_count) + threads_per_client_vm=client_count, + ) # Values passed in via this flag do not get recorded in metadata. # The target passed in is applied to each client VM, so multiply by # len(vms). @@ -1471,27 +922,39 @@ def _DoRunStairCaseLoad(client_count, if FLAGS.ycsb_include_individual_results and len(results) > 1: for i, result in enumerate(results): all_results.extend( - _CreateSamples( - result, + ycsb_stats.CreateSamples( + ycsb_result=result, + ycsb_version=FLAGS.ycsb_version, + include_histogram=FLAGS.ycsb_histogram, + include_command_line=_SHOULD_RECORD_COMMAND_LINE.value, result_type='individual', result_index=i, - include_histogram=FLAGS.ycsb_histogram, - **client_meta)) - - if self.measurement_type == HDRHISTOGRAM: - combined_log = self.CombineHdrHistogramLogFiles( - parameters['hdrhistogram.output.path'], vms) - parsed_hdr = ParseHdrLogs(combined_log) - combined = _CombineResults(results, self.measurement_type, - parsed_hdr) + **client_meta, + ) + ) + + if self.measurement_type == ycsb_stats.HDRHISTOGRAM: + combined_log = ycsb_stats.CombineHdrHistogramLogFiles( + self.hdr_dir, parameters['hdrhistogram.output.path'], vms + ) + parsed_hdr = ycsb_stats.ParseHdrLogs(combined_log) + combined = ycsb_stats.CombineResults( + results, self.measurement_type, parsed_hdr + ) else: - combined = _CombineResults(results, self.measurement_type, {}) + combined = ycsb_stats.CombineResults( + results, self.measurement_type, {} + ) run_samples = list( - _CreateSamples( - combined, - result_type='combined', + ycsb_stats.CreateSamples( + ycsb_result=combined, + ycsb_version=FLAGS.ycsb_version, + include_command_line=_SHOULD_RECORD_COMMAND_LINE.value, include_histogram=FLAGS.ycsb_histogram, - **client_meta)) + result_type='combined', + **client_meta, + ) + ) overall_throughput = 0 for s in run_samples: @@ -1500,7 +963,8 @@ def _DoRunStairCaseLoad(client_count, return overall_throughput, run_samples target_throughput, run_samples = _DoRunStairCaseLoad( - client_count, target_qps_per_vm, workload_meta) + client_count, target_qps_per_vm, workload_meta + ) # Uses 5 * unthrottled throughput as starting point. target_throughput *= 5 @@ -1508,71 +972,25 @@ def _DoRunStairCaseLoad(client_count, is_sustained = False while FLAGS.ycsb_dynamic_load: actual_throughput, run_samples = _DoRunStairCaseLoad( - client_count, target_throughput // len(vms), workload_meta, - is_sustained) + client_count, + target_throughput // len(vms), + workload_meta, + is_sustained, + ) is_sustained = FLAGS.ycsb_dynamic_load_sustain_throughput_ratio < ( - actual_throughput / target_throughput) + actual_throughput / target_throughput + ) for s in run_samples: s.metadata['sustained'] = is_sustained all_results.extend(run_samples) - target_throughput = self._GetRunLoadTarget(actual_throughput, - is_sustained) + target_throughput = self._GetRunLoadTarget( + actual_throughput, is_sustained + ) if target_throughput is None: break return all_results - def CombineHdrHistogramLogFiles(self, - hdr_files_dir: str, - vms: Iterable[virtual_machine.VirtualMachine] - ) -> dict[str, str]: - """Combine multiple hdr histograms by group type. - - Combine multiple hdr histograms in hdr log files format into 1 human - readable hdr histogram log file. - This is done by - 1) copying hdrhistogram log files to a single file on a worker vm; - 2) aggregating file containing multiple %-tile histogram into - a single %-tile histogram using HistogramLogProcessor from the - hdrhistogram package that is installed on the vms. Refer to https:// - github.com/HdrHistogram/HdrHistogram/blob/master/HistogramLogProcessor - - Args: - hdr_files_dir: directory on the remote vms where hdr files are stored. - vms: remote vms - - Returns: - dict of hdrhistograms keyed by group type - """ - vms = list(vms) - hdrhistograms = {} - for grouptype in HDRHISTOGRAM_GROUPS: - - def _GetHdrHistogramLog(vm, group=grouptype): - filename = f'{hdr_files_dir}{group}.hdr' - return vm.RemoteCommand(f'touch {filename} && tail -1 {filename}')[0] - - results = background_tasks.RunThreaded(_GetHdrHistogramLog, vms) - - # It's possible that there is no result for certain group, e.g., read - # only, update only. - if not all(results): - continue - - worker_vm = vms[0] - for hdr in results[1:]: - worker_vm.RemoteCommand( - 'sudo chmod 755 {1}{2}.hdr && echo "{0}" >> {1}{2}.hdr'.format( - hdr[:-1], hdr_files_dir, grouptype)) - hdrhistogram, _ = worker_vm.RemoteCommand( - 'cd {0} && ./HistogramLogProcessor -i {1}{2}.hdr' - ' -outputValueUnitRatio 1'.format( - self.hdr_dir, hdr_files_dir, grouptype - ) - ) - hdrhistograms[grouptype.lower()] = hdrhistogram - return hdrhistograms - def Load(self, vms, workloads=None, load_kwargs=None): """Load data using YCSB.""" if FLAGS.ycsb_skip_load_stage: @@ -1590,16 +1008,21 @@ def _HasInsertFailures(result_samples): if FLAGS.ycsb_reload_database or not self.loaded: load_samples += list( - self._LoadThreaded(vms, workloads[0], **(load_kwargs or {}))) - if (_SHOULD_FAIL_ON_INCOMPLETE_LOADING.value and - _HasInsertFailures(load_samples)): + self._LoadThreaded(vms, workloads[0], **(load_kwargs or {})) + ) + if _SHOULD_FAIL_ON_INCOMPLETE_LOADING.value and _HasInsertFailures( + load_samples + ): raise errors.Benchmarks.RunError( - 'There are insert failures, so the table loading is incomplete') + 'There are insert failures, so the table loading is incomplete' + ) self.loaded = True if FLAGS.ycsb_sleep_after_load_in_sec > 0: - logging.info('Sleeping %s seconds after load stage.', - FLAGS.ycsb_sleep_after_load_in_sec) + logging.info( + 'Sleeping %s seconds after load stage.', + FLAGS.ycsb_sleep_after_load_in_sec, + ) time.sleep(FLAGS.ycsb_sleep_after_load_in_sec) if FLAGS.ycsb_load_samples: return load_samples @@ -1620,11 +1043,14 @@ def Run(self, vms, workloads=None, run_kwargs=None) -> list[sample.Sample]: samples = self._RunIncrementalMode(vms, workloads, run_kwargs) else: samples = list(self.RunStaircaseLoads(vms, workloads, **run_kwargs)) - if (FLAGS.ycsb_sleep_after_load_in_sec > 0 and - not FLAGS.ycsb_skip_load_stage): + if ( + FLAGS.ycsb_sleep_after_load_in_sec > 0 + and not FLAGS.ycsb_skip_load_stage + ): for s in samples: - s.metadata[ - 'sleep_after_load_in_sec'] = FLAGS.ycsb_sleep_after_load_in_sec + s.metadata['sleep_after_load_in_sec'] = ( + FLAGS.ycsb_sleep_after_load_in_sec + ) return samples def _SetRunParameters(self, params: Mapping[str, Any]) -> None: @@ -1632,8 +1058,7 @@ def _SetRunParameters(self, params: Mapping[str, Any]) -> None: # Ideally YCSB should be refactored to include a function that just takes # commands for a run, but that will be a large refactor. FLAGS['ycsb_run_parameters'].unparse() - FLAGS['ycsb_run_parameters'].parse( - [f'{k}={v}' for k, v in params.items()]) + FLAGS['ycsb_run_parameters'].parse([f'{k}={v}' for k, v in params.items()]) def _RunBurstMode(self, vms, workloads, run_kwargs=None): """Runs YCSB in burst mode, where the second run has increased QPS.""" @@ -1736,7 +1161,8 @@ def LoadAndRun(self, vms, workloads=None, load_kwargs=None, run_kwargs=None): load_samples = [] if not FLAGS.ycsb_skip_load_stage: load_samples = self.Load( - vms, workloads=workloads, load_kwargs=load_kwargs) + vms, workloads=workloads, load_kwargs=load_kwargs + ) run_samples = [] if not FLAGS.ycsb_skip_run_stage: run_samples = self.Run(vms, workloads=workloads, run_kwargs=run_kwargs) diff --git a/perfkitbenchmarker/linux_packages/ycsb_stats.py b/perfkitbenchmarker/linux_packages/ycsb_stats.py new file mode 100644 index 0000000000..d48f679d55 --- /dev/null +++ b/perfkitbenchmarker/linux_packages/ycsb_stats.py @@ -0,0 +1,843 @@ +# Copyright 2023 PerfKitBenchmarker Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parsing results from YCSB output into samples.""" + +import bisect +import collections +from collections.abc import Iterable, Mapping +import copy +import csv +import dataclasses +import io +import itertools +import json +import logging +import math +import operator +import posixpath +import re +from absl import flags +from perfkitbenchmarker import background_tasks +from perfkitbenchmarker import errors +from perfkitbenchmarker import linux_packages +from perfkitbenchmarker import sample +from perfkitbenchmarker import virtual_machine + +FLAGS = flags.FLAGS + +YCSB_URL_TEMPLATE = ( + 'https://github.com/brianfrankcooper/YCSB/releases/' + 'download/{0}/ycsb-{0}.tar.gz' +) +YCSB_DIR = posixpath.join(linux_packages.INSTALL_DIR, 'ycsb') +YCSB_EXE = posixpath.join(YCSB_DIR, 'bin', 'ycsb') +HDRHISTOGRAM_DIR = posixpath.join(linux_packages.INSTALL_DIR, 'hdrhistogram') +HDRHISTOGRAM_TAR_URL = ( + 'https://github.com/HdrHistogram/HdrHistogram/archive/' + 'HdrHistogram-2.1.10.tar.gz' +) +HDRHISTOGRAM_GROUPS = ['READ', 'UPDATE'] + +_DEFAULT_PERCENTILES = 50, 75, 90, 95, 99, 99.9 + +HISTOGRAM = 'histogram' +HDRHISTOGRAM = 'hdrhistogram' +TIMESERIES = 'timeseries' +YCSB_MEASUREMENT_TYPES = [HISTOGRAM, HDRHISTOGRAM, TIMESERIES] + +# Binary operators to aggregate reported statistics. +# Statistics with operator 'None' will be dropped. +AGGREGATE_OPERATORS = { + 'Operations': operator.add, + 'RunTime(ms)': max, + 'Return=0': operator.add, + 'Return=-1': operator.add, + 'Return=-2': operator.add, + 'Return=-3': operator.add, + 'Return=OK': operator.add, + 'Return=ERROR': operator.add, + 'Return=NOT_FOUND': operator.add, + 'LatencyVariance(ms)': None, + 'AverageLatency(ms)': None, # Requires both average and # of ops. + 'Throughput(ops/sec)': operator.add, + '95thPercentileLatency(ms)': None, # Calculated across clients. + '99thPercentileLatency(ms)': None, # Calculated across clients. + 'MinLatency(ms)': min, + 'MaxLatency(ms)': max, +} + +# Status line pattern +_STATUS_PATTERN = r'(\d+) sec: \d+ operations; (\d+.\d+) current ops\/sec' +# Status interval default is 10 sec, change to 1 sec. +_STATUS_INTERVAL_SEC = 1 + +# Default loading thread count for non-batching backends. +DEFAULT_PRELOAD_THREADS = 32 + +# Customer YCSB tar url. If not set, the official YCSB release will be used. +_ycsb_tar_url = None + +# Parameters for incremental workload. Can be made into flags in the future. +_INCREMENTAL_STARTING_QPS = 500 +_INCREMENTAL_TIMELIMIT_SEC = 60 * 5 + +_ThroughputTimeSeries = dict[int, float] +# Tuple of (percentile, latency, count) +_HdrHistogramTuple = tuple[float, float, int] + + +@dataclasses.dataclass +class _OpResult: + """Individual results for a single operation. + + Attributes: + group: group name (e.g., update, insert, overall) + statistics: dict mapping from statistic name to value + data_type: Corresponds to --ycsb_measurement_type. + data: For HISTOGRAM/HDRHISTOGRAM: list of (ms_lower_bound, count) tuples, + e.g. [(0, 530), (19, 1)] indicates that 530 ops took between 0ms and 1ms, + and 1 took between 19ms and 20ms. Empty bins are not reported. For + TIMESERIES: list of (time, latency us) tuples. + """ + + group: str = '' + data_type: str = '' + data: list[tuple[int, float]] = dataclasses.field(default_factory=list) + statistics: dict[str, float] = dataclasses.field(default_factory=dict) + + +@dataclasses.dataclass +class YcsbResult: + """Aggregate results for the YCSB run. + + Attributes: + client: Contains YCSB version information. + command_line: Command line executed. + throughput_time_series: Time series of throughputs (interval, QPS). + groups: dict of operation group name to results for that operation. + """ + + client: str = '' + command_line: str = '' + throughput_time_series: _ThroughputTimeSeries = dataclasses.field( + default_factory=dict + ) + groups: dict[str, _OpResult] = dataclasses.field(default_factory=dict) + + +def _ValidateErrorRate(result: YcsbResult, threshold: float) -> None: + """Raises an error if results contains entries with too high error rate. + + Computes the error rate for each operation, example output looks like: + + [INSERT], Operations, 100 + [INSERT], AverageLatency(us), 74.92 + [INSERT], MinLatency(us), 5 + [INSERT], MaxLatency(us), 98495 + [INSERT], 95thPercentileLatency(us), 42 + [INSERT], 99thPercentileLatency(us), 1411 + [INSERT], Return=OK, 90 + [INSERT], Return=ERROR, 10 + + This function will then compute 10/100 = 0.1 error rate. + + Args: + result: The result of running ParseResults() + threshold: The error rate before throwing an exception. 1.0 means no + exception will be thrown, 0.0 means an exception is always thrown. + + Raises: + errors.Benchmarks.RunError: If the computed error rate is higher than the + threshold. + """ + for operation in result.groups.values(): + name, stats = operation.group, operation.statistics + # The operation count can be 0 + count = stats.get('Operations', 0) + if count == 0: + continue + # These keys may be missing from the output. + error_rate = stats.get('Return=ERROR', 0) / count + if error_rate > threshold: + raise errors.Benchmarks.RunError( + f'YCSB had a {error_rate} error rate for {name}, higher than ' + f'threshold {threshold}' + ) + + +def ParseResults( + ycsb_result_string: str, + data_type: str = 'histogram', + error_rate_threshold: float = 1.0, +) -> 'YcsbResult': + """Parse YCSB results. + + Example input for histogram datatype: + + YCSB Client 0.1 + Command line: -db com.yahoo.ycsb.db.HBaseClient -P /tmp/pkb/workloada + [OVERALL], RunTime(ms), 1800413.0 + [OVERALL], Throughput(ops/sec), 2740.503428935472 + [UPDATE], Operations, 2468054 + [UPDATE], AverageLatency(us), 2218.8513395574005 + [UPDATE], MinLatency(us), 554 + [UPDATE], MaxLatency(us), 352634 + [UPDATE], 95thPercentileLatency(ms), 4 + [UPDATE], 99thPercentileLatency(ms), 7 + [UPDATE], Return=0, 2468054 + [UPDATE], 0, 398998 + [UPDATE], 1, 1015682 + [UPDATE], 2, 532078 + ... + + Example input for hdrhistogram datatype: + + YCSB Client 0.17.0 + Command line: -db com.yahoo.ycsb.db.RedisClient -P /opt/pkb/workloadb + [OVERALL], RunTime(ms), 29770.0 + [OVERALL], Throughput(ops/sec), 33590.86328518643 + [UPDATE], Operations, 49856.0 + [UPDATE], AverageLatency(us), 1478.0115532734276 + [UPDATE], MinLatency(us), 312.0 + [UPDATE], MaxLatency(us), 24623.0 + [UPDATE], 95thPercentileLatency(us), 3501.0 + [UPDATE], 99thPercentileLatency(us), 6747.0 + [UPDATE], Return=OK, 49856 + ... + + Example input for ycsb version 0.17.0+: + + ... + Command line: -db com.yahoo.ycsb.db.HBaseClient10 ... -load + YCSB Client 0.17.0 + + Loading workload... + Starting test. + ... + [OVERALL], RunTime(ms), 11411 + [OVERALL], Throughput(ops/sec), 8763.473841030585 + [INSERT], Operations, 100000 + [INSERT], AverageLatency(us), 74.92 + [INSERT], MinLatency(us), 5 + [INSERT], MaxLatency(us), 98495 + [INSERT], 95thPercentileLatency(us), 42 + [INSERT], 99thPercentileLatency(us), 1411 + [INSERT], Return=OK, 100000 + ... + + Example input for timeseries datatype: + + ... + [OVERALL], RunTime(ms), 240007.0 + [OVERALL], Throughput(ops/sec), 10664.605615669543 + ... + [READ], Operations, 1279253 + [READ], AverageLatency(us), 3002.7057071587874 + [READ], MinLatency(us), 63 + [READ], MaxLatency(us), 93584 + [READ], Return=OK, 1279281 + [READ], 0, 528.6142757498257 + [READ], 500, 360.95347448674966 + [READ], 1000, 667.7379547689283 + [READ], 1500, 731.5389357265888 + [READ], 2000, 778.7992281717318 + ... + + Args: + ycsb_result_string: str. Text output from YCSB. + data_type: Either 'histogram' or 'timeseries' or 'hdrhistogram'. 'histogram' + and 'hdrhistogram' datasets are in the same format, with the difference + being lacking the (millisec, count) histogram component. Hence are parsed + similarly. + error_rate_threshold: Error statistics in the output should not exceed this + ratio. + + Returns: + A YcsbResult object that contains the results from parsing YCSB output. + Raises: + IOError: If the results contained unexpected lines. + """ + if ( + 'redis.clients.jedis.exceptions.JedisConnectionException' + in ycsb_result_string + ): + # This error is cause by ycsb using an old version of redis client 2.9.0 + # https://github.com/xetorthio/jedis/issues/1977 + raise errors.Benchmarks.KnownIntermittentError( + 'errors.Benchmarks.KnownIntermittentError' + ) + + lines = [] + client_string = 'YCSB' + command_line = 'unknown' + throughput_time_series = {} + fp = io.StringIO(ycsb_result_string) + result_string = next(fp).strip() + + def IsHeadOfResults(line): + return line.startswith('[OVERALL]') + + while not IsHeadOfResults(result_string): + if result_string.startswith('YCSB Client 0.'): + client_string = result_string + if result_string.startswith('Command line:'): + command_line = result_string + # Look for status lines which include throughput on a 1-sec basis. + match = re.search(_STATUS_PATTERN, result_string) + if match is not None: + timestamp, qps = int(match.group(1)), float(match.group(2)) + # Repeats in the printed status are erroneous, ignore. + if timestamp not in throughput_time_series: + throughput_time_series[timestamp] = qps + try: + result_string = next(fp).strip() + except StopIteration: + raise IOError( + f'Could not parse YCSB output: {ycsb_result_string}' + ) from None + + if result_string.startswith('[OVERALL]'): # YCSB > 0.7.0. + lines.append(result_string) + else: + # Received unexpected header + raise IOError(f'Unexpected header: {client_string}') + + # Some databases print additional output to stdout. + # YCSB results start with []; + # filter to just those lines. + def LineFilter(line): + return re.search(r'^\[[A-Z]+\]', line) is not None + + lines = itertools.chain(lines, filter(LineFilter, fp)) + + r = csv.reader(lines) + + by_operation = itertools.groupby(r, operator.itemgetter(0)) + + result = YcsbResult( + client=client_string, + command_line=command_line, + throughput_time_series=throughput_time_series, + ) + + for operation, lines in by_operation: + operation = operation[1:-1].lower() + + if operation == 'cleanup': + continue + + op_result = _OpResult(group=operation, data_type=data_type) + latency_unit = 'ms' + for _, name, val in lines: + name = name.strip() + val = val.strip() + # Drop ">" from ">1000" + if name.startswith('>'): + name = name[1:] + val = float(val) if '.' in val or 'nan' in val.lower() else int(val) + if name.isdigit(): + if val: + if data_type == TIMESERIES and latency_unit == 'us': + val /= 1000.0 + op_result.data.append((int(name), val)) + else: + if '(us)' in name: + name = name.replace('(us)', '(ms)') + val /= 1000.0 + latency_unit = 'us' + op_result.statistics[name] = val + + result.groups[operation] = op_result + _ValidateErrorRate(result, error_rate_threshold) + return result + + +def ParseHdrLogFile(logfile: str) -> list[_HdrHistogramTuple]: + """Parse a hdrhistogram log file into a list of (percentile, latency, count). + + Example decrypted hdrhistogram logfile (value measures latency in microsec): + + #[StartTime: 1523565997 (seconds since epoch), Thu Apr 12 20:46:37 UTC 2018] + Value Percentile TotalCount 1/(1-Percentile) + + 314.000 0.000000000000 2 1.00 + 853.000 0.100000000000 49955 1.11 + 949.000 0.200000000000 100351 1.25 + 1033.000 0.300000000000 150110 1.43 + ... + 134271.000 0.999998664856 1000008 748982.86 + 134271.000 0.999998855591 1000008 873813.33 + 201983.000 0.999999046326 1000009 1048576.00 + #[Mean = 1287.159, StdDeviation = 667.560] + #[Max = 201983.000, Total count = 1000009] + #[Buckets = 8, SubBuckets = 2048] + + Example of output: + [(0, 0.314, 2), (10, 0.853, 49953), (20, 0.949, 50396), ...] + + Args: + logfile: Hdrhistogram log file. + + Returns: + List of (percentile, value, count) tuples + """ + result = [] + last_percent_value = -1 + prev_total_count = 0 + for row in logfile.split('\n'): + if re.match(r'( *)(\d|\.)( *)', row): + row_vals = row.split() + # convert percentile to 100 based and round up to 3 decimal places + percentile = math.floor(float(row_vals[1]) * 100000) / 1000.0 + current_total_count = int(row_vals[2]) + if ( + percentile > last_percent_value + and current_total_count > prev_total_count + ): + # convert latency to millisec based and percentile to 100 based. + latency = float(row_vals[0]) / 1000 + count = current_total_count - prev_total_count + result.append((percentile, latency, count)) + last_percent_value = percentile + prev_total_count = current_total_count + return result + + +def ParseHdrLogs( + hdrlogs: Mapping[str, str] +) -> dict[str, list[_HdrHistogramTuple]]: + """Parse a dict of group to hdr logs into a dict of group to histogram tuples. + + Args: + hdrlogs: Dict of group (read or update) to hdr logs for that group. + + Returns: + Dict of group to histogram tuples of reportable percentile values. + """ + parsed_hdr_histograms = {} + for group, logfile in hdrlogs.items(): + values = ParseHdrLogFile(logfile) + parsed_hdr_histograms[group] = values + return parsed_hdr_histograms + + +def _CumulativeSum(xs): + total = 0 + for x in xs: + total += x + yield total + + +def _WeightedQuantile(x, weights, p): + """Weighted quantile measurement for an ordered list. + + This method interpolates to the higher value when the quantile is not a direct + member of the list. This works well for YCSB, since latencies are floored. + + Args: + x: List of values. + weights: List of numeric weights. + p: float. Desired quantile in the interval [0, 1]. + + Returns: + float. + + Raises: + ValueError: When 'x' and 'weights' are not the same length, or 'p' is not in + the interval [0, 1]. + """ + if len(x) != len(weights): + raise ValueError( + 'Lengths do not match: {0} != {1}'.format(len(x), len(weights)) + ) + if p < 0 or p > 1: + raise ValueError('Invalid quantile: {0}'.format(p)) + n = sum(weights) + target = n * float(p) + cumulative = list(_CumulativeSum(weights)) + + # Find the first cumulative weight >= target + i = bisect.bisect_left(cumulative, target) + if i == len(x): + return x[-1] + else: + return x[i] + + +def _PercentilesFromHistogram(ycsb_histogram, percentiles=_DEFAULT_PERCENTILES): + """Calculate percentiles for from a YCSB histogram. + + Args: + ycsb_histogram: List of (time_ms, frequency) tuples. + percentiles: iterable of floats, in the interval [0, 100]. + + Returns: + dict, mapping from percentile to value. + Raises: + ValueError: If one or more percentiles are outside [0, 100]. + """ + result = collections.OrderedDict() + histogram = sorted(ycsb_histogram) + for percentile in percentiles: + if percentile < 0 or percentile > 100: + raise ValueError('Invalid percentile: {0}'.format(percentile)) + if math.modf(percentile)[0] < 1e-7: + percentile = int(percentile) + label = 'p{0}'.format(percentile) + latencies, freqs = list(zip(*histogram)) + time_ms = _WeightedQuantile(latencies, freqs, percentile * 0.01) + result[label] = time_ms + return result + + +def CombineResults( + result_list: Iterable[YcsbResult], + measurement_type: str, + combined_hdr: Mapping[str, list[_HdrHistogramTuple]], +): + """Combine results from multiple YCSB clients. + + Reduces a list of YCSB results (the output of ParseResults) + into a single result. Histogram bin counts, operation counts, and throughput + are summed; RunTime is replaced by the maximum runtime of any result. + + Args: + result_list: Iterable of ParseResults outputs. + measurement_type: Measurement type used. If measurement type is histogram, + histogram bins are summed across results. If measurement type is + hdrhistogram, an aggregated hdrhistogram (combined_hdr) is expected. + combined_hdr: Dict of already aggregated histogram. + + Returns: + A dictionary, as returned by ParseResults. + """ + + def DropUnaggregated(result: YcsbResult) -> None: + """Remove statistics which 'operators' specify should not be combined.""" + drop_keys = {k for k, v in AGGREGATE_OPERATORS.items() if v is None} + for group in result.groups.values(): + for k in drop_keys: + group.statistics.pop(k, None) + + def CombineHistograms(hist1, hist2): + h1 = dict(hist1) + h2 = dict(hist2) + keys = sorted(frozenset(h1) | frozenset(h2)) + result = [] + for k in keys: + result.append((k, h1.get(k, 0) + h2.get(k, 0))) + return result + + combined_weights = {} + + def _CombineLatencyTimeSeries( + combined_series: list[tuple[int, float]], + individual_series: list[tuple[int, float]], + ) -> list[tuple[int, float]]: + """Combines two timeseries of average latencies. + + Args: + combined_series: A list representing the timeseries with which the + individual series is being merged. + individual_series: A list representing the timeseries being merged with + the combined series. + + Returns: + A list representing the new combined series. + + Note that this assumes that each individual timeseries spent an equal + amount of time executing requests for each timeslice. This should hold for + runs without -target where each client has an equal number of threads, but + may not hold otherwise. + """ + combined_series = dict(combined_series) + individual_series = dict(individual_series) + timestamps = set(combined_series) | set(individual_series) + + result = [] + for timestamp in sorted(timestamps): + if timestamp not in individual_series: + continue + if timestamp not in combined_weights: + combined_weights[timestamp] = 1.0 + if timestamp not in combined_series: + result.append((timestamp, individual_series[timestamp])) + continue + + # This computes a new combined average latency by dividing the sum of + # request latencies by the sum of request counts for the time period. + # The sum of latencies for an individual series is assumed to be "1", + # so the sum of latencies for the combined series is the total number of + # series i.e. "combined_weight". + # The request count for an individual series is 1 / average latency. + # This means the request count for the combined series is + # combined_weight * 1 / average latency. + combined_weight = combined_weights[timestamp] + average_latency = (combined_weight + 1.0) / ( + (combined_weight / combined_series[timestamp]) + + (1.0 / individual_series[timestamp]) + ) + result.append((timestamp, average_latency)) + combined_weights[timestamp] += 1.0 + return result + + def _CombineThroughputTimeSeries( + series1: _ThroughputTimeSeries, series2: _ThroughputTimeSeries + ) -> _ThroughputTimeSeries: + """Returns a combined dict of [timestamp, total QPS] from the two series.""" + timestamps1 = set(series1) + timestamps2 = set(series2) + all_timestamps = timestamps1 | timestamps2 + diff_timestamps = timestamps1 ^ timestamps2 + if diff_timestamps: + # This case is rare but does happen occassionally, so log a warning + # instead of raising an exception. + logging.warning( + 'Expected combined timestamps to be the same, got different ' + 'timestamps: %s', + diff_timestamps, + ) + result = {} + for timestamp in all_timestamps: + result[timestamp] = series1.get(timestamp, 0) + series2.get(timestamp, 0) + return result + + result_list = list(result_list) + result = copy.deepcopy(result_list[0]) + DropUnaggregated(result) + + for indiv in result_list[1:]: + for group_name, group in indiv.groups.items(): + if group_name not in result.groups: + logging.warning( + 'Found result group "%s" in individual YCSB result, ' + 'but not in accumulator.', + group_name, + ) + result.groups[group_name] = copy.deepcopy(group) + continue + + # Combine reported statistics. + # If no combining operator is defined, the statistic is skipped. + # Otherwise, the aggregated value is either: + # * The value in 'indiv', if the statistic is not present in 'result' or + # * AGGREGATE_OPERATORS[statistic](result_value, indiv_value) + for k, v in group.statistics.items(): + if k not in AGGREGATE_OPERATORS: + logging.warning('No operator for "%s". Skipping aggregation.', k) + continue + elif AGGREGATE_OPERATORS[k] is None: # Drop + result.groups[group_name].statistics.pop(k, None) + continue + elif k not in result.groups[group_name].statistics: + logging.warning( + 'Found statistic "%s.%s" in individual YCSB result, ' + 'but not in accumulator.', + group_name, + k, + ) + result.groups[group_name].statistics[k] = copy.deepcopy(v) + continue + + op = AGGREGATE_OPERATORS[k] + result.groups[group_name].statistics[k] = op( + result.groups[group_name].statistics[k], v + ) + + if measurement_type == HISTOGRAM: + result.groups[group_name].data = CombineHistograms( + result.groups[group_name].data, group.data + ) + elif measurement_type == TIMESERIES: + result.groups[group_name].data = _CombineLatencyTimeSeries( + result.groups[group_name].data, group.data + ) + result.client = ' '.join((result.client, indiv.client)) + result.command_line = ';'.join((result.command_line, indiv.command_line)) + + # if _THROUGHPUT_TIME_SERIES.value: + result.throughput_time_series = _CombineThroughputTimeSeries( + result.throughput_time_series, indiv.throughput_time_series + ) + + if measurement_type == HDRHISTOGRAM: + for group_name in combined_hdr: + if group_name in result.groups: + result.groups[group_name].data = combined_hdr[group_name] + + return result + + +def CombineHdrHistogramLogFiles( + hdr_install_dir: str, + hdr_files_dir: str, + vms: Iterable[virtual_machine.VirtualMachine], +) -> dict[str, str]: + """Combine multiple hdr histograms by group type. + + Combine multiple hdr histograms in hdr log files format into 1 human + readable hdr histogram log file. + This is done by + 1) copying hdrhistogram log files to a single file on a worker vm; + 2) aggregating file containing multiple %-tile histogram into + a single %-tile histogram using HistogramLogProcessor from the + hdrhistogram package that is installed on the vms. Refer to https:// + github.com/HdrHistogram/HdrHistogram/blob/master/HistogramLogProcessor + + Args: + hdr_install_dir: directory where HistogramLogProcessor is located. + hdr_files_dir: directory on the remote vms where hdr files are stored. + vms: remote vms + + Returns: + dict of hdrhistograms keyed by group type + """ + vms = list(vms) + hdrhistograms = {} + for grouptype in HDRHISTOGRAM_GROUPS: + + def _GetHdrHistogramLog(vm, group=grouptype): + filename = f'{hdr_files_dir}{group}.hdr' + return vm.RemoteCommand(f'touch {filename} && tail -1 {filename}')[0] + + results = background_tasks.RunThreaded(_GetHdrHistogramLog, vms) + + # It's possible that there is no result for certain group, e.g., read + # only, update only. + if not all(results): + continue + + worker_vm = vms[0] + for hdr in results[1:]: + worker_vm.RemoteCommand( + 'sudo chmod 755 {1}{2}.hdr && echo "{0}" >> {1}{2}.hdr'.format( + hdr[:-1], hdr_files_dir, grouptype + ) + ) + hdrhistogram, _ = worker_vm.RemoteCommand( + 'cd {0} && ./HistogramLogProcessor -i {1}{2}.hdr' + ' -outputValueUnitRatio 1'.format( + hdr_install_dir, hdr_files_dir, grouptype + ) + ) + hdrhistograms[grouptype.lower()] = hdrhistogram + return hdrhistograms + + +def CreateSamples( + ycsb_result: YcsbResult, + ycsb_version: str, + include_histogram: bool = False, + include_command_line=True, + **kwargs, +) -> list[sample.Sample]: + """Create PKB samples from a YCSB result. + + Args: + ycsb_result: Result of ParseResults. + ycsb_version: The version of YCSB used to run the tests. + include_histogram: If True, include records for each histogram bin. Note + that this will increase the output volume significantly. + include_command_line: If True, include command line in metadata. Note that + this makes sample output much longer if there are multiple client VMs. + **kwargs: Base metadata for each sample. + + Yields: + List of sample.Sample objects. + """ + command_line = ycsb_result.command_line + stage = 'load' if command_line.endswith('-load') else 'run' + base_metadata = { + 'stage': stage, + 'ycsb_tar_url': _ycsb_tar_url, + 'ycsb_version': ycsb_version, + } + if include_command_line: + base_metadata['command_line'] = command_line + base_metadata.update(kwargs) + + throughput_time_series = ycsb_result.throughput_time_series + if throughput_time_series: + yield sample.Sample( + 'Throughput Time Series', + 0, + '', + {'throughput_time_series': sorted(throughput_time_series.items())}, + ) + + for group_name, group in ycsb_result.groups.items(): + meta = base_metadata.copy() + meta['operation'] = group_name + for statistic, value in group.statistics.items(): + if value is None: + continue + + unit = '' + m = re.match(r'^(.*) *\((us|ms|ops/sec)\)$', statistic) + if m: + statistic = m.group(1) + unit = m.group(2) + yield sample.Sample(' '.join([group_name, statistic]), value, unit, meta) + + if group.data and group.data_type == HISTOGRAM: + percentiles = _PercentilesFromHistogram(group.data) + for label, value in percentiles.items(): + yield sample.Sample( + ' '.join([group_name, label, 'latency']), value, 'ms', meta + ) + if include_histogram: + for time_ms, count in group.data: + yield sample.Sample( + '{0}_latency_histogram_{1}_ms'.format(group_name, time_ms), + count, + 'count', + meta, + ) + + if group.data and group.data_type == HDRHISTOGRAM: + # Strip percentile from the three-element tuples. + histogram = [value_count[-2:] for value_count in group.data] + percentiles = _PercentilesFromHistogram(histogram) + for label, value in percentiles.items(): + yield sample.Sample( + ' '.join([group_name, label, 'latency']), value, 'ms', meta + ) + if include_histogram: + histogram = [] + for _, value, bucket_count in group.data: + histogram.append( + {'microsec_latency': int(value * 1000), 'count': bucket_count} + ) + hist_meta = meta.copy() + hist_meta.update({'histogram': json.dumps(histogram)}) + yield sample.Sample( + '{0} latency histogram'.format(group_name), 0, '', hist_meta + ) + + if group.data and group.data_type == TIMESERIES: + for sample_time, average_latency in group.data: + timeseries_meta = meta.copy() + timeseries_meta['sample_time'] = sample_time + yield sample.Sample( + ' '.join([group_name, 'AverageLatency (timeseries)']), + average_latency, + 'ms', + timeseries_meta, + ) + yield sample.Sample( + 'Average Latency Time Series', + 0, + '', + {'latency_time_series': group.data}, + ) diff --git a/perfkitbenchmarker/linux_virtual_machine.py b/perfkitbenchmarker/linux_virtual_machine.py index 3274d50f8c..ed73ccb16b 100644 --- a/perfkitbenchmarker/linux_virtual_machine.py +++ b/perfkitbenchmarker/linux_virtual_machine.py @@ -60,6 +60,7 @@ OS_PRETTY_NAME_REGEXP = r'PRETTY_NAME="(.*)"' _EPEL_URL = 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-{}.noarch.rpm' +_ORACLE_EPEL_URL = 'oracle-epel-release-el{}' CLEAR_BUILD_REGEXP = r'Installed version:\s*(.*)\s*' UPDATE_RETRIES = 5 DEFAULT_SSH_PORT = 22 @@ -206,7 +207,7 @@ class CpuVulnerabilities: - """The 3 different vulnerablity statuses from vm.cpu_vulernabilities. + """The 3 different vulnerability statuses from vm.cpu_vulernabilities. Example input: /sys/devices/system/cpu/vulnerabilities/itlb_multihit:KVM: Vulnerable @@ -1861,7 +1862,7 @@ class BaseRhelMixin(BaseLinuxMixin): """Class holding RHEL/CentOS specific VM methods and attributes.""" # In all RHEL 8+ based distros yum is an alias to dnf. - # dnf is backwards compatibile with yum, but has some additional capabilities + # dnf is backwards compatible with yum, but has some additional capabilities # For CentOS and RHEL 7 we override this to yum and do not pass dnf-only flags # The commands are similar enough that forking whole methods seemed necessary. # This can be removed when CentOS and RHEL 7 are no longer supported by PKB. @@ -2057,6 +2058,23 @@ def SetupPackageManager(self): # https://docs.fedoraproject.org/en-US/epel/#_rhel_9 self.RemoteCommand(f'sudo dnf install -y {_EPEL_URL.format(9)}') +class Oracle8Mixin(BaseRhelMixin): + """Class holding Oracle Linux 8 specific VM methods and attributes.""" + OS_TYPE = os_types.ORACLE8 + PYTHON_2_PACKAGE = None + + def SetupPackageManager(self): + """Install EPEL.""" + self.RemoteCommand(f'sudo dnf install -y {_ORACLE_EPEL_URL.format(8)}') + +class Oracle9Mixin(BaseRhelMixin): + """Class holding Oracle Linux 9 specific VM methods and attributes.""" + OS_TYPE = os_types.ORACLE9 + PYTHON_2_PACKAGE = None + + def SetupPackageManager(self): + """Install EPEL.""" + self.RemoteCommand(f'sudo dnf install -y {_ORACLE_EPEL_URL.format(9)}') class Fedora36Mixin(BaseRhelMixin): """Class holding Fedora36 specific methods and attributes.""" @@ -2387,11 +2405,13 @@ class Ubuntu1604Mixin(BaseUbuntuMixin, virtual_machine.DeprecatedOsMixin): ALTERNATIVE_OS = os_types.UBUNTU1804 -class Ubuntu1804Mixin(BaseUbuntuMixin): +class Ubuntu1804Mixin(BaseUbuntuMixin, virtual_machine.DeprecatedOsMixin): """Class holding Ubuntu1804 specific VM methods and attributes.""" OS_TYPE = os_types.UBUNTU1804 # https://packages.ubuntu.com/bionic/python PYTHON_2_PACKAGE = 'python' + END_OF_LIFE = '2023-05-31' + ALTERNATIVE_OS = os_types.UBUNTU2004 def UpdateEnvironmentPath(self): """Add /snap/bin to default search path for Ubuntu1804. @@ -2409,15 +2429,22 @@ class Ubuntu1804EfaMixin(Ubuntu1804Mixin): OS_TYPE = os_types.UBUNTU1804_EFA -# Inherit Ubuntu 18's idiosyncracies. -# Note https://bugs.launchpad.net/snappy/+bug/1659719 is also marked not fix in -# focal. -class Ubuntu2004Mixin(Ubuntu1804Mixin): +class Ubuntu2004Mixin(BaseUbuntuMixin): """Class holding Ubuntu2004 specific VM methods and attributes.""" OS_TYPE = os_types.UBUNTU2004 # https://packages.ubuntu.com/focal/python2 PYTHON_2_PACKAGE = 'python2' + def UpdateEnvironmentPath(self): + """Add /snap/bin to default search path for Ubuntu2004. + + See https://bugs.launchpad.net/snappy/+bug/1659719. + """ + self.RemoteCommand( + r'sudo sed -i "1 i\export PATH=$PATH:/snap/bin" ~/.bashrc') + self.RemoteCommand( + r'sudo sed -i "1 i\export PATH=$PATH:/snap/bin" /etc/bash.bashrc') + class Ubuntu2004EfaMixin(Ubuntu2004Mixin): """Class holding EFA specific VM methods and attributes.""" diff --git a/perfkitbenchmarker/managed_memory_store.py b/perfkitbenchmarker/managed_memory_store.py index 0798c2c913..44dc0f7420 100644 --- a/perfkitbenchmarker/managed_memory_store.py +++ b/perfkitbenchmarker/managed_memory_store.py @@ -14,15 +14,20 @@ """Module containing class for cloud managed memory stores.""" import abc -import logging +import dataclasses +import re from typing import Optional from absl import flags +from absl import logging from perfkitbenchmarker import resource +from perfkitbenchmarker import virtual_machine # List of memory store types REDIS = 'REDIS' MEMCACHED = 'MEMCACHED' +_REDIS_SHARDS_REGEX = r'(?s)slots\n(\d+)\n(\d+).+?port\n(\d+)\nip\n(\S+)' + FLAGS = flags.FLAGS @@ -71,6 +76,19 @@ class Failover(object): False, 'If True, provisions a cluster instead of a standalone instance.', ) +_NODE_COUNT = flags.DEFINE_integer( + 'managed_memory_store_node_count', + 1, + ( + 'Number of cache nodes (shards) to use. Only used if ' + 'managed_memory_store_cluster is True.' + ), +) +_ZONES = flags.DEFINE_list( + 'cloud_redis_zones', + [], + 'If using cluster mode, the zones to distribute shards between.', +) flags.DEFINE_string( 'cloud_redis_region', 'us-central1', @@ -79,6 +97,9 @@ class Failover(object): 'Defaults to the GCP region of us-central1.' ), ) +_TLS = flags.DEFINE_bool( + 'cloud_redis_tls', False, 'Whether to enable TLS on the instance.' +) MEMCACHED_NODE_COUNT = 1 @@ -124,6 +145,23 @@ def ParseReadableVersion(version: str) -> str: return '.'.join(version.split('.', 2)[:2]) +@dataclasses.dataclass +class RedisShard: + """An object representing a Redis shard. + + Attributes: + slots: formatted like 2731-5461 + ip: address of the redis shard + port: port of the redis shard + zone: location where the shard is located + """ + + slots: str + ip: str + port: int + zone: Optional[str] = None + + class BaseManagedMemoryStore(resource.BaseResource): """Object representing a cloud managed memory store.""" @@ -143,8 +181,13 @@ def __init__(self, spec): self._port: int = None self._password: str = None self._clustered: bool = _MANAGED_MEMORY_STORE_CLUSTER.value + self.node_count = _NODE_COUNT.value if self._clustered else 1 + self.zones = _ZONES.value if self._clustered else [] + self.enable_tls = _TLS.value self.metadata['clustered'] = self._clustered + self.metadata['node_count'] = self.node_count + self.metadata['enable_tls'] = self.enable_tls def GetMemoryStoreIp(self) -> str: """Returns the Ip address of the managed memory store.""" @@ -158,6 +201,30 @@ def GetMemoryStorePort(self) -> int: self._PopulateEndpoint() return self._port + def GetShardEndpoints( + self, client: virtual_machine.BaseVirtualMachine + ) -> list[RedisShard]: + """Returns shard endpoints for the cluster. + + The format of the `cluster shards` command can be found here: + https://redis.io/commands/cluster-shards/. + + Args: + client: VM that has access to the redis cluster. + + Returns: + A list of redis shards. + """ + result, _ = client.RemoteCommand( + f'redis-cli -h {self.GetMemoryStoreIp()} -p' + f' {self.GetMemoryStorePort()} cluster shards' + ) + shards = re.findall(_REDIS_SHARDS_REGEX, result) + return [ + RedisShard(slots=f'{slot_begin}-{slot_end}', ip=ip, port=int(port)) + for slot_begin, slot_end, port, ip in shards + ] + @abc.abstractmethod def _PopulateEndpoint(self) -> None: """Populates the endpoint information for the managed memory store.""" diff --git a/perfkitbenchmarker/network.py b/perfkitbenchmarker/network.py index 331828b3df..3a1a380124 100644 --- a/perfkitbenchmarker/network.py +++ b/perfkitbenchmarker/network.py @@ -105,6 +105,7 @@ def __init__(self, zone=None, cidr=None, machine_type=None): self.zone = zone self.cidr = cidr self.machine_type = machine_type + self.subnet_name = None def __repr__(self): return '%s(%r)' % (self.__class__, self.__dict__) diff --git a/perfkitbenchmarker/os_types.py b/perfkitbenchmarker/os_types.py index 6da6bfaa9f..568a55fafe 100644 --- a/perfkitbenchmarker/os_types.py +++ b/perfkitbenchmarker/os_types.py @@ -38,6 +38,8 @@ FEDORA36 = 'fedora36' FEDORA37 = 'fedora37' JUJU = 'juju' +ORACLE8 = 'oracle8' +ORACLE9 = 'oracle9' RHEL7 = 'rhel7' RHEL8 = 'rhel8' RHEL9 = 'rhel9' @@ -48,7 +50,7 @@ UBUNTU_CONTAINER = 'ubuntu_container' UBUNTU1604 = 'ubuntu1604' # deprecated UBUNTU1604_CUDA9 = 'ubuntu1604_cuda9' -UBUNTU1804 = 'ubuntu1804' +UBUNTU1804 = 'ubuntu1804' # deprecated UBUNTU1804_EFA = 'ubuntu1804_efa' UBUNTU2004 = 'ubuntu2004' UBUNTU2004_EFA = 'ubuntu2004_efa' @@ -104,6 +106,8 @@ FEDORA36, FEDORA37, JUJU, + ORACLE8, + ORACLE9, RHEL7, RHEL8, RHEL9, @@ -114,7 +118,7 @@ UBUNTU_CONTAINER, UBUNTU1604, # deprecated UBUNTU1604_CUDA9, - UBUNTU1804, + UBUNTU1804, # deprecated UBUNTU1804_EFA, UBUNTU2004, UBUNTU2004_EFA, @@ -153,6 +157,6 @@ BASE_OS_TYPES = [CLEAR, CORE_OS, DEBIAN, RHEL, WINDOWS] # May change from time to time. -DEFAULT = UBUNTU1804 +DEFAULT = UBUNTU2004 flags.DEFINE_enum('os_type', DEFAULT, ALL, 'The VM\'s OS type.') diff --git a/perfkitbenchmarker/pkb.py b/perfkitbenchmarker/pkb.py index fb70b8d863..4f29413ab5 100644 --- a/perfkitbenchmarker/pkb.py +++ b/perfkitbenchmarker/pkb.py @@ -1481,6 +1481,13 @@ def Main(): if FLAGS.helpmatchmd: _PrintHelpMD(FLAGS.helpmatchmd) return 0 + + if not FLAGS.accept_licenses: + logging.warning( + 'Please run with the --accept_licenses flag to ' + 'acknowledge PKB may install software on your behalf.' + ) + CheckVersionFlag() SetUpPKB() return RunBenchmarks() diff --git a/perfkitbenchmarker/provider_info.py b/perfkitbenchmarker/provider_info.py index ab727874d4..cc0635366f 100644 --- a/perfkitbenchmarker/provider_info.py +++ b/perfkitbenchmarker/provider_info.py @@ -36,9 +36,10 @@ # Though Docker is not a cloud provider, it's inclusion is useful # for performing on premise to cloud benchmarks DOCKER = 'Docker' +OCI = 'OCI' VALID_CLOUDS = (GCP, AZURE, AWS, IBMCLOUD, DIGITALOCEAN, KUBERNETES, OPENSTACK, - RACKSPACE, CLOUDSTACK, ALICLOUD, MESOS, PROFITBRICKS, DOCKER) + RACKSPACE, CLOUDSTACK, ALICLOUD, MESOS, PROFITBRICKS, DOCKER, OCI) _PROVIDER_INFO_REGISTRY = {} diff --git a/perfkitbenchmarker/providers/aws/aws_elasticache_redis.py b/perfkitbenchmarker/providers/aws/aws_elasticache_redis.py index f229552499..96f16bfd59 100644 --- a/perfkitbenchmarker/providers/aws/aws_elasticache_redis.py +++ b/perfkitbenchmarker/providers/aws/aws_elasticache_redis.py @@ -22,6 +22,7 @@ from perfkitbenchmarker import errors from perfkitbenchmarker import managed_memory_store from perfkitbenchmarker import provider_info +from perfkitbenchmarker import virtual_machine from perfkitbenchmarker import vm_util from perfkitbenchmarker.providers.aws import aws_network from perfkitbenchmarker.providers.aws import util @@ -47,14 +48,13 @@ def __init__(self, spec): self.subnet_group_name = 'subnet-%s' % self.name self.version = REDIS_VERSION_MAPPING[spec.config.cloud_redis.redis_version] self.node_type = FLAGS.elasticache_node_type - self.node_count = ( - FLAGS.elasticache_node_count if self._clustered else None - ) self.redis_region = FLAGS.cloud_redis_region self.failover_zone = FLAGS.elasticache_failover_zone self.failover_subnet = None self.failover_style = FLAGS.redis_failover_style + self.subnets = [] + @staticmethod def CheckPrerequisites(benchmark_config): if ( @@ -131,6 +131,16 @@ def _CreateDependencies(self): self.failover_subnet.Create() cmd += [self.failover_subnet.id] + # Subnets determine where shards can be placed. + regional_network = self.spec.vms[0].network.regional_network + vpc_id = regional_network.vpc.id + for zone in self.zones: + cidr = regional_network.vpc.NextSubnetCidrBlock() + subnet = aws_network.AwsSubnet(zone, vpc_id, cidr_block=cidr) + subnet.Create() + cmd += [subnet.id] + self.subnets.append(subnet) + vm_util.IssueCommand(cmd) def _DeleteDependencies(self): @@ -147,6 +157,9 @@ def _DeleteDependencies(self): if self.failover_subnet: self.failover_subnet.Delete() + for subnet in self.subnets: + subnet.Delete() + def _Create(self): """Creates the cluster.""" cmd = [ @@ -186,6 +199,13 @@ def _Create(self): else: cmd += ['--num-node-groups', str(self.node_count)] + if self.enable_tls: + cmd += [ + '--transit-encryption-enabled', + '--transit-encryption-mode', + 'required', + ] + cmd += ['--tags'] cmd += util.MakeFormattedDefaultTags() _, stderr, _ = vm_util.IssueCommand(cmd, raise_on_failure=False) @@ -267,3 +287,22 @@ def _PopulateEndpoint(self): primary_endpoint = cluster_info['NodeGroups'][0]['PrimaryEndpoint'] self._ip = primary_endpoint['Address'] self._port = primary_endpoint['Port'] + + def GetShardEndpoints( + self, client: virtual_machine.BaseVirtualMachine + ) -> list[managed_memory_store.RedisShard]: + """See base class.""" + shards = super().GetShardEndpoints(client) + shards_by_slots = {shard.slots: shard for shard in shards} + + cluster_info = self.DescribeInstance() + # See data/elasticache_describe_cluster.txt for an example + node_groups = cluster_info['NodeGroups'] + zones_by_slots = { + node['Slots']: node['NodeGroupMembers'][0]['PreferredAvailabilityZone'] + for node in node_groups + } + for slot in zones_by_slots: + shards_by_slots[slot].zone = zones_by_slots[slot] + + return list(shards_by_slots.values()) diff --git a/perfkitbenchmarker/providers/aws/aws_elasticached_memcached.py b/perfkitbenchmarker/providers/aws/aws_elasticached_memcached.py index d10634246e..ac54c2e6e0 100644 --- a/perfkitbenchmarker/providers/aws/aws_elasticached_memcached.py +++ b/perfkitbenchmarker/providers/aws/aws_elasticached_memcached.py @@ -22,6 +22,7 @@ from perfkitbenchmarker import managed_memory_store from perfkitbenchmarker import provider_info from perfkitbenchmarker import vm_util +from perfkitbenchmarker.providers.aws import flags as aws_flags from perfkitbenchmarker.providers.aws import util @@ -40,7 +41,7 @@ def __init__(self, spec): self.subnet_group_name = 'subnet-%s' % self.name self.zone = self.spec.vms[0].zone self.region = util.GetRegionFromZone(self.zone) - self.node_type = FLAGS.cache_node_type + self.node_type = aws_flags.ELASTICACHE_NODE_TYPE.value self.version = FLAGS.managed_memory_store_version @staticmethod diff --git a/perfkitbenchmarker/providers/aws/aws_relational_db.py b/perfkitbenchmarker/providers/aws/aws_relational_db.py index b289f8e648..a7cd17149e 100644 --- a/perfkitbenchmarker/providers/aws/aws_relational_db.py +++ b/perfkitbenchmarker/providers/aws/aws_relational_db.py @@ -320,6 +320,10 @@ def _IsInstanceReady(self, instance_id, timeout=IS_READY_TIMEOUT): if waiting_param: logging.info('Applying parameter') + if state == 'insufficient-capacity': + raise errors.Benchmarks.InsufficientCapacityCloudFailure( + 'Insufficient capacity to provision this db.' + ) if state == 'available' and not pending_values and not waiting_param: break diff --git a/perfkitbenchmarker/providers/aws/flags.py b/perfkitbenchmarker/providers/aws/flags.py index 414d8aefc3..96e73ae301 100644 --- a/perfkitbenchmarker/providers/aws/flags.py +++ b/perfkitbenchmarker/providers/aws/flags.py @@ -56,19 +56,11 @@ 'IMAGE_NAME_REGEX.') flags.DEFINE_string('aws_preprovisioned_data_bucket', None, 'AWS bucket where pre-provisioned data has been copied.') -flags.DEFINE_string( +ELASTICACHE_NODE_TYPE = flags.DEFINE_string( 'elasticache_node_type', 'cache.m4.large', 'The AWS cache node type to use for elasticache clusters.', ) -flags.DEFINE_integer( - 'elasticache_node_count', - 1, - ( - 'Number of cache nodes (shards) to use. Only used if ' - 'managed_memory_store_cluster is True.' - ), -) flags.DEFINE_string( 'elasticache_failover_zone', None, 'AWS elasticache failover zone' ) diff --git a/perfkitbenchmarker/providers/azure/azure_flexible_server.py b/perfkitbenchmarker/providers/azure/azure_flexible_server.py index 759d09061b..768c04b824 100644 --- a/perfkitbenchmarker/providers/azure/azure_flexible_server.py +++ b/perfkitbenchmarker/providers/azure/azure_flexible_server.py @@ -31,7 +31,7 @@ FLAGS = flags.FLAGS DISABLE_HA = 'Disabled' -ENABLE_HA = 'Enabled' +ENABLE_HA = 'SameZone' DEFAULT_MYSQL_VERSION = '8.0' DEFAULT_POSTGRES_VERSION = '13' diff --git a/perfkitbenchmarker/providers/gcp/flags.py b/perfkitbenchmarker/providers/gcp/flags.py index fa99cc2dc0..61d236ca38 100644 --- a/perfkitbenchmarker/providers/gcp/flags.py +++ b/perfkitbenchmarker/providers/gcp/flags.py @@ -133,7 +133,11 @@ flags.DEFINE_boolean('gke_enable_alpha', False, 'Whether to enable alpha kubernetes clusters.') flags.DEFINE_boolean('gke_enable_gvnic', True, - 'Whether to use google vitrual interface on GKE nodes.') + 'Whether to use google virtual network interface on GKE ' + 'nodes.') +GKE_NCCL_FAST_SOCKET = flags.DEFINE_boolean( + 'gke_enable_nccl_fast_socket', False, + 'Whether to enable NCCL fast socket on GKE.') flags.DEFINE_string('gcp_dataproc_subnet', None, 'Specifies the subnet that the cluster will be part of.') flags.DEFINE_multi_string('gcp_dataproc_property', [], diff --git a/perfkitbenchmarker/providers/gcp/gce_disk.py b/perfkitbenchmarker/providers/gcp/gce_disk.py index 6336e78280..15484e3b22 100644 --- a/perfkitbenchmarker/providers/gcp/gce_disk.py +++ b/perfkitbenchmarker/providers/gcp/gce_disk.py @@ -26,6 +26,7 @@ from perfkitbenchmarker import resource from perfkitbenchmarker import vm_util from perfkitbenchmarker.configs import option_decoders +from perfkitbenchmarker.providers.gcp import flags as gcp_flags from perfkitbenchmarker.providers.gcp import util FLAGS = flags.FLAGS @@ -92,7 +93,7 @@ def PdDriveIsNvme(vm): # such as confidential VMs on Milan. # this is not robust, but can get refactored when # there is more clarity on what groups of VMs are NVMe. - if family in ['n2d', 'c2d'] and 'confidential' in vm.OS_TYPE: + if gcp_flags.GCE_CONFIDENTIAL_COMPUTE.value: return True return False @@ -296,10 +297,7 @@ def Detach(self): def GetDevicePath(self): """Returns the path to the device inside the VM.""" - if self.disk_type == disk.LOCAL and self.interface == NVME: - return '/dev/%s' % self.name - else: - if self.disk_type in GCE_REMOTE_DISK_TYPES and self.interface == NVME: - return self.name - # by default, returns this name id. - return '/dev/disk/by-id/google-%s' % self.name + if self.disk_type in GCE_REMOTE_DISK_TYPES and self.interface == NVME: + return self.name + # by default, returns this name id. + return f'/dev/disk/by-id/google-{self.name}' diff --git a/perfkitbenchmarker/providers/gcp/gce_network.py b/perfkitbenchmarker/providers/gcp/gce_network.py index 8a5aef5488..524d54bf08 100644 --- a/perfkitbenchmarker/providers/gcp/gce_network.py +++ b/perfkitbenchmarker/providers/gcp/gce_network.py @@ -45,7 +45,8 @@ ALLOW_ALL = 'tcp:1-65535,udp:1-65535,icmp' _PLACEMENT_GROUP_PREFIXES = frozenset( - ['c2', 'c3', 'n2', 'n2d', 'c2d', 'a2']) + ['c2', 'c3', 'n2', 'n2d', 'c2d', 'c3d', 'a2', 'g2'] +) class GceVpnGateway(network.BaseVpnGateway): diff --git a/perfkitbenchmarker/providers/gcp/gce_virtual_machine.py b/perfkitbenchmarker/providers/gcp/gce_virtual_machine.py index 1bdae41993..742259e89d 100644 --- a/perfkitbenchmarker/providers/gcp/gce_virtual_machine.py +++ b/perfkitbenchmarker/providers/gcp/gce_virtual_machine.py @@ -82,7 +82,7 @@ # Gcloud operations are complete when their 'status' is 'DONE'. OPERATION_DONE = 'DONE' -# 2h timeout for LM notificaiton +# 2h timeout for LM notification LM_NOTIFICATION_TIMEOUT_SECONDS = 60 * 60 * 2 NVME = 'NVME' @@ -94,7 +94,8 @@ 'Instance failed to start due to preemption.' ) _GCE_VM_CREATE_TIMEOUT = 1200 -_GCE_NVIDIA_GPU_PREFIX = 'nvidia-tesla-' +_GCE_NVIDIA_GPU_PREFIX = 'nvidia-' +_GCE_NVIDIA_TESLA_GPU_PREFIX = 'nvidia-tesla-' _SHUTDOWN_SCRIPT = 'su "{user}" -c "echo | gsutil cp - {preempt_marker}"' METADATA_PREEMPT_URI = ( 'http://metadata.google.internal/computeMetadata/v1/instance/preempted' @@ -134,8 +135,8 @@ class GceVmSpec(virtual_machine.BaseVmSpec): preemptible: boolean. True if the VM should be preemptible, False otherwise. project: string or None. The project to create the VM in. image_family: string or None. The image family used to locate the image. - image_project: string or None. The image project used to locate the specifed - image. + image_project: string or None. The image project used to locate the + specified image. boot_disk_size: None or int. The size of the boot disk in GB. boot_disk_type: string or None. The type of the boot disk. """ @@ -176,24 +177,6 @@ def __init__(self, *args, **kwargs): self.cpus = None self.memory = None - # The A2 machine family, unlike other GCP offerings has a preset number of - # GPUs, so we set them directly from the machine_type - # https://cloud.google.com/blog/products/compute/announcing-google-cloud-a2-vm-family-based-on-nvidia-a100-gpu - if self.machine_type and self.machine_type.startswith('a2-'): - a2_lookup = { - 'a2-highgpu-1g': 1, - 'a2-highgpu-2g': 2, - 'a2-highgpu-4g': 4, - 'a2-highgpu-8g': 8, - 'a2-megagpu-16g': 16, - 'a2-ultragpu-1g': 1, - 'a2-ultragpu-2g': 2, - 'a2-ultragpu-4g': 4, - 'a2-ultragpu-8g': 8, - } - self.gpu_count = a2_lookup[self.machine_type] - self.gpu_type = virtual_machine.GPU_A100 - @classmethod def _ApplyFlags(cls, config_values, flag_values): """Modifies config options based on runtime flag values. @@ -419,9 +402,13 @@ def GenerateAcceleratorSpecString(accelerator_type, accelerator_count): String to be used by gcloud to attach accelerators to a VM. Must be prepended by the flag '--accelerator'. """ - gce_accelerator_type = ( - FLAGS.gce_accelerator_type_override - or _GCE_NVIDIA_GPU_PREFIX + accelerator_type + gce_accelerator_type = FLAGS.gce_accelerator_type_override or ( + ( + _GCE_NVIDIA_TESLA_GPU_PREFIX + if accelerator_type in virtual_machine.TESLA_GPU_TYPES + else _GCE_NVIDIA_GPU_PREFIX + ) + + accelerator_type ) return 'type={0},count={1}'.format(gce_accelerator_type, accelerator_count) @@ -501,6 +488,26 @@ def __init__(self, vm_spec): self.gce_tags = vm_spec.gce_tags self.gce_network_tier = FLAGS.gce_network_tier self.gce_nic_type = FLAGS.gce_nic_type + + # The A2 machine family, unlike other GCP offerings has a preset number of + # GPUs, so we set them directly from the machine_type + # https://cloud.google.com/blog/products/compute/announcing-google-cloud-a2-vm-family-based-on-nvidia-a100-gpu + # machine_type is always defined when running, but not in unit tests. + if self.machine_type and self.machine_type.startswith('a2-'): + a2_lookup = { + 'a2-highgpu-1g': 1, + 'a2-highgpu-2g': 2, + 'a2-highgpu-4g': 4, + 'a2-highgpu-8g': 8, + 'a2-megagpu-16g': 16, + 'a2-ultragpu-1g': 1, + 'a2-ultragpu-2g': 2, + 'a2-ultragpu-4g': 4, + 'a2-ultragpu-8g': 8, + } + self.gpu_count = a2_lookup[self.machine_type] + self.gpu_type = virtual_machine.GPU_A100 + if not self.SupportGVNIC(): logging.warning('Changing gce_nic_type to VIRTIO_NET') self.gce_nic_type = 'VIRTIO_NET' @@ -1068,11 +1075,7 @@ def CreateScratchDisk(self, disk_spec_id, disk_spec): name = 'local-ssd-%d' % self.local_disk_counter disk_number = self.local_disk_counter + 1 elif self.ssd_interface == NVME: - # Device can either be /dev/nvme0n1 or /dev/nvme1n1. Find out which. - name, _ = self.RemoteCommand( - 'find /dev/nvme*n%d' % (self.local_disk_counter + 1) - ) - name = name.strip().split('/')[-1] + name = f'local-nvme-ssd-{self.local_disk_counter}' disk_number = self.local_disk_counter + self.NVME_START_INDEX else: raise errors.Error('Unknown Local SSD Interface.') @@ -1342,7 +1345,7 @@ def CollectLMNotificationsTime(self): } lm_times = self._ReadLMNoticeContents() if not lm_times: - return events_dict + raise ValueError('Cannot collect lm times. Live Migration might failed.') # Result may contain errors captured, so we need to skip them for event_info in lm_times.splitlines(): diff --git a/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py b/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py index c5aeb87f09..541803c270 100644 --- a/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py +++ b/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py @@ -425,6 +425,7 @@ def __init__(self, dpb_service_spec): super().__init__(dpb_service_spec) self._job_counter = 0 self.batch_name = f'{self.cluster_id}-{self._job_counter}' + self.dpb_hdfs_type = 'HDD' def SubmitJob(self, jarfile=None, @@ -548,12 +549,23 @@ def GetJobProperties(self) -> Dict[str, str]: result['spark.dynamicAllocation.maxExecutors'] = ( self.spec.dataproc_serverless_max_executors) if self.spec.worker_group.disk_spec.disk_size: - result['spark.dataproc.driver.disk_size'] = ( + result['spark.dataproc.driver.disk.size'] = ( f'{self.spec.worker_group.disk_spec.disk_size}g' ) - result['spark.dataproc.executor.disk_size'] = ( + result['spark.dataproc.executor.disk.size'] = ( f'{self.spec.worker_group.disk_spec.disk_size}g' ) + if self.spec.dataproc_serverless_memory: + result['spark.driver.memory'] = f'{self.spec.dataproc_serverless_memory}m' + result['spark.executor.memory'] = ( + f'{self.spec.dataproc_serverless_memory}m') + if self.spec.dataproc_serverless_memory_overhead: + result['spark.driver.memoryOverhead'] = ( + f'{self.spec.dataproc_serverless_memory_overhead}m' + ) + result['spark.executor.memoryOverhead'] = ( + f'{self.spec.dataproc_serverless_memory_overhead}m' + ) result.update(super().GetJobProperties()) return result @@ -585,7 +597,11 @@ def GetMetadata(self): 'dpb_cluster_max_executors': max_executors, 'dpb_cluster_initial_executors': initial_executors, 'dpb_cores_per_node': self.spec.dataproc_serverless_core_count, - 'dpb_hdfs_type': 'default-disk', + 'dpb_memory_per_node': + self.spec.dataproc_serverless_memory or 'default', + 'dpb_memory_overhead_per_node': + self.spec.dataproc_serverless_memory_overhead or 'default', + 'dpb_hdfs_type': basic_data['dpb_hdfs_type'], 'dpb_disk_size': basic_data['dpb_disk_size'], 'dpb_service_zone': basic_data['dpb_service_zone'], 'dpb_job_properties': basic_data['dpb_job_properties'], diff --git a/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc_serverless_prices.py b/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc_serverless_prices.py index f20170ec44..6de0396c25 100644 --- a/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc_serverless_prices.py +++ b/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc_serverless_prices.py @@ -8,138 +8,138 @@ DATAPROC_SERVERLESS_PRICES = { 'us-west1': { 'usd_per_milli_dcu_sec': 0.06 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.04 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.04 / 720 / 3600, }, 'us-west2': { 'usd_per_milli_dcu_sec': 0.072071 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.048 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.048 / 720 / 3600, }, 'us-west3': { 'usd_per_milli_dcu_sec': 0.072071 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.048 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.048 / 720 / 3600, }, 'us-west4': { 'usd_per_milli_dcu_sec': 0.067572 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.044 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.044 / 720 / 3600, }, 'us-east1': { 'usd_per_milli_dcu_sec': 0.06 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.04 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.04 / 720 / 3600, }, 'us-east4': { 'usd_per_milli_dcu_sec': 0.067572 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.044 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.044 / 720 / 3600, }, 'us-east5': { 'usd_per_milli_dcu_sec': 0.06 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.04 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.04 / 720 / 3600, }, 'us-central1': { 'usd_per_milli_dcu_sec': 0.06 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.04 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.04 / 720 / 3600, }, 'us-south1': { 'usd_per_milli_dcu_sec': 0.0708 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.0472 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.0472 / 720 / 3600, }, 'europe-north1': { 'usd_per_milli_dcu_sec': 0.066062 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.044 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.044 / 720 / 3600, }, 'europe-west1': { 'usd_per_milli_dcu_sec': 0.066007 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.04 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.04 / 720 / 3600, }, 'europe-west2': { 'usd_per_milli_dcu_sec': 0.077307 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.048 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.048 / 720 / 3600, }, 'europe-west3': { 'usd_per_milli_dcu_sec': 0.077307 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.048 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.048 / 720 / 3600, }, 'europe-west4': { 'usd_per_milli_dcu_sec': 0.066057 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.044 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.044 / 720 / 3600, }, 'europe-west6': { 'usd_per_milli_dcu_sec': 0.083955 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.052 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.052 / 720 / 3600, }, 'europe-west8': { 'usd_per_milli_dcu_sec': 0.0696 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.0464 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.0464 / 720 / 3600, }, 'europe-west9': { 'usd_per_milli_dcu_sec': 0.0696 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.0464 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.0464 / 720 / 3600, }, 'europe-central2': { 'usd_per_milli_dcu_sec': 0.077307 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.048 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.048 / 720 / 3600, }, 'europe-southwest1': { 'usd_per_milli_dcu_sec': 0.0708 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.047 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.047 / 720 / 3600, }, 'asia-northeast1': { 'usd_per_milli_dcu_sec': 0.076976 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.052 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.052 / 720 / 3600, }, 'asia-northeast2': { 'usd_per_milli_dcu_sec': 0.076976 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.052 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.052 / 720 / 3600, }, 'asia-northeast3': { 'usd_per_milli_dcu_sec': 0.076976 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.052 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.052 / 720 / 3600, }, 'asia-east1': { 'usd_per_milli_dcu_sec': 0.069477 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.04 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.04 / 720 / 3600, }, 'asia-east2': { 'usd_per_milli_dcu_sec': 0.083955 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.05 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.05 / 720 / 3600, }, 'asia-south1': { 'usd_per_milli_dcu_sec': 0.072071 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.048 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.048 / 720 / 3600, }, 'asia-south2': { 'usd_per_milli_dcu_sec': 0.072071 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.048 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.048 / 720 / 3600, }, 'asia-southeast1': { 'usd_per_milli_dcu_sec': 0.074015 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.044 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.044 / 720 / 3600, }, 'asia-southeast2': { 'usd_per_milli_dcu_sec': 0.080674 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.052 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.052 / 720 / 3600, }, 'australia-southeast1': { 'usd_per_milli_dcu_sec': 0.085135 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.054 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.054 / 720 / 3600, }, 'australia-southeast2': { 'usd_per_milli_dcu_sec': 0.085135 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.054 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.054 / 720 / 3600, }, 'northamerica-northeast1': { 'usd_per_milli_dcu_sec': 0.066057 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.044 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.044 / 720 / 3600, }, 'northamerica-northeast2': { 'usd_per_milli_dcu_sec': 0.066057 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.044 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.044 / 720 / 3600, }, 'southamerica-east1': { 'usd_per_milli_dcu_sec': 0.095246 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.06 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.06 / 720 / 3600, }, 'southamerica-west1': { 'usd_per_milli_dcu_sec': 0.08581 / 1000 / 3600, - 'usd_per_shuffle_storage_gb_sec': 0.057 / 744 / 3600, + 'usd_per_shuffle_storage_gb_sec': 0.057 / 720 / 3600, }, } diff --git a/perfkitbenchmarker/providers/gcp/gcp_spanner.py b/perfkitbenchmarker/providers/gcp/gcp_spanner.py index c45c75b623..0cc31362b3 100644 --- a/perfkitbenchmarker/providers/gcp/gcp_spanner.py +++ b/perfkitbenchmarker/providers/gcp/gcp_spanner.py @@ -30,6 +30,7 @@ from google.cloud import monitoring_v3 from google.cloud.monitoring_v3 import query import numpy as np +from perfkitbenchmarker import background_tasks from perfkitbenchmarker import errors from perfkitbenchmarker import relational_db from perfkitbenchmarker import relational_db_spec @@ -468,15 +469,23 @@ def GetDefaultPort(self) -> int: def _PostCreate(self): super()._PostCreate() - self.client_vm_query_tools.InstallPackages() - - @property - def client_vm_query_tools(self): - if not hasattr(self, '_client_vm_query_tools'): - connection_properties = sql_engine_utils.DbConnectionProperties( - self.spec.engine, self.spec.engine_version, self.endpoint, self.port, - self.spec.database_username, self.spec.database_password, - self.instance_id, self.database, self.project) - self._client_vm_query_tools = sql_engine_utils.GetQueryToolsByEngine( - self.client_vm, connection_properties) - return self._client_vm_query_tools + # TODO(user) move to superclass. + background_tasks.RunThreaded( + lambda client_query_tools: client_query_tools.InstallPackages(), + self.client_vms_query_tools, + ) + + def _GetDbConnectionProperties( + self, + ) -> sql_engine_utils.DbConnectionProperties: + return sql_engine_utils.DbConnectionProperties( + self.spec.engine, + self.spec.engine_version, + self.endpoint, + self.port, + self.spec.database_username, + self.spec.database_password, + self.instance_id, + self.database, + self.project, + ) diff --git a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py index e82c70645d..def6574e7b 100644 --- a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py +++ b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py @@ -125,6 +125,14 @@ def __init__(self, spec): os.environ['CLOUDSDK_API_ENDPOINT_OVERRIDES_CONTAINER'] = ( gcp_flags.GKE_API_OVERRIDE.value) + self.enable_nccl_fast_socket = False + if gcp_flags.GKE_NCCL_FAST_SOCKET.value: + if self.nodepools: + self.enable_nccl_fast_socket = True + else: + raise errors.Config.InvalidValue( + 'NCCL fast socket is only supported on secondary node pools.') + def GetResourceMetadata(self): """Returns a dict containing metadata about the cluster. @@ -143,6 +151,7 @@ def GetResourceMetadata(self): # TODO(pclay): support NVME when it leaves alpha # Also consider moving FLAGS.gce_ssd_interface into the vm_spec. result['gce_local_ssd_interface'] = gce_virtual_machine.SCSI + result['gke_nccl_fast_socket'] = self.enable_nccl_fast_socket return result def _GcloudCommand(self, *args, **kwargs): @@ -230,10 +239,10 @@ def _AddNodeParamsToCmd( cmd.flags['labels'] = util.MakeFormattedDefaultTags() if vm_config.gpu_count: - cmd.flags['accelerator'] = ( - gce_virtual_machine.GenerateAcceleratorSpecString( - vm_config.gpu_type, - vm_config.gpu_count)) + if 'a2-' not in vm_config.machine_type: + cmd.flags['accelerator'] = ( + gce_virtual_machine.GenerateAcceleratorSpecString( + vm_config.gpu_type, vm_config.gpu_count)) if vm_config.min_cpu_platform: cmd.flags['min-cpu-platform'] = vm_config.min_cpu_platform @@ -268,6 +277,11 @@ def _AddNodeParamsToCmd( cmd.args.append('--enable-gvnic') else: cmd.args.append('--no-enable-gvnic') + if ( + self.enable_nccl_fast_socket + and name != container_service.DEFAULT_NODEPOOL + ): + cmd.args.append('--enable-fast-socket') if FLAGS.gke_node_system_config is not None: cmd.flags['system-config-from-file'] = FLAGS.gke_node_system_config @@ -290,7 +304,10 @@ def _PostCreate(self): env['KUBECONFIG'] = FLAGS.kubeconfig cmd.IssueRetryable(env=env) - if self.vm_config.gpu_count: + should_install_nvidia_drivers = ( + self.vm_config.gpu_count or + any(pool.vm_config.gpu_count for pool in self.nodepools.values())) + if should_install_nvidia_drivers: kubernetes_helper.CreateFromFile(NVIDIA_DRIVER_SETUP_DAEMON_SET_SCRIPT) kubernetes_helper.CreateFromFile( data.ResourcePath(NVIDIA_UNRESTRICTED_PERMISSIONS_DAEMON_SET)) diff --git a/perfkitbenchmarker/providers/oci/__init__.py b/perfkitbenchmarker/providers/oci/__init__.py new file mode 100644 index 0000000000..8f33e6ef87 --- /dev/null +++ b/perfkitbenchmarker/providers/oci/__init__.py @@ -0,0 +1,3 @@ +"""Provider for Oracle Cloud.""" + +OCI_PATH = 'oci' diff --git a/perfkitbenchmarker/providers/oci/default_config_constants.yaml b/perfkitbenchmarker/providers/oci/default_config_constants.yaml new file mode 100644 index 0000000000..6e0237f7c2 --- /dev/null +++ b/perfkitbenchmarker/providers/oci/default_config_constants.yaml @@ -0,0 +1,209 @@ +# All anchors defined in this file should be compatible +# with *all* clouds. That means any vm_specs or disk_specs +# defined here should have keys for every cloud. +default_single_core: &default_single_core + GCP: + machine_type: n1-standard-1 + zone: us-central1-a + image: null + Azure: + machine_type: Standard_A1 + zone: eastus2 + image: null + AWS: + machine_type: t2.small + zone: us-east-1 + image: null + AliCloud: + machine_type: ecs.g5.large + zone: cn-beijing-g + image: null + DigitalOcean: + machine_type: 2gb + zone: sfo1 + image: null + OpenStack: + machine_type: m1.small + zone: nova + image: null + CloudStack: + machine_type: 1vCPU.1GB + zone: QC-1 + image: null + Rackspace: + machine_type: general1-1 + zone: IAD + image: null + Kubernetes: + image: null + Mesos: + image: null + ProfitBricks: + machine_type: Small + zone: ZONE_1 + image: null + Docker: + image: null + machine_type: + cpus: 1 + memory: 2.0GiB + IBMCloud: + machine_type: cx2-2x4 + zone: us-south-1 + image: null + OCI: + machine_type: VM.Standard.A1.Flex + zone: us-ashburn-1 + image: null + +# TODO: update the two core machines for more providers +default_dual_core: &default_dual_core + GCP: + machine_type: n1-standard-2 + zone: us-central1-a + image: null + Azure: + machine_type: Standard_D2_v3 + zone: eastus2 + image: null + AWS: + machine_type: m5.large + zone: us-east-1 + image: null + Docker: + image: null + machine_type: + cpus: 2 + memory: 4.0GiB + AliCloud: + machine_type: ecs.g5.xlarge + zone: cn-beijing-g + image: null + IBMCloud: + machine_type: cx2-4x8 + zone: us-south-1 + image: null + Kubernetes: + image: null + OCI: + machine_type: VM.Standard.A1.Flex + zone: us-ashburn-1 + image: null + +# TODO(user): update the disk types below as more providers are +# updated for the disk types refactor. +default_500_gb: &default_500_gb + GCP: + disk_type: pd-standard + disk_size: 500 + mount_point: /scratch + Azure: + disk_type: Standard_LRS + disk_size: 500 + mount_point: /scratch + AWS: + disk_type: standard + disk_size: 500 + mount_point: /scratch + AliCloud: + disk_type: standard + disk_size: 500 + mount_point: /scratch + DigitalOcean: + disk_type: standard + disk_size: 500 + mount_point: /scratch + OpenStack: + disk_type: standard + disk_size: 500 + mount_point: /scratch + CloudStack: + disk_size: 500 + mount_point: /scratch + Rackspace: + disk_type: standard + disk_size: 500 + mount_point: /scratch + Kubernetes: + disk_type: emptyDir + disk_size: 500 + mount_point: /scratch + Mesos: + disk_type: local + disk_size: 500 + mount_point: /scratch + ProfitBricks: + disk_type: standard + disk_size: 500 + mount_point: /scratch + Docker: + disk_type: local + disk_size: 500 + mount_point: /scratch + IBMCloud: + disk_type: standard + disk_size: 500 + mount_point: /scratch + OCI: + disk_type: paravirtualized + disk_size: 500 + mount_point: /scratch + +# TODO(user): update the disk types below as more providers are +# updated for the disk types refactor. +default_50_gb: &default_50_gb + GCP: + disk_type: pd-standard + disk_size: 50 + mount_point: /scratch + Azure: + disk_type: Standard_LRS + disk_size: 50 + mount_point: /scratch + AWS: + disk_type: standard + disk_size: 50 + mount_point: /scratch + AliCloud: + disk_type: standard + disk_size: 50 + mount_point: /scratch + DigitalOcean: + disk_type: standard + disk_size: 50 + mount_point: /scratch + OpenStack: + disk_type: standard + disk_size: 50 + mount_point: /scratch + CloudStack: + disk_size: 50 + mount_point: /scratch + Rackspace: + disk_type: standard + disk_size: 50 + mount_point: /scratch + Kubernetes: + disk_type: emptyDir + disk_size: 50 + mount_point: /scratch + Mesos: + disk_type: local + disk_size: 50 + mount_point: /scratch + ProfitBricks: + disk_type: standard + disk_size: 50 + mount_point: /scratch + Docker: + disk_type: local + disk_size: 50 + mount_point: /scratch + IBMCloud: + disk_type: standard + disk_size: 50 + mount_point: /scratch + OCI: + disk_type: paravirtualized + disk_size: 50 + mount_point: /scratch diff --git a/perfkitbenchmarker/providers/oci/flags.py b/perfkitbenchmarker/providers/oci/flags.py new file mode 100644 index 0000000000..b0858787d3 --- /dev/null +++ b/perfkitbenchmarker/providers/oci/flags.py @@ -0,0 +1,28 @@ +"""Module containing flags applicable across benchmark run on OCI.""" + +from absl import flags + +VALID_TIERS = ['VM.Standard', 'VM.Optimized'] + +VALID_SHAPES = ['.A1.Flex', '3.Flex', '.E4.Flex'] + +flags.DEFINE_string('oci_availability_domain', None, 'The availability domain') + +flags.DEFINE_string('oci_fault_domain', None, 'The fault domain') + +flags.DEFINE_string('oci_shape', 'VM.Standard.A1.Flex', 'Performance tier to use for the machine type. Defaults to ' + 'Standard.') + +flags.DEFINE_integer('oci_compute_units', 1, 'Number of compute units to allocate for the machine type') + +flags.DEFINE_integer('oci_compute_memory', None, 'Number of memory in gbs to allocate for the machine type') + +flags.DEFINE_integer('oci_boot_disk_size', 50, 'Size of Boot disk in GBs') + +flags.DEFINE_boolean('oci_use_vcn', True, 'Use in built networking') + +flags.DEFINE_integer('oci_num_local_ssds', 0, 'No. of disks') + +flags.DEFINE_string( + 'oci_network_name', None, 'The name of an already created ' + 'network to use instead of creating a new one.') diff --git a/perfkitbenchmarker/providers/oci/oci_disk.py b/perfkitbenchmarker/providers/oci/oci_disk.py new file mode 100644 index 0000000000..80f1dbdedf --- /dev/null +++ b/perfkitbenchmarker/providers/oci/oci_disk.py @@ -0,0 +1,173 @@ + +"""Module containing classes related to Oracle disks.""" + +import json +import logging +import threading +from absl import flags +from perfkitbenchmarker import disk +from perfkitbenchmarker import vm_util +from perfkitbenchmarker.providers.oci import util + +FLAGS = flags.FLAGS + +# https://docs.oracle.com/en-us/iaas/Content/Block/Concepts/blockvolumeperformance.htm + +# Acceptable values for vpus per GB is +# 0: Represents Lower Cost option. +# 10: Represents Balanced option. +# 20: Represents Higher Performance option. +DEFAULT_VPUS_PER_GB = 20 + +DISK_CREATE_STATUSES = frozenset( + ['AVAILABLE', 'FAULTY', 'PROVISIONING', 'RESTORING', 'TERMINATED', 'TERMINATING'] +) + +DISK_ATTACH_STATUS = frozenset( + ['ATTACHED', 'ATTACHING', 'DETACHED', 'DETACHING'] +) + + +class OciDisk(disk.BaseDisk): + _lock = threading.Lock() + vm_devices = {} + + def __init__(self, disk_spec, vm_name, availability_domain, disk_number): + super(OciDisk, self).__init__(disk_spec) + self.id = None + self.availability_domain = availability_domain + self.disk_size = disk_spec.disk_size or 100 + self.vpus_per_gb: int = DEFAULT_VPUS_PER_GB + self.status = None + self.name = f'{vm_name}-{disk_number}' + self.attachment_id = None + self.device_name = None + self.iqn: Optional[str] = None + self.port: Optional[str] = None + self.ipv4: Optional[str] = None + self.tags = util.MakeFormattedDefaultTags() + + + def _Create(self): + """Creates the disk.""" + create_cmd = util.OCI_PREFIX + [ + 'bv', + 'volume', + 'create', + f'--availability-domain {self.availability_domain}', + f'--size-in-gbs {self.disk_size}', + f'--display-name {self.name}', + f'--freeform-tags {self.tags}', + f'--vpus-per-gb {str(self.vpus_per_gb)}'] + create_cmd = util.GetEncodedCmd(create_cmd) + stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False) + response = json.loads(stdout) + self.id = response['data']['id'] + self._WaitForDiskStatus(['AVAILABLE']) + + def _Delete(self): + """Deletes the disk.""" + # oci bv volume delete + delete_cmd = util.OCI_PREFIX + [ + 'bv', + 'volume ', + 'delete', + f'--volume-id {self.id}', + '--force'] + delete_cmd = util.GetEncodedCmd(delete_cmd) + out, _ = vm_util.IssueRetryableCommand(delete_cmd) + self._WaitForDiskStatus(['TERMINATED']) + + @vm_util.Retry(poll_interval=60, log_errors=False) + def _WaitForDiskStatus(self, status_list): + """Waits until the disk's status is in status_list.""" + logging.info('Waiting until the instance status is: %s' % status_list) + status_cmd = util.OCI_PREFIX + [ + 'bv', + 'volume', + 'get', + f'--volume-id {self.id}'] + status_cmd = util.GetEncodedCmd(status_cmd) + out, _ = vm_util.IssueRetryableCommand(status_cmd) + state = json.loads(out) + check_state = state['data']['lifecycle-state'] + self.status = check_state + assert check_state in status_list + + def Attach(self, disk_spec, vm): + attach_cmd = util.OCI_PREFIX + [ + 'compute', + 'volume-attachment', + 'attach', + f'--volume-id {self.id}', + f'--instance-id {vm.ocid}', + f'--type {self.disk_type}', + f'--device {self.device_name}'] + logging.info('Attaching Oci disk %s.' % self.id) + attach_cmd = util.GetEncodedCmd(attach_cmd) + stdout, _ = vm_util.IssueRetryableCommand(attach_cmd) + response = json.loads(stdout) + self.attachment_id = response['data']['id'] + self._WaitForDiskAttachStatus(disk_spec, ['ATTACHED']) + + def Detach(self, disk_spec, vm): + if disk_spec.disk_type == 'iscsi': + self.ExecuteDetachIscsiCommands(vm) + detach_cmd = util.OCI_PREFIX + [ + 'compute', + 'volume-attachment', + 'detach', + f'--volume-attachment-id {self.attachment_id}', + '--force'] + logging.info('Detaching Oci disk %s.' % self.id) + detach_cmd = util.GetEncodedCmd(detach_cmd) + out, _ = vm_util.IssueRetryableCommand(detach_cmd) + self._WaitForDiskAttachStatus(disk_spec, ['DETACHED']) + + @vm_util.Retry(poll_interval=60, log_errors=False) + def _WaitForDiskAttachStatus(self, disk_spec, status_list): + """Waits until the disk's attach status is in status_list.""" + logging.info('Waiting until the instance status is : %s' % status_list) + status_cmd = util.OCI_PREFIX + [ + 'compute', + 'volume-attachment', + 'get', + f'--volume-attachment-id {self.attachment_id}'] + status_cmd = util.GetEncodedCmd(status_cmd) + out, _ = vm_util.IssueRetryableCommand(status_cmd) + state = json.loads(out) + check_state = state['data']['lifecycle-state'] + self.status = check_state + if self.status == 'ATTACHED' and disk_spec.disk_type == 'iscsi': + self.iqn = state['data']['iqn'] + self.ipv4 = state['data']['ipv4'] + self.port = state['data']['port'] + assert check_state in status_list + + def ExecuteAttachIscsiCommands(self, vm): + vm.RemoteCommand(f"sudo iscsiadm -m node -o new -T {self.iqn} -p {self.ipv4}:{self.port}") + vm.RemoteCommand(f"sudo iscsiadm -m node -o update -T {self.iqn} -n node.startup -v automatic") + vm.RemoteCommand(f"sudo iscsiadm -m node -T {self.iqn} -p {self.ipv4}:{self.port} -l") + + def ExecuteDetachIscsiCommands(self, vm): + vm.RemoteCommand(f"sudo iscsiadm -m node -T {self.iqn} -p {self.ipv4}:{self.port} -u") + vm.RemoteCommand(f"sudo iscsiadm -m node -o delete -T {self.iqn} -p {self.ipv4}:{self.port}") + + def GetDevicePath(self): + """Returns the path to the device inside the VM.""" + return self.device_name + + def GetFreeDeviceName(self, vm): + free_device_cmd = util.OCI_PREFIX + [ + 'compute', + 'device', + 'list-instance', + f'--instance-id {vm.ocid}'] + free_device_cmd = util.GetEncodedCmd(free_device_cmd) + out, _ = vm_util.IssueRetryableCommand(free_device_cmd) + stdout, _ = vm_util.IssueRetryableCommand(free_device_cmd) + response = json.loads(stdout) + for free_disk in range(0, 31): + if response['data'][free_disk]['is-available'] is True: + self.device_name = response['data'][free_disk]['name'] + break diff --git a/perfkitbenchmarker/providers/oci/oci_network.py b/perfkitbenchmarker/providers/oci/oci_network.py new file mode 100644 index 0000000000..b125cfe83f --- /dev/null +++ b/perfkitbenchmarker/providers/oci/oci_network.py @@ -0,0 +1,409 @@ +"""Module containing classes related to Oracle Network.""" + +import json +import logging +import uuid + +from absl import flags +from perfkitbenchmarker import network +from perfkitbenchmarker import provider_info +from perfkitbenchmarker import resource +from perfkitbenchmarker import vm_util +from perfkitbenchmarker.providers.oci import util + +FLAGS = flags.FLAGS + +MAX_NAME_LENGTH = 128 +WAIT_INTERVAL_SECONDS = 600 + +VCN_CREATE_STATUSES = frozenset( + ['AVAILABLE', 'PROVISIONING', 'TERMINATED', 'TERMINATING', 'UPDATING'] +) + +SUBNET_CREATE_STATUSES = frozenset( + ['AVAILABLE', 'PROVISIONING', 'TERMINATED', 'TERMINATING', 'UPDATING'] +) + +IG_CREATE_STATUSES = frozenset( + ['AVAILABLE', 'PROVISIONING', 'TERMINATED', 'TERMINATING'] +) + +ROUTE_TABLE_UPDATE_STATUSES = frozenset( + ['AVAILABLE', 'PROVISIONING', 'TERMINATED', 'TERMINATING'] +) + +SECURITY_LIST_UPDATE_STATUSES = frozenset( + ['AVAILABLE', 'PROVISIONING', 'TERMINATED', 'TERMINATING'] +) + + +class OciVcn(resource.BaseResource): + """An object representing an Oci VCN.""" + + def __init__(self, name, region): + super(OciVcn, self).__init__() + self.status = None + self.region = region + self.id = None + self.name = name + self.cidr_blocks = ["172.16.0.0/16"] + self.cidr_block = None + self.vcn_id = None + self.subnet_id = None + self.ig_id = None + self.rt_id = None + self.security_list_id = None + self.tags = util.MakeFormattedDefaultTags() + + @vm_util.Retry(poll_interval=60, log_errors=False) + def WaitForVcnStatus(self, status_list): + """Waits until the disk's status is in status_list.""" + logging.info('Waiting until the instance status is: %s' % status_list) + status_cmd = util.OCI_PREFIX + [ + 'network', + 'vcn', + 'get', + f'--vcn-id {self.vcn_id}'] + status_cmd = util.GetEncodedCmd(status_cmd) + out, _ = vm_util.IssueRetryableCommand(status_cmd) + state = json.loads(out) + check_state = state['data']['lifecycle-state'] + self.status = check_state + assert check_state in status_list + + def GetVcnIDFromName(self): + """Gets VCN OCIid from Name""" + get_cmd = util.OCI_PREFIX + [ + 'network', + 'vcn', + 'list', + f'--display-name {self.name}'] + get_cmd = util.GetEncodedCmd(get_cmd) + logging.info(get_cmd) + stdout, _, _ = vm_util.IssueCommand(get_cmd, raise_on_failure=False) + response = json.loads(stdout) + self.vcn_id = response['data'][0]['id'] + logging.info(self.vcn_id) + + def _Create(self): + """Creates the VPC.""" + logging.info("Creating custom CIDR Block") + create_cmd = util.OCI_PREFIX + [ + 'network', + 'vcn', + 'create', + f'--display-name pkb-{FLAGS.run_uri}', + f'--dns-label vcn{FLAGS.run_uri}', + f'--freeform-tags {self.tags}', + '--from-json \'{"cidr-blocks":["172.16.0.0/16"]}\''] + create_cmd = util.GetEncodedCmd(create_cmd) + logging.info(create_cmd) + stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False) + response = json.loads(stdout) + self.vcn_id = response['data']['id'] + self.cidr_block = response['data']['cidr-block'] + + def _Delete(self): + delete_cmd = util.OCI_PREFIX + [ + 'network', + 'vcn', + 'delete', + f'--vcn-id {self.vcn_id}', + '--force'] + delete_cmd = util.GetEncodedCmd(delete_cmd) + stdout, _, _ = vm_util.IssueCommand(delete_cmd, raise_on_failure=False) + + def GetSubnetIdFromVCNId(self): + """Gets Subnet OCIid from Name""" + get_cmd = util.OCI_PREFIX + [ + 'network', + 'subnet', + 'list', + f'--vcn-id {self.vcn_id}'] + get_cmd = util.GetEncodedCmd(get_cmd) + logging.info(get_cmd) + stdout, _, _ = vm_util.IssueCommand(get_cmd, raise_on_failure=False) + response = json.loads(stdout) + self.subnet_id = response['data'][0]['id'] + + @vm_util.Retry(poll_interval=60, log_errors=False) + def WaitForSubnetStatus(self, status_list): + """Waits until the disk's status is in status_list.""" + logging.info('Waiting until the instance status is: %s' % status_list) + status_cmd = util.OCI_PREFIX + [ + 'network', + 'subnet', + 'get', + f'--subnet-id {self.subnet_id}'] + status_cmd = util.GetEncodedCmd(status_cmd) + out, _ = vm_util.IssueRetryableCommand(status_cmd) + state = json.loads(out) + check_state = state['data']['lifecycle-state'] + self.status = check_state + assert check_state in status_list + + def CreateSubnet(self): + """Creates the VPC.""" + logging.info("Creating custom subnet Block") + create_cmd = util.OCI_PREFIX + [ + 'network', + 'subnet', + 'create', + f'--display-name pkb-{FLAGS.run_uri}', + f'--dns-label sub{FLAGS.run_uri}', + f'--cidr-block {self.cidr_block}', + f'--vcn-id {self.vcn_id}'] + create_cmd = util.GetEncodedCmd(create_cmd) + stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False) + response = json.loads(stdout) + self.subnet_id = response['data']['id'] + + def DeleteSubnet(self): + """Creates the VPC.""" + logging.info("Creating custom subnet Block") + create_cmd = util.OCI_PREFIX + [ + 'network', + 'subnet', + 'delete', + f'--subnet-id {self.subnet_id}', + '--force'] + create_cmd = util.GetEncodedCmd(create_cmd) + stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False) + + def WaitForInternetGatewayStatus(self, status_list): + """Waits until the disk's status is in status_list.""" + logging.info('Waiting until the instance status is: %s', status_list) + status_cmd = util.OCI_PREFIX + [ + 'network', + 'internet-gateway', + 'get', + f'--ig-id {self.ig_id}'] + status_cmd = util.GetEncodedCmd(status_cmd) + out, _ = vm_util.IssueRetryableCommand(status_cmd) + state = json.loads(out) + check_state = state['data']['lifecycle-state'] + self.status = check_state + assert check_state in status_list + + def CreateInternetGateway(self): + """Creates the Internet Gateway.""" + logging.info("Creating custom Internet Gateway") + create_cmd = util.OCI_PREFIX + [ + 'network', + 'internet-gateway', + 'create', + f'--display-name pkb-{FLAGS.run_uri}', + f'--vcn-id {self.vcn_id}', + '--is-enabled True'] + create_cmd = util.GetEncodedCmd(create_cmd) + stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False) + response = json.loads(stdout) + self.ig_id = response['data']['id'] + + def DeleteInternetGateway(self): + """Creates the VPC.""" + logging.info("Creating custom subnet Block") + create_cmd = util.OCI_PREFIX + [ + 'network', + 'internet-gateway', + 'delete', + f'--ig-id {self.ig_id}', + '--force'] + create_cmd = util.GetEncodedCmd(create_cmd) + stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False) + + def WaitForRouteTableStatus(self, status_list): + """Waits until the disk's status is in status_list.""" + logging.info('Waiting until the instance status is: %s', status_list) + status_cmd = util.OCI_PREFIX + [ + 'network', + 'route-table', + 'get', + f'--rt-id {self.rt_id}'] + status_cmd = util.GetEncodedCmd(status_cmd) + out, _ = vm_util.IssueRetryableCommand(status_cmd) + state = json.loads(out) + check_state = state['data']['lifecycle-state'] + self.status = check_state + assert check_state in status_list + + def WaitForSecurityListStatus(self, status_list): + """Waits until the disk's status is in status_list.""" + logging.info('Waiting until the instance status is: %s', status_list) + status_cmd = util.OCI_PREFIX + [ + 'network', + 'security-list', + 'get', + f'--security-list-id {self.security_list_id}'] + status_cmd = util.GetEncodedCmd(status_cmd) + out, _ = vm_util.IssueRetryableCommand(status_cmd) + state = json.loads(out) + check_state = state['data']['lifecycle-state'] + self.status = check_state + assert check_state in status_list + + def UpdateRouteTable(self): + """Updates the Route Table.""" + logging.info("Update Routing Table with Internet Gateway") + create_cmd = util.OCI_PREFIX + [ + 'network', + 'route-table', + 'update', + f'--rt-id {self.rt_id}', + '--force', + '--route-rules \'[{\"cidrBlock\":"0.0.0.0/0\",\"networkEntityId\":\"%s\"}]\'' % self.ig_id] + create_cmd = util.GetEncodedCmd(create_cmd) + stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False) + + def ClearRouteTable(self): + """Updates the Route Table.""" + logging.info("Update Routing Table with Internet Gateway") + create_cmd = util.OCI_PREFIX + [ + 'network', + 'route-table', + 'update', + f'--rt-id {self.rt_id}', + '--force', + '--route-rules \'[]\''] + create_cmd = util.GetEncodedCmd(create_cmd) + stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False) + + def UpdateSecurityList(self): + # UNUSED / DEPRECIATED + + """Updates the Route Table to allow all ports traffic on internal ip and 22 on Internet""" + logging.info("Update Routing Table with Internet Gateway") + create_cmd = util.OCI_PREFIX + [ + 'network', + 'security-list', + 'update', + f'--security-list-id {self.security_list_id}', + '--force', + '--ingress-security-rules \'[{\"source\": \"%s\", \"protocol\": \"all\", \"isStateless\": false},' + '{\"source\": "0.0.0.0/0", \"protocol\": \"6\", \"isStateless\": false, \"tcpOptions\": {' + '\"destinationPortRange\": {\"max\": 22, \"min\": 22}}}]\'' % self.cidr_block] + create_cmd = util.GetEncodedCmd(create_cmd) + stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False) + + def AddSecurityListIngressRule(self, start_port=22, end_port=None): + if not end_port: + end_port = start_port + + """Updates security list to allow traffic on a specific port""" + logging.info(f"Add ingress rule for ports {start_port} : {end_port}") + cmd = util.OCI_PREFIX + [ + 'network', + 'security-list', + 'update', + f'--security-list-id {self.security_list_id}', + '--force', + '--ingress-security-rules \'[{\"source\": \"%s\", \"protocol\": \"all\", \"isStateless\": false},' + '{\"source\": "0.0.0.0/0", \"protocol\": \"6\", \"isStateless\": false, \"tcpOptions\": {' + '\"destinationPortRange\": {\"max\": \"%i\", \"min\": \"%i\"}}}]\'' % (self.cidr_block, end_port, start_port)] + cmd = util.GetEncodedCmd(cmd) + stdout, _, _ = vm_util.IssueCommand(cmd, raise_on_failure=False) + + def GetDefaultRouteTableId(self): + """Get Default Route Table OCI Id.""" + status_cmd = util.OCI_PREFIX + [ + 'network', + 'vcn', + 'get', + f'--vcn-id {self.vcn_id}'] + status_cmd = util.GetEncodedCmd(status_cmd) + out, _, _ = vm_util.IssueCommand(status_cmd) + state = json.loads(out) + self.rt_id = state['data']['default-route-table-id'] + + def GetDefaultSecurityListId(self): + """Get Default Route Table OCI Id.""" + status_cmd = util.OCI_PREFIX + [ + 'network', + 'vcn', + 'get', + f'--vcn-id {self.vcn_id}'] + status_cmd = util.GetEncodedCmd(status_cmd) + out, _, _ = vm_util.IssueCommand(status_cmd) + state = json.loads(out) + self.security_list_id = state['data']['default-security-list-id'] + + +class OciNetwork(network.BaseNetwork): + """Object representing a AliCloud Network.""" + + CLOUD = provider_info.OCI + + def __init__(self, spec): + super(OciNetwork, self).__init__(spec) + self.name = FLAGS.oci_network_name or ('perfkit-%s-%s' % (FLAGS.run_uri, str(uuid.uuid4())[-12:])) + self.region = spec.zone + self.use_vcn = FLAGS.oci_use_vcn + self.network_id = None + self.vcn_id = None + + if self.use_vcn: + self.vcn = OciVcn(self.name, self.region) + self.security_group = None +# else: +# self.vcn = OciVcn(self.name, self.region) +# self.security_group = None + + @vm_util.Retry() + def Create(self): + """Creates the network.""" + if self.use_vcn: + self.vcn.Create() + self.vcn.WaitForVcnStatus(["AVAILABLE"])#AVAILABLE + self.vcn.GetDefaultRouteTableId() + self.vcn.GetDefaultSecurityListId() + self.vcn.CreateSubnet() + self.vcn.WaitForSubnetStatus(["AVAILABLE"]) + self.network_id = self.vcn.subnet_id + self.vcn.CreateInternetGateway() + self.vcn.WaitForInternetGatewayStatus(["AVAILABLE"]) + self.vcn.UpdateRouteTable() + self.vcn.WaitForRouteTableStatus(["AVAILABLE"]) + + # Add opening in VCN for SSH + self.vcn.AddSecurityListIngressRule(start_port=22) + self.vcn.WaitForSecurityListStatus(["AVAILABLE"]) + else: + self.vcn.GetVcnIDFromName() + self.vcn.GetSubnetIdFromVCNId() + self.network_id = self.vcn.subnet_id + + def Delete(self): + """Deletes the network.""" + if self.use_vcn: + self.vcn.ClearRouteTable() + self.vcn.DeleteInternetGateway() + self.vcn.DeleteSubnet() + self.vcn.Delete() + + +class OCIFirewall(network.BaseFirewall): + + def __init__(self): + super(OCIFirewall, self).__init__() + + + + def AllowPort(self, vm, start_port, end_port=None): + """ + Open a port range on a specific vm. This seems to normally be called by the vm object. + + :param vm: + :param start_port: + :param end_port: + :return: + """ + + if not vm.network.vcn: + # TODO: What happens when we do not have a vcn? Is that possible? + logging.error('Opening ports with OCI cloud only supported when using a VCN for now!') + + else: + vm.network.vcn.AddSecurityListIngressRule(start_port, end_port=end_port) + + + diff --git a/perfkitbenchmarker/providers/oci/oci_virtual_machine.py b/perfkitbenchmarker/providers/oci/oci_virtual_machine.py new file mode 100644 index 0000000000..c380cbddc1 --- /dev/null +++ b/perfkitbenchmarker/providers/oci/oci_virtual_machine.py @@ -0,0 +1,310 @@ +"""Class to represent an Oracle Virtual Machine object. + +Machine Types: +https://docs.oracle.com/en-us/iaas/Content/Compute/References/computeshapes.htm + +All VM specifics are self-contained and the class provides methods to +operate on the VM: boot, shutdown, etc. +""" + + +import itertools +import json +import logging +import threading + +from absl import flags +from perfkitbenchmarker import disk +from perfkitbenchmarker import errors +from perfkitbenchmarker import linux_virtual_machine +from perfkitbenchmarker import provider_info +from perfkitbenchmarker import virtual_machine +from perfkitbenchmarker import vm_util +from perfkitbenchmarker.configs import option_decoders +from perfkitbenchmarker.providers.oci import util, oci_disk, oci_network + +FLAGS = flags.FLAGS + +INSTANCE_EXISTS_STATUSES = frozenset( + ['CREATING_IMAGE', 'MOVING', 'PROVISIONING', 'RUNNING', 'STARTING', 'STOPPED', 'STOPPING', 'TERMINATED', + 'TERMINATING' + ]) + + +class OciVmSpec(virtual_machine.BaseVmSpec): + CLOUD = provider_info.OCI + + def __init__(self, *args, **kwargs): + self.num_local_ssds: int = None + super(OciVmSpec, self).__init__(*args, **kwargs) + + @classmethod + def _ApplyFlags(cls, config_values, flag_values): + super(OciVmSpec, cls)._ApplyFlags(config_values, flag_values) + if flag_values['oci_compute_units'].present: + config_values['oci_compute_units'] = flag_values.oci_compute_units + if flag_values['oci_compute_memory'].present: + config_values['oci_compute_memory'] = flag_values.oci_compute_memory + if flag_values['oci_availability_domain'].present: + config_values['oci_availability_domain'] = flag_values.oci_availability_domain + if flag_values['oci_fault_domain'].present: + config_values['oci_fault_domain'] = flag_values.oci_fault_domain + if flag_values['oci_boot_disk_size'].present: + config_values['oci_boot_disk_size'] = flag_values.oci_boot_disk_size + if flag_values['oci_use_vcn'].present: + config_values['oci_use_vcn'] = flag_values.oci_use_vcn + if flag_values['oci_num_local_ssds'].present: + config_values['num_local_ssds'] = flag_values.oci_num_local_ssds + if flag_values['machine_type'].present: + config_values['machine_type'] = flag_values.machine_type + if flag_values['oci_network_name'].present: + config_values['oci_network_name'] = flag_values.oci_network_name + + @classmethod + def _GetOptionDecoderConstructions(cls): + """Gets decoder classes and constructor args for each configurable option. + + Returns: + dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair. + The pair specifies a decoder class and its __init__() keyword + arguments to construct in order to decode the named option. + """ + result = super(OciVmSpec, cls)._GetOptionDecoderConstructions() + result.update({ + 'oci_compute_units': (option_decoders.IntDecoder, {'default': None}), + 'oci_compute_memory': (option_decoders.IntDecoder, {'default': None}), + 'oci_availability_domain': (option_decoders.StringDecoder, {'default': None}), + 'oci_fault_domain': (option_decoders.StringDecoder, {'default': None}), + 'oci_boot_disk_size': (option_decoders.IntDecoder, {'default': 50}), + 'oci_use_vcn': (option_decoders.BooleanDecoder, {'default': True}), + 'num_local_ssds': (option_decoders.IntDecoder, {'default': 0}), + 'machine_type': (option_decoders.StringDecoder, {'default': 'VM.Standard.A1.Flex'}), + 'region': (option_decoders.StringDecoder, {'default': None}), + 'oci_network_name': (option_decoders.StringDecoder, {'default': None}), + }) + return result + + +class OciVirtualMachine(virtual_machine.BaseVirtualMachine): + CLOUD = provider_info.OCI + + _counter_lock = threading.Lock() + _counter = itertools.count() + + def __init__(self, vm_spec): + super(OciVirtualMachine, self).__init__(vm_spec) + with self._counter_lock: + self.instance_number = next(self._counter) + + MAX_LOCAL_DISKS = 32 + self.name = 'perfkit-%s-%s' % (FLAGS.run_uri, self.instance_number) + self.ocid = '' + self.image = vm_spec.image or None + self.operating_system = None + self.operating_system_version = None + self.key_pair_name = "" + self.region = vm_spec.zone + self.subnet = None + self.availability_domain = vm_spec.oci_availability_domain + self.fault_domain = vm_spec.oci_fault_domain + self.machine_type = vm_spec.machine_type + self.compute_units = vm_spec.oci_compute_units + self.compute_memory = vm_spec.oci_compute_memory + self.bv_size = vm_spec.oci_boot_disk_size + self.ip_address = None + self.internal_ip = None + self.status = None + self.user_name = 'perfkit' + self.network = oci_network.OciNetwork.GetNetwork(self) + self.local_disk_counter = 0 + self.num_local_ssds = vm_spec.num_local_ssds + self.max_local_disks = MAX_LOCAL_DISKS + self.tags = util.MakeFormattedDefaultTags() + + @vm_util.Retry(poll_interval=60, log_errors=False) + def _WaitForInstanceStatus(self, status_list): + """Waits until the instance's status is in status_list.""" + logging.info('Waits until the instance\'s status is one of statuses: %s', + status_list) + status_cmd = util.OCI_PREFIX + [ + 'compute', + 'instance', + 'list', + f'--display-name {self.name}', + '--sort-order DESC'] + status_cmd = util.GetEncodedCmd(status_cmd) + out, _ = vm_util.IssueRetryableCommand(status_cmd) + state = json.loads(out) + check_state = state['data'][0]['lifecycle-state'] + self.status = check_state + assert check_state in status_list + + @vm_util.Retry(poll_interval=5, log_errors=False) + def _WaitForIPStatus(self, status_list): + """Waits until the instance's status is in status_list.""" + logging.info('Waits until the instance\'s status is one of statuses: %s', + status_list) + ipstatus_cmd = util.OCI_PREFIX + [ + 'compute', + 'instance', + 'list-vnics', + f'--instance-id {self.ocid}'] + ipstatus_cmd = util.GetEncodedCmd(ipstatus_cmd) + out, _ = vm_util.IssueRetryableCommand(ipstatus_cmd) + state = json.loads(out) + check_state = state['data'][0]['lifecycle-state'] + assert check_state in status_list + + def _Create(self): + if self.compute_units is None: + self.compute_units = 1 + + if self.compute_memory is None: + self.compute_memory = self.compute_units * 4 + ad_list = [] + if self.availability_domain is None: + ad_list = util.GetAvailabilityDomainFromRegion(self.region) + self.availability_domain = ad_list[0] + if self.fault_domain is None: + fd_list = util.GetFaultDomainFromAvailabilityDomain(self.availability_domain) + self.fault_domain = fd_list[0] + + if self.image is not None: + oci_image, oci_os_name, oci_os_version = util.GetOciImageIdFromName(self.image, self.machine_type) + else: + oci_os_name = util.GetOsFromImageFamily(self.DEFAULT_IMAGE_FAMILY) + oci_os_version = util.GetOsVersionFromOs(self.DEFAULT_IMAGE_PROJECT, oci_os_name) + oci_image = util.GetOciImageIdFromImage(oci_os_name, oci_os_version, self.machine_type) + self.image = oci_image + + shape_config = "'{\"memoryInGBs\":%s,\"ocpus\":%s}'" % (self.compute_memory, self.compute_units) + + key_file_path = vm_util.GetPublicKeyPath() + + public_key = util.GetPublicKey() + + if "Oracle" in oci_os_name: + user_data = util.ADD_CLOUDINIT_ORACLE_TEMPLATE.format(user_name=self.user_name, + public_key=public_key) + else: + user_data = util.ADD_CLOUDINIT_TEMPLATE.format(user_name=self.user_name, + public_key=public_key) + user_data_filepath = '/tmp/user_data-' + self.name + '.sh' + with open(user_data_filepath, 'w') as user_data_file: + user_data_file.write(user_data) + + create_cmd = util.OCI_PREFIX + [ + 'compute', + 'instance', + 'launch', + f'--subnet-id {self.network.network_id}', + f'--display-name {self.name}', + f'--hostname-label {self.name}', + f'--region {self.region}', + f'--availability-domain {self.availability_domain}', + f'--fault-domain {self.fault_domain}', + f'--image-id {self.image}', + f'--shape {self.machine_type}', + '--shape-config ', + f' {shape_config}', + f'--user-data-file {user_data_filepath}', + f' --boot-volume-size-in-gbs {self.bv_size}', + f'--freeform-tags {self.tags}', + f'--ssh-authorized-keys-file {key_file_path}', + '--assign-public-ip true'] + create_cmd = util.GetEncodedCmd(create_cmd) + stdout, _, ret = vm_util.IssueCommand(create_cmd) + ociid = json.loads(stdout) + self.ocid = ociid['data']['id'] + self._WaitForInstanceStatus(['RUNNING']) + self._GetPublicIP() + + def _GetPublicIP(self): + self._WaitForIPStatus(['AVAILABLE']) + ip_cmd = util.OCI_PREFIX + [ + 'compute', + 'instance', + 'list-vnics', + f'--instance-id {self.ocid}'] + ip_cmd = util.GetEncodedCmd(ip_cmd) + out, _, _ = vm_util.IssueCommand(ip_cmd) + ips = json.loads(out) + self.internal_ip = ips['data'][0]['private-ip'] + self.ip_address = ips['data'][0]['public-ip'] + + def _Delete(self): + if self.status == 'RUNNING': + delete_cmd = util.OCI_PREFIX + [ + 'compute', + 'instance', + 'terminate', + f'--instance-id {self.ocid}', + '--preserve-boot-volume false', + '--force'] + delete_cmd = util.GetEncodedCmd(delete_cmd) + out, _ = vm_util.IssueRetryableCommand(delete_cmd) + self._WaitForInstanceStatus(['TERMINATED']) + + def _Exists(self): + """Returns true if the VM exists.""" + if self.status == 'TERMINATED': + return False + return self.status in INSTANCE_EXISTS_STATUSES + + def CreateScratchDisk(self, disk_spec_id, disk_spec): + """Create a VM's scratch disk. + + Args: + disk_spec: virtual_machine.BaseDiskSpec object of the disk. + """ + disk_number = disk_spec_id + self.local_disk_counter += 1 + if self.local_disk_counter > self.max_local_disks: + raise errors.Error('Not enough local disks.') + logging.info("Now starting to create disks") + data_disk = oci_disk.OciDisk(disk_spec, self.name, self.availability_domain, disk_number) + self.scratch_disks.append(data_disk) + data_disk.Create() + data_disk.GetFreeDeviceName(self) + data_disk.Attach(disk_spec, self) + if disk_spec.disk_type == 'iscsi': + data_disk.ExecuteAttachIscsiCommands(self) + self.FormatDisk(data_disk.GetDevicePath(), disk.LOCAL) + self.MountDisk(data_disk.GetDevicePath(), disk_spec.mount_point, + disk.LOCAL, data_disk.mount_options, + data_disk.fstab_options) + + def AllowPort(self, start_port, end_port=None, source_range=None): + + # TODO: Potentially replace for case where firewall skip flag is in place + super(OciVirtualMachine, self).AllowPort(start_port, end_port, source_range) + + +class Ubuntu2204BasedOCIVirtualMachine(OciVirtualMachine, + linux_virtual_machine.Ubuntu2204Mixin): + DEFAULT_IMAGE_FAMILY = 'ubuntu-os-cloud' + DEFAULT_IMAGE_PROJECT = 'ubuntu-2204-lts' + + +class Ubuntu2004BasedOCIVirtualMachine(OciVirtualMachine, + linux_virtual_machine.Ubuntu2004Mixin): + DEFAULT_IMAGE_FAMILY = 'ubuntu-os-cloud' + DEFAULT_IMAGE_PROJECT = 'ubuntu-2004-lts' + + +class Ubuntu1804BasedOCIVirtualMachine(OciVirtualMachine, + linux_virtual_machine.Ubuntu1804Mixin): + DEFAULT_IMAGE_FAMILY = 'ubuntu-os-cloud' + DEFAULT_IMAGE_PROJECT = 'ubuntu-1804-lts' + + +class Oracle9BasedVirtualMachine(OciVirtualMachine, + linux_virtual_machine.Oracle9Mixin): + DEFAULT_IMAGE_FAMILY = 'Oracle Linux' + DEFAULT_IMAGE_PROJECT = '9' + + +class Oracle8BasedVirtualMachine(OciVirtualMachine, + linux_virtual_machine.Oracle8Mixin): + DEFAULT_IMAGE_FAMILY = 'Oracle Linux' + DEFAULT_IMAGE_PROJECT = '8' diff --git a/perfkitbenchmarker/providers/oci/provider_info.py b/perfkitbenchmarker/providers/oci/provider_info.py new file mode 100644 index 0000000000..1c06087554 --- /dev/null +++ b/perfkitbenchmarker/providers/oci/provider_info.py @@ -0,0 +1,9 @@ +"""Provider info for OCI.""" + +from perfkitbenchmarker import provider_info +from perfkitbenchmarker import providers + + +class OCIProviderInfo(provider_info.BaseProviderInfo): + UNSUPPORTED_BENCHMARKS = ['mysql_service'] + CLOUD = provider_info.OCI diff --git a/perfkitbenchmarker/providers/oci/util.py b/perfkitbenchmarker/providers/oci/util.py new file mode 100644 index 0000000000..20f3c4146d --- /dev/null +++ b/perfkitbenchmarker/providers/oci/util.py @@ -0,0 +1,186 @@ +"""Utilities for working with OracleCloud Web Services resources.""" + +import shlex + +from absl import flags +from perfkitbenchmarker import vm_util +import six +import json +from perfkitbenchmarker import context + +OCI_PREFIX = ['oci'] + +ADD_CLOUDINIT_TEMPLATE = """#!/bin/bash +echo "{user_name} ALL = NOPASSWD: ALL" >> /etc/sudoers +useradd {user_name} --home /home/{user_name} --shell /bin/bash -m +mkdir /home/{user_name}/.ssh +echo "{public_key}" >> /home/{user_name}/.ssh/authorized_keys +chown -R {user_name}:{user_name} /home/{user_name}/.ssh +chmod 700 /home/{user_name}/.ssh +chmod 600 /home/{user_name}/.ssh/authorized_keys +sudo iptables -F +""" + +ADD_CLOUDINIT_ORACLE_TEMPLATE = """#!/bin/bash +echo "{user_name} ALL = NOPASSWD: ALL" >> /etc/sudoers +useradd {user_name} --home /home/{user_name} --shell /bin/bash -m +mkdir /home/{user_name}/.ssh +echo "{public_key}" >> /home/{user_name}/.ssh/authorized_keys +chown -R {user_name}:{user_name} /home/{user_name}/.ssh +chmod 700 /home/{user_name}/.ssh +chmod 600 /home/{user_name}/.ssh/authorized_keys +sudo systemctl stop firewalld +sudo systemctl disable firewalld +""" + + +def GetEncodedCmd(cmd): + cmd_line = ' '.join(cmd) + cmd_args = shlex.split(cmd_line) + return cmd_args + + +def GetOciImageIdFromImage(operating_system, operating_system_version, shape): + # oci compute image list --all --operating-system "Canonical Ubuntu" --operating-system-version 18.04 --shape + # VM.Standard.A1.Flex -c ocid1.tenancy.oc1..aaaaaaaadfogwfmgjoi35onknsnu6u5zfp43gh657appkvbghhzyhfhh5oya + create_cmd = OCI_PREFIX + [ + 'compute', + 'image', + 'list', + '--all', + '--operating-system \"%s\"' % operating_system, + '--operating-system-version \"%s\"' % operating_system_version, + '--shape %s' % shape] + create_cmd = GetEncodedCmd(create_cmd) + stdout, _ = vm_util.IssueRetryableCommand(create_cmd) + image_names = json.loads(stdout)['data'] + if len(image_names) > 0: + return image_names[0]['id'] + + +def GetOciImageIdFromName(name, shape): + create_cmd = OCI_PREFIX + [ + 'compute', + 'image', + 'list', + '--all', + '--display-name \"%s\"' % name, + '--shape %s' % shape] + create_cmd = GetEncodedCmd(create_cmd) + stdout, _ = vm_util.IssueRetryableCommand(create_cmd) + image_names = json.loads(stdout)['data'] + if len(image_names) > 0: + return image_names[0]['id'], image_names[0]['operating-system'], image_names[0]['operating-system-version'] + + +def GetAvailabilityDomainFromRegion(region): + create_cmd = OCI_PREFIX + [ + 'iam', + 'availability-domain', + 'list', + '--region %s' % region] + create_cmd = GetEncodedCmd(create_cmd) + stdout, _ = vm_util.IssueRetryableCommand(create_cmd) + availability_domains = json.loads(stdout)['data'] + availability_domains_list = [] + if len(availability_domains) == 1: + availability_domains_list.append(availability_domains[0]['name']) + elif len(availability_domains) == 2: + availability_domains_list.append(availability_domains[0]['name']) + availability_domains_list.append(availability_domains[1]['name']) + else: + availability_domains_list.append(availability_domains[0]['name']) + availability_domains_list.append(availability_domains[1]['name']) + availability_domains_list.append(availability_domains[2]['name']) + return availability_domains_list + + +def GetFaultDomainFromAvailabilityDomain(availability_domain): + create_cmd = OCI_PREFIX + [ + 'iam', + 'fault-domain', + 'list', + '--availability-domain %s' % availability_domain] + create_cmd = GetEncodedCmd(create_cmd) + stdout, _ = vm_util.IssueRetryableCommand(create_cmd) + fault_domains = json.loads(stdout)['data'] + fault_domains_list = [] + if len(fault_domains) == 1: + fault_domains_list.append(fault_domains[0]['name']) + elif len(fault_domains) == 2: + fault_domains_list.append(fault_domains[0]['name']) + fault_domains_list.append(fault_domains[1]['name']) + else: + fault_domains_list.append(fault_domains[0]['name']) + fault_domains_list.append(fault_domains[1]['name']) + fault_domains_list.append(fault_domains[2]['name']) + return fault_domains_list + + +def GetOsFromImageFamily(operating_system): + if "ubuntu" in operating_system: + return 'Canonical Ubuntu' + elif "Oracle" in operating_system: + return 'Oracle Linux' + + +def GetOsVersionFromOs(operating_system_version, operating_system): + if operating_system == 'Canonical Ubuntu': + if "1804" in operating_system_version: + return '18.04' + elif "2004" in operating_system_version: + return '20.04' + elif "2204" in operating_system_version: + return '22.04' + elif operating_system == 'Oracle Linux': + if '9' in operating_system_version: + return '9' + elif '8' in operating_system_version: + return '8' + + +def GetPublicKey(): + cat_cmd = ['cat', + vm_util.GetPublicKeyPath()] + keyfile, _ = vm_util.IssueRetryableCommand(cat_cmd) + return keyfile.strip() + + +def FormatTagsJSON(tags_dict): + """Format a dict of tags into arguments. + + Args: + tags_dict: Tags to be formatted. + + Returns: + A string contains formatted tags + """ + tags = ','.join(f'"{k}": "{v}"' for k, v in sorted(six.iteritems(tags_dict)) if k != 'owner') + return json.dumps(tags) + + +def GetDefaultTags(timeout_minutes=None): + """Get the default tags in a dictionary. + + Args: + timeout_minutes: Timeout used for setting the timeout_utc tag. + + Returns: + A dict of tags, contributed from the benchmark spec. + """ + benchmark_spec = context.GetThreadBenchmarkSpec() + if not benchmark_spec: + return {} + return benchmark_spec.GetResourceTags(timeout_minutes) + + +def MakeFormattedDefaultTags(timeout_minutes=None): + """Get the default tags formatted. + + Args: + timeout_minutes: Timeout used for setting the timeout_utc tag. + + Returns: + A string contains tags, contributed from the benchmark spec. + """ + return "{" + FormatTagsJSON(GetDefaultTags(timeout_minutes)) + "}" diff --git a/perfkitbenchmarker/publisher.py b/perfkitbenchmarker/publisher.py index e5066735cc..7983379ef2 100755 --- a/perfkitbenchmarker/publisher.py +++ b/perfkitbenchmarker/publisher.py @@ -31,7 +31,6 @@ import pprint import sys import time -from typing import List import uuid from absl import flags @@ -306,8 +305,11 @@ def AddMetadata(self, metadata, benchmark_spec): class SamplePublisher(six.with_metaclass(abc.ABCMeta, object)): """An object that can publish performance samples.""" + # Time series data is long. Turn this flag off to hide time series data. + PUBLISH_CONSOLE_LOG_DATA = True + @abc.abstractmethod - def PublishSamples(self, samples: List[pkb_sample.SampleDict]): + def PublishSamples(self, samples: list[pkb_sample.SampleDict]): """Publishes 'samples'. PublishSamples will be called exactly once. Calling @@ -382,6 +384,8 @@ class PrettyPrintStreamPublisher(SamplePublisher): stream: File-like object. Output stream to print samples. """ + PUBLISH_CONSOLE_LOG_DATA = False + def __init__(self, stream=None): super().__init__() self.stream = stream or sys.stdout @@ -458,10 +462,16 @@ def PublishSamples(self, samples): self._FormatMetadata(benchmark_meta))) for sample in test_samples: - meta = {k: v for k, v in six.iteritems(sample['metadata']) - if k not in all_constant_meta} - result.write(' {0:<30s} {1:>15f} {2:<30s}'.format( - sample['metric'], sample['value'], sample['unit'])) + meta = { + k: v + for k, v in six.iteritems(sample['metadata']) + if k not in all_constant_meta + } + result.write( + ' {0:<30s} {1:>15f} {2:<30s}'.format( + sample['metric'], sample['value'], sample['unit'] + ) + ) if meta: result.write(' ({0})'.format(self._FormatMetadata(meta))) result.write('\n') @@ -485,6 +495,8 @@ class LogPublisher(SamplePublisher): logger: Logger to publish to. Defaults to the root logger. """ + PUBLISH_CONSOLE_LOG_DATA = False + def __init__(self, level=logging.INFO, logger=None): super().__init__() self.level = level @@ -915,6 +927,7 @@ class SampleCollector(object): Attributes: samples: A list of Sample objects as dicts. + samples_for_console: A list of Sample objects to publish to console. metadata_providers: A list of MetadataProvider objects. Metadata providers to use. Defaults to DEFAULT_METADATA_PROVIDERS. publishers: A list of SamplePublisher objects to publish to. @@ -928,14 +941,15 @@ class SampleCollector(object): def __init__(self, metadata_providers=None, publishers=None, publishers_from_flags=True, add_default_publishers=True): - self.samples: List[pkb_sample.SampleDict] = [] + self.samples: list[pkb_sample.SampleDict] = [] + self.samples_for_console: list[pkb_sample.SampleDict] = [] if metadata_providers is not None: self.metadata_providers = metadata_providers else: self.metadata_providers = DEFAULT_METADATA_PROVIDERS - self.publishers: List[SamplePublisher] = publishers[:] if publishers else [] + self.publishers: list[SamplePublisher] = publishers[:] if publishers else [] for publisher_class in EXTERNAL_PUBLISHERS: self.publishers.append(publisher_class()) if publishers_from_flags: @@ -1031,13 +1045,20 @@ def AddSamples(self, samples, benchmark, benchmark_spec): sample['sample_uri'] = str(uuid.uuid4()) self.samples.append(sample) + if not s.DisableConsoleLog(): + self.samples_for_console.append(sample) + def PublishSamples(self): """Publish samples via all registered publishers.""" if not self.samples: logging.warning('No samples to publish.') return for publisher in self.publishers: - publisher.PublishSamples(self.samples) + publisher.PublishSamples( + self.samples + if publisher.PUBLISH_CONSOLE_LOG_DATA + else self.samples_for_console + ) self.samples = [] diff --git a/perfkitbenchmarker/relational_db.py b/perfkitbenchmarker/relational_db.py index 9d8ad330e7..e12dbc51e7 100644 --- a/perfkitbenchmarker/relational_db.py +++ b/perfkitbenchmarker/relational_db.py @@ -230,20 +230,40 @@ def client_vm(self): raise RelationalDbPropertyNotSetError('client_vm is not set') return self._client_vm + # TODO(user): add support for multiple client VMs @client_vm.setter def client_vm(self, client_vm): self._client_vm = client_vm + def _GetDbConnectionProperties( + self, + ) -> sql_engine_utils.DbConnectionProperties: + return sql_engine_utils.DbConnectionProperties( + self.spec.engine, + self.spec.engine_version, + self.endpoint, + self.port, + self.spec.database_username, + self.spec.database_password, + ) + + # TODO(user): Deprecate in favor of client_vms_query_tools @property def client_vm_query_tools(self): if not hasattr(self, '_client_vm_query_tools'): - connection_properties = sql_engine_utils.DbConnectionProperties( - self.spec.engine, self.spec.engine_version, self.endpoint, self.port, - self.spec.database_username, self.spec.database_password) - self._client_vm_query_tools = sql_engine_utils.GetQueryToolsByEngine( - self.client_vm, connection_properties) + self._client_vm_query_tools = self.client_vms_query_tools[0] return self._client_vm_query_tools + @property + def client_vms_query_tools(self) -> list[sql_engine_utils.ISQLQueryTools]: + if not hasattr(self, '_client_vms_query_tools'): + connection_properties = self._GetDbConnectionProperties() + self._client_vms_query_tools = [ + sql_engine_utils.GetQueryToolsByEngine(vm, connection_properties) + for vm in self.client_vms + ] + return self._client_vms_query_tools + @property def client_vm_query_tools_for_replica(self): """Query tools to make custom queries on replica.""" @@ -258,8 +278,11 @@ def client_vm_query_tools_for_replica(self): return self._client_vm_query_tools_for_replica def SetVms(self, vm_groups): - self.client_vm = vm_groups['clients' if 'clients' in - vm_groups else 'default'][0] + self.client_vms = vm_groups[ + 'clients' if 'clients' in vm_groups else 'default' + ] + # TODO(user): Remove this after moving to multiple client VMs. + self.client_vm = self.client_vms[0] @property def endpoint(self): diff --git a/perfkitbenchmarker/sample.py b/perfkitbenchmarker/sample.py index 6b282966fd..606c10b295 100644 --- a/perfkitbenchmarker/sample.py +++ b/perfkitbenchmarker/sample.py @@ -24,6 +24,9 @@ PERCENTILES_LIST = 0.1, 1, 5, 10, 50, 90, 95, 99, 99.9 +# Add this flag to the metadata to hide logging to console. +DISABLE_CONSOLE_LOG = 'disable_console_log' + _SAMPLE_FIELDS = 'metric', 'value', 'unit', 'metadata', 'timestamp' # Metric names for time series @@ -155,6 +158,20 @@ def __eq__(self, other) -> bool: return False return True + def DisableConsoleLog(self) -> bool: + """Disable log to console when this return True.""" + + # Disable Console log is set as a metadata rather than a field + # is due to the current structure of samples class. + # Adding extra field to a sample might break serialization of some publisher + # pipeline as they expect certain format. + # Modyfing asdict function is also not enough because when we pickle + # the samples, + return ( + DISABLE_CONSOLE_LOG in self.metadata + and self.metadata[DISABLE_CONSOLE_LOG] + ) + def asdict(self)-> Dict[str, Any]: # pylint:disable=invalid-name """Converts the Sample to a dictionary.""" return self._asdict() diff --git a/perfkitbenchmarker/static_virtual_machine.py b/perfkitbenchmarker/static_virtual_machine.py index 456665c705..af3bd692f6 100644 --- a/perfkitbenchmarker/static_virtual_machine.py +++ b/perfkitbenchmarker/static_virtual_machine.py @@ -36,6 +36,7 @@ from perfkitbenchmarker import os_types from perfkitbenchmarker import resource from perfkitbenchmarker import virtual_machine +from perfkitbenchmarker import windows_virtual_machine FLAGS = flags.FLAGS @@ -393,3 +394,10 @@ class Debian10BasedStaticVirtualMachine(StaticVirtualMachine, class Debian11BasedStaticVirtualMachine(StaticVirtualMachine, linux_virtual_machine.Debian11Mixin): pass + + +class Windows2019SQLServer2019StandardStaticVirtualMachine( + StaticVirtualMachine, + windows_virtual_machine.Windows2019SQLServer2019Standard, +): + pass diff --git a/perfkitbenchmarker/test_util.py b/perfkitbenchmarker/test_util.py index a98e9647e7..67ec5b1efe 100644 --- a/perfkitbenchmarker/test_util.py +++ b/perfkitbenchmarker/test_util.py @@ -85,6 +85,19 @@ def assertSampleListsEqualUpToTimestamp(self, a, b, msg=None): ex.args = (ex.message,) raise ex + def assertSampleInList(self, a, b, msg=None): # pylint:disable=invalid-name + """Assert that sample a is in list b (up to timestamp).""" + found = False + for s in b: + try: + self.assertSamplesEqualUpToTimestamp(a, s, msg=msg) + except self.failureException: + continue + found = True + if not found: + msg = msg or f'{a} not found in {b}.' + raise AssertionError(msg) + def assertDiskMounts(benchmark_config, mount_point): """Test whether a disk mounts in a given configuration. diff --git a/perfkitbenchmarker/time_triggers/maintenance_simulation_trigger.py b/perfkitbenchmarker/time_triggers/maintenance_simulation_trigger.py index 6c921b0018..96b755aed2 100644 --- a/perfkitbenchmarker/time_triggers/maintenance_simulation_trigger.py +++ b/perfkitbenchmarker/time_triggers/maintenance_simulation_trigger.py @@ -15,6 +15,7 @@ import collections import copy +import logging import statistics from typing import Any, List, Dict @@ -114,7 +115,10 @@ def AppendSamples( for vm in self.vms: vm.WaitLMNotificationRelease() lm_events_dict = vm.CollectLMNotificationsTime() - lm_ends = max(lm_ends, float(lm_events_dict['Host_maintenance_end'])) + # Host maintenance is in s + lm_ends = max( + lm_ends, float(lm_events_dict['Host_maintenance_end']) * 1000 + ) samples.append( sample.Sample( 'LM Total Time', @@ -199,6 +203,9 @@ def _AggregateThroughputSample(self, s: sample.Sample) -> List[sample.Sample]: median = statistics.median(base_line_values) mean = statistics.mean(base_line_values) + logging.info('LM Baseline median: %s', median) + logging.info('LM Baseline mean: %s', mean) + # Keep the metadata from the original sample except time series metadata for field in sample.TIME_SERIES_METADATA: if field in metadata: @@ -213,6 +220,10 @@ def _AggregateThroughputSample(self, s: sample.Sample) -> List[sample.Sample]: if values_after_lm_ends: mean_after_lm_ends = statistics.mean(values_after_lm_ends) samples += self._ComputeDegradation(mean, mean_after_lm_ends, metadata) + logging.info('Mean after LM ends: %s', mean_after_lm_ends) + logging.info( + 'Number of samples after LM ends: %s', len(values_after_lm_ends) + ) return samples def _ComputeLossPercentile( diff --git a/perfkitbenchmarker/traces/otel.py b/perfkitbenchmarker/traces/otel.py index c6a2ea747d..e477748fab 100644 --- a/perfkitbenchmarker/traces/otel.py +++ b/perfkitbenchmarker/traces/otel.py @@ -34,6 +34,14 @@ flags.DEFINE_integer( 'otel_interval_secs', 60, 'Interval of the metrics to collect.' ) + +_HIDE_LOGGING = flags.DEFINE_boolean( + 'otel_hide_logging', + True, + 'Hide logging to console for otel metrics.', +) + + flags.DEFINE_string( 'otel_config_file', './otel/config.yaml', @@ -209,6 +217,8 @@ def _Analyze(role, file): parsed_metrics[name]['vm_role'] = role for key, value in parsed_metrics.items(): + if _HIDE_LOGGING.value: + value[sample.DISABLE_CONSOLE_LOG] = True samples.append( sample.Sample( metric=key, value=-1, unit=value['unit'], metadata=value diff --git a/perfkitbenchmarker/virtual_machine.py b/perfkitbenchmarker/virtual_machine.py index 9b5d1c2138..3d70155284 100644 --- a/perfkitbenchmarker/virtual_machine.py +++ b/perfkitbenchmarker/virtual_machine.py @@ -162,7 +162,7 @@ class BootCompletionIpSubset(enum.Enum): GPU_T4 = 't4' GPU_L4 = 'l4' GPU_A10 = 'a10' -VALID_GPU_TYPES = [ +TESLA_GPU_TYPES = [ GPU_K80, GPU_P100, GPU_V100, @@ -170,9 +170,9 @@ class BootCompletionIpSubset(enum.Enum): GPU_P4, GPU_P4_VWS, GPU_T4, - GPU_L4, GPU_A10, ] +VALID_GPU_TYPES = TESLA_GPU_TYPES + [GPU_L4] CPUARCH_X86_64 = 'x86_64' CPUARCH_AARCH64 = 'aarch64' diff --git a/perfkitbenchmarker/vm_util.py b/perfkitbenchmarker/vm_util.py index dcbdf0361c..08c469bb0b 100644 --- a/perfkitbenchmarker/vm_util.py +++ b/perfkitbenchmarker/vm_util.py @@ -110,6 +110,18 @@ 'wait for unresponsive servers.') +class RetryError(Exception): + """Base class for retry errors.""" + + +class TimeoutExceededRetryError(RetryError): + """Exception that is raised when a retryable function times out.""" + + +class RetriesExceededRetryError(RetryError): + """Exception that is raised when a retryable function hits its retry limit.""" + + class IpAddressSubset(object): """Enum of options for --ip_addresses.""" REACHABLE = 'REACHABLE' @@ -252,6 +264,12 @@ def Retry(poll_interval=POLL_INTERVAL, max_retries=MAX_RETRIES, Returns: A function that wraps functions in retry logic. It can be used as a decorator. + + Raises: + TimeoutExceededRetryError - if the provided (or default) timeout is exceeded + while retrying the wrapped function. + RetriesExceededRetryError - if the provided (or default) limit on the number + of retry attempts is exceeded while retrying the wrapped function. """ if retryable_exceptions is None: # TODO(user) Make retries less aggressive. @@ -276,9 +294,10 @@ def WrappedFunction(*args, **kwargs): except retryable_exceptions as e: fuzz_multiplier = 1 - fuzz + random.random() * fuzz sleep_time = poll_interval * fuzz_multiplier - if ((time.time() + sleep_time) >= deadline or - (max_retries >= 0 and tries > max_retries)): - raise + if (time.time() + sleep_time) >= deadline: + raise TimeoutExceededRetryError() from e + elif max_retries >= 0 and tries > max_retries: + raise RetriesExceededRetryError() from e else: if log_errors: logging.info('Retrying exception running %s: %s', f.__name__, e) diff --git a/perfkitbenchmarker/windows_virtual_machine.py b/perfkitbenchmarker/windows_virtual_machine.py index 1dd2e1acfd..350081dbe9 100644 --- a/perfkitbenchmarker/windows_virtual_machine.py +++ b/perfkitbenchmarker/windows_virtual_machine.py @@ -401,7 +401,7 @@ def OnStartup(self): self.home_dir = stdout.strip() stdout, _ = self.RemoteCommand('echo $env:SystemDrive') self.system_drive = stdout.strip() - self.RemoteCommand('mkdir %s' % self.temp_dir) + self.RemoteCommand('mkdir %s -Force' % self.temp_dir) self.DisableGuestFirewall() def _Reboot(self): diff --git a/requirements.txt b/requirements.txt index 2b3e340faa..72a16523b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,6 +31,8 @@ google-cloud-monitoring==0.31.1 beautifulsoup4 requests python-datetime-tz +matplotlib +seaborn # unlike setup.py requirements.txt cannot require a Python version AFAICT # this hack requires a non-existent package when Python is less than 3.9 diff --git a/tests/configs/benchmark_config_spec_test.py b/tests/configs/benchmark_config_spec_test.py index 58c94c1d3c..74418877a7 100644 --- a/tests/configs/benchmark_config_spec_test.py +++ b/tests/configs/benchmark_config_spec_test.py @@ -171,7 +171,7 @@ class VmGroupSpecTestCase(pkb_common_test_case.PkbCommonTestCase): def setUp(self): super(VmGroupSpecTestCase, self).setUp() self._spec_class = vm_group_decoders.VmGroupSpec - self._kwargs = {'cloud': provider_info.GCP, 'os_type': os_types.UBUNTU1804, + self._kwargs = {'cloud': provider_info.GCP, 'os_type': os_types.DEFAULT, 'vm_spec': _GCP_AWS_VM_CONFIG} def testMissingValues(self): @@ -187,7 +187,7 @@ def testDefaults(self): self.assertEqual(result.cloud, 'GCP') self.assertEqual(result.disk_count, 1) self.assertIsNone(result.disk_spec) - self.assertEqual(result.os_type, 'ubuntu1804') + self.assertEqual(result.os_type, 'ubuntu2004') self.assertEqual(result.static_vms, []) self.assertEqual(result.vm_count, 1) self.assertIsInstance(result.vm_spec, gce_virtual_machine.GceVmSpec) @@ -280,7 +280,7 @@ def testMissingCloudDiskConfig(self): self._spec_class( _COMPONENT, cloud=provider_info.GCP, - os_type=os_types.UBUNTU1804, + os_type=os_types.DEFAULT, disk_spec={}, vm_spec=_GCP_AWS_VM_CONFIG) self.assertEqual( @@ -293,7 +293,7 @@ def testMissingCloudVmConfig(self): self._spec_class( _COMPONENT, cloud=provider_info.GCP, - os_type=os_types.UBUNTU1804, + os_type=os_types.DEFAULT, vm_spec={}) self.assertEqual( str(cm.exception), @@ -333,7 +333,7 @@ def testNonPresentFlagsAndPresentConfigValues(self): _COMPONENT, flag_values=self.createNonPresentFlags(), vm_count=2, **self._kwargs) self.assertEqual(result.cloud, 'GCP') - self.assertEqual(result.os_type, 'ubuntu1804') + self.assertEqual(result.os_type, 'ubuntu2004') self.assertEqual(result.vm_count, 2) def testVmCountNone(self): @@ -369,13 +369,13 @@ def testNone(self): def testValidInput(self): result = self._decoder.Decode({ - 'default': {'cloud': provider_info.GCP, 'os_type': os_types.UBUNTU1804, + 'default': {'cloud': provider_info.GCP, 'os_type': os_types.DEFAULT, 'vm_spec': _GCP_AWS_VM_CONFIG}}, _COMPONENT, {}) self.assertIsInstance(result, dict) self.assertEqual(len(result), 1) self.assertIsInstance(result['default'], vm_group_decoders.VmGroupSpec) self.assertEqual(result['default'].cloud, 'GCP') - self.assertEqual(result['default'].os_type, 'ubuntu1804') + self.assertEqual(result['default'].os_type, 'ubuntu2004') self.assertIsInstance(result['default'].vm_spec, gce_virtual_machine.GceVmSpec) @@ -385,7 +385,7 @@ def testInvalidInput(self): { 'default': { 'cloud': provider_info.GCP, - 'os_type': os_types.UBUNTU1804, + 'os_type': os_types.DEFAULT, 'static_vms': [{}, {'fake_option': 1.2}], 'vm_spec': _GCP_AWS_VM_CONFIG, } @@ -451,7 +451,7 @@ def setUp(self): self._spec_class = benchmark_config_spec.BenchmarkConfigSpec self._description = 'Test description.' self._vm_groups = {'default': {'cloud': provider_info.GCP, - 'os_type': os_types.UBUNTU1804, + 'os_type': os_types.DEFAULT, 'vm_spec': _GCP_AWS_VM_CONFIG}} self._kwargs = {'description': self._description, 'vm_groups': self._vm_groups} @@ -466,7 +466,7 @@ def testValidInput(self): self.assertIsInstance(result.vm_groups['default'], vm_group_decoders.VmGroupSpec) self.assertEqual(result.vm_groups['default'].cloud, 'GCP') - self.assertEqual(result.vm_groups['default'].os_type, 'ubuntu1804') + self.assertEqual(result.vm_groups['default'].os_type, 'ubuntu2004') self.assertIsInstance(result.vm_groups['default'].vm_spec, gce_virtual_machine.GceVmSpec) @@ -483,7 +483,7 @@ def testInvalidVmGroups(self): def testMismatchedOsTypes(self): self._kwargs['vm_groups'] = { os_type + '_group': {'os_type': os_type, 'vm_spec': _GCP_AWS_VM_CONFIG} - for os_type in (os_types.UBUNTU1804, os_types.RHEL8, + for os_type in (os_types.DEFAULT, os_types.RHEL8, os_types.WINDOWS2019_CORE)} expected_os_types = os_types.JUJU, os_types.WINDOWS2019_CORE with self.assertRaises(errors.Config.InvalidValue) as cm: @@ -497,7 +497,7 @@ def testMismatchedOsTypes(self): "'juju', 'windows2019_core'. The following VM group options are " "invalid:{sep}" "test_component.vm_groups['rhel8_group'].os_type: 'rhel8'{sep}" - "test_component.vm_groups['ubuntu1804_group'].os_type: 'ubuntu1804'" + "test_component.vm_groups['ubuntu2004_group'].os_type: 'ubuntu2004'" .format(sep=os.linesep))) def testFlagOverridesPropagate(self): @@ -515,7 +515,7 @@ def testFlagOverridesPropagate(self): self.assertIsInstance(result.vm_groups['default'], vm_group_decoders.VmGroupSpec) self.assertEqual(result.vm_groups['default'].cloud, 'AWS') - self.assertEqual(result.vm_groups['default'].os_type, 'ubuntu1804') + self.assertEqual(result.vm_groups['default'].os_type, 'ubuntu2004') self.assertIsInstance(result.vm_groups['default'].vm_spec, virtual_machine.BaseVmSpec) diff --git a/tests/data/elasticache_describe_cluster.txt b/tests/data/elasticache_describe_cluster.txt new file mode 100644 index 0000000000..80e3614226 --- /dev/null +++ b/tests/data/elasticache_describe_cluster.txt @@ -0,0 +1,111 @@ +{ + "ReplicationGroups": [ + { + "ReplicationGroupId": "pkb-cbf06969", + "Description": "pkb-cbf06969", + "GlobalReplicationGroupInfo": {}, + "Status": "available", + "PendingModifiedValues": {}, + "MemberClusters": [ + "pkb-cbf06969-0001-001", + "pkb-cbf06969-0002-001", + "pkb-cbf06969-0003-001", + "pkb-cbf06969-0004-001", + "pkb-cbf06969-0005-001", + "pkb-cbf06969-0006-001" + ], + "NodeGroups": [ + { + "NodeGroupId": "0001", + "Status": "available", + "Slots": "0-2730", + "NodeGroupMembers": [ + { + "CacheClusterId": "pkb-cbf06969-0001-001", + "CacheNodeId": "0001", + "PreferredAvailabilityZone": "us-east-1c" + } + ] + }, + { + "NodeGroupId": "0002", + "Status": "available", + "Slots": "2731-5461", + "NodeGroupMembers": [ + { + "CacheClusterId": "pkb-cbf06969-0002-001", + "CacheNodeId": "0001", + "PreferredAvailabilityZone": "us-east-1a" + } + ] + }, + { + "NodeGroupId": "0003", + "Status": "available", + "Slots": "5462-8192", + "NodeGroupMembers": [ + { + "CacheClusterId": "pkb-cbf06969-0003-001", + "CacheNodeId": "0001", + "PreferredAvailabilityZone": "us-east-1b" + } + ] + }, + { + "NodeGroupId": "0004", + "Status": "available", + "Slots": "8193-10923", + "NodeGroupMembers": [ + { + "CacheClusterId": "pkb-cbf06969-0004-001", + "CacheNodeId": "0001", + "PreferredAvailabilityZone": "us-east-1a" + } + ] + }, + { + "NodeGroupId": "0005", + "Status": "available", + "Slots": "10924-13653", + "NodeGroupMembers": [ + { + "CacheClusterId": "pkb-cbf06969-0005-001", + "CacheNodeId": "0001", + "PreferredAvailabilityZone": "us-east-1b" + } + ] + }, + { + "NodeGroupId": "0006", + "Status": "available", + "Slots": "13654-16383", + "NodeGroupMembers": [ + { + "CacheClusterId": "pkb-cbf06969-0006-001", + "CacheNodeId": "0001", + "PreferredAvailabilityZone": "us-east-1c" + } + ] + } + ], + "AutomaticFailover": "enabled", + "MultiAZ": "disabled", + "ConfigurationEndpoint": { + "Address": "pkb-cbf06969.t88vpu.clustercfg.use1.cache.amazonaws.com", + "Port": 6379 + }, + "SnapshotRetentionLimit": 0, + "SnapshotWindow": "03:00-04:00", + "ClusterEnabled": true, + "CacheNodeType": "cache.m5.large", + "AuthTokenEnabled": false, + "TransitEncryptionEnabled": false, + "AtRestEncryptionEnabled": false, + "ARN": "arn:aws:elasticache:us-east-1:835761027970:replicationgroup:pkb-cbf06969", + "LogDeliveryConfigurations": [], + "ReplicationGroupCreateTime": "2023-05-24T02:58:42.581Z", + "DataTiering": "disabled", + "AutoMinorVersionUpgrade": true + } + ] +} diff --git a/tests/data/linux_boot/systemd2.output b/tests/data/linux_boot/systemd2.output index 645e07295f..d034b21d32 100644 --- a/tests/data/linux_boot/systemd2.output +++ b/tests/data/linux_boot/systemd2.output @@ -1,7 +1,7 @@ The time when unit became active or started is printed after the "@" character. The time the unit took to start is printed after the "+" character. -systemd-sysctl.service +103ms +systemd-sysctl.service +103us systemd-modules-load.service @671ms +150ms systemd-journald.socket @530ms -.mount @300ms diff --git a/tests/data/netperf_results_multistreams.json b/tests/data/netperf_results_multistreams.json new file mode 100644 index 0000000000..7c79b29f91 --- /dev/null +++ b/tests/data/netperf_results_multistreams.json @@ -0,0 +1,22 @@ +[ + [ + "MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 20001 AF_INET to 104.154.50.86 () port 20001 AF_INET : +/-2.500% @ 99% conf.", + "Throughput,Throughput Units,Throughput Confidence Width (%),Confidence Iterations Run,Stddev Latency Microseconds,50th Percentile Latency Microseconds,90th Percentile Latency Microseconds,99th Percentile Latency Microseconds,Minimum Latency Microseconds,Maximum Latency Microseconds,Local Transport Retransmissions,Remote Transport Retransmissions,Transport MSS bytes", + "1000.00,10^6bits/s,10.100,20,1084.37,2,6,3374,1,3500,0,0,1408" + ], + [ + "MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 20001 AF_INET to 104.154.50.86 () port 20001 AF_INET : +/-2.500% @ 99% conf.", + "Throughput,Throughput Units,Throughput Confidence Width (%),Confidence Iterations Run,Stddev Latency Microseconds,50th Percentile Latency Microseconds,90th Percentile Latency Microseconds,99th Percentile Latency Microseconds,Minimum Latency Microseconds,Maximum Latency Microseconds,Local Transport Retransmissions,Remote Transport Retransmissions,Transport MSS bytes", + "2000.00,10^6bits/s,10.100,20,2084.37,2,6,3374,1,3500,0,0,1408" + ], + [ + "MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 20001 AF_INET to 104.154.50.86 () port 20001 AF_INET : +/-2.500% @ 99% conf.", + "Throughput,Throughput Units,Throughput Confidence Width (%),Confidence Iterations Run,Stddev Latency Microseconds,50th Percentile Latency Microseconds,90th Percentile Latency Microseconds,99th Percentile Latency Microseconds,Minimum Latency Microseconds,Maximum Latency Microseconds,Local Transport Retransmissions,Remote Transport Retransmissions,Transport MSS bytes", + "3000.00,10^6bits/s,10.100,20,1084.37,2,6,3374,1,3500,0,0,1408" + ], + [ + "MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 20001 AF_INET to 104.154.50.86 () port 20001 AF_INET : +/-2.500% @ 99% conf.", + "Throughput,Throughput Units,Throughput Confidence Width (%),Confidence Iterations Run,Stddev Latency Microseconds,50th Percentile Latency Microseconds,90th Percentile Latency Microseconds,99th Percentile Latency Microseconds,Minimum Latency Microseconds,Maximum Latency Microseconds,Local Transport Retransmissions,Remote Transport Retransmissions,Transport MSS bytes", + "4000.00,10^6bits/s,10.100,20,1084.37,2,6,3374,1,3500,0,0,1408" + ] +] diff --git a/tests/data/redis_cluster_shards.txt b/tests/data/redis_cluster_shards.txt new file mode 100644 index 0000000000..20f9b65085 --- /dev/null +++ b/tests/data/redis_cluster_shards.txt @@ -0,0 +1,120 @@ +slots +2731 +5461 +nodes +id +72a3d871c65a09476808bbf8bb6b7cc9facc6f0d +port +6379 +ip +10.0.1.117 +endpoint +10.0.1.117 +hostname + +role +master +replication-offset +0 +health +online +slots +10924 +13653 +nodes +id +99a2cc672e715527b53816cc57ab8e9f7661fcb4 +port +6379 +ip +10.0.3.217 +endpoint +10.0.3.217 +hostname + +role +master +replication-offset +0 +health +online +slots +8193 +10923 +nodes +id +e01888d0f30de88f2f0e271aa38237f9e1f7fc1b +port +6379 +ip +10.0.2.177 +endpoint +10.0.2.177 +hostname + +role +master +replication-offset +0 +health +online +slots +5462 +8192 +nodes +id +4ae859700bc028a88e4479fb964c7185575fd26e +port +6379 +ip +10.0.3.6 +endpoint +10.0.3.6 +hostname + +role +master +replication-offset +0 +health +online +slots +13654 +16383 +nodes +id +dae79ffbdbe5511fd163142c1aba5dad4ec72d0e +port +6379 +ip +10.0.2.104 +endpoint +10.0.2.104 +hostname + +role +master +replication-offset +0 +health +online +slots +0 +2730 +nodes +id +eaee237523068bb804e170b05726494c76b0c706 +port +6379 +ip +10.0.1.174 +endpoint +10.0.1.174 +hostname + +role +master +replication-offset +0 +health +online diff --git a/tests/disk_metadata_test.py b/tests/disk_metadata_test.py index 3cf897b934..617e5d37d0 100644 --- a/tests/disk_metadata_test.py +++ b/tests/disk_metadata_test.py @@ -87,7 +87,7 @@ def DoAwsDiskTest(self, disk_type, machine_type, vm_spec = aws_virtual_machine.AwsVmSpec( 'test_vm_spec.AWS', zone='us-east-1a', machine_type=machine_type) - vm = aws_virtual_machine.Ubuntu1804BasedAwsVirtualMachine(vm_spec) + vm = aws_virtual_machine.Ubuntu2004BasedAwsVirtualMachine(vm_spec) vm.GetNVMEDeviceInfo = mock.Mock() vm.GetNVMEDeviceInfo.return_value = [ { @@ -134,7 +134,7 @@ def DoAzureDiskTest(self, storage_type, disk_type, machine_type, vm_spec = azure_virtual_machine.AzureVmSpec( 'test_vm_spec.AZURE', zone='eastus2', machine_type=machine_type) - vm = azure_virtual_machine.Ubuntu1604BasedAzureVirtualMachine(vm_spec) + vm = azure_virtual_machine.Ubuntu2004BasedAzureVirtualMachine(vm_spec) azure_disk.AzureDisk.Create = mock.Mock() azure_disk.AzureDisk.Attach = mock.Mock() diff --git a/tests/gce_virtual_machine_test.py b/tests/gce_virtual_machine_test.py index fff8bdc073..29a1c067cf 100644 --- a/tests/gce_virtual_machine_test.py +++ b/tests/gce_virtual_machine_test.py @@ -386,10 +386,10 @@ def _CreateFakeReturnValues(self, fake_image=''): fake_rets.append((json.dumps(_CreateFakeDiskMetadata(fake_image)), '', 0)) return fake_rets - def testCreateUbuntu1804(self): + def testCreateUbuntu2004(self): vm_class = virtual_machine.GetVmClass(provider_info.GCP, - os_types.UBUNTU1804) - fake_image = 'fake-ubuntu1804' + os_types.UBUNTU2004) + fake_image = 'fake-ubuntu2004' with PatchCriticalObjects( self._CreateFakeReturnValues(fake_image)) as issue_command: vm = vm_class(self.spec) @@ -398,20 +398,20 @@ def testCreateUbuntu1804(self): command_string = ' '.join(issue_command.call_args[0][0]) self.assertEqual(issue_command.call_count, 1) - self.assertEqual(vm.GetDefaultImageFamily(False), 'ubuntu-1804-lts') - self.assertEqual(vm.GetDefaultImageFamily(True), 'ubuntu-1804-lts-arm64') + self.assertEqual(vm.GetDefaultImageFamily(False), 'ubuntu-2004-lts') + self.assertEqual(vm.GetDefaultImageFamily(True), 'ubuntu-2004-lts-arm64') self.assertEqual(vm.GetDefaultImageProject(), 'ubuntu-os-cloud') self.assertTrue(vm.SupportGVNIC()) self.assertIn('gcloud compute instances create', command_string) self.assertIn( - '--image-family ubuntu-1804-lts --image-project ubuntu-os-cloud', + '--image-family ubuntu-2004-lts --image-project ubuntu-os-cloud', command_string) self.assertNotIn('--boot-disk-size', command_string) self.assertNotIn('--boot-disk-type', command_string) vm._PostCreate() self.assertEqual(issue_command.call_count, 3) self.assertDictContainsSubset({'image': fake_image, - 'image_family': 'ubuntu-1804-lts', + 'image_family': 'ubuntu-2004-lts', 'image_project': 'ubuntu-os-cloud', 'boot_disk_size': '10', 'boot_disk_type': 'pd-standard'}, @@ -420,8 +420,8 @@ def testCreateUbuntu1804(self): def testCreateUbuntuInCustomProject(self): """Test simulating passing --image and --image_project.""" vm_class = virtual_machine.GetVmClass(provider_info.GCP, - os_types.UBUNTU1804) - fake_image = 'fake-ubuntu1804' + os_types.UBUNTU2004) + fake_image = 'fake-ubuntu2004' fake_image_project = 'fake-project' spec = gce_virtual_machine.GceVmSpec(_COMPONENT, machine_type='fake-machine-type', @@ -437,7 +437,7 @@ def testCreateUbuntuInCustomProject(self): self.assertEqual(issue_command.call_count, 1) self.assertIn('gcloud compute instances create', command_string) self.assertIn( - '--image fake-ubuntu1804 --image-project fake-project', + '--image fake-ubuntu2004 --image-project fake-project', command_string) self.assertNotIn('--image-family', command_string) vm._PostCreate() @@ -451,8 +451,8 @@ def testCreateUbuntuInCustomProject(self): def testCreateUbuntuInCustomDisk(self): """Test simulating passing --image and --image_project.""" vm_class = virtual_machine.GetVmClass(provider_info.GCP, - os_types.UBUNTU1804) - fake_image = 'fake-ubuntu1804' + os_types.UBUNTU2004) + fake_image = 'fake-ubuntu2004' fake_image_project = 'fake-project' spec = gce_virtual_machine.GceVmSpec(_COMPONENT, machine_type='fake-machine-type', @@ -746,9 +746,10 @@ def testCreateRateLimitedMachineCreatedFailure(self, mock_cmd): 'memory': '1.0GiB', }) vm = pkb_common_test_case.TestGceVirtualMachine(spec) - with self.assertRaises( - errors.Benchmarks.QuotaFailure.RateLimitExceededError): + with self.assertRaises(vm_util.RetriesExceededRetryError) as e: vm._Create() + self.assertIs(type(e.exception.__cause__), + errors.Benchmarks.QuotaFailure.RateLimitExceededError) self.assertEqual(issue_command.call_count, util.RATE_LIMITED_MAX_RETRIES + 1) @@ -1009,7 +1010,7 @@ class GvnicTest(GceVirtualMachineTestCase): def setUp(self): super(GvnicTest, self).setUp() vm_spec = gce_virtual_machine.GceVmSpec('test_component', project='test') - self.vm = gce_virtual_machine.Ubuntu1804BasedGceVirtualMachine(vm_spec) + self.vm = gce_virtual_machine.Ubuntu2004BasedGceVirtualMachine(vm_spec) self.vm.HasPackage = mock.Mock(return_value=False) self.mock_cmd = mock.Mock() self.vm.RemoteCommand = self.mock_cmd diff --git a/tests/iaas_relational_db_test.py b/tests/iaas_relational_db_test.py index 013e29d55b..ca94b79602 100644 --- a/tests/iaas_relational_db_test.py +++ b/tests/iaas_relational_db_test.py @@ -119,7 +119,7 @@ def testMakePostgresClientCommand(self): db = FakeRelationalDb(self.postgres_spec) db.endpoint = '1.1.1.1' db.port = db.GetDefaultPort() - db.client_vm = CreateTestLinuxVm() + db.SetVms({'default': [CreateTestLinuxVm()]}) db.server_vm = CreateTestLinuxVm() self.assertEqual( db.client_vm_query_tools.MakeSqlCommand( @@ -132,7 +132,7 @@ def testIssuePostgresClientCommand(self): db = FakeRelationalDb(self.postgres_spec) db.endpoint = '1.1.1.1' db.port = db.GetDefaultPort() - db.client_vm = CreateTestLinuxVm() + db.SetVms({'default': [CreateTestLinuxVm()]}) db.server_vm = CreateTestLinuxVm() with mock.patch.object(db.client_vm, 'RemoteCommand') as remote_command: db.client_vm_query_tools.IssueSqlCommand('Select 1', database_name='abc') @@ -152,7 +152,7 @@ def testIssuePostgresClientCommandWithSessionVariables(self): db = FakeRelationalDb(self.postgres_spec) db.endpoint = '1.1.1.1' db.port = db.GetDefaultPort() - db.client_vm = CreateTestLinuxVm() + db.SetVms({'default': [CreateTestLinuxVm()]}) db.server_vm = CreateTestLinuxVm() with mock.patch.object(db.client_vm, 'RemoteCommand') as remote_command: db.client_vm_query_tools.IssueSqlCommand( @@ -174,7 +174,7 @@ def testIssuePostgresClientCommandWithSessionVariables(self): def testMakePostgresServerCommand(self): FLAGS['use_managed_db'].parse(False) db = FakeRelationalDb(self.postgres_spec) - db.client_vm = CreateTestLinuxVm() + db.SetVms({'default': [CreateTestLinuxVm()]}) db.server_vm = CreateTestLinuxVm() db.endpoint = '1.1.1.1' db.port = db.GetDefaultPort() @@ -187,7 +187,7 @@ def testMakePostgresServerCommand(self): def testMakeMysqlCientCommand(self): FLAGS['use_managed_db'].parse(False) db = FakeRelationalDb(self.mysql_spec) - db.client_vm = CreateTestLinuxVm() + db.SetVms({'default': [CreateTestLinuxVm()]}) db.server_vm = CreateTestLinuxVm() db.endpoint = '1.1.1.1' db.port = db.GetDefaultPort() @@ -199,7 +199,7 @@ def testMakeMysqlCientCommand(self): def testMakeMysqlCommandWithLocalHost(self): FLAGS['use_managed_db'].parse(False) db = FakeRelationalDb(self.mysql_spec) - db.client_vm = CreateTestLinuxVm() + db.SetVms({'default': [CreateTestLinuxVm()]}) db.server_vm = CreateTestLinuxVm() db.endpoint = '1.1.1.1' db.port = db.GetDefaultPort() @@ -211,7 +211,7 @@ def testMakeMysqlCommandWithLocalHost(self): def testMakeSqlserverCommand(self): FLAGS['use_managed_db'].parse(False) db = FakeRelationalDb(self.sqlserver_spec) - db.client_vm = CreateTestLinuxVm() + db.SetVms({'default': [CreateTestLinuxVm()]}) db.server_vm = CreateTestLinuxVm() db.endpoint = '1.1.1.1' db.port = db.GetDefaultPort() @@ -223,7 +223,7 @@ def testMakeSqlserverCommand(self): def testMakeSqlserverCommandWithLocalHost(self): FLAGS['use_managed_db'].parse(False) db = FakeRelationalDb(self.sqlserver_spec) - db.client_vm = CreateTestLinuxVm() + db.SetVms({'default': [CreateTestLinuxVm()]}) db.server_vm = CreateTestLinuxVm() db.endpoint = '1.1.1.1' db.port = db.GetDefaultPort() @@ -238,7 +238,7 @@ def testInstallMYSQLServer(self): db = FakeRelationalDb(self.mysql_spec) db.endpoint = '1.1.1.1' db.port = db.GetDefaultPort() - db.client_vm = CreateTestLinuxVm() + db.SetVms({'default': [CreateTestLinuxVm()]}) db.server_vm = CreateTestLinuxVm() db.server_vm.IS_REBOOTABLE = False db.client_vm.IS_REBOOTABLE = False diff --git a/tests/linux_benchmarks/cloud_redis_memtier_benchmark_test.py b/tests/linux_benchmarks/cloud_redis_memtier_benchmark_test.py index b1447d50de..f87d22bd3b 100644 --- a/tests/linux_benchmarks/cloud_redis_memtier_benchmark_test.py +++ b/tests/linux_benchmarks/cloud_redis_memtier_benchmark_test.py @@ -13,16 +13,47 @@ # limitations under the License. """Tests for cloud_redis_memtier_benchmark.""" +import pathlib import unittest from absl import flags +from absl.testing import flagsaver import mock - +from perfkitbenchmarker import vm_util +from perfkitbenchmarker.configs import benchmark_config_spec from perfkitbenchmarker.linux_benchmarks import cloud_redis_memtier_benchmark from perfkitbenchmarker.linux_packages import memtier +from perfkitbenchmarker.providers.aws import aws_elasticache_redis # pylint:disable=unused-import from tests import pkb_common_test_case FLAGS = flags.FLAGS +_CLUSTER_SHARDS_OUTPUT = 'redis_cluster_shards.txt' +_DESCRIBE_CLUSTER_OUTPUT = 'elasticache_describe_cluster.txt' + + +def _ReadFile(filename): + path = pathlib.Path(__file__).parents[1] / 'data' / filename + with open(path) as f: + return f.read() + + +def _GetTestRedisSpec(): + spec_args = {'cloud': 'AWS', 'redis_version': 'redis_6_x'} + return benchmark_config_spec._CloudRedisSpec( + 'test_component', flag_values=FLAGS, **spec_args + ) + + +def _GetTestRedisInstance(): + test_spec = _GetTestRedisSpec() + mock_bm_spec = mock.Mock() + mock_bm_spec.config.cloud_redis = test_spec + redis_class = cloud_redis_memtier_benchmark._GetManagedMemoryStoreClass() + instance = redis_class(mock_bm_spec) + instance._ip = '0.0.0.0' + instance._port = 1234 + return instance + class CloudRedisMemtierBenchmarkTest(pkb_common_test_case.PkbCommonTestCase): @@ -89,6 +120,83 @@ def testDelete(self): cloud_redis_memtier_benchmark.Cleanup(benchmark_spec) redis_instance.Delete.assert_called_once_with() + @flagsaver.flagsaver(cloud='AWS') + def testGetConnectionsMultiVm(self): + test_redis_instance = _GetTestRedisInstance() + test_redis_instance.name = 'pkb-cbf06969' + vm1 = pkb_common_test_case.TestLinuxVirtualMachine( + pkb_common_test_case.CreateTestVmSpec() + ) + vm1.ip_address = 'vm1' + vm2 = pkb_common_test_case.TestLinuxVirtualMachine( + pkb_common_test_case.CreateTestVmSpec() + ) + vm2.ip_address = 'vm2' + self.enter_context( + mock.patch.object( + vm1, + 'RemoteCommand', + return_value=(_ReadFile(_CLUSTER_SHARDS_OUTPUT), ''), + ) + ) + self.enter_context( + mock.patch.object( + vm_util, + 'IssueCommand', + return_value=(_ReadFile(_DESCRIBE_CLUSTER_OUTPUT), '', 0), + ) + ) + + connections = cloud_redis_memtier_benchmark._GetConnections( + [vm1, vm2], test_redis_instance + ) + + self.assertCountEqual( + connections, + [ + memtier.MemtierConnection(vm1, '10.0.1.117', 6379), + memtier.MemtierConnection(vm1, '10.0.2.104', 6379), + memtier.MemtierConnection(vm1, '10.0.3.217', 6379), + memtier.MemtierConnection(vm2, '10.0.2.177', 6379), + memtier.MemtierConnection(vm2, '10.0.1.174', 6379), + memtier.MemtierConnection(vm2, '10.0.3.6', 6379), + ], + ) + + @flagsaver.flagsaver(cloud='AWS') + def testGetConnectionsSingleVm(self): + test_redis_instance = _GetTestRedisInstance() + test_redis_instance.name = 'pkb-cbf06969' + vm1 = pkb_common_test_case.TestLinuxVirtualMachine( + pkb_common_test_case.CreateTestVmSpec() + ) + vm1.ip_address = 'vm1' + self.enter_context( + mock.patch.object( + vm1, + 'RemoteCommand', + return_value=(_ReadFile(_CLUSTER_SHARDS_OUTPUT), ''), + ) + ) + self.enter_context( + mock.patch.object( + vm_util, + 'IssueCommand', + return_value=(_ReadFile(_DESCRIBE_CLUSTER_OUTPUT), '', 0), + ) + ) + + connections = cloud_redis_memtier_benchmark._GetConnections( + [vm1], test_redis_instance + ) + + self.assertCountEqual( + connections, + [ + memtier.MemtierConnection(vm1, '0.0.0.0', 1234), + ], + ) + if __name__ == '__main__': unittest.main() diff --git a/tests/linux_benchmarks/cluster_boot_benchmark_test.py b/tests/linux_benchmarks/cluster_boot_benchmark_test.py index 318287b53f..cdb5de5070 100644 --- a/tests/linux_benchmarks/cluster_boot_benchmark_test.py +++ b/tests/linux_benchmarks/cluster_boot_benchmark_test.py @@ -1,6 +1,6 @@ """Tests for cluster_boot_delete.""" -import datetime +from typing import Optional import unittest import freezegun @@ -30,6 +30,28 @@ def vm_mock(index: int, timestamp: float) -> mock.Mock: OS_TYPE=f'linux{index}') +def vm_mock_given_delete_times( + index: int, + delete_start_time: Optional[float] = None, + delete_end_time: Optional[float] = None) -> mock.Mock: + """Creates a mock vm with a provided delete_start_time and delete_end_time. + + Args: + index: an integer which specifies index of vm in the list of vms. + delete_start_time: mock timestamp indicating the start of the VM delete + process. May be passed in as None to test MeasureDelete logic. + delete_end_time: mock timestamp indicating the end of the VM delete process. + May be passed in as None to test MeasureDelete logic. + + Returns: + A mock vm. + """ + return mock.Mock( + delete_start_time=delete_start_time, + delete_end_time=delete_end_time, + OS_TYPE=f'linux{index}') + + class ClusterBootBenchmarkTest(pkb_common_test_case.PkbCommonTestCase, test_util.SamplesTestMixin): @@ -42,8 +64,7 @@ def testMeasureDelete(self): vms_to_test = [vm_mock(i, timestamp) for i in range(num_vms)] # call Delete on vms - with freezegun.freeze_time(datetime.datetime.utcfromtimestamp(timestamp)): - actual_samples = cluster_boot_benchmark.MeasureDelete(vms_to_test) + actual_samples = cluster_boot_benchmark.MeasureDelete(vms_to_test) # for all vms create mock samples ie the expected samples expected_delete_times = [5, 6, 7] @@ -69,6 +90,65 @@ def testMeasureDelete(self): # assert actual and expected samples are equal self.assertSampleListsEqualUpToTimestamp(actual_samples, expected_samples) + def testMeasureDeleteNoValidVMs(self): + """MeasureDelete test where no VMs have valid delete measurements.""" + timestamp = 1625863325.003580 + vm_with_neither = vm_mock_given_delete_times(index=0) + vm_with_start_only = vm_mock_given_delete_times( + index=1, delete_start_time=timestamp) + # We never expect to see this in production, but it does ensure that + # the conditional measurement of delete times works for all combinations + # of missing attributes. + vm_with_end_only = vm_mock_given_delete_times( + index=2, delete_end_time=timestamp + 5) + vms_to_test = [vm_with_neither, vm_with_start_only, vm_with_end_only] + + # invoke MeasureDelete + actual_samples = cluster_boot_benchmark.MeasureDelete(vms_to_test) + + # None of these VMs can have their delete times measured. + self.assertEmpty(actual_samples) + + def testMeasureDeleteAttributeChecks(self): + """MeasureDelete test where some VMs do not have valid delete measurements.""" + timestamp = 1625863325.003580 + vm_with_neither = vm_mock_given_delete_times(index=0) + vm_with_start_only = vm_mock_given_delete_times(index=1, + delete_start_time=timestamp) + vm_with_both = vm_mock_given_delete_times(index=2, + delete_start_time=timestamp, + delete_end_time=timestamp + 5) + vms_to_test = [vm_with_neither, vm_with_start_only, vm_with_both] + # invoke MeasureDelete + actual_samples = cluster_boot_benchmark.MeasureDelete(vms_to_test) + + # Only vm_with_both should have its delete time measured. + # Note that the vm_instance metadata field is set after filtering out + # invalid VMs, so we still expect this to be 0. + expected_delete_time = 5 + expected_instance_index = 2 + expected_num_vms = 1 + expected_samples = [ + sample.Sample( + 'Delete Time', expected_delete_time, 'seconds', { + 'machine_instance': 0, + 'num_vms': expected_num_vms, + 'os_type': f'linux{expected_instance_index}', + } + ) + ] + + expected_cluster_delete_time = 5 + expected_cluster_delete_metadata = { + 'num_vms': 1, + 'os_type': f'linux{expected_instance_index}', + } + expected_samples.append( + sample.Sample('Cluster Delete Time', expected_cluster_delete_time, + 'seconds', expected_cluster_delete_metadata)) + # assert actual and expected samples are equal + self.assertSampleListsEqualUpToTimestamp(actual_samples, expected_samples) + @freezegun.freeze_time('2023-03-07') def testGetTimeToBoot(self): context.SetThreadBenchmarkSpec( diff --git a/tests/linux_benchmarks/netperf_benchmark_test.py b/tests/linux_benchmarks/netperf_benchmark_test.py index 25df6c4bd7..3b68283b74 100644 --- a/tests/linux_benchmarks/netperf_benchmark_test.py +++ b/tests/linux_benchmarks/netperf_benchmark_test.py @@ -20,9 +20,9 @@ from absl.testing import flagsaver from absl.testing import parameterized import mock - from perfkitbenchmarker import benchmark_spec from perfkitbenchmarker import errors +from perfkitbenchmarker import flag_util from perfkitbenchmarker import vm_util from perfkitbenchmarker.linux_benchmarks import netperf_benchmark @@ -118,7 +118,9 @@ def testExternalAndInternal(self): ('TCP_CRR_Latency_max', 2500.0, 'us'), ('TCP_CRR_Latency_stddev', 551.07, 'us'), ('TCP_STREAM_Throughput', 1187.94, mbps), + ('TCP_STREAM_Throughput_1stream', 1187.94, mbps), ('TCP_STREAM_Throughput', 1973.37, 'Mbits/sec'), + ('TCP_STREAM_Throughput_1stream', 1973.37, 'Mbits/sec'), ('UDP_RR_Transaction_Rate', 1359.71, tps), ('UDP_RR_Latency_p50', 700.0, 'us'), ('UDP_RR_Latency_p90', 757.0, 'us'), @@ -136,14 +138,17 @@ def testExternalAndInternal(self): ('UDP_STREAM_Throughput', 1102.42, mbps), ('UDP_STREAM_Throughput', 1802.72, 'Mbits/sec'), ], - [i[:3] for i in result]) + [i[:3] for i in result], + ) external_meta = {'ip_type': 'external'} internal_meta = {'ip_type': 'internal'} - expected_meta = (([external_meta] * 7 + [internal_meta] * 7) * 2 + - [external_meta, internal_meta] + - [external_meta] * 7 + - [internal_meta] * 7) + expected_meta = ( + ([external_meta] * 7 + [internal_meta] * 7) * 2 + + [external_meta, external_meta, internal_meta, internal_meta] + + [external_meta] * 7 + + [internal_meta] * 7 + ) for i, meta in enumerate(expected_meta): self.assertIsInstance(result[i][3], dict) @@ -166,6 +171,55 @@ def testParseNetperfOutputError(self, output): False) self.assertIn('Failed to parse stdout', str(e.exception)) + @flagsaver.flagsaver(netperf_benchmarks=[netperf_benchmark.TCP_STREAM]) + def testMultiStreams(self): + self._ConfigureIpTypes() + num_streams = 4 + FLAGS.netperf_num_streams = flag_util.IntegerList([num_streams]) + self.should_run_external.return_value = True + self.should_run_internal.return_value = False + # Read netperf mock results for multiple streams + path = os.path.join( + os.path.dirname(__file__), + '..', + 'data', + 'netperf_results_multistreams.json', + ) + with open(path) as fp: + stdouts = ['\n'.join(i) for i in json.load(fp)] + self.expected_stdout = [] + for i in range(0, len(stdouts), num_streams): + self.expected_stdout.append( + json.dumps((stdouts[i : i + num_streams], [''], [0])) + ) + + vm_spec = mock.MagicMock(spec=benchmark_spec.BenchmarkSpec) + vm_spec.vms = [mock.MagicMock(), mock.MagicMock()] + vm_spec.vms[0].RobustRemoteCommand.side_effect = [ + (i, '') for i in self.expected_stdout + ] + vm_spec.vms[1].GetInternalIPs.return_value = ['test_ip'] + run_result = netperf_benchmark.Run(vm_spec) + result = [] + for sample in run_result: + if sample[0] not in ['start_time', 'end_time']: + result.append(sample) + + self.assertListEqual( + [ + ('TCP_STREAM_Throughput_p50', 3000.0, 'Mbits/sec'), + ('TCP_STREAM_Throughput_p90', 4000.0, 'Mbits/sec'), + ('TCP_STREAM_Throughput_p99', 4000.0, 'Mbits/sec'), + ('TCP_STREAM_Throughput_average', 2500.0, 'Mbits/sec'), + ('TCP_STREAM_Throughput_stddev', 1290.9944487358057, 'Mbits/sec'), + ('TCP_STREAM_Throughput_min', 1000.0, 'Mbits/sec'), + ('TCP_STREAM_Throughput_max', 4000.0, 'Mbits/sec'), + ('TCP_STREAM_Throughput_total', 10000.0, 'Mbits/sec'), + ('TCP_STREAM_Throughput_4streams', 10000.0, 'Mbits/sec'), + ], + [i[:3] for i in result], + ) + if __name__ == '__main__': unittest.main() diff --git a/tests/linux_packages/cmake_test.py b/tests/linux_packages/cmake_test.py index b6e483dc11..be695e8747 100644 --- a/tests/linux_packages/cmake_test.py +++ b/tests/linux_packages/cmake_test.py @@ -21,13 +21,13 @@ class CmakeTests(pkb_common_test_case.PkbCommonTestCase): @flagsaver.flagsaver(cmake_kitware=True) def testAptInstallViaKitware(self): - vm = MockVm('ubuntu1804') + vm = MockVm('ubuntu2004') cmake.AptInstall(vm) expected_cmds = [ 'curl --silent https://apt.kitware.com/keys/kitware-archive-latest.asc ' '| gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/kitware.gpg ' '>/dev/null', 'sudo apt-add-repository ' - '"deb https://apt.kitware.com/ubuntu/ bionic main"' + '"deb https://apt.kitware.com/ubuntu/ focal main"' ] vm.RemoteCommand.assert_has_calls([mock.call(cmd) for cmd in expected_cmds]) diff --git a/tests/linux_packages/linux_boot_test.py b/tests/linux_packages/linux_boot_test.py index 1eb390b234..cdeb44f4d5 100644 --- a/tests/linux_packages/linux_boot_test.py +++ b/tests/linux_packages/linux_boot_test.py @@ -124,9 +124,9 @@ def testParseSystemDCriticalChain(self): with open(os.path.join(self.data_dir, 'systemd2.output')) as f: output = f.read() self.assertEqual(linux_boot.ParseSystemDCriticalChainOutput(output), - 0.774) + 0.671103) self.assertEqual(linux_boot.ParseSystemDCriticalChainServiceTime(output), - 0.103) + 0.000103) def testCollectVmToVmSamples(self): """Test vm to vm networking result parsing.""" diff --git a/tests/linux_packages/memtier_test.py b/tests/linux_packages/memtier_test.py index a1d4f32c90..10ec80c459 100644 --- a/tests/linux_packages/memtier_test.py +++ b/tests/linux_packages/memtier_test.py @@ -8,6 +8,7 @@ from absl import flags from absl.testing import flagsaver +from perfkitbenchmarker import background_tasks from perfkitbenchmarker import sample from perfkitbenchmarker import test_util from perfkitbenchmarker.linux_packages import memtier @@ -43,13 +44,19 @@ --- GET 0 50.0 GET 2 100.00 +GET """ METADATA = { 'test': 'foobar', + 'p50_latency': 1.215, 'p90_latency': 2.295, 'p95_latency': 2.319, 'p99_latency': 2.399, + 'p99.5_latency': 3.871, + 'p99.9_latency': 3.872, + 'p99.950_latency': 3.873, + 'p99.990_latency': 3.874, 'avg_latency': 1.54, } @@ -345,6 +352,7 @@ def testAggregateMemtierWithOneResult(self): 'values': [1, 1, 1, 1, 1], 'timestamps': [0, 1000, 2000, 3000, 4000], 'interval': 1, + 'ramp_down_starts': 4000, }, timestamp=0, ), @@ -436,6 +444,102 @@ def testAggregateMemtierWithOneResult(self): }, timestamp=0, ), + sample.Sample( + metric='Average Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='Max Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='Min Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='p50.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='p90.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='p95.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='p99.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='p99.90_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), ] self.assertEqual(samples, expected_result) @@ -533,90 +637,366 @@ def testAggregateMemtierResultsWithMultipleResultsDifferentStartTime(self): timestamp=0, ), sample.Sample( - metric='OPS_time_series', + metric='OPS_time_series', + value=0.0, + unit='ops', + metadata={ + 'values': [1, 2, 3, 3, 3], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'ramp_down_starts': 4000, + }, + timestamp=0, + ), + sample.Sample( + metric='Average Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 5, 5, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='Max Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 5, 5, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='Min Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 5, 5, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='p50.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 5, 5, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='p90.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 5, 5, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='p95.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 5, 5, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='p99.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 5, 5, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='p99.90_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 5, 5, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='Average Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='Average Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [0, 5, 4, 3, 2], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='Average Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [0, 0, 5, 4, 3], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 2, + }, + timestamp=0, + ), + sample.Sample( + metric='Max Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='Max Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [0, 5, 4, 3, 2], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='Max Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [0, 0, 5, 4, 3], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 2, + }, + timestamp=0, + ), + sample.Sample( + metric='Min Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='Min Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [0, 5, 4, 3, 2], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='Min Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [0, 0, 5, 4, 3], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 2, + }, + timestamp=0, + ), + sample.Sample( + metric='p50.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='p50.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [0, 5, 4, 3, 2], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='p50.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [0, 0, 5, 4, 3], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 2, + }, + timestamp=0, + ), + sample.Sample( + metric='p90.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='p90.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [0, 5, 4, 3, 2], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='p90.00_time_series', value=0.0, - unit='ops', + unit='ms', metadata={ - 'values': [1, 2, 3, 3, 3], + 'values': [0, 0, 5, 4, 3], 'timestamps': [0, 1000, 2000, 3000, 4000], 'interval': 1, + 'client': 2, }, timestamp=0, ), sample.Sample( - metric='Average Latency_time_series', + metric='p95.00_time_series', value=0.0, unit='ms', metadata={ - 'values': [1, 5, 5, 4, 5], + 'values': [1, 2, 3, 4, 5], 'timestamps': [0, 1000, 2000, 3000, 4000], 'interval': 1, + 'client': 0, }, timestamp=0, ), sample.Sample( - metric='Max Latency_time_series', + metric='p95.00_time_series', value=0.0, unit='ms', metadata={ - 'values': [1, 5, 5, 4, 5], + 'values': [0, 5, 4, 3, 2], 'timestamps': [0, 1000, 2000, 3000, 4000], 'interval': 1, + 'client': 1, }, timestamp=0, ), sample.Sample( - metric='Min Latency_time_series', + metric='p95.00_time_series', value=0.0, unit='ms', metadata={ - 'values': [1, 5, 5, 4, 5], + 'values': [0, 0, 5, 4, 3], 'timestamps': [0, 1000, 2000, 3000, 4000], 'interval': 1, + 'client': 2, }, timestamp=0, ), sample.Sample( - metric='p50.00_time_series', + metric='p99.00_time_series', value=0.0, unit='ms', metadata={ - 'values': [1, 5, 5, 4, 5], + 'values': [1, 2, 3, 4, 5], 'timestamps': [0, 1000, 2000, 3000, 4000], 'interval': 1, + 'client': 0, }, timestamp=0, ), sample.Sample( - metric='p90.00_time_series', + metric='p99.00_time_series', value=0.0, unit='ms', metadata={ - 'values': [1, 5, 5, 4, 5], + 'values': [0, 5, 4, 3, 2], 'timestamps': [0, 1000, 2000, 3000, 4000], 'interval': 1, + 'client': 1, }, timestamp=0, ), sample.Sample( - metric='p95.00_time_series', + metric='p99.00_time_series', value=0.0, unit='ms', metadata={ - 'values': [1, 5, 5, 4, 5], + 'values': [0, 0, 5, 4, 3], 'timestamps': [0, 1000, 2000, 3000, 4000], 'interval': 1, + 'client': 2, }, timestamp=0, ), sample.Sample( - metric='p99.00_time_series', + metric='p99.90_time_series', value=0.0, unit='ms', metadata={ - 'values': [1, 5, 5, 4, 5], + 'values': [1, 2, 3, 4, 5], 'timestamps': [0, 1000, 2000, 3000, 4000], 'interval': 1, + 'client': 0, }, timestamp=0, ), @@ -625,9 +1005,22 @@ def testAggregateMemtierResultsWithMultipleResultsDifferentStartTime(self): value=0.0, unit='ms', metadata={ - 'values': [1, 5, 5, 4, 5], + 'values': [0, 5, 4, 3, 2], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='p99.90_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [0, 0, 5, 4, 3], 'timestamps': [0, 1000, 2000, 3000, 4000], 'interval': 1, + 'client': 2, }, timestamp=0, ), @@ -710,6 +1103,7 @@ def testAggregateMemtierResultsWithMultipleResults(self): 'values': [2, 2, 2, 2, 2], 'timestamps': [0, 1000, 2000, 3000, 4000], 'interval': 1, + 'ramp_down_starts': 4000, }, timestamp=0, ), @@ -801,7 +1195,200 @@ def testAggregateMemtierResultsWithMultipleResults(self): }, timestamp=0, ), + sample.Sample( + metric='Average Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='Average Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [5, 4, 3, 2, 1], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='Max Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='Max Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [5, 4, 3, 2, 1], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='Min Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='Min Latency_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [5, 4, 3, 2, 1], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='p50.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='p50.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [5, 4, 3, 2, 1], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='p90.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='p90.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [5, 4, 3, 2, 1], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='p95.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='p95.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [5, 4, 3, 2, 1], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='p99.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='p99.00_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [5, 4, 3, 2, 1], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 1, + }, + timestamp=0, + ), + sample.Sample( + metric='p99.90_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [1, 2, 3, 4, 5], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 0, + }, + timestamp=0, + ), + sample.Sample( + metric='p99.90_time_series', + value=0.0, + unit='ms', + metadata={ + 'values': [5, 4, 3, 2, 1], + 'timestamps': [0, 1000, 2000, 3000, 4000], + 'interval': 1, + 'client': 1, + }, + timestamp=0, + ), ] + print(samples) self.assertEqual(samples, expected_result) def testParseResults_no_time_series(self): @@ -860,45 +1447,280 @@ def testParseResults_no_time_series(self): samples.extend(results.GetSamples(METADATA)) self.assertSampleListsEqualUpToTimestamp(samples, expected_result) + @flagsaver.flagsaver(num_cpus_override=16) def testMeasureLatencyCappedThroughput(self): mock_run_results = [ # Multi-pipeline - GetMemtierResult(7270, 0.175), - GetMemtierResult(386941, 6.751), - GetMemtierResult(424626, 3.247), - GetMemtierResult(408957, 1.591), - GetMemtierResult(398920, 0.839), - GetMemtierResult(408290, 1.207), - GetMemtierResult(405672, 1.015), - GetMemtierResult(408808, 0.951), - GetMemtierResult(405209, 0.967), - GetMemtierResult(398249, 1.015), - GetMemtierResult(409221, 0.967), - GetMemtierResult(413240, 0.975), - GetMemtierResult(412573, 0.975), + GetMemtierResult(10, 10.0), + GetMemtierResult(20, 5.0), + GetMemtierResult(30, 2.0), + GetMemtierResult(8, 1.5), + GetMemtierResult(9, 0.7), + GetMemtierResult(3, 1.4), + GetMemtierResult(2, 0.8), + GetMemtierResult(4, 1.3), + GetMemtierResult(15, 0.9), + GetMemtierResult(7, 1.2), + GetMemtierResult(10, 0.9), + GetMemtierResult(1, 1.1), + GetMemtierResult(9, 0.9), + GetMemtierResult(30, 1.2), # Multi-client - GetMemtierResult(7433, 0.159), - GetMemtierResult(218505, 2.975), - GetMemtierResult(79875, 4.447), - GetMemtierResult(323469, 0.519), - GetMemtierResult(321503, 0.743), - GetMemtierResult(324469, 0.855), - GetMemtierResult(308853, 1.007), - GetMemtierResult(322717, 0.903), - GetMemtierResult(321258, 0.919), - GetMemtierResult(323695, 0.927), - GetMemtierResult(310044, 0.983), + GetMemtierResult(10, 10.0), + GetMemtierResult(20, 5.0), + GetMemtierResult(30, 2.0), + GetMemtierResult(8, 1.5), + GetMemtierResult(9, 0.7), + GetMemtierResult(3, 1.4), ] self.enter_context( mock.patch.object(memtier, '_Run', side_effect=mock_run_results)) - results = memtier.MeasureLatencyCappedThroughput(None, 'unused', 0) + mock_vm = pkb_common_test_case.TestLinuxVirtualMachine( + pkb_common_test_case.CreateTestVmSpec() + ) + results = memtier.MeasureLatencyCappedThroughput(mock_vm, 1, 'unused', 0) actual_throughputs = [] for s in results: if s.metric == 'Ops Throughput': actual_throughputs.append(s.value) - self.assertEqual(actual_throughputs, [413240, 324469]) + self.assertEqual(actual_throughputs, [15.0, 9.0]) + + def testRunParallelSingleVm(self): + vm1 = pkb_common_test_case.TestLinuxVirtualMachine( + pkb_common_test_case.CreateTestVmSpec() + ) + connections = [ + memtier.MemtierConnection(vm1, '10.0.1.117', 6379), + ] + mock_run_threaded = self.enter_context( + mock.patch.object(background_tasks, 'RunThreaded') + ) + + memtier._RunParallelConnections(connections, '0.0.0.0', 1234, 1, 2, 3) + + mock_run_threaded.assert_called_once_with( + memtier._Run, + [ + ( + (), + { + 'vm': vm1, + 'server_ip': '0.0.0.0', + 'server_port': 1234, + 'threads': 1, + 'clients': 2, + 'pipeline': 3, + 'password': None, + }, + ), + ], + ) + + def testRunParallelMultipleVms(self): + vm1 = pkb_common_test_case.TestLinuxVirtualMachine( + pkb_common_test_case.CreateTestVmSpec() + ) + vm1.ip_address = 'vm1' + vm2 = pkb_common_test_case.TestLinuxVirtualMachine( + pkb_common_test_case.CreateTestVmSpec() + ) + vm2.ip_address = 'vm2' + connections = [ + memtier.MemtierConnection(vm1, '10.0.1.117', 6379), + memtier.MemtierConnection(vm1, '10.0.2.104', 6379), + memtier.MemtierConnection(vm1, '10.0.3.217', 6379), + memtier.MemtierConnection(vm2, '10.0.2.177', 6379), + memtier.MemtierConnection(vm2, '10.0.1.174', 6379), + memtier.MemtierConnection(vm2, '10.0.3.6', 6379), + ] + mock_run_threaded = self.enter_context( + mock.patch.object(background_tasks, 'RunThreaded') + ) + + memtier._RunParallelConnections(connections, '0.0.0.0', 1234, 1, 2, 3) + + mock_run_threaded.assert_called_once_with( + memtier._Run, + [ + ( + (), + { + 'vm': vm1, + 'server_ip': '0.0.0.0', + 'server_port': 1234, + 'threads': 1, + 'clients': 2, + 'pipeline': 3, + 'password': None, + 'shard_addresses': ( + '10.0.1.117:6379,10.0.2.104:6379,10.0.3.217:6379' + ), + }, + ), + ( + (), + { + 'vm': vm2, + 'server_ip': '0.0.0.0', + 'server_port': 1234, + 'threads': 1, + 'clients': 2, + 'pipeline': 3, + 'password': None, + 'shard_addresses': ( + '10.0.2.177:6379,10.0.1.174:6379,10.0.3.6:6379' + ), + }, + ), + ], + ) + + @flagsaver.flagsaver(memtier_distribution_iterations=1, num_cpus_override=16) + def testMeasureLatencyCappedThroughputDistribution(self): + vm1 = pkb_common_test_case.TestLinuxVirtualMachine( + pkb_common_test_case.CreateTestVmSpec() + ) + vm1.ip_address = 'vm1' + vm2 = pkb_common_test_case.TestLinuxVirtualMachine( + pkb_common_test_case.CreateTestVmSpec() + ) + vm2.ip_address = 'vm2' + connections = [ + memtier.MemtierConnection(vm1, '10.0.1.117', 6379), + memtier.MemtierConnection(vm1, '10.0.2.104', 6379), + memtier.MemtierConnection(vm1, '10.0.3.217', 6379), + memtier.MemtierConnection(vm2, '10.0.2.177', 6379), + memtier.MemtierConnection(vm2, '10.0.1.174', 6379), + memtier.MemtierConnection(vm2, '10.0.3.6', 6379), + ] + + mock_binary_search = self.enter_context( + mock.patch.object( + memtier, + '_BinarySearchForLatencyCappedThroughput', + return_value=[ + memtier.MemtierResult( + parameters=memtier.MemtierBinarySearchParameters( + pipelines=1, threads=2, clients=3 + ) + ) + ], + ) + ) + mock_results = [ + memtier.MemtierResult( + ops_per_sec=0, + kb_per_sec=0, + latency_ms=0, + latency_dic={'90': 0, '95': 50, '99': 1.0}, + ), + memtier.MemtierResult( + ops_per_sec=200, + kb_per_sec=2, + latency_ms=0.2, + latency_dic={'90': 10, '95': 40, '99': 0.8}, + ), + memtier.MemtierResult( + ops_per_sec=400, + kb_per_sec=4, + latency_ms=0.4, + latency_dic={'90': 20, '95': 30, '99': 0.6}, + ), + memtier.MemtierResult( + ops_per_sec=600, + kb_per_sec=6, + latency_ms=0.6, + latency_dic={'90': 30, '95': 20, '99': 0.4}, + ), + memtier.MemtierResult( + ops_per_sec=800, + kb_per_sec=8, + latency_ms=0.8, + latency_dic={'90': 40, '95': 10, '99': 0.2}, + ), + memtier.MemtierResult( + ops_per_sec=1000, + kb_per_sec=10, + latency_ms=1.0, + latency_dic={'90': 50, '95': 0, '99': 0.0}, + ), + ] + mock_run = self.enter_context( + mock.patch.object( + memtier, + '_RunParallelConnections', + return_value=mock_results, + ) + ) + + results = memtier.MeasureLatencyCappedThroughputDistribution( + connections, '0.0.0.0', 1234, [vm1, vm2], 6 + ) + + expected_metadata = { + 'distribution_iterations': 1, + 'threads': 2, + 'clients': 3, + 'pipelines': 1, + } + + with self.subTest('SamplesAreCorrect'): + # self.assertSampleListsEqualUpToTimestamp(results, expected_samples) + self.assertSampleInList( + sample.Sample( + metric='Mean ops_per_sec', + value=500.0, + unit='ops/s', + metadata=expected_metadata, + ), + results, + ) + self.assertSampleInList( + sample.Sample( + metric='Stdev kb_per_sec', + value=3.7416573867739413, + unit='KB/s', + metadata=expected_metadata, + ), + results, + ) + with self.subTest('BinarySearchHasCorrectArgs'): + mock_binary_search.assert_called_once_with( + connections, [memtier._ClientModifier(10, 16)], '0.0.0.0', 1234, None + ) + with self.subTest('RunHasCorrectArgs'): + mock_run.assert_has_calls( + [mock.call(connections, '0.0.0.0', 1234, 2, 3, 1, None)] + ) + + def testCombineResults(self): + result1 = memtier.MemtierResult( + ops_per_sec=800, + kb_per_sec=8, + latency_ms=0.8, + latency_dic={'90': 40, '95': 10, '99': 0.2}, + metadata={'test_metadata': True}, + parameters=memtier.MemtierBinarySearchParameters(lower_bound=1), + ) + result2 = memtier.MemtierResult( + ops_per_sec=1000, + kb_per_sec=10, + latency_ms=1.0, + latency_dic={'90': 50, '95': 0, '99': 0.0}, + ) + expected_result = memtier.MemtierResult( + ops_per_sec=1800, + kb_per_sec=18, + latency_ms=0.9, + latency_dic={'90': 45, '95': 5, '99': 0.1}, + metadata={'test_metadata': True}, + parameters=memtier.MemtierBinarySearchParameters(lower_bound=1), + ) + self.assertEqual( + expected_result, memtier._CombineResults([result1, result2]) + ) if __name__ == '__main__': diff --git a/tests/linux_packages/speccpu_test.py b/tests/linux_packages/speccpu_test.py index 83f4fb24c0..9869197528 100644 --- a/tests/linux_packages/speccpu_test.py +++ b/tests/linux_packages/speccpu_test.py @@ -423,7 +423,7 @@ def setUp(self): self.addCleanup(mock.patch.stopall) def testParseResultsC(self): - vm = mock.Mock(vm=linux_virtual_machine.Ubuntu1804Mixin) + vm = mock.Mock(vm=linux_virtual_machine.Ubuntu2004Mixin) spec_test_config = speccpu.SpecInstallConfigurations() spec_test_config.benchmark_name = 'speccpu2006' spec_test_config.log_format = r'Est. (SPEC.*_base2006)\s*(\S*)' @@ -465,7 +465,7 @@ def testParseResultsC(self): def testParseSpeedResults(self): speccpu.FLAGS.spec_runmode = 'base' - vm = mock.Mock(vm=linux_virtual_machine.Ubuntu1804Mixin) + vm = mock.Mock(vm=linux_virtual_machine.Ubuntu2004Mixin) spec_test_config = speccpu.SpecInstallConfigurations() spec_test_config.benchmark_name = 'speccpu2006' spec_test_config.log_format = r'Est. (SPEC.*_base2006)\s*(\S*)' @@ -478,7 +478,7 @@ def testParseSpeedResults(self): def testParseAllResults(self): speccpu.FLAGS.spec_runmode = 'all' - vm = mock.Mock(vm=linux_virtual_machine.Ubuntu1804Mixin) + vm = mock.Mock(vm=linux_virtual_machine.Ubuntu2004Mixin) spec_test_config = speccpu.SpecInstallConfigurations() spec_test_config.benchmark_name = 'speccpu2017' spec_test_config.log_format = r'Est. (SPEC.*2017_.*_base)\s*(\S*)' @@ -491,7 +491,7 @@ def testParseAllResults(self): def testParsePeakResults(self): speccpu.FLAGS.spec_runmode = 'peak' - vm = mock.Mock(vm=linux_virtual_machine.Ubuntu1804Mixin) + vm = mock.Mock(vm=linux_virtual_machine.Ubuntu2004Mixin) spec_test_config = speccpu.SpecInstallConfigurations() spec_test_config.benchmark_name = 'speccpu2017' spec_test_config.log_format = r'Est. (SPEC.*2017_.*_base)\s*(\S*)' @@ -504,7 +504,7 @@ def testParsePeakResults(self): def testParsePartialPeakResults(self): speccpu.FLAGS.spec_runmode = 'peak' - vm = mock.Mock(vm=linux_virtual_machine.Ubuntu1804Mixin) + vm = mock.Mock(vm=linux_virtual_machine.Ubuntu2004Mixin) spec_test_config = speccpu.SpecInstallConfigurations() spec_test_config.benchmark_name = 'speccpu2017' spec_test_config.log_format = r'Est. (SPEC.*2017_.*_base)\s*(\S*)' diff --git a/tests/linux_packages/ycsb_test.py b/tests/linux_packages/ycsb_test.py index 9aea5c39ce..de07a6b3ab 100644 --- a/tests/linux_packages/ycsb_test.py +++ b/tests/linux_packages/ycsb_test.py @@ -24,6 +24,7 @@ import mock from perfkitbenchmarker import errors from perfkitbenchmarker.linux_packages import ycsb +from perfkitbenchmarker.linux_packages import ycsb_stats from tests import matchers from tests import pkb_common_test_case @@ -38,7 +39,7 @@ def open_data_file(filename): def _parse_and_return_time_series(filename): content = open_data_file(filename) - return ycsb.ParseResults(content, 'timeseries') + return ycsb_stats.ParseResults(content, 'timeseries') class SimpleResultParserTestCase(pkb_common_test_case.PkbCommonTestCase): @@ -46,36 +47,39 @@ class SimpleResultParserTestCase(pkb_common_test_case.PkbCommonTestCase): def setUp(self): super(SimpleResultParserTestCase, self).setUp() self.contents = open_data_file('ycsb-test-run.dat') - self.results = ycsb.ParseResults(self.contents, 'histogram') + self.results = ycsb_stats.ParseResults(self.contents, 'histogram') def testCommandLineSet(self): self.assertEqual( - 'Command line: -db com.yahoo.ycsb.BasicDB ' - '-P workloads/workloada -t', self.results.command_line) + 'Command line: -db com.yahoo.ycsb.BasicDB -P workloads/workloada -t', + self.results.command_line, + ) def testClientSet(self): self.assertEqual('YCSB Client 0.1', self.results.client) def testUpdateStatisticsParsed(self): self.assertEqual( - ycsb._OpResult( + ycsb_stats._OpResult( group='update', statistics={ 'Operations': 531, 'Return=0': 531, - 'AverageLatency(ms)': .0659774011299435, + 'AverageLatency(ms)': 0.0659774011299435, 'MinLatency(ms)': 0.042, - 'MaxLatency(ms)': .345, + 'MaxLatency(ms)': 0.345, '95thPercentileLatency(ms)': 0, - '99thPercentileLatency(ms)': 0 + '99thPercentileLatency(ms)': 0, }, - data_type=ycsb.HISTOGRAM, - data=[(0, 530), (19, 1)]), - self.results.groups['update']) + data_type=ycsb_stats.HISTOGRAM, + data=[(0, 530), (19, 1)], + ), + self.results.groups['update'], + ) def testReadStatisticsParsed(self): self.assertEqual( - ycsb._OpResult( + ycsb_stats._OpResult( group='read', statistics={ 'Operations': 469, @@ -84,23 +88,24 @@ def testReadStatisticsParsed(self): 'MinLatency(ms)': 0.034, 'MaxLatency(ms)': 0.102, '95thPercentileLatency(ms)': 0, - '99thPercentileLatency(ms)': 0 + '99thPercentileLatency(ms)': 0, }, - data_type=ycsb.HISTOGRAM, - data=[(0, 469)]), - self.results.groups['read']) + data_type=ycsb_stats.HISTOGRAM, + data=[(0, 469)], + ), + self.results.groups['read'], + ) def testOverallStatisticsParsed(self): self.assertEqual( - ycsb._OpResult( + ycsb_stats._OpResult( group='overall', - statistics={ - 'RunTime(ms)': 80.0, - 'Throughput(ops/sec)': 12500.0 - }, + statistics={'RunTime(ms)': 80.0, 'Throughput(ops/sec)': 12500.0}, data_type='histogram', - data=[]), - self.results.groups['overall']) + data=[], + ), + self.results.groups['overall'], + ) class DetailedResultParserTestCase(unittest.TestCase): @@ -108,23 +113,24 @@ class DetailedResultParserTestCase(unittest.TestCase): def setUp(self): super(DetailedResultParserTestCase, self).setUp() self.contents = open_data_file('ycsb-test-run-2.dat') - self.results = ycsb.ParseResults(self.contents, 'histogram') + self.results = ycsb_stats.ParseResults(self.contents, 'histogram') def testPercentilesFromHistogram_read(self): hist = self.results.groups['read'].data - percentiles = ycsb._PercentilesFromHistogram(hist) + percentiles = ycsb_stats._PercentilesFromHistogram(hist) self.assertEqual(1, percentiles['p50']) self.assertEqual(7, percentiles['p99']) def testPercentilesFromHistogram_update(self): hist = self.results.groups['update'].data - percentiles = ycsb._PercentilesFromHistogram(hist) + percentiles = ycsb_stats._PercentilesFromHistogram(hist) self.assertEqual(1, percentiles['p50']) self.assertEqual(7, percentiles['p99']) -class ThroughputTimeSeriesParserTestCase(pkb_common_test_case.PkbCommonTestCase - ): +class ThroughputTimeSeriesParserTestCase( + pkb_common_test_case.PkbCommonTestCase +): def setUp(self): super().setUp() @@ -139,7 +145,7 @@ def testParsedThroughputTimeSeriesIsCorrect(self): 30: 2496.8, 40: 2509.6, 50: 2487.2, - 60: 2513.2 + 60: 2513.2, } self.assertEqual(results.throughput_time_series, expected) @@ -148,10 +154,11 @@ def testCombinedThroughputTimeSeriesIsCorrect(self): results_1 = _parse_and_return_time_series('ycsb-time-series.dat') results_2 = _parse_and_return_time_series('ycsb-time-series-2.dat') - combined = ycsb._CombineResults( + combined = ycsb_stats.CombineResults( result_list=[results_1, results_2], - measurement_type=ycsb.TIMESERIES, - combined_hdr={}) + measurement_type=ycsb_stats.TIMESERIES, + combined_hdr={}, + ) expected = { 10: 4187.5, @@ -168,14 +175,22 @@ class BadResultParserTestCase(unittest.TestCase): def testBadTestRun(self): contents = open_data_file('ycsb-test-run-3.dat') - self.assertRaises(errors.Benchmarks.KnownIntermittentError, - ycsb.ParseResults, contents, 'histogram') + self.assertRaises( + errors.Benchmarks.KnownIntermittentError, + ycsb_stats.ParseResults, + contents, + 'histogram', + ) - @flagsaver.flagsaver(ycsb_max_error_rate=0.95) def testErrorRate(self): contents = open_data_file('ycsb-test-run-4.dat') - self.assertRaises(errors.Benchmarks.RunError, ycsb.ParseResults, contents, - 'hdrhistogram') + self.assertRaises( + errors.Benchmarks.RunError, + ycsb_stats.ParseResults, + contents, + 'hdrhistogram', + 0.95, + ) class WeightedQuantileTestCase(unittest.TestCase): @@ -183,26 +198,28 @@ class WeightedQuantileTestCase(unittest.TestCase): def testEvenlyWeightedSamples(self): x = list(range(1, 101)) # 1-100 weights = [1 for _ in x] - self.assertEqual(50, ycsb._WeightedQuantile(x, weights, 0.50)) - self.assertEqual(75, ycsb._WeightedQuantile(x, weights, 0.75)) - self.assertEqual(90, ycsb._WeightedQuantile(x, weights, 0.90)) - self.assertEqual(95, ycsb._WeightedQuantile(x, weights, 0.95)) - self.assertEqual(99, ycsb._WeightedQuantile(x, weights, 0.99)) - self.assertEqual(100, ycsb._WeightedQuantile(x, weights, 1)) + self.assertEqual(50, ycsb_stats._WeightedQuantile(x, weights, 0.50)) + self.assertEqual(75, ycsb_stats._WeightedQuantile(x, weights, 0.75)) + self.assertEqual(90, ycsb_stats._WeightedQuantile(x, weights, 0.90)) + self.assertEqual(95, ycsb_stats._WeightedQuantile(x, weights, 0.95)) + self.assertEqual(99, ycsb_stats._WeightedQuantile(x, weights, 0.99)) + self.assertEqual(100, ycsb_stats._WeightedQuantile(x, weights, 1)) def testLowWeight(self): x = [1, 4] weights = [99, 1] for i in range(100): - self.assertEqual(1, ycsb._WeightedQuantile(x, weights, i / 100.0)) - self.assertEqual(4, ycsb._WeightedQuantile(x, weights, 0.995)) + self.assertEqual(1, ycsb_stats._WeightedQuantile(x, weights, i / 100.0)) + self.assertEqual(4, ycsb_stats._WeightedQuantile(x, weights, 0.995)) def testMidWeight(self): x = [0, 1.2, 4] weights = [1, 98, 1] for i in range(2, 99): - self.assertAlmostEqual(1.2, ycsb._WeightedQuantile(x, weights, i / 100.0)) - self.assertEqual(4, ycsb._WeightedQuantile(x, weights, 0.995)) + self.assertAlmostEqual( + 1.2, ycsb_stats._WeightedQuantile(x, weights, i / 100.0) + ) + self.assertEqual(4, ycsb_stats._WeightedQuantile(x, weights, 0.995)) class ParseWorkloadTestCase(unittest.TestCase): @@ -212,11 +229,13 @@ def testParsesEmptyString(self): def testIgnoresComment(self): self.assertDictEqual({}, ycsb.ParseWorkload('#\n')) - self.assertDictEqual({}, - ycsb.ParseWorkload('#recordcount = 10\n' - '# columnfamily=cf')) - self.assertDictEqual({'recordcount': '10'}, - ycsb.ParseWorkload('#Sample!\nrecordcount = 10')) + self.assertDictEqual( + {}, ycsb.ParseWorkload('#recordcount = 10\n# columnfamily=cf') + ) + self.assertDictEqual( + {'recordcount': '10'}, + ycsb.ParseWorkload('#Sample!\nrecordcount = 10'), + ) def testParsesSampleWorkload(self): contents = open_data_file('ycsb_workloada') @@ -231,7 +250,7 @@ def testParsesSampleWorkload(self): 'updateproportion': '0.5', 'scanproportion': '0', 'insertproportion': '0', - 'requestdistribution': 'zipfian' + 'requestdistribution': 'zipfian', } self.assertDictEqual(expected, actual) @@ -240,63 +259,56 @@ def testParsesSampleWorkload(self): class CombineResultsTestCase(unittest.TestCase): def testGroupMissing(self): - r1 = ycsb.YcsbResult( + r1 = ycsb_stats.YcsbResult( groups={ - 'read': ycsb._OpResult( + 'read': ycsb_stats._OpResult( group='read', - statistics={ - 'Operations': 100, - 'Return=0': 100 - }, - data_type=ycsb.HISTOGRAM, + statistics={'Operations': 100, 'Return=0': 100}, + data_type=ycsb_stats.HISTOGRAM, ) - }) - r2 = ycsb.YcsbResult( + } + ) + r2 = ycsb_stats.YcsbResult( groups={ - 'read': ycsb._OpResult( + 'read': ycsb_stats._OpResult( group='read', - statistics={ - 'Operations': 96, - 'Return=0': 94, - 'Return=-1': 2 - }, - data_type=ycsb.HISTOGRAM, + statistics={'Operations': 96, 'Return=0': 94, 'Return=-1': 2}, + data_type=ycsb_stats.HISTOGRAM, ), - 'update': ycsb._OpResult( + 'update': ycsb_stats._OpResult( group='update', - statistics={ - 'Operations': 100, - 'AverageLatency(ms)': 25 - }, - data_type=ycsb.HISTOGRAM, - ) - }) - combined = ycsb._CombineResults([r1, r2], 'histogram', {}) + statistics={'Operations': 100, 'AverageLatency(ms)': 25}, + data_type=ycsb_stats.HISTOGRAM, + ), + } + ) + combined = ycsb_stats.CombineResults([r1, r2], 'histogram', {}) self.assertCountEqual(['read', 'update'], combined.groups) - self.assertCountEqual(['Operations', 'Return=0', 'Return=-1'], - combined.groups['read'].statistics) + self.assertCountEqual( + ['Operations', 'Return=0', 'Return=-1'], + combined.groups['read'].statistics, + ) read_stats = combined.groups['read'].statistics - self.assertEqual({ - 'Operations': 196, - 'Return=0': 194, - 'Return=-1': 2 - }, read_stats) + self.assertEqual( + {'Operations': 196, 'Return=0': 194, 'Return=-1': 2}, read_stats + ) def testDropUnaggregatedFromSingleResult(self): - r = ycsb.YcsbResult( + r = ycsb_stats.YcsbResult( client='', command_line='', groups={ - 'read': - ycsb._OpResult( - group='read', - statistics={'AverageLatency(ms)': 21}, - data_type=ycsb.HISTOGRAM) - }) + 'read': ycsb_stats._OpResult( + group='read', + statistics={'AverageLatency(ms)': 21}, + data_type=ycsb_stats.HISTOGRAM, + ) + }, + ) r_copy = copy.deepcopy(r) self.assertEqual(r, r_copy) - combined = ycsb._CombineResults([r], 'histogram', {}) + combined = ycsb_stats.CombineResults([r], 'histogram', {}) self.assertEqual(r, r_copy) r.groups['read'].statistics = {} self.assertEqual(r, combined) @@ -318,9 +330,13 @@ def testParseHdrLogFile(self): #[Max = 203903.000, Total count = 499019] #[Buckets = 8, SubBuckets = 2048] """ - actual = ycsb.ParseHdrLogFile(rawlog) - expected = [(0.0, 0.314, 2), (10.0, 0.853, 49953), - (20.0, 0.949, 50396), (30.0, 1.033, 49759)] + actual = ycsb_stats.ParseHdrLogFile(rawlog) + expected = [ + (0.0, 0.314, 2), + (10.0, 0.853, 49953), + (20.0, 0.949, 50396), + (30.0, 1.033, 49759), + ] self.assertEqual(actual, expected) @@ -328,24 +344,21 @@ class PrerequisitesTestCase(pkb_common_test_case.PkbCommonTestCase): @parameterized.named_parameters( { - 'testcase_name': - 'SnapshotVersion', - 'url': - 'https://storage.googleapis.com/externally_shared_files/ycsb-0.18.0-SNAPSHOT.tar.gz', - 'expected_version': - 18, - }, { + 'testcase_name': 'SnapshotVersion', + 'url': 'https://storage.googleapis.com/externally_shared_files/ycsb-0.18.0-SNAPSHOT.tar.gz', + 'expected_version': 18, + }, + { 'testcase_name': 'StandardVersion', 'url': 'https://storage.googleapis.com/ycsbclient/ycsb-0.17.0.tar.gz', 'expected_version': 17, - }, { - 'testcase_name': - 'GitHubVersion', - 'url': - 'https://github.com/brianfrankcooper/YCSB/releases/download/0.17.0/ycsb-0.17.0.tar.gz', - 'expected_version': - 17, - }) + }, + { + 'testcase_name': 'GitHubVersion', + 'url': 'https://github.com/brianfrankcooper/YCSB/releases/download/0.17.0/ycsb-0.17.0.tar.gz', + 'expected_version': 17, + }, + ) def testGetVersionIndexFromUrl(self, url, expected_version): actual_version = ycsb._GetVersionFromUrl(url) self.assertEqual(actual_version, expected_version) @@ -367,7 +380,7 @@ def setUp(self): FLAGS.ycsb_workload_files = ['workloadc'] self.test_executor = ycsb.YCSBExecutor('test_database') # Result parsing is already handled elsewhere - self.enter_context(mock.patch.object(ycsb, 'ParseResults')) + self.enter_context(mock.patch.object(ycsb_stats, 'ParseResults')) # Test VM with mocked command self.test_vm = mock.Mock() self.test_cmd = self.test_vm.RobustRemoteCommand @@ -429,7 +442,7 @@ def testIncrementalLoadCalledWithCorrectTarget(self): mock.call(matchers.HAS('-target 8542')), mock.call(matchers.HAS('-target 10000')), ], - self.test_cmd.mock_calls + self.test_cmd.mock_calls, ) @flagsaver.flagsaver @@ -438,8 +451,9 @@ def testIncrementalLoadUsesCorrectThreadCounts(self): FLAGS.ycsb_incremental_load = 2500 FLAGS.ycsb_client_vms = 1 FLAGS['ycsb_threads_per_client'].parse(['1000']) - mock_set_thread_count = self.enter_context(mock.patch.object( - self.test_executor, '_SetClientThreadCount')) + mock_set_thread_count = self.enter_context( + mock.patch.object(self.test_executor, '_SetClientThreadCount') + ) # Act self.test_executor.Run([self.test_vm]) @@ -453,7 +467,7 @@ def testIncrementalLoadUsesCorrectThreadCounts(self): mock.call(1000), mock.call(1000), ], - mock_set_thread_count.mock_calls + mock_set_thread_count.mock_calls, ) @flagsaver.flagsaver @@ -467,8 +481,7 @@ def testIncrementalLoadCalledWithLowerTarget(self): # Assert self.assertSequenceEqual( - [mock.call(matchers.HAS('-target 200'))], - self.test_cmd.mock_calls + [mock.call(matchers.HAS('-target 200'))], self.test_cmd.mock_calls ) diff --git a/tests/pkb_test.py b/tests/pkb_test.py index 132d3e1b7a..8870b1c7e8 100644 --- a/tests/pkb_test.py +++ b/tests/pkb_test.py @@ -303,7 +303,7 @@ def testCollectMeminfoHandler(self): vm = mock.Mock() vm.RemoteCommand.return_value = 'b: 100\na: 10\nbadline', '' vm.name = 'pkb-1234-0' - vm.OS_TYPE = 'ubuntu1804' + vm.OS_TYPE = 'ubuntu2004' vm.machine_type = 'n1-standard-2' benchmark_spec = mock.Mock(vms=[vm]) samples = [] @@ -316,7 +316,7 @@ def testCollectMeminfoHandler(self): 'meminfo_keys': 'a,b', 'meminfo_malformed': 'badline', 'meminfo_machine_type': 'n1-standard-2', - 'meminfo_os_type': 'ubuntu1804', + 'meminfo_os_type': 'ubuntu2004', 'meminfo_vmname': 'pkb-1234-0', } expected_sample = sample.Sample('meminfo', 0, '', expected_metadata) diff --git a/tests/providers/aws/aws_capacity_reservation_test.py b/tests/providers/aws/aws_capacity_reservation_test.py index 802727576c..0abd653efe 100644 --- a/tests/providers/aws/aws_capacity_reservation_test.py +++ b/tests/providers/aws/aws_capacity_reservation_test.py @@ -44,7 +44,7 @@ def __init__(self): self.zone = 'us-west-1' self.region = 'us-west-1' self.machine_type = 'fake_machine_type' - self.OS_TYPE = 'ubuntu1804' # pylint: disable=invalid-name + self.OS_TYPE = 'ubuntu2004' # pylint: disable=invalid-name self.network = mock.MagicMock() self.capacity_reservation_id = None diff --git a/tests/providers/aws/aws_dynamodb_test.py b/tests/providers/aws/aws_dynamodb_test.py index ab05294034..d5f2020008 100644 --- a/tests/providers/aws/aws_dynamodb_test.py +++ b/tests/providers/aws/aws_dynamodb_test.py @@ -299,8 +299,9 @@ def testTagResourceFailsWithNonExistentResource(self): self.enter_context( mock.patch.object(test_instance, '_Exists', return_value=False)) - with self.assertRaises(errors.Resource.CreationError): + with self.assertRaises(vm_util.RetriesExceededRetryError) as e: test_instance._GetTagResourceCommand(['test', 'tag']) + self.assertIs(type(e.exception.__cause__), errors.Resource.CreationError) def testUpdateWithDefaultTags(self): test_instance = GetTestDynamoDBInstance() diff --git a/tests/providers/gcp/gcp_dpb_dataproc_test.py b/tests/providers/gcp/gcp_dpb_dataproc_test.py index a4ad814012..20af4da802 100644 --- a/tests/providers/gcp/gcp_dpb_dataproc_test.py +++ b/tests/providers/gcp/gcp_dpb_dataproc_test.py @@ -16,7 +16,6 @@ import unittest from absl import flags import mock - from perfkitbenchmarker import dpb_service from perfkitbenchmarker import errors from perfkitbenchmarker import vm_util @@ -56,14 +55,17 @@ applications=['foo-component', 'bar-component'], worker_group=mock.Mock( vm_spec=mock.Mock(machine_type='fake-machine-type', num_local_ssds=2), - disk_spec=mock.Mock(disk_type='pd-ssd', disk_size=42))) + disk_spec=mock.Mock(disk_type='pd-ssd', disk_size=42), + ), +) DPGKE_CLUSTER_SPEC = mock.Mock( static_dpb_service_instance=None, gke_cluster_name='gke-cluster', gke_cluster_location='gke-cluster-loc', version='preview-0.3', - gke_cluster_nodepools='name:pool-name,role:driver,min:3') + gke_cluster_nodepools='name:pool-name,role:driver,min:3', +) SERVERLESS_SPEC = mock.Mock( static_dpb_service_instance=None, @@ -72,7 +74,13 @@ dataproc_serverless_initial_executors=4, dataproc_serverless_min_executors=2, dataproc_serverless_max_executors=10, - worker_group=mock.Mock(disk_spec=mock.Mock(disk_size=42)) + dataproc_serverless_memory=10000, + dataproc_serverless_memory_overhead=4000, + worker_group=mock.Mock( + disk_spec=mock.Mock( + disk_size=42, + ), + ), ) @@ -96,7 +104,8 @@ def setUp(self): FLAGS.zones = [GCP_ZONE_US_CENTRAL1_A] @mock.patch.object( - vm_util, 'IssueCommand', return_value=('fake_stdout', 'fake_stderr', 0)) + vm_util, 'IssueCommand', return_value=('fake_stdout', 'fake_stderr', 0) + ) def testCreate(self, mock_issue): cluster = LocalGcpDpbDataproc() cluster._Create() @@ -113,8 +122,9 @@ def testCreate(self, mock_issue): self.assertIn('--worker-machine-type fake-machine-type', command_string) self.assertIn('--num-worker-local-ssds 2', command_string) self.assertIn('--num-workers 2', command_string) - self.assertIn('--optional-components foo-component,bar-component', - command_string) + self.assertIn( + '--optional-components foo-component,bar-component', command_string + ) self.assertIn('--project fake-project ', command_string) self.assertIn('--region us-central1', command_string) self.assertIn('--zone us-central1-a', command_string) @@ -123,8 +133,13 @@ def testCreate(self, mock_issue): vm_util, 'IssueCommand', return_value=( - 'fake_stdout', "The zone 'projects/fake-project/zones/us-central1-a' " - 'does not have enough resources available to fulfill the request.', 1) + 'fake_stdout', + ( + "The zone 'projects/fake-project/zones/us-central1-a' " + 'does not have enough resources available to fulfill the request.' + ), + 1, + ), ) def testCreateResourceExhausted(self, mock_issue): cluster = LocalGcpDpbDataproc() @@ -151,14 +166,16 @@ def setUp(self): FLAGS.dpb_service_bucket = STAGING_BUCKET @mock.patch.object( - vm_util, 'IssueCommand', return_value=('fake_stdout', 'fake_stderr', 0)) + vm_util, 'IssueCommand', return_value=('fake_stdout', 'fake_stderr', 0) + ) def testCreate(self, mock_issue): cluster = LocalGcpDpbDPGKE() cluster._Create() self.assertEqual(mock_issue.call_count, 1) command_string = ' '.join(mock_issue.call_args[0][0]) - self.assertIn('gcloud alpha dataproc clusters gke create pkb-fakeru', - command_string) + self.assertIn( + 'gcloud alpha dataproc clusters gke create pkb-fakeru', command_string + ) self.assertIn('--gke-cluster gke-cluster ', command_string) self.assertIn('--namespace pkb-fakeru ', command_string) self.assertIn('--gke-cluster-location gke-cluster-loc ', command_string) @@ -174,12 +191,15 @@ def testMissingAttrs(self): 'version', ], static_dpb_service_instance=None, - gke_cluster_nodepools='') + gke_cluster_nodepools='', + ) with self.assertRaises(errors.Setup.InvalidSetupError) as ex: LocalGcpDpbDPGKE(spec=cluster_spec) self.assertIn( - "['gke_cluster_name', 'gke_cluster_nodepools', 'gke_cluster_location'] must be provided for provisioning DPGKE.", - str(ex.exception)) + "['gke_cluster_name', 'gke_cluster_nodepools', 'gke_cluster_location']" + ' must be provided for provisioning DPGKE.', + str(ex.exception), + ) class GcpDpbDataprocServerlessTest(pkb_common_test_case.PkbCommonTestCase): @@ -190,16 +210,21 @@ def setUp(self): FLAGS.dpb_service_zone = GCP_ZONE_US_CENTRAL1_A @mock.patch.object( - vm_util, 'IssueCommand', return_value=(SERVERLESS_MOCK_BATCH, '', 0)) + vm_util, 'IssueCommand', return_value=(SERVERLESS_MOCK_BATCH, '', 0) + ) def testSubmitJob(self, mock_issue): service = gcp_dpb_dataproc.GcpDpbDataprocServerless(SERVERLESS_SPEC) result = service.SubmitJob( pyspark_file=( - 'gs://pkb-fab5770b/spark_sql_test_scripts/spark_sql_runner.py'), + 'gs://pkb-fab5770b/spark_sql_test_scripts/spark_sql_runner.py' + ), job_arguments=[ - '--sql-scripts', 'gs://pkb-fab5770b/2.sql', '--report-dir', - 'gs://pkb-fab5770b/report-1643853399069', '--table-metadata', - 'gs://pkb-fab5770b/metadata.json' + '--sql-scripts', + 'gs://pkb-fab5770b/2.sql', + '--report-dir', + 'gs://pkb-fab5770b/report-1643853399069', + '--table-metadata', + 'gs://pkb-fab5770b/metadata.json', ], job_jars=[], job_type='pyspark', @@ -208,32 +233,66 @@ def testSubmitJob(self, mock_issue): self.assertEqual(result.pending_time, 72.282181) self.assertEqual(mock_issue.call_count, 2) mock_issue.assert_has_calls([ - mock.call([ - 'gcloud', 'dataproc', 'batches', 'submit', 'pyspark', - 'gs://pkb-fab5770b/spark_sql_test_scripts/spark_sql_runner.py', - '--batch', 'pkb-fakeru-0', - '--format', 'json', - '--labels', '', - '--properties', - ('^@^spark.executor.cores=4@' - 'spark.driver.cores=4@' - 'spark.executor.instances=4@' - 'spark.dynamicAllocation.minExecutors=2@' - 'spark.dynamicAllocation.maxExecutors=10@' - 'spark.dataproc.driver.disk_size=42g@' - 'spark.dataproc.executor.disk_size=42g'), - '--quiet', - '--region', 'us-central1', - '--version', 'fake-4.2', - '--', - '--sql-scripts', 'gs://pkb-fab5770b/2.sql', - '--report-dir', 'gs://pkb-fab5770b/report-1643853399069', - '--table-metadata', 'gs://pkb-fab5770b/metadata.json' - ], raise_on_failure=False, timeout=None), - mock.call([ - 'gcloud', 'dataproc', 'batches', 'describe', 'pkb-fakeru-0', - '--format', 'json', '--quiet', '--region', 'us-central1' - ], raise_on_failure=False, timeout=None) + mock.call( + [ + 'gcloud', + 'dataproc', + 'batches', + 'submit', + 'pyspark', + 'gs://pkb-fab5770b/spark_sql_test_scripts/spark_sql_runner.py', + '--batch', + 'pkb-fakeru-0', + '--format', + 'json', + '--labels', + '', + '--properties', + ( + '^@^spark.executor.cores=4@' + 'spark.driver.cores=4@' + 'spark.executor.instances=4@' + 'spark.dynamicAllocation.minExecutors=2@' + 'spark.dynamicAllocation.maxExecutors=10@' + 'spark.dataproc.driver.disk.size=42g@' + 'spark.dataproc.executor.disk.size=42g@' + 'spark.driver.memory=10000m@' + 'spark.executor.memory=10000m@' + 'spark.driver.memoryOverhead=4000m@' + 'spark.executor.memoryOverhead=4000m' + ), + '--quiet', + '--region', + 'us-central1', + '--version', + 'fake-4.2', + '--', + '--sql-scripts', + 'gs://pkb-fab5770b/2.sql', + '--report-dir', + 'gs://pkb-fab5770b/report-1643853399069', + '--table-metadata', + 'gs://pkb-fab5770b/metadata.json', + ], + raise_on_failure=False, + timeout=None, + ), + mock.call( + [ + 'gcloud', + 'dataproc', + 'batches', + 'describe', + 'pkb-fakeru-0', + '--format', + 'json', + '--quiet', + '--region', + 'us-central1', + ], + raise_on_failure=False, + timeout=None, + ), ]) diff --git a/tests/providers_test.py b/tests/providers_test.py index 3b3d665b3a..5975652c5d 100644 --- a/tests/providers_test.py +++ b/tests/providers_test.py @@ -74,7 +74,7 @@ def testBenchmarkConfigSpecLoadsProvider(self): 'vm_groups': { 'group1': { 'cloud': 'AWS', - 'os_type': 'ubuntu1804', + 'os_type': 'ubuntu2004', 'vm_count': 0, 'vm_spec': {'AWS': {}} } diff --git a/tests/relational_db_test.py b/tests/relational_db_test.py new file mode 100644 index 0000000000..1560080a81 --- /dev/null +++ b/tests/relational_db_test.py @@ -0,0 +1,54 @@ +"""Tests for relational_db.""" + +import unittest +from absl import flags +import mock +from perfkitbenchmarker import relational_db +from perfkitbenchmarker import relational_db_spec +from tests import pkb_common_test_case + +FLAGS = flags.FLAGS + + +# Implements some abstract functions so we can instantiate BaseRelationalDb. +class TestBaseRelationalDb(relational_db.BaseRelationalDb): + + def _Create(self): + pass + + def _Delete(self): + pass + + def GetDefaultEngineVersion(self, engine): + return 'test' + + +class RelationalDbTest(pkb_common_test_case.PkbCommonTestCase): + + def setUp(self): + super().setUp() + minimal_spec = { + 'cloud': 'GCP', + 'engine': 'mysql', + 'db_spec': {'GCP': {'machine_type': 'n1-standard-1'}}, + 'db_disk_spec': {'GCP': {'disk_size': 500}}, + } + self.spec = relational_db_spec.RelationalDbSpec( + 'test_component', flag_values=FLAGS, **minimal_spec + ) + FLAGS['run_uri'].parse('test_uri') + + def test_client_vm_query_tools(self): + test_db = TestBaseRelationalDb(self.spec) + test_db._endpoint = 'test_endpoint' + mock_vms = {'default': [mock.Mock(), mock.Mock()]} + test_db.SetVms(mock_vms) + + self.assertLen(test_db.client_vms_query_tools, 2) + self.assertEqual( + test_db.client_vm_query_tools, test_db.client_vms_query_tools[0] + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/scratch_disk_test.py b/tests/scratch_disk_test.py index c187ce63f9..401602ee03 100644 --- a/tests/scratch_disk_test.py +++ b/tests/scratch_disk_test.py @@ -158,7 +158,7 @@ def _PatchCloudSpecific(self): def _CreateVm(self): vm_spec = azure_virtual_machine.AzureVmSpec( 'test_vm_spec.Azure', zone='eastus2', machine_type='test_machine_type') - return azure_virtual_machine.Ubuntu1604BasedAzureVirtualMachine(vm_spec) + return azure_virtual_machine.Ubuntu2004BasedAzureVirtualMachine(vm_spec) def _GetDiskClass(self): return azure_disk.AzureDisk @@ -172,7 +172,7 @@ def _PatchCloudSpecific(self): def _CreateVm(self): vm_spec = gce_virtual_machine.GceVmSpec('test_vm_spec.GCP', machine_type='test_machine_type') - vm = gce_virtual_machine.Ubuntu1804BasedGceVirtualMachine(vm_spec) + vm = gce_virtual_machine.Ubuntu2004BasedGceVirtualMachine(vm_spec) vm.GetNVMEDeviceInfo = mock.Mock() vm.GetNVMEDeviceInfo.return_value = [ { @@ -206,7 +206,7 @@ def _PatchCloudSpecific(self): def _CreateVm(self): vm_spec = aws_virtual_machine.AwsVmSpec( 'test_vm_spec.AWS', zone='us-east-1a', machine_type='test_machine_type') - vm = aws_virtual_machine.Ubuntu1604BasedAwsVirtualMachine(vm_spec) + vm = aws_virtual_machine.Ubuntu2004BasedAwsVirtualMachine(vm_spec) vm.LogDeviceByDiskSpecId('0_0', 'foobar_1') vm.LogDeviceByName('foobar_1', 'vol67890', None) diff --git a/tests/time_triggers/maintenance_simulation_trigger_test.py b/tests/time_triggers/maintenance_simulation_trigger_test.py index f53086ff7b..0825055e06 100644 --- a/tests/time_triggers/maintenance_simulation_trigger_test.py +++ b/tests/time_triggers/maintenance_simulation_trigger_test.py @@ -533,115 +533,151 @@ def testAppendLossFunctionSamplesWithNotification(self): samples = [s] trigger.trigger_time = datetime.datetime.fromtimestamp(4) vm = mock.MagicMock() - vm.CollectLMNotificationsTime = mock.MagicMock(return_value={ - 'LM_total_time': 100, - 'Host_maintenance_end': 8000 - }) + vm.CollectLMNotificationsTime = mock.MagicMock( + return_value={'LM_total_time': 100, 'Host_maintenance_end': 8} + ) trigger.vms = [vm] trigger.AppendSamples(None, vm_spec, samples) - self.assertEqual(samples, [ - Sample( - metric='TPM_time_series', - value=0.0, - unit='TPM', - metadata={ - 'values': - [1, 1, 1, 1, 0, 0.1, 0.2, 0.3, 0.95, 0.95, 0.95, 0.95], - 'timestamps': [ - 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, - 11000, 12000 - ], - 'interval': 1 - }, - timestamp=0), - Sample( - metric='LM Total Time', - value=100.0, - unit='seconds', - metadata={ - 'LM_total_time': 100, - 'Host_maintenance_end': 8000 - }, - timestamp=0), - Sample( - metric='seconds_dropped_below_0_percent', - value=1.0, - unit='s', - metadata={}, - timestamp=0), - Sample( - metric='seconds_dropped_below_10_percent', - value=2.0, - unit='s', - metadata={}, - timestamp=0), - Sample( - metric='seconds_dropped_below_20_percent', - value=3.0, - unit='s', - metadata={}, - timestamp=0), - Sample( - metric='seconds_dropped_below_30_percent', - value=4.0, - unit='s', - metadata={}, - timestamp=0), - Sample( - metric='seconds_dropped_below_40_percent', - value=4.0, - unit='s', - metadata={}, - timestamp=0), - Sample( - metric='seconds_dropped_below_50_percent', - value=4.0, - unit='s', - metadata={}, - timestamp=0), - Sample( - metric='seconds_dropped_below_60_percent', - value=4.0, - unit='s', - metadata={}, - timestamp=0), - Sample( - metric='seconds_dropped_below_70_percent', - value=4.0, - unit='s', - metadata={}, - timestamp=0), - Sample( - metric='seconds_dropped_below_80_percent', - value=4.0, - unit='s', - metadata={}, - timestamp=0), - Sample( - metric='seconds_dropped_below_90_percent', - value=4.0, - unit='s', - metadata={}, - timestamp=0), - Sample( - metric='unresponsive_metric', - value=2.584, - unit='metric', - metadata={}, - timestamp=0), - Sample( - metric='total_loss_seconds', - value=3.4, - unit='seconds', - metadata={}, - timestamp=0), - Sample( - metric='degradation_percent', - value=5.0, - unit='%', - metadata={}, - timestamp=0) - ]) + self.assertEqual( + samples, + [ + Sample( + metric='TPM_time_series', + value=0.0, + unit='TPM', + metadata={ + 'values': [ + 1, + 1, + 1, + 1, + 0, + 0.1, + 0.2, + 0.3, + 0.95, + 0.95, + 0.95, + 0.95, + ], + 'timestamps': [ + 1000, + 2000, + 3000, + 4000, + 5000, + 6000, + 7000, + 8000, + 9000, + 10000, + 11000, + 12000, + ], + 'interval': 1, + }, + timestamp=0, + ), + Sample( + metric='LM Total Time', + value=100.0, + unit='seconds', + metadata={'LM_total_time': 100, 'Host_maintenance_end': 8}, + timestamp=0, + ), + Sample( + metric='seconds_dropped_below_0_percent', + value=1.0, + unit='s', + metadata={}, + timestamp=0, + ), + Sample( + metric='seconds_dropped_below_10_percent', + value=2.0, + unit='s', + metadata={}, + timestamp=0, + ), + Sample( + metric='seconds_dropped_below_20_percent', + value=3.0, + unit='s', + metadata={}, + timestamp=0, + ), + Sample( + metric='seconds_dropped_below_30_percent', + value=4.0, + unit='s', + metadata={}, + timestamp=0, + ), + Sample( + metric='seconds_dropped_below_40_percent', + value=4.0, + unit='s', + metadata={}, + timestamp=0, + ), + Sample( + metric='seconds_dropped_below_50_percent', + value=4.0, + unit='s', + metadata={}, + timestamp=0, + ), + Sample( + metric='seconds_dropped_below_60_percent', + value=4.0, + unit='s', + metadata={}, + timestamp=0, + ), + Sample( + metric='seconds_dropped_below_70_percent', + value=4.0, + unit='s', + metadata={}, + timestamp=0, + ), + Sample( + metric='seconds_dropped_below_80_percent', + value=4.0, + unit='s', + metadata={}, + timestamp=0, + ), + Sample( + metric='seconds_dropped_below_90_percent', + value=4.0, + unit='s', + metadata={}, + timestamp=0, + ), + Sample( + metric='unresponsive_metric', + value=2.584, + unit='metric', + metadata={}, + timestamp=0, + ), + Sample( + metric='total_loss_seconds', + value=3.4, + unit='seconds', + metadata={}, + timestamp=0, + ), + Sample( + metric='degradation_percent', + value=5.0, + unit='%', + metadata={}, + timestamp=0, + ), + ], + ) @mock.patch('time.time', mock.MagicMock(return_value=0)) def testAppendLossFunctionSamplesContainsMetadata(self): @@ -658,116 +694,152 @@ def testAppendLossFunctionSamplesContainsMetadata(self): samples = [s] trigger.trigger_time = datetime.datetime.fromtimestamp(4) vm = mock.MagicMock() - vm.CollectLMNotificationsTime = mock.MagicMock(return_value={ - 'LM_total_time': 100, - 'Host_maintenance_end': 8000 - }) + vm.CollectLMNotificationsTime = mock.MagicMock( + return_value={'LM_total_time': 100, 'Host_maintenance_end': 8} + ) trigger.vms = [vm] trigger.AppendSamples(None, vm_spec, samples) - self.assertEqual(samples, [ - sample.Sample( - metric='TPM_time_series', - value=0.0, - unit='TPM', - metadata={ - 'values': - [1, 1, 1, 1, 0, 0.1, 0.2, 0.3, 0.95, 0.95, 0.95, 0.95], - 'timestamps': [ - 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, - 11000, 12000 - ], - 'interval': 1, - 'random': 'random' - }, - timestamp=0), - sample.Sample( - metric='LM Total Time', - value=100.0, - unit='seconds', - metadata={ - 'LM_total_time': 100, - 'Host_maintenance_end': 8000 - }, - timestamp=0), - sample.Sample( - metric='seconds_dropped_below_0_percent', - value=1.0, - unit='s', - metadata={'random': 'random'}, - timestamp=0), - sample.Sample( - metric='seconds_dropped_below_10_percent', - value=2.0, - unit='s', - metadata={'random': 'random'}, - timestamp=0), - sample.Sample( - metric='seconds_dropped_below_20_percent', - value=3.0, - unit='s', - metadata={'random': 'random'}, - timestamp=0), - sample.Sample( - metric='seconds_dropped_below_30_percent', - value=4.0, - unit='s', - metadata={'random': 'random'}, - timestamp=0), - sample.Sample( - metric='seconds_dropped_below_40_percent', - value=4.0, - unit='s', - metadata={'random': 'random'}, - timestamp=0), - sample.Sample( - metric='seconds_dropped_below_50_percent', - value=4.0, - unit='s', - metadata={'random': 'random'}, - timestamp=0), - sample.Sample( - metric='seconds_dropped_below_60_percent', - value=4.0, - unit='s', - metadata={'random': 'random'}, - timestamp=0), - sample.Sample( - metric='seconds_dropped_below_70_percent', - value=4.0, - unit='s', - metadata={'random': 'random'}, - timestamp=0), - sample.Sample( - metric='seconds_dropped_below_80_percent', - value=4.0, - unit='s', - metadata={'random': 'random'}, - timestamp=0), - sample.Sample( - metric='seconds_dropped_below_90_percent', - value=4.0, - unit='s', - metadata={'random': 'random'}, - timestamp=0), - sample.Sample( - metric='unresponsive_metric', - value=2.584, - unit='metric', - metadata={'random': 'random'}, - timestamp=0), - sample.Sample( - metric='total_loss_seconds', - value=3.4, - unit='seconds', - metadata={'random': 'random'}, - timestamp=0), - sample.Sample( - metric='degradation_percent', - value=5.0, - unit='%', - metadata={'random': 'random'}, - timestamp=0) - ]) + self.assertEqual( + samples, + [ + sample.Sample( + metric='TPM_time_series', + value=0.0, + unit='TPM', + metadata={ + 'values': [ + 1, + 1, + 1, + 1, + 0, + 0.1, + 0.2, + 0.3, + 0.95, + 0.95, + 0.95, + 0.95, + ], + 'timestamps': [ + 1000, + 2000, + 3000, + 4000, + 5000, + 6000, + 7000, + 8000, + 9000, + 10000, + 11000, + 12000, + ], + 'interval': 1, + 'random': 'random', + }, + timestamp=0, + ), + sample.Sample( + metric='LM Total Time', + value=100.0, + unit='seconds', + metadata={'LM_total_time': 100, 'Host_maintenance_end': 8}, + timestamp=0, + ), + sample.Sample( + metric='seconds_dropped_below_0_percent', + value=1.0, + unit='s', + metadata={'random': 'random'}, + timestamp=0, + ), + sample.Sample( + metric='seconds_dropped_below_10_percent', + value=2.0, + unit='s', + metadata={'random': 'random'}, + timestamp=0, + ), + sample.Sample( + metric='seconds_dropped_below_20_percent', + value=3.0, + unit='s', + metadata={'random': 'random'}, + timestamp=0, + ), + sample.Sample( + metric='seconds_dropped_below_30_percent', + value=4.0, + unit='s', + metadata={'random': 'random'}, + timestamp=0, + ), + sample.Sample( + metric='seconds_dropped_below_40_percent', + value=4.0, + unit='s', + metadata={'random': 'random'}, + timestamp=0, + ), + sample.Sample( + metric='seconds_dropped_below_50_percent', + value=4.0, + unit='s', + metadata={'random': 'random'}, + timestamp=0, + ), + sample.Sample( + metric='seconds_dropped_below_60_percent', + value=4.0, + unit='s', + metadata={'random': 'random'}, + timestamp=0, + ), + sample.Sample( + metric='seconds_dropped_below_70_percent', + value=4.0, + unit='s', + metadata={'random': 'random'}, + timestamp=0, + ), + sample.Sample( + metric='seconds_dropped_below_80_percent', + value=4.0, + unit='s', + metadata={'random': 'random'}, + timestamp=0, + ), + sample.Sample( + metric='seconds_dropped_below_90_percent', + value=4.0, + unit='s', + metadata={'random': 'random'}, + timestamp=0, + ), + sample.Sample( + metric='unresponsive_metric', + value=2.584, + unit='metric', + metadata={'random': 'random'}, + timestamp=0, + ), + sample.Sample( + metric='total_loss_seconds', + value=3.4, + unit='seconds', + metadata={'random': 'random'}, + timestamp=0, + ), + sample.Sample( + metric='degradation_percent', + value=5.0, + unit='%', + metadata={'random': 'random'}, + timestamp=0, + ), + ], + ) @mock.patch('time.time', mock.MagicMock(return_value=0)) def testAppendLossFunctionSamplesHandleTimeDrift(self): @@ -787,10 +859,9 @@ def testAppendLossFunctionSamplesHandleTimeDrift(self): samples = [s] trigger.trigger_time = datetime.datetime.fromtimestamp(4) vm = mock.MagicMock() - vm.CollectLMNotificationsTime = mock.MagicMock(return_value={ - 'LM_total_time': 100, - 'Host_maintenance_end': 11000 - }) + vm.CollectLMNotificationsTime = mock.MagicMock( + return_value={'LM_total_time': 100, 'Host_maintenance_end': 11} + ) trigger.vms = [vm] trigger.AppendSamples(None, vm_spec, samples) @@ -813,7 +884,7 @@ def testAppendLossFunctionSamplesHandleTimeDrift(self): metric='LM Total Time', value=100.0, unit='seconds', - metadata={'LM_total_time': 100, 'Host_maintenance_end': 11000}, + metadata={'LM_total_time': 100, 'Host_maintenance_end': 11}, timestamp=0, ), sample.Sample(