diff --git a/.gitignore b/.gitignore index d083ea1dd..220056c20 100644 --- a/.gitignore +++ b/.gitignore @@ -62,3 +62,6 @@ system_tests/local_test_setup # Make sure a generated file isn't accidentally committed. pylintrc pylintrc.test + +# Benchmarking results and logs +__benchmark_results__/** diff --git a/tests/perf/__init__.py b/tests/perf/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/perf/microbenchmarks/__init__.py b/tests/perf/microbenchmarks/__init__.py new file mode 100644 index 000000000..58d482ea3 --- /dev/null +++ b/tests/perf/microbenchmarks/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/perf/microbenchmarks/_utils.py b/tests/perf/microbenchmarks/_utils.py new file mode 100644 index 000000000..67945e3eb --- /dev/null +++ b/tests/perf/microbenchmarks/_utils.py @@ -0,0 +1,164 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, List +import statistics +import io +import os + + +def publish_benchmark_extra_info( + benchmark: Any, + params: Any, + benchmark_group: str = "read", + true_times: List[float] = [], +) -> None: + """ + Helper function to publish benchmark parameters to the extra_info property. + """ + + benchmark.extra_info["num_files"] = params.num_files + benchmark.extra_info["file_size"] = params.file_size_bytes + benchmark.extra_info["chunk_size"] = params.chunk_size_bytes + if benchmark_group == "write": + benchmark.extra_info["pattern"] = "seq" + else: + benchmark.extra_info["pattern"] = params.pattern + benchmark.extra_info["coros"] = params.num_coros + benchmark.extra_info["rounds"] = params.rounds + benchmark.extra_info["bucket_name"] = params.bucket_name + benchmark.extra_info["bucket_type"] = params.bucket_type + benchmark.extra_info["processes"] = params.num_processes + benchmark.group = benchmark_group + + object_size = params.file_size_bytes + num_files = params.num_files + min_throughput = (object_size / (1024 * 1024) * num_files) / benchmark.stats["max"] + max_throughput = (object_size / (1024 * 1024) * num_files) / benchmark.stats["min"] + mean_throughput = (object_size / (1024 * 1024) * num_files) / benchmark.stats["mean"] + median_throughput = ( + object_size / (1024 * 1024) * num_files + ) / benchmark.stats["median"] + + benchmark.extra_info["throughput_MiB_s_min"] = min_throughput + benchmark.extra_info["throughput_MiB_s_max"] = max_throughput + benchmark.extra_info["throughput_MiB_s_mean"] = mean_throughput + benchmark.extra_info["throughput_MiB_s_median"] = median_throughput + + print("\nThroughput Statistics (MiB/s):") + print(f" Min: {min_throughput:.2f} (from max time)") + print(f" Max: {max_throughput:.2f} (from min time)") + print(f" Mean: {mean_throughput:.2f} (approx, from mean time)") + print(f" Median: {median_throughput:.2f} (approx, from median time)") + + if true_times: + throughputs = [(object_size / (1024 * 1024) * num_files) / t for t in true_times] + true_min_throughput = min(throughputs) + true_max_throughput = max(throughputs) + true_mean_throughput = statistics.mean(throughputs) + true_median_throughput = statistics.median(throughputs) + + benchmark.extra_info["true_throughput_MiB_s_min"] = true_min_throughput + benchmark.extra_info["true_throughput_MiB_s_max"] = true_max_throughput + benchmark.extra_info["true_throughput_MiB_s_mean"] = true_mean_throughput + benchmark.extra_info["true_throughput_MiB_s_median"] = true_median_throughput + + print("\nThroughput Statistics from true_times (MiB/s):") + print(f" Min: {true_min_throughput:.2f}") + print(f" Max: {true_max_throughput:.2f}") + print(f" Mean: {true_mean_throughput:.2f}") + print(f" Median: {true_median_throughput:.2f}") + + # Get benchmark name, rounds, and iterations + name = benchmark.name + rounds = benchmark.stats['rounds'] + iterations = benchmark.stats['iterations'] + + # Header for throughput table + header = "\n\n" + "-" * 125 + "\n" + header += "Throughput Benchmark (MiB/s)\n" + header += "-" * 125 + "\n" + header += f"{'Name':<50} {'Min':>10} {'Max':>10} {'Mean':>10} {'StdDev':>10} {'Median':>10} {'Rounds':>8} {'Iterations':>12}\n" + header += "-" * 125 + + # Data row for throughput table + # The table headers (Min, Max) refer to the throughput values. + row = f"{name:<50} {min_throughput:>10.4f} {max_throughput:>10.4f} {mean_throughput:>10.4f} {'N/A':>10} {median_throughput:>10.4f} {rounds:>8} {iterations:>12}" + + print(header) + print(row) + print("-" * 125) + +class RandomBytesIO(io.RawIOBase): + """ + A file-like object that generates random bytes using os.urandom. + It enforces a fixed size and an upper safety cap. + """ + # 10 GiB default safety cap + DEFAULT_CAP = 10 * 1024 * 1024 * 1024 + + def __init__(self, size, max_size=DEFAULT_CAP): + """ + Args: + size (int): The exact size of the virtual file in bytes. + max_size (int): The maximum allowed size to prevent safety issues. + """ + if size is None: + raise ValueError("Size must be defined (cannot be infinite).") + + if size > max_size: + raise ValueError(f"Requested size {size} exceeds the maximum limit of {max_size} bytes (10 GiB).") + + self._size = size + self._pos = 0 + + def read(self, n=-1): + # 1. Handle "read all" (n=-1) + if n is None or n < 0: + n = self._size - self._pos + + # 2. Handle EOF (End of File) + if self._pos >= self._size: + return b"" + + # 3. Clamp read amount to remaining size + # This ensures we stop exactly at `size` bytes. + n = min(n, self._size - self._pos) + + # 4. Generate data + data = os.urandom(n) + self._pos += len(data) + return data + + def readable(self): + return True + + def seekable(self): + return True + + def tell(self): + return self._pos + + def seek(self, offset, whence=io.SEEK_SET): + if whence == io.SEEK_SET: + new_pos = offset + elif whence == io.SEEK_CUR: + new_pos = self._pos + offset + elif whence == io.SEEK_END: + new_pos = self._size + offset + else: + raise ValueError(f"Invalid whence: {whence}") + + # Clamp position to valid range [0, size] + self._pos = max(0, min(new_pos, self._size)) + return self._pos diff --git a/tests/perf/microbenchmarks/config.py b/tests/perf/microbenchmarks/config.py new file mode 100644 index 000000000..bb4a5f1c4 --- /dev/null +++ b/tests/perf/microbenchmarks/config.py @@ -0,0 +1,181 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# nit: TODO: rename it to config_to_params.py +import itertools +import os +from typing import Dict, List + +import yaml + +try: + from tests.perf.microbenchmarks.parameters import ReadParameters, WriteParameters +except ModuleNotFoundError: + from parameters import ReadParameters, WriteParameters + + + + + +def _get_params() -> Dict[str, List[ReadParameters]]: + """ + Docstring for _get_params + 1. this function output a list of readParameters. + 2. to populate the values of readparameters, use default values from config.yaml + 3. generate all possible params , ie + no. of params should be equal to bucket_type*file_size_mib, chunk_size * process * coros + you may use itertools.product + """ + params: Dict[str, List[ReadParameters]] = {} + config_path = os.path.join(os.path.dirname(__file__), "config.yaml") + with open(config_path, "r") as f: + config = yaml.safe_load(f) + + common_params = config["common"] + bucket_types = common_params["bucket_types"] + file_sizes_mib = common_params["file_sizes_mib"] + chunk_sizes_mib = common_params["chunk_sizes_mib"] + rounds = common_params["rounds"] + + bucket_map = { + "zonal": config["defaults"]["DEFAULT_RAPID_ZONAL_BUCKET"], + "regional": config["defaults"]["DEFAULT_STANDARD_BUCKET"], + } + + for workload in config["workload"]: + workload_name = workload["name"] + params[workload_name] = [] + pattern = workload["pattern"] + processes = workload["processes"] + coros = workload["coros"] + + # Create a product of all parameter combinations + product = itertools.product( + bucket_types, + file_sizes_mib, + chunk_sizes_mib, + processes, + coros, + ) + + for ( + bucket_type, + file_size_mib, + chunk_size_mib, + num_processes, + num_coros, + ) in product: + file_size_bytes = file_size_mib * 1024 * 1024 + chunk_size_bytes = chunk_size_mib * 1024 * 1024 + bucket_name = bucket_map[bucket_type] + + if "single_file" in workload_name: + num_files = 1 + else: + num_files = num_processes * num_coros + + # Create a descriptive name for the parameter set + name = f"{pattern}_{bucket_type}_{num_processes}p_{num_coros}c" + + params[workload_name].append( + ReadParameters( + name=name, + workload_name=workload_name, + pattern=pattern, + bucket_name=bucket_name, + bucket_type=bucket_type, + num_coros=num_coros, + num_processes=num_processes, + num_files=num_files, + rounds=rounds, + chunk_size_bytes=chunk_size_bytes, + file_size_bytes=file_size_bytes, + ) + ) + return params + + +def get_write_params() -> Dict[str, List[WriteParameters]]: + """ + Docstring for get_write_params + 1. this function output a list of WriteParameters. + 2. to populate the values of WriteParameters, use default values from config_writes.yaml + 3. generate all possible params , ie + no. of params should be equal to bucket_type*file_size_mib, chunk_size * process * coros + you may use itertools.product + """ + params: Dict[str, List[WriteParameters]] = {} + config_path = os.path.join(os.path.dirname(__file__), "config_writes.yaml") + with open(config_path, "r") as f: + config = yaml.safe_load(f) + + common_params = config["common"] + bucket_types = common_params["bucket_types"] + file_sizes_mib = common_params["file_sizes_mib"] + chunk_sizes_mib = common_params["chunk_sizes_mib"] + rounds = common_params["rounds"] + + bucket_map = { + "zonal": config["defaults"]["DEFAULT_RAPID_ZONAL_BUCKET"], + "regional": config["defaults"]["DEFAULT_STANDARD_BUCKET"], + } + + for workload in config["workload"]: + workload_name = workload["name"] + params[workload_name] = [] + processes = workload["processes"] + coros = workload["coros"] + + # Create a product of all parameter combinations + product = itertools.product( + bucket_types, + file_sizes_mib, + chunk_sizes_mib, + processes, + coros, + ) + + for ( + bucket_type, + file_size_mib, + chunk_size_mib, + num_processes, + num_coros, + ) in product: + file_size_bytes = file_size_mib * 1024 * 1024 + chunk_size_bytes = chunk_size_mib * 1024 * 1024 + bucket_name = bucket_map[bucket_type] + + if "single_file" in workload_name: + num_files = 1 + else: + num_files = num_processes * num_coros + + # Create a descriptive name for the parameter set + name = f"{workload_name}_{bucket_type}_{num_processes}p_{num_coros}c" + + params[workload_name].append( + WriteParameters( + name=name, + workload_name=workload_name, + bucket_name=bucket_name, + bucket_type=bucket_type, + num_coros=num_coros, + num_processes=num_processes, + num_files=num_files, + rounds=rounds, + chunk_size_bytes=chunk_size_bytes, + file_size_bytes=file_size_bytes, + ) + ) + return params \ No newline at end of file diff --git a/tests/perf/microbenchmarks/config.yaml b/tests/perf/microbenchmarks/config.yaml new file mode 100644 index 000000000..25bfd92c8 --- /dev/null +++ b/tests/perf/microbenchmarks/config.yaml @@ -0,0 +1,49 @@ +common: + bucket_types: + - "regional" + - "zonal" + file_sizes_mib: + - 1024 # 1GiB + chunk_sizes_mib: [100] + rounds: 10 + +workload: + + ############# single process single coroutine ######### + - name: "read_seq" + pattern: "seq" + coros: [1] + processes: [1] + + - name: "read_rand" + pattern: "rand" + coros: [1] + processes: [1] + + ############# single process multi coroutine ######### + + - name: "read_seq_multi_coros" + pattern: "seq" + coros: [2, 4, 8, 16] + processes: [1] + + - name: "read_rand_multi_coros" + pattern: "rand" + coros: [2, 4, 8, 16] + processes: [1] + + ############# multi process multi coroutine ######### + - name: "read_seq_multi_process" + pattern: "seq" + coros: [1, 2, 4] + processes: [2, 16, 48, 96] + + - name: "read_rand_multi_process" + pattern: "rand" + coros: [1, 2, 4] + processes: [2, 16, 48, 96] + + +defaults: + DEFAULT_RAPID_ZONAL_BUCKET: "chandrasiri-benchmarks-zb" + DEFAULT_STANDARD_BUCKET: "chandrasiri-benchmarks-rb" diff --git a/tests/perf/microbenchmarks/config_writes.yaml b/tests/perf/microbenchmarks/config_writes.yaml new file mode 100644 index 000000000..b4d93ba52 --- /dev/null +++ b/tests/perf/microbenchmarks/config_writes.yaml @@ -0,0 +1,34 @@ +common: + bucket_types: + - "regional" + - "zonal" + file_sizes_mib: + - 1024 # 1GiB + chunk_sizes_mib: [100] + rounds: 10 + +workload: + + ############# single proc single coroutines ######### + - name: "write_seq" + pattern: "seq" + coros: [1] + processes: [1] + + ############# single proc multiple coroutines ######### + + - name: "write_seq_multi_coros" + pattern: "seq" + coros: [2, 4, 8, 16] + processes: [1] + + ############# multiple proc multiple coroutines ######### + - name: "write_seq_multi_process" + pattern: "seq" + coros: [1, 2] + processes: [8, 16, 32, 64] + + +defaults: + DEFAULT_RAPID_ZONAL_BUCKET: "chandrasiri-benchmarks-zb" + DEFAULT_STANDARD_BUCKET: "chandrasiri-benchmarks-rb" diff --git a/tests/perf/microbenchmarks/conftest.py b/tests/perf/microbenchmarks/conftest.py new file mode 100644 index 000000000..01ba802dd --- /dev/null +++ b/tests/perf/microbenchmarks/conftest.py @@ -0,0 +1,152 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import contextlib +from typing import Any +from tests.perf.microbenchmarks.resource_monitor import ResourceMonitor +import pytest +from tests.system._helpers import delete_blob + +import asyncio +import multiprocessing +import os +import uuid +from google.cloud import storage +from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( + AsyncAppendableObjectWriter, +) +from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from tests.perf.microbenchmarks.parameters import WriteParameters + +_OBJECT_NAME_PREFIX = "micro-benchmark" + + +@pytest.fixture(scope="function") +def blobs_to_delete(): + blobs_to_delete = [] + + yield blobs_to_delete + + for blob in blobs_to_delete: + delete_blob(blob) + + +@pytest.fixture(scope="session") +def storage_client(): + from google.cloud.storage import Client + + client = Client() + with contextlib.closing(client): + yield client + +@pytest.fixture +def monitor(): + """ + Provides the ResourceMonitor class. + Usage: with monitor() as m: ... + """ + return ResourceMonitor + +def publish_resource_metrics(benchmark: Any, monitor: ResourceMonitor) -> None: + """ + Helper function to publish resource monitor results to the extra_info property. + """ + benchmark.extra_info.update( + { + "cpu_max_global": f"{monitor.max_cpu:.2f}", + "mem_max": f"{monitor.max_mem:.2f}", + "net_throughput_mb_s": f"{monitor.throughput_mb_s:.2f}", + "vcpus": monitor.vcpus, + } + ) + + +async def upload_appendable_object(bucket_name, object_name, object_size, chunk_size): + # flush interval set to little over 1GiB to minimize number of flushes. + # this method is to write "appendable" objects which will be used for + # benchmarking reads, hence not concerned performance of writes here. + writer = AsyncAppendableObjectWriter( + AsyncGrpcClient().grpc_client, bucket_name, object_name, writer_options={"FLUSH_INTERVAL_BYTES": 1026 * 1024 ** 2} + ) + await writer.open() + uploaded_bytes = 0 + while uploaded_bytes < object_size: + bytes_to_upload = min(chunk_size, object_size - uploaded_bytes) + await writer.append(os.urandom(bytes_to_upload)) + uploaded_bytes += bytes_to_upload + object_metdata = await writer.close(finalize_on_close=True) + assert object_metdata.size == uploaded_bytes + return uploaded_bytes + + +def upload_simple_object(bucket_name, object_name, object_size, chunk_size): + storage_client = storage.Client() + bucket = storage_client.bucket(bucket_name) + blob = bucket.blob(object_name) + blob.chunk_size = chunk_size + data = os.urandom(object_size) + blob.upload_from_string(data) + return object_size + + +def _upload_worker(args): + bucket_name, object_name, object_size, chunk_size, bucket_type = args + if bucket_type == "zonal": + uploaded_bytes = asyncio.run( + upload_appendable_object(bucket_name, object_name, object_size, chunk_size) + ) + else: + uploaded_bytes = upload_simple_object(bucket_name, object_name, object_size, chunk_size) + return object_name, uploaded_bytes + + +def _create_files(num_files, bucket_name, bucket_type, object_size, chunk_size=1024 * 1024 * 1024): + """ + 1. using upload_appendable_object implement this and return a list of file names. + TODO: adapt this to REGIONAL BUCKETS as well. + """ + object_names = [ + f"{_OBJECT_NAME_PREFIX}-{uuid.uuid4().hex[:5]}" for _ in range(num_files) + ] + + args_list = [ + (bucket_name, object_names[i], object_size, chunk_size, bucket_type) + for i in range(num_files) + ] + + ctx = multiprocessing.get_context("spawn") + with ctx.Pool() as pool: + results = pool.map(_upload_worker, args_list) + + total_uploaded_bytes = sum(r[1] for r in results) + assert total_uploaded_bytes == object_size * num_files + + return [r[0] for r in results] + + +@pytest.fixture +def workload_params(request): + params = request.param + if isinstance(params, WriteParameters): + files_names = [ + f"{_OBJECT_NAME_PREFIX}-{uuid.uuid4().hex[:5]}" + for _ in range(params.num_files) + ] + else: + files_names = _create_files( + params.num_files, + params.bucket_name, + params.bucket_type, + params.file_size_bytes, + ) + return params, files_names \ No newline at end of file diff --git a/tests/perf/microbenchmarks/json_to_csv.py b/tests/perf/microbenchmarks/json_to_csv.py new file mode 100644 index 000000000..1ef58f907 --- /dev/null +++ b/tests/perf/microbenchmarks/json_to_csv.py @@ -0,0 +1,190 @@ +import json +import csv +import argparse +import logging +import numpy as np + +MB = 1024 * 1024 + + +def _process_benchmark_result(bench, headers, extra_info_headers, stats_headers): + """ + Process a single benchmark result and prepare it for CSV reporting. + + This function extracts relevant statistics and metadata from a benchmark + run, calculates derived metrics like percentiles and throughput, and + formats it as a dictionary. + + Args: + bench (dict): The dictionary for a single benchmark from the JSON output. + headers (list): The list of all header names for the CSV. + extra_info_headers (list): Headers from the 'extra_info' section. + stats_headers (list): Headers from the 'stats' section. + + """ + row = {h: "" for h in headers} + row["name"] = bench.get("name", "") + row["group"] = bench.get("group", "") + + extra_info = bench.get("extra_info", {}) + + # Populate extra_info and stats + for key in extra_info_headers: + row[key] = extra_info.get(key) + for key in stats_headers: + row[key] = bench.get("stats", {}).get(key) + + # Handle threads/coros mapping + if "threads" in row: + row["threads"] = extra_info.get("num_coros", extra_info.get("coros")) + + # Calculate percentiles + timings = bench.get("stats", {}).get("data") + if timings: + row["p90"] = np.percentile(timings, 90) + row["p95"] = np.percentile(timings, 95) + row["p99"] = np.percentile(timings, 99) + + # Calculate max throughput + file_size = extra_info.get("file_size_bytes", extra_info.get("file_size", 0)) + num_files = extra_info.get("num_files", 1) + total_bytes = file_size * num_files + + min_time = bench.get("stats", {}).get("min") + if min_time and min_time > 0: + row["max_throughput_mb_s"] = (total_bytes / min_time) / MB + else: + row["max_throughput_mb_s"] = 0.0 + + return row + + +def _generate_report(json_path, csv_path): + """Generate a CSV summary report from the pytest-benchmark JSON output. + + Args: + json_path (str): The path to the JSON file containing benchmark results. + csv_path (str): The path where the CSV report will be saved. + + Returns: + str: The path to the generated CSV report file. + + """ + logging.info(f"Generating CSV report from {json_path}") + + with open(json_path, "r") as f: + data = json.load(f) + + benchmarks = data.get("benchmarks", []) + if not benchmarks: + logging.warning("No benchmarks found in the JSON file.") + return + + # headers order - name group block_size bucket_name bucket_type chunk_size cpu_max_global file_size mem_max net_throughput_mb_s num_files pattern processes rounds threads vcpus min max mean median stddev p90 p95 p99 max_throughput_mb_s + # if there are any other column keep it at the afterwards. + ordered_headers = [ + "name", + "group", + "block_size", + "bucket_name", + "bucket_type", + "chunk_size", + "cpu_max_global", + "file_size", + "mem_max", + "net_throughput_mb_s", + "num_files", + "pattern", + "processes", + "rounds", + "threads", + "vcpus", + "min", + "max", + "mean", + "median", + "stddev", + "p90", + "p95", + "p99", + "max_throughput_mb_s", + ] + + # Gather all available headers from the data + all_available_headers = set(["name", "group"]) + stats_headers = ["min", "max", "mean", "median", "stddev"] + custom_headers = ["p90", "p95", "p99", "max_throughput_mb_s"] + + all_available_headers.update(stats_headers) + all_available_headers.update(custom_headers) + + extra_info_keys = set() + for bench in benchmarks: + if "extra_info" in bench and isinstance(bench["extra_info"], dict): + extra_info_keys.update(bench["extra_info"].keys()) + all_available_headers.update(extra_info_keys) + + # Construct the final header list + final_headers = list(ordered_headers) + + # Add any headers from the data that are not in the ordered list + for header in sorted(list(all_available_headers)): + if header not in final_headers: + final_headers.append(header) + + # We still need the full list of extra_info headers for _process_benchmark_result + extra_info_headers = sorted(list(extra_info_keys)) + + with open(csv_path, "w", newline="") as f: + writer = csv.writer(f) + writer.writerow(final_headers) + + for bench in benchmarks: + row = _process_benchmark_result( + bench, final_headers, extra_info_headers, stats_headers + ) + writer.writerow([row.get(h, "") for h in final_headers]) + + logging.info(f"CSV report generated at {csv_path}") + return csv_path + + +def main(): + """ + Converts a JSON benchmark file to a CSV file. + + The CSV file will contain the 'name' of each benchmark and all fields + from the 'extra_info' section. + """ + parser = argparse.ArgumentParser(description="Convert benchmark JSON to CSV.") + parser.add_argument( + "--input_file", + nargs="?", + default="output.json", + help="Path to the input JSON file (default: output.json)", + ) + parser.add_argument( + "--output_file", + nargs="?", + default="output.csv", + help="Path to the output CSV file (default: output.csv)", + ) + args = parser.parse_args() + + logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" + ) + + try: + _generate_report(args.input_file, args.output_file) + print(f"Successfully converted {args.input_file} to {args.output_file}") + except FileNotFoundError: + logging.error(f"Error: Input file not found at {args.input_file}") + except json.JSONDecodeError: + logging.error(f"Error: Could not decode JSON from {args.input_file}") + except Exception as e: + logging.error(f"An unexpected error occurred: {e}") + + +if __name__ == "__main__": + main() diff --git a/tests/perf/microbenchmarks/parameters.py b/tests/perf/microbenchmarks/parameters.py new file mode 100644 index 000000000..02b384b44 --- /dev/null +++ b/tests/perf/microbenchmarks/parameters.py @@ -0,0 +1,43 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass + + +@dataclass +class ReadParameters: + name: str + workload_name: str + pattern: str + bucket_name: str + bucket_type: str + num_coros: int + num_processes: int + num_files: int + rounds: int + chunk_size_bytes: int + file_size_bytes: int + + +@dataclass +class WriteParameters: + name: str + workload_name: str + bucket_name: str + bucket_type: str + num_coros: int + num_processes: int + num_files: int + rounds: int + chunk_size_bytes: int + file_size_bytes: int diff --git a/tests/perf/microbenchmarks/resource_monitor.py b/tests/perf/microbenchmarks/resource_monitor.py new file mode 100644 index 000000000..be6ae7025 --- /dev/null +++ b/tests/perf/microbenchmarks/resource_monitor.py @@ -0,0 +1,99 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import threading +import time + +import psutil + + +class ResourceMonitor: + def __init__(self): + self.interval = 1.0 + + self.vcpus = psutil.cpu_count() or 1 + self.max_cpu = 0.0 + self.max_mem = 0.0 + + # Network and Time tracking + self.start_time = 0.0 + self.duration = 0.0 + self.start_net = None + self.net_sent_mb = 0.0 + self.net_recv_mb = 0.0 + + self._stop_event = threading.Event() + self._thread = None + + def __enter__(self): + self.start_net = psutil.net_io_counters() + self.start_time = time.perf_counter() + self.start() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.stop() + self.duration = time.perf_counter() - self.start_time + end_net = psutil.net_io_counters() + + self.net_sent_mb = (end_net.bytes_sent - self.start_net.bytes_sent) / ( + 1024 * 1024 + ) + self.net_recv_mb = (end_net.bytes_recv - self.start_net.bytes_recv) / ( + 1024 * 1024 + ) + + def _monitor(self): + psutil.cpu_percent(interval=None) + current_process = psutil.Process() + while not self._stop_event.is_set(): + try: + # CPU and Memory tracking for current process tree + total_cpu = current_process.cpu_percent(interval=None) + current_mem = current_process.memory_info().rss + for child in current_process.children(recursive=True): + try: + total_cpu += child.cpu_percent(interval=None) + current_mem += child.memory_info().rss + except (psutil.NoSuchProcess, psutil.AccessDenied): + continue + + # Normalize CPU by number of vcpus + global_cpu = total_cpu / self.vcpus + + mem = current_mem + + if global_cpu > self.max_cpu: + self.max_cpu = global_cpu + if mem > self.max_mem: + self.max_mem = mem + except psutil.NoSuchProcess: + pass + + time.sleep(self.interval) + + def start(self): + self._thread = threading.Thread(target=self._monitor, daemon=True) + self._thread.start() + + def stop(self): + self._stop_event.set() + if self._thread: + self._thread.join() + + @property + def throughput_mb_s(self): + """Calculates combined network throughput.""" + if self.duration <= 0: + return 0.0 + return (self.net_sent_mb + self.net_recv_mb) / self.duration \ No newline at end of file diff --git a/tests/perf/microbenchmarks/test_reads.py b/tests/perf/microbenchmarks/test_reads.py new file mode 100644 index 000000000..fa5ff5775 --- /dev/null +++ b/tests/perf/microbenchmarks/test_reads.py @@ -0,0 +1,425 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Docstring for tests.perf.microbenchmarks.test_reads + +File for benchmarking zonal reads (i.e. downloads) + +1. 1 object 1 coro with variable chunk_size + +calculate latency, throughput, etc for downloads. + + +""" + +import time +import asyncio +import random +from io import BytesIO +import logging + +import pytest + +from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( + AsyncMultiRangeDownloader, +) +from tests.perf.microbenchmarks._utils import publish_benchmark_extra_info +from tests.perf.microbenchmarks.conftest import ( + publish_resource_metrics, +) +import tests.perf.microbenchmarks.config as config +from concurrent.futures import ThreadPoolExecutor +import multiprocessing + +all_params = config._get_params() + + +async def create_client(): + """Initializes async client and gets the current event loop.""" + return AsyncGrpcClient().grpc_client + + +async def download_chunks_using_mrd_async(client, filename, other_params, chunks): + # start timer. + start_time = time.monotonic_ns() + + total_bytes_downloaded = 0 + mrd = AsyncMultiRangeDownloader(client, other_params.bucket_name, filename) + await mrd.open() + for offset, size in chunks: + buffer = BytesIO() + await mrd.download_ranges([(offset, size, buffer)]) + total_bytes_downloaded += buffer.tell() + await mrd.close() + + assert total_bytes_downloaded == other_params.file_size_bytes + + # end timer. + end_time = time.monotonic_ns() + elapsed_time = end_time - start_time + print(f"INFO: Time taken to download all chunks: {elapsed_time} ns") + return elapsed_time / 1_000_000_000 + + +def download_chunks_using_mrd(loop, client, filename, other_params, chunks): + return loop.run_until_complete( + download_chunks_using_mrd_async(client, filename, other_params, chunks) + ) + + +def download_chunks_using_json(_, json_client, filename, other_params, chunks): + bucket = json_client.bucket(other_params.bucket_name) + blob = bucket.blob(filename) + start_time = time.monotonic_ns() + for offset, size in chunks: + _ = blob.download_as_bytes(start=offset, end=offset + size - 1) + return (time.monotonic_ns() - start_time) / 1_000_000_000 + + +@pytest.mark.parametrize( + "workload_params", + all_params["read_rand"] + all_params["read_seq"], + indirect=True, + ids=lambda p: p.name, +) +def test_downloads_single_proc_single_coro( + benchmark, storage_client, blobs_to_delete, monitor, workload_params +): + """ + 1. create chunks based on the object size and chunk_size. [(start_byte, min(chunk_size, remaining_size))] + 2. Pass the list of chunks to `download_chunks_using_mrd` for zonal bucket + `download_chunks_using_json` for regional bucket. + above function are target methods. + 3. benchmark target method, using benchmark.pedantic + + + + """ + params, files_names = workload_params + + object_size = params.file_size_bytes + chunk_size = params.chunk_size_bytes + chunks = [] + for offset in range(0, object_size, chunk_size): + size = min(chunk_size, object_size - offset) + chunks.append((offset, size)) + + if params.pattern == "rand": + logging.info("randomizing chunks") + random.shuffle(chunks) + + if params.bucket_type == "zonal": + logging.info("bucket type zonal") + target_func = download_chunks_using_mrd + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + client = loop.run_until_complete(create_client()) + else: + logging.info("bucket type regional") + target_func = download_chunks_using_json + loop = None + client = storage_client + + output_times = [] + + def target_wrapper(*args, **kwargs): + result = target_func(*args, **kwargs) + output_times.append(result) + return output_times + + try: + with monitor() as m: + output_times = benchmark.pedantic( + target=target_wrapper, + iterations=1, + rounds=params.rounds, + args=( + loop, + client, + files_names[0], + params, + chunks, + ), + ) + finally: + if loop is not None: + tasks = asyncio.all_tasks(loop=loop) + for task in tasks: + task.cancel() + loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) + loop.close() + publish_benchmark_extra_info(benchmark, params, true_times=output_times) + publish_resource_metrics(benchmark, m) + + blobs_to_delete.extend( + storage_client.bucket(params.bucket_name).blob(f) for f in files_names + ) + + +def download_files_using_mrd_multi_coro(loop, client, files, other_params, chunks): + """ + Docstring for download_files_using_mrd + + 1. for each file + 1. create chunks of size other_params.chunk_size_bytes + 2. create a coroutine/task using download_chunks_using_mrd + 3. execute all coroutines/task using asyncio.gather in loop. + 3. capture latency (output time) + 2. output max time. + + :param loop: Description + :param client: Description + :param files: Description + :param other_params: Description + """ + + async def main(): + if len(files) == 1: + result = await download_chunks_using_mrd_async( + client, files[0], other_params, chunks + ) + return [result] + else: + tasks = [] + for f in files: + tasks.append( + download_chunks_using_mrd_async(client, f, other_params, chunks) + ) + return await asyncio.gather(*tasks) + + results = loop.run_until_complete(main()) + return max(results) + + +def download_files_using_json_multi_threaded( + _, json_client, files, other_params, chunks +): + """ + Docstring for download_files_using_json + + 1. for each file + 1. create chunks of size other_params.chunk_size_bytes + 2. using threaPoolexecutor send each file chunks to download_chunks_using_json + 3. capture latency (output time) + 2. output max time. + + :param _: Description + :param json_client: Description + :param files: Description + :param other_params: Description + """ + results = [] + # In the context of multi-coro, num_coros is the number of files to download concurrently. + # So we can use it as max_workers for the thread pool. + with ThreadPoolExecutor(max_workers=other_params.num_coros) as executor: + futures = [] + for f in files: + future = executor.submit( + download_chunks_using_json, None, json_client, f, other_params, chunks + ) + futures.append(future) + + for future in futures: + results.append(future.result()) + + return max(results) + + +@pytest.mark.parametrize( + "workload_params", + all_params["read_seq_multi_coros"] + + all_params["read_rand_multi_coros"], + indirect=True, + ids=lambda p: p.name, +) +def test_downloads_single_proc_multi_coro( + benchmark, storage_client, blobs_to_delete, monitor, workload_params +): + params, files_names = workload_params + + object_size = params.file_size_bytes + chunk_size = params.chunk_size_bytes + chunks = [] + for offset in range(0, object_size, chunk_size): + size = min(chunk_size, object_size - offset) + chunks.append((offset, size)) + + if params.pattern == "rand": + logging.info("randomizing chunks") + random.shuffle(chunks) + + if params.bucket_type == "zonal": + logging.info("bucket type zonal") + target_func = download_files_using_mrd_multi_coro + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + client = loop.run_until_complete(create_client()) + else: + logging.info("bucket type regional") + target_func = download_files_using_json_multi_threaded + loop = None + client = storage_client + + output_times = [] + + def target_wrapper(*args, **kwargs): + result = target_func(*args, **kwargs) + output_times.append(result) + return output_times + + try: + with monitor() as m: + output_times = benchmark.pedantic( + target=target_wrapper, + iterations=1, + rounds=params.rounds, + args=( + loop, + client, + files_names, + params, + chunks, + ), + ) + finally: + if loop is not None: + tasks = asyncio.all_tasks(loop=loop) + for task in tasks: + task.cancel() + loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) + loop.close() + publish_benchmark_extra_info(benchmark, params, true_times=output_times) + publish_resource_metrics(benchmark, m) + + blobs_to_delete.extend( + storage_client.bucket(params.bucket_name).blob(f) for f in files_names + ) + + +def _download_files_worker(files_to_download, other_params, chunks, bucket_type): + # For regional buckets, a new client must be created for each process. + # For zonal, the same is done for consistency. + if bucket_type == "zonal": + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + client = loop.run_until_complete(create_client()) + try: + # download_files_using_mrd_multi_coro returns max latency of coros + result = download_files_using_mrd_multi_coro( + loop, client, files_to_download, other_params, chunks + ) + # logging.info(f"downloading complete for ") + finally: + tasks = asyncio.all_tasks(loop=loop) + for task in tasks: + task.cancel() + loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) + loop.close() + return result + else: # regional + from google.cloud import storage + + json_client = storage.Client() + # download_files_using_json_multi_threaded returns max latency of threads + return download_files_using_json_multi_threaded( + None, json_client, files_to_download, other_params, chunks + ) + + +def download_files_mp_mc_wrapper(files_names, params, chunks, bucket_type): + num_processes = params.num_processes + num_coros = params.num_coros # This is n, number of files per process + + # Distribute filenames to processes + filenames_per_process = [ + files_names[i : i + num_coros] for i in range(0, len(files_names), num_coros) + ] + + args = [ + ( + filenames, + params, + chunks, + bucket_type, + ) + for filenames in filenames_per_process + ] + + ctx = multiprocessing.get_context("spawn") + with ctx.Pool(processes=num_processes) as pool: + results = pool.starmap(_download_files_worker, args) + + return max(results) + + +@pytest.mark.parametrize( + "workload_params", + all_params["read_seq_multi_process"] + + all_params["read_rand_multi_process"], + indirect=True, + ids=lambda p: p.name, +) +def test_downloads_multi_proc_multi_coro( + benchmark, storage_client, blobs_to_delete, monitor, workload_params +): + """ + 1. this should have the same patterns as `test_downloads_single_proc_multi_coro` + `test_downloads_single_proc_single_coro` but + + * it should download files among m process n coroutines. i.e. input files_names + list will contain m*n files. Spawn m process and each process should download n files. + create processes in spwan mode. Output time (latency) for each round should be the maximum latency of all process. + """ + params, files_names = workload_params + logging.info(f"num files: {len(files_names)}") + + object_size = params.file_size_bytes + chunk_size = params.chunk_size_bytes + chunks = [] + for offset in range(0, object_size, chunk_size): + size = min(chunk_size, object_size - offset) + chunks.append((offset, size)) + + if params.pattern == "rand": + logging.info("randomizing chunks") + random.shuffle(chunks) + + output_times = [] + + def target_wrapper(*args, **kwargs): + result = download_files_mp_mc_wrapper(*args, **kwargs) + output_times.append(result) + return output_times + + try: + with monitor() as m: + output_times = benchmark.pedantic( + target=target_wrapper, + iterations=1, + rounds=params.rounds, + args=( + files_names, + params, + chunks, + params.bucket_type, + ), + ) + finally: + publish_benchmark_extra_info(benchmark, params, true_times=output_times) + publish_resource_metrics(benchmark, m) + + blobs_to_delete.extend( + storage_client.bucket(params.bucket_name).blob(f) for f in files_names + ) \ No newline at end of file diff --git a/tests/perf/microbenchmarks/test_writes.py b/tests/perf/microbenchmarks/test_writes.py new file mode 100644 index 000000000..61dc354db --- /dev/null +++ b/tests/perf/microbenchmarks/test_writes.py @@ -0,0 +1,311 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Docstring for tests.perf.microbenchmarks.test_writes + +File for benchmarking zonal writes (i.e. uploads) +""" + +import os +import time +import asyncio +from concurrent.futures import ThreadPoolExecutor +import multiprocessing +import logging + +import pytest +from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import AsyncAppendableObjectWriter + +from tests.perf.microbenchmarks._utils import publish_benchmark_extra_info, RandomBytesIO +from tests.perf.microbenchmarks.conftest import publish_resource_metrics +import tests.perf.microbenchmarks.config as config + +# Get write parameters +all_params = config.get_write_params() + +async def create_client(): + """Initializes async client and gets the current event loop.""" + return AsyncGrpcClient().grpc_client + +async def upload_chunks_using_grpc_async(client, filename, other_params): + start_time = time.monotonic_ns() + + writer = AsyncAppendableObjectWriter( + client=client, bucket_name=other_params.bucket_name, object_name=filename + ) + await writer.open() + + uploaded_bytes = 0 + upload_size = other_params.file_size_bytes + chunk_size = other_params.chunk_size_bytes + + while uploaded_bytes < upload_size: + bytes_to_upload = min(chunk_size, upload_size - uploaded_bytes) + data = os.urandom(bytes_to_upload) + await writer.append(data) + uploaded_bytes += bytes_to_upload + await writer.close() + + assert uploaded_bytes == upload_size + + end_time = time.monotonic_ns() + elapsed_time = end_time - start_time + return elapsed_time / 1_000_000_000 + +def upload_chunks_using_grpc(loop, client, filename, other_params): + return loop.run_until_complete( + upload_chunks_using_grpc_async(client, filename, other_params) + ) + +def upload_using_json(_, json_client, filename, other_params): + start_time = time.monotonic_ns() + + bucket = json_client.bucket(other_params.bucket_name) + blob = bucket.blob(filename) + upload_size = other_params.file_size_bytes + # Don't use BytesIO because it'll report high memory usage for large files. + # `RandomBytesIO` generates random bytes on the fly. + in_mem_file = RandomBytesIO(upload_size) + # data = os.urandom(upload_size) + blob.upload_from_file(in_mem_file) + + end_time = time.monotonic_ns() + elapsed_time = end_time - start_time + return elapsed_time / 1_000_000_000 + +@pytest.mark.parametrize( + "workload_params", + all_params["write_seq"], + indirect=True, + ids=lambda p: p.name, +) +def test_uploads_single_proc_single_coro( + benchmark, storage_client, blobs_to_delete, monitor, workload_params +): + params, files_names = workload_params + + if params.bucket_type == "zonal": + logging.info("bucket type zonal") + target_func = upload_chunks_using_grpc + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + client = loop.run_until_complete(create_client()) + else: + logging.info("bucket type regional") + target_func = upload_using_json + loop = None + client = storage_client + + output_times = [] + + def target_wrapper(*args, **kwargs): + result = target_func(*args, **kwargs) + output_times.append(result) + return output_times + + try: + with monitor() as m: + output_times = benchmark.pedantic( + target=target_wrapper, + iterations=1, + rounds=params.rounds, + args=( + loop, + client, + files_names[0], + params, + ), + ) + finally: + if loop is not None: + tasks = asyncio.all_tasks(loop=loop) + for task in tasks: + task.cancel() + loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) + loop.close() + publish_benchmark_extra_info(benchmark, params, benchmark_group="write", true_times=output_times) + publish_resource_metrics(benchmark, m) + + blobs_to_delete.extend( + storage_client.bucket(params.bucket_name).blob(f) for f in files_names + ) + +def upload_files_using_grpc_multi_coro(loop, client, files, other_params): + async def main(): + tasks = [] + for f in files: + tasks.append( + upload_chunks_using_grpc_async(client, f, other_params) + ) + return await asyncio.gather(*tasks) + + results = loop.run_until_complete(main()) + return max(results) + +def upload_files_using_json_multi_threaded(_, json_client, files, other_params): + results = [] + with ThreadPoolExecutor(max_workers=other_params.num_coros) as executor: + futures = [] + for f in files: + future = executor.submit( + upload_using_json, None, json_client, f, other_params + ) + futures.append(future) + + for future in futures: + results.append(future.result()) + + return max(results) + +@pytest.mark.parametrize( + "workload_params", + all_params["write_seq_multi_coros"], + indirect=True, + ids=lambda p: p.name, +) +def test_uploads_single_proc_multi_coro( + benchmark, storage_client, blobs_to_delete, monitor, workload_params +): + params, files_names = workload_params + + if params.bucket_type == "zonal": + logging.info("bucket type zonal") + target_func = upload_files_using_grpc_multi_coro + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + client = loop.run_until_complete(create_client()) + else: + logging.info("bucket type regional") + target_func = upload_files_using_json_multi_threaded + loop = None + client = storage_client + + output_times = [] + + def target_wrapper(*args, **kwargs): + result = target_func(*args, **kwargs) + output_times.append(result) + return output_times + + try: + with monitor() as m: + output_times = benchmark.pedantic( + target=target_wrapper, + iterations=1, + rounds=params.rounds, + args=( + loop, + client, + files_names, + params, + ), + ) + finally: + if loop is not None: + tasks = asyncio.all_tasks(loop=loop) + for task in tasks: + task.cancel() + loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) + loop.close() + publish_benchmark_extra_info(benchmark, params, benchmark_group="write", true_times=output_times) + publish_resource_metrics(benchmark, m) + + blobs_to_delete.extend( + storage_client.bucket(params.bucket_name).blob(f) for f in files_names + ) + +def _upload_files_worker(files_to_upload, other_params, bucket_type): + if bucket_type == "zonal": + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + client = loop.run_until_complete(create_client()) + try: + result = upload_files_using_grpc_multi_coro( + loop, client, files_to_upload, other_params + ) + finally: + # cleanup loop + tasks = asyncio.all_tasks(loop=loop) + for task in tasks: + task.cancel() + loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) + loop.close() + return result + else: # regional + from google.cloud import storage + json_client = storage.Client() + return upload_files_using_json_multi_threaded( + None, json_client, files_to_upload, other_params + ) + +def upload_files_mp_mc_wrapper(files_names, params): + num_processes = params.num_processes + num_coros = params.num_coros + + filenames_per_process = [ + files_names[i : i + num_coros] for i in range(0, len(files_names), num_coros) + ] + + args = [ + ( + filenames, + params, + params.bucket_type, + ) + for filenames in filenames_per_process + ] + + ctx = multiprocessing.get_context("spawn") + with ctx.Pool(processes=num_processes) as pool: + results = pool.starmap(_upload_files_worker, args) + + return max(results) + +@pytest.mark.parametrize( + "workload_params", + all_params["write_seq_multi_process"], + indirect=True, + ids=lambda p: p.name, +) +def test_uploads_multi_proc_multi_coro( + benchmark, storage_client, blobs_to_delete, monitor, workload_params +): + params, files_names = workload_params + + output_times = [] + + def target_wrapper(*args, **kwargs): + result = upload_files_mp_mc_wrapper(*args, **kwargs) + output_times.append(result) + return output_times + + try: + with monitor() as m: + output_times = benchmark.pedantic( + target=target_wrapper, + iterations=1, + rounds=params.rounds, + args=( + files_names, + params, + ), + ) + finally: + publish_benchmark_extra_info(benchmark, params, benchmark_group="write", true_times=output_times) + publish_resource_metrics(benchmark, m) + + blobs_to_delete.extend( + storage_client.bucket(params.bucket_name).blob(f) for f in files_names + ) \ No newline at end of file