From 6293d85f6733254d6ebc245aef8396f5e4ae098a Mon Sep 17 00:00:00 2001 From: Marzieh Lenjani Date: Wed, 1 Apr 2026 08:55:29 -0700 Subject: [PATCH] Add diagnosis APIs to MediaWiki benchmark MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: This diff adds reusable pre-flight diagnostic checks and centralizes diagnosis merging at the framework level, so all benchmarks benefit automatically. **1. Shared check functions (`packages/common/diagnosis_utils.py`)** Added two reusable check functions to the shared diagnosis API: - `check_file_descriptor_limit()`: verifies ulimit is sufficient, with optional auto-fix that records the change under `notable_auto_fixes` - `check_selinux()`: detects SELinux Enforcing mode (causes HHVM segfaults) These join existing `check_ipv6_hostname()` and `check_port_available()`. **2. Generic pre-flight CLI (`packages/common/preflight_checks.py`)** A reusable script callable from any benchmark's bash runner: - Accepts `--benchmark ` and `--min-fds ` for per-benchmark config - Runs SELinux, IPv6 hostname, and file descriptor checks - Fixes inverted `check_ipv6_hostname` usage (returns True when IPv6 is detected, not necessarily broken — should not gate `all_ok`) **3. Framework-level diagnosis merge (`benchpress/cli/commands/run.py`)** After `job.run()`, `run.py` now automatically merges DiagnosisRecorder records (failures + auto-fixes) into metrics via `DIAGNOSIS_FILE_PATH`. Every benchmark gets diagnosis merging for free — no per-benchmark merge code needed. Removed the now-redundant `merge_failure_to_results()` call from tao_bench's `run_autoscale.py`. **4. MediaWiki integration** - `run.sh`: Calls common `preflight_checks.py` before benchmark; raises `ulimit -n` in bash so child processes (HHVM, nginx, wrk) inherit the limit - `jobs.yml`: Added `-U {auto_fix_ulimit}` arg with default `auto_fix_ulimit=1` to all 5 mediawiki job entries Differential Revision: D98805269 --- benchpress/cli/commands/run.py | 26 ++++ benchpress/config/jobs.yml | 10 ++ packages/common/diagnosis_utils.py | 197 +++++++++++++++++++++++++++- packages/common/preflight_checks.py | 100 ++++++++++++++ packages/mediawiki/run.sh | 23 +++- packages/tao_bench/run_autoscale.py | 4 - 6 files changed, 353 insertions(+), 7 deletions(-) create mode 100644 packages/common/preflight_checks.py diff --git a/benchpress/cli/commands/run.py b/benchpress/cli/commands/run.py index 41787581..d679501f 100644 --- a/benchpress/cli/commands/run.py +++ b/benchpress/cli/commands/run.py @@ -10,6 +10,7 @@ import logging import os import subprocess +import sys from datetime import datetime, timezone from os import path @@ -260,6 +261,31 @@ def run(self, args, jobs) -> None: if not args.disable_hooks: job.stop_hooks() + # Merge diagnosis records (failures, auto-fixes) into metrics. + # Works for all benchmarks that use DiagnosisRecorder. + diagnosis_path = os.environ.get("DIAGNOSIS_FILE_PATH", "") + if diagnosis_path and os.path.exists(diagnosis_path): + try: + sys.path.insert( + 0, + os.path.join( + os.path.dirname(__file__), + "..", + "..", + "..", + "packages", + "common", + ), + ) + from diagnosis_utils import DiagnosisRecorder + + recorder = DiagnosisRecorder.get_instance( + shared_file_path=diagnosis_path + ) + recorder.merge_failure_to_results(metrics) + except Exception as e: + logger.warning("Failed to merge diagnosis records: %s", e) + final_metrics["metrics"] = metrics stdout_reporter = ReporterFactory.create("stdout") click.echo("Results Report:", err=True) diff --git a/benchpress/config/jobs.yml b/benchpress/config/jobs.yml index 99481d41..e69a2ac7 100644 --- a/benchpress/config/jobs.yml +++ b/benchpress/config/jobs.yml @@ -10,6 +10,7 @@ - '-nnginx' - '-L {load_generator}' - '-s {lg_path}' + - '-U {auto_fix_ulimit}' - '--' - '--mediawiki' - '--client-duration={duration}' @@ -22,6 +23,7 @@ - 'lg_path=benchmarks/oss_performance_mediawiki/wrk/wrk' - 'duration=10m' - 'timeout=11m' + - 'auto_fix_ulimit=1' - 'extra_args=' hooks: - hook: copymove @@ -40,6 +42,7 @@ - '-L {load_generator}' - '-s {lg_path}' - '-T {temp_dir}' + - '-U {auto_fix_ulimit}' - '-p' - '--' - '--mediawiki' @@ -68,6 +71,7 @@ - 'temp_dir=default_no_temp_dir' - 'num_multi_req_warmups=-1' # -1 means the warmup status will define the number of iterations - 'load_generator_seed=1000' + - 'auto_fix_ulimit=1' - 'extra_args=' hooks: - hook: copymove @@ -86,6 +90,7 @@ - '-s {lg_path}' - '-R{scale_out}' - '-c{client_threads}' + - '-U {auto_fix_ulimit}' - '--' - '--mediawiki-mlp' - '--client-duration={duration}' @@ -100,6 +105,7 @@ - 'client_threads=0' - 'duration=10m' - 'timeout=11m' + - 'auto_fix_ulimit=1' - 'extra_args=' hooks: - hook: copymove @@ -118,6 +124,7 @@ - '-s {lg_path}' - '-R{scale_out}' - '-c{client_threads}' + - '-U {auto_fix_ulimit}' - '--' - '--mediawiki-mlp' - '--client-duration={duration}' @@ -133,6 +140,7 @@ - 'client_threads=0' - 'duration=10m' - 'timeout=11m' + - 'auto_fix_ulimit=1' - 'extra_args=' hooks: - hook: copymove @@ -151,6 +159,7 @@ - '-s {lg_path}' - '-R{scale_out}' - '-c{client_threads}' + - '-U {auto_fix_ulimit}' - '--' - '--mediawiki-mem' - '--client-duration={duration}' @@ -165,6 +174,7 @@ - 'client_threads=0' - 'duration=10m' - 'timeout=11m' + - 'auto_fix_ulimit=1' - 'extra_args=' hooks: - hook: copymove diff --git a/packages/common/diagnosis_utils.py b/packages/common/diagnosis_utils.py index 4c61ed20..52ba6515 100644 --- a/packages/common/diagnosis_utils.py +++ b/packages/common/diagnosis_utils.py @@ -14,7 +14,9 @@ import fcntl import json import os +import resource import socket +import subprocess import sys from datetime import datetime from typing import Any, Dict, List, Optional @@ -212,8 +214,8 @@ def record_auto_fix( Record a notable auto-fix that was applied and may affect the benchmark score. Args: - benchmark: Name of the benchmark (e.g., "tao_bench") - fix_type: Type of fix (e.g., "ephemeral_port_cap") + benchmark: Name of the benchmark (e.g., "tao_bench", "mediawiki") + fix_type: Type of fix (e.g., "ephemeral_port_cap", "file_descriptor_limit") description: Human-readable description of what was fixed and why original_value: The original value before the fix fixed_value: The value after the fix was applied @@ -539,3 +541,194 @@ def record_port_unavailable_error( """ recorder = DiagnosisRecorder.get_instance(root_dir=root_dir) recorder.record_port_unavailable_error(port=port, benchmark=benchmark, errno=errno) + + +def check_file_descriptor_limit( + benchmark: str, + required_fds: int, + auto_fix: bool = False, + root_dir: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, +) -> bool: + """ + Check if the file descriptor soft limit is sufficient and optionally auto-fix. + + This is useful for benchmarks that open many sockets, files, or database + connections (e.g., mediawiki with HHVM + nginx + wrk, or tao_bench with + many memtier client connections). + + Args: + benchmark: Name of the benchmark (for diagnosis recording) + required_fds: Minimum number of file descriptors needed + auto_fix: If True, attempt to raise the soft limit automatically + root_dir: Root directory for diagnosis file + metadata: Additional metadata to include in diagnosis records + + Returns: + True if the FD limit is sufficient (or was auto-fixed), False otherwise + """ + soft_limit, hard_limit = resource.getrlimit(resource.RLIMIT_NOFILE) + + if soft_limit >= required_fds: + print(f"File descriptor limit OK: soft={soft_limit}, need={required_fds}") + return True + + recorder = DiagnosisRecorder.get_instance(root_dir=root_dir) + extra_meta = metadata or {} + + if auto_fix: + try: + new_limit = required_fds + if hard_limit < new_limit: + resource.setrlimit(resource.RLIMIT_NOFILE, (new_limit, new_limit)) + else: + resource.setrlimit(resource.RLIMIT_NOFILE, (new_limit, hard_limit)) + actual_soft, _ = resource.getrlimit(resource.RLIMIT_NOFILE) + recorder.record_auto_fix( + benchmark=benchmark, + fix_type="file_descriptor_limit", + description=( + f"Raised file descriptor soft limit from {soft_limit} to " + f"{actual_soft} (need at least {required_fds})." + ), + original_value=soft_limit, + fixed_value=actual_soft, + score_impact=( + "None \u2014 raising the file descriptor limit has no " + "performance impact." + ), + metadata={ + "original_soft_limit": soft_limit, + "original_hard_limit": hard_limit, + "new_soft_limit": actual_soft, + "required_fds": required_fds, + **extra_meta, + }, + ) + return True + except (ValueError, OSError) as e: + error_msg = ( + f"File descriptor soft limit ({soft_limit}) is too low " + f"(need at least {required_fds}). Auto-fix failed: {e}" + ) + print(f"\nWARNING: {error_msg}\n", file=sys.stderr) + recorder.record_failure( + benchmark=benchmark, + error_type="file_descriptor_limit_low", + reason=error_msg, + solutions=[ + f"Run with higher ulimit: sudo bash -c 'ulimit -n " + f"{required_fds} && ./benchpress_cli.py run ...'", + "Set system-wide limit in /etc/security/limits.conf and reboot", + ], + metadata={ + "soft_limit": soft_limit, + "hard_limit": hard_limit, + "required_fds": required_fds, + "auto_fix_error": str(e), + **extra_meta, + }, + ) + return False + + # No auto-fix \u2014 record failure and warn + error_msg = ( + f"File descriptor soft limit ({soft_limit}) is too low " + f"(need at least {required_fds}). " + f"Processes may fail with 'Too many open files'." + ) + print( + f"\n{'=' * 80}\n" + f"ERROR: File descriptor limit too low\n" + f"{'=' * 80}\n\n" + f"{error_msg}\n" + f" Current: soft={soft_limit}, hard={hard_limit}\n\n" + f"SOLUTIONS:\n" + f" 1. Enable auto-fix: add auto_fix_ulimit=1 to job input\n" + f" 2. Run with higher ulimit: sudo bash -c 'ulimit -n " + f"{required_fds} && ./benchpress_cli.py run ...'\n" + f" 3. Set in /etc/security/limits.conf and reboot\n" + f"\n{'=' * 80}\n", + file=sys.stderr, + ) + recorder.record_failure( + benchmark=benchmark, + error_type="file_descriptor_limit_low", + reason=error_msg, + solutions=[ + "Enable auto-fix: add auto_fix_ulimit=1 to job input", + f"Run with higher ulimit: sudo bash -c 'ulimit -n " + f"{required_fds} && ./benchpress_cli.py run ...'", + "Set system-wide limit in /etc/security/limits.conf and reboot", + ], + metadata={ + "soft_limit": soft_limit, + "hard_limit": hard_limit, + "required_fds": required_fds, + **extra_meta, + }, + ) + # Continue anyway so the diagnosis appears in the output JSON + return False + + +def check_selinux( + benchmark: str, + root_dir: Optional[str] = None, +) -> bool: + """ + Check if SELinux is disabled or permissive. + + SELinux in Enforcing mode can cause segfaults or permission denials for + benchmarks that use JIT compilation, custom memory allocators, or bind + to non-standard ports. + + Args: + benchmark: Name of the benchmark (for diagnosis recording) + root_dir: Root directory for diagnosis file + + Returns: + True if SELinux is disabled or permissive, False if Enforcing + """ + try: + result = subprocess.run( + ["getenforce"], capture_output=True, text=True, timeout=5 + ) + status = result.stdout.strip() + except (FileNotFoundError, subprocess.TimeoutExpired): + # getenforce not found \u2014 SELinux likely not installed + print("SELinux check: getenforce not found, assuming SELinux is not installed") + return True + + if status in ("Disabled", "Permissive"): + print(f"SELinux check OK: {status}") + return True + + error_msg = ( + f"SELinux is set to '{status}'. This may cause segfaults or " + f"permission errors during the {benchmark} benchmark." + ) + print( + f"\n{'=' * 80}\n" + f"ERROR: SELinux is Enforcing\n" + f"{'=' * 80}\n\n" + f"{error_msg}\n\n" + f"SOLUTIONS:\n" + f" 1. Disable SELinux temporarily: sudo setenforce 0\n" + f" 2. Disable SELinux permanently: edit /etc/selinux/config\n" + f"\n{'=' * 80}\n", + file=sys.stderr, + ) + recorder = DiagnosisRecorder.get_instance(root_dir=root_dir) + recorder.record_failure( + benchmark=benchmark, + error_type="selinux_enforcing", + reason=error_msg, + solutions=[ + "Disable SELinux temporarily: sudo setenforce 0", + "Disable SELinux permanently: set SELINUX=disabled in " + "/etc/selinux/config and reboot", + ], + metadata={"selinux_status": status}, + ) + return False diff --git a/packages/common/preflight_checks.py b/packages/common/preflight_checks.py new file mode 100644 index 00000000..7b3b5f4c --- /dev/null +++ b/packages/common/preflight_checks.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Reusable pre-flight checks for any benchmark. + +Can be called from any benchmark's runner script (especially bash-based +runners that cannot call diagnosis_utils.py functions directly). +Diagnosis merging is handled at the framework level in run.py. + +Usage: + python3 preflight_checks.py --benchmark mediawiki --benchpress-root /path \ + [--auto-fix-ulimit] [--min-fds 100000] +""" + +import argparse +import sys + +from diagnosis_utils import ( + check_file_descriptor_limit, + check_ipv6_hostname, + check_selinux, + DiagnosisRecorder, +) + + +# Default minimum file descriptors needed for a reliable run. +DEFAULT_MIN_FDS = 100_000 + + +def run_checks(args): + """Run pre-flight checks before the benchmark starts.""" + DiagnosisRecorder.get_instance(root_dir=args.benchpress_root) + + all_ok = True + all_ok = ( + check_selinux(benchmark=args.benchmark, root_dir=args.benchpress_root) + and all_ok + ) + # check_ipv6_hostname returns True when IPv6 is detected (not necessarily + # broken). It records a diagnosis failure internally only when IPv6 is + # detected AND broken, so we don't use its return value for all_ok. + check_ipv6_hostname( + "localhost", benchmark=args.benchmark, root_dir=args.benchpress_root + ) + all_ok = ( + check_file_descriptor_limit( + benchmark=args.benchmark, + required_fds=args.min_fds, + auto_fix=args.auto_fix_ulimit, + root_dir=args.benchpress_root, + ) + and all_ok + ) + + if all_ok: + print("\nAll pre-flight checks passed.") + else: + print( + "\nSome pre-flight checks failed (see above). " + "Continuing anyway so diagnostics appear in the output.", + file=sys.stderr, + ) + + +def main(): + parser = argparse.ArgumentParser( + description="Reusable pre-flight checks for benchpress benchmarks" + ) + parser.add_argument( + "--benchmark", + default="unknown", + help="Name of the benchmark (e.g., mediawiki, tao_bench)", + ) + parser.add_argument( + "--benchpress-root", default=".", help="Path to benchpress root directory" + ) + parser.add_argument( + "--min-fds", + type=int, + default=DEFAULT_MIN_FDS, + help=f"Minimum file descriptor soft limit required (default: {DEFAULT_MIN_FDS})", + ) + parser.add_argument( + "--auto-fix-ulimit", + action="store_true", + help="Automatically raise file descriptor soft limit if too low", + ) + args = parser.parse_args() + + run_checks(args) + + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/packages/mediawiki/run.sh b/packages/mediawiki/run.sh index 6716c40d..4ab0f0bb 100755 --- a/packages/mediawiki/run.sh +++ b/packages/mediawiki/run.sh @@ -68,6 +68,7 @@ Proxy shell script to executes oss-performance benchmark -D enable TC dump with vm-dump-tc -C TC dump interval in seconds (default:600) -O output directory for JIT monitoring files (default: /tmp/jit_study_output) + -U <0|1> set to 1 to automatically raise file descriptor soft limit if too low Any other options that oss-performance perf.php script could accept can be passed in as extra arguments appending two hyphens '--' followed by the @@ -251,6 +252,10 @@ function main() { local temp_dir temp_dir="" + # Pre-flight check options + local auto_fix_ulimit + auto_fix_ulimit=false + # JIT monitoring options enable_jit_monitoring=false jit_monitor_port="localhost:9092" @@ -259,7 +264,7 @@ function main() { tc_dump_interval=600 jit_output_dir="/tmp/jit_study_output" - while getopts 'H:n:r:L:s:R:t:c:m:pT:jJ:DI:C:O:' OPTION "${@}"; do + while getopts 'H:n:r:L:s:R:t:c:m:pT:jJ:DI:C:O:U:' OPTION "${@}"; do case "$OPTION" in H) db_host="${OPTARG}" @@ -338,6 +343,11 @@ function main() { # Make sure the directory exists mkdir -p "${jit_output_dir}" ;; + U) + if [[ "${OPTARG}" -ne 0 ]]; then + auto_fix_ulimit=true + fi + ;; ?) show_help >&2 exit 1 @@ -359,6 +369,7 @@ function main() { readonly disable_perf_record readonly use_temp_dir readonly temp_dir + readonly auto_fix_ulimit readonly enable_jit_monitoring readonly jit_monitor_port readonly jit_monitor_interval @@ -366,6 +377,16 @@ function main() { readonly tc_dump_interval readonly jit_output_dir + # Run pre-flight checks (FD limits, SELinux, IPv6 hostname) + local _preflight_args=(--benchmark mediawiki --benchpress-root "${OLD_CWD}") + if [[ "${auto_fix_ulimit}" == "true" ]]; then + _preflight_args+=(--auto-fix-ulimit) + # Also raise the ulimit in this shell so child processes (HHVM, nginx, wrk) + # inherit the higher limit. The Python auto-fix only affects its own process. + ulimit -n 100000 2>/dev/null || true + fi + python3 "${SCRIPT_DIR}/../common/preflight_checks.py" "${_preflight_args[@]}" + # Check if dump_jit_size.py exists when JIT monitoring is enabled if [[ "${enable_jit_monitoring}" == "true" ]]; then if [[ ! -f "${SCRIPT_DIR}/dump_jit_size.py" ]]; then diff --git a/packages/tao_bench/run_autoscale.py b/packages/tao_bench/run_autoscale.py index d8f9173e..12d9ec43 100755 --- a/packages/tao_bench/run_autoscale.py +++ b/packages/tao_bench/run_autoscale.py @@ -733,10 +733,6 @@ def run_server(args): # Valid server result results.append(res) print(f"Server {i}: Successfully parsed and added to results") - recorder.merge_failure_to_results( - results_dict=overall, - ) - for res in results: overall["fast_qps"] += res["fast_qps"] overall["slow_qps"] += res["slow_qps"]