From 6293d85f6733254d6ebc245aef8396f5e4ae098a Mon Sep 17 00:00:00 2001
From: Marzieh Lenjani <marziehlenjani@meta.com>
Date: Wed, 1 Apr 2026 08:55:29 -0700
Subject: [PATCH] Add diagnosis APIs to MediaWiki benchmark
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
This diff adds reusable pre-flight diagnostic checks and centralizes diagnosis
  merging at the framework level, so all benchmarks benefit automatically.

  **1. Shared check functions (`packages/common/diagnosis_utils.py`)**
  Added two reusable check functions to the shared diagnosis API:
  - `check_file_descriptor_limit()`: verifies ulimit is sufficient, with optional
    auto-fix that records the change under `notable_auto_fixes`
  - `check_selinux()`: detects SELinux Enforcing mode (causes HHVM segfaults)

  These join existing `check_ipv6_hostname()` and `check_port_available()`.

  **2. Generic pre-flight CLI (`packages/common/preflight_checks.py`)**
  A reusable script callable from any benchmark's bash runner:
  - Accepts `--benchmark <name>` and `--min-fds <N>` for per-benchmark config
  - Runs SELinux, IPv6 hostname, and file descriptor checks
  - Fixes inverted `check_ipv6_hostname` usage (returns True when IPv6 is
    detected, not necessarily broken — should not gate `all_ok`)

  **3. Framework-level diagnosis merge (`benchpress/cli/commands/run.py`)**
  After `job.run()`, `run.py` now automatically merges DiagnosisRecorder records
  (failures + auto-fixes) into metrics via `DIAGNOSIS_FILE_PATH`. Every benchmark
  gets diagnosis merging for free — no per-benchmark merge code needed. Removed
  the now-redundant `merge_failure_to_results()` call from tao_bench's
  `run_autoscale.py`.

  **4. MediaWiki integration**
  - `run.sh`: Calls common `preflight_checks.py` before benchmark; raises
    `ulimit -n` in bash so child processes (HHVM, nginx, wrk) inherit the limit
  - `jobs.yml`: Added `-U {auto_fix_ulimit}` arg with default `auto_fix_ulimit=1`
    to all 5 mediawiki job entries

Differential Revision: D98805269
---
 benchpress/cli/commands/run.py      |  26 ++++
 benchpress/config/jobs.yml          |  10 ++
 packages/common/diagnosis_utils.py  | 197 +++++++++++++++++++++++++++-
 packages/common/preflight_checks.py | 100 ++++++++++++++
 packages/mediawiki/run.sh           |  23 +++-
 packages/tao_bench/run_autoscale.py |   4 -
 6 files changed, 353 insertions(+), 7 deletions(-)
 create mode 100644 packages/common/preflight_checks.py
diff --git a/benchpress/cli/commands/run.py b/benchpress/cli/commands/run.py
index 41787581..d679501f 100644
--- a/benchpress/cli/commands/run.py
+++ b/benchpress/cli/commands/run.py
@@ -10,6 +10,7 @@
 import logging
 import os
 import subprocess
+import sys
 from datetime import datetime, timezone
 from os import path
 
@@ -260,6 +261,31 @@ def run(self, args, jobs) -> None:
                 if not args.disable_hooks:
                     job.stop_hooks()
 
+            # Merge diagnosis records (failures, auto-fixes) into metrics.
+            # Works for all benchmarks that use DiagnosisRecorder.
+            diagnosis_path = os.environ.get("DIAGNOSIS_FILE_PATH", "")
+            if diagnosis_path and os.path.exists(diagnosis_path):
+                try:
+                    sys.path.insert(
+                        0,
+                        os.path.join(
+                            os.path.dirname(__file__),
+                            "..",
+                            "..",
+                            "..",
+                            "packages",
+                            "common",
+                        ),
+                    )
+                    from diagnosis_utils import DiagnosisRecorder
+
+                    recorder = DiagnosisRecorder.get_instance(
+                        shared_file_path=diagnosis_path
+                    )
+                    recorder.merge_failure_to_results(metrics)
+                except Exception as e:
+                    logger.warning("Failed to merge diagnosis records: %s", e)
+
             final_metrics["metrics"] = metrics
             stdout_reporter = ReporterFactory.create("stdout")
             click.echo("Results Report:", err=True)
diff --git a/benchpress/config/jobs.yml b/benchpress/config/jobs.yml
index 99481d41..e69a2ac7 100644
--- a/benchpress/config/jobs.yml
+++ b/benchpress/config/jobs.yml
@@ -10,6 +10,7 @@
     - '-nnginx'
     - '-L {load_generator}'
     - '-s {lg_path}'
+    - '-U {auto_fix_ulimit}'
     - '--'
     - '--mediawiki'
     - '--client-duration={duration}'
@@ -22,6 +23,7 @@
     - 'lg_path=benchmarks/oss_performance_mediawiki/wrk/wrk'
     - 'duration=10m'
     - 'timeout=11m'
+    - 'auto_fix_ulimit=1'
     - 'extra_args='
   hooks:
     - hook: copymove
@@ -40,6 +42,7 @@
     - '-L {load_generator}'
     - '-s {lg_path}'
     - '-T {temp_dir}'
+    - '-U {auto_fix_ulimit}'
     - '-p'
     - '--'
     - '--mediawiki'
@@ -68,6 +71,7 @@
     - 'temp_dir=default_no_temp_dir'
     - 'num_multi_req_warmups=-1' # -1 means the warmup status will define the number of iterations
     - 'load_generator_seed=1000'
+    - 'auto_fix_ulimit=1'
     - 'extra_args='
   hooks:
     - hook: copymove
@@ -86,6 +90,7 @@
     - '-s {lg_path}'
     - '-R{scale_out}'
     - '-c{client_threads}'
+    - '-U {auto_fix_ulimit}'
     - '--'
     - '--mediawiki-mlp'
     - '--client-duration={duration}'
@@ -100,6 +105,7 @@
     - 'client_threads=0'
     - 'duration=10m'
     - 'timeout=11m'
+    - 'auto_fix_ulimit=1'
     - 'extra_args='
   hooks:
     - hook: copymove
@@ -118,6 +124,7 @@
     - '-s {lg_path}'
     - '-R{scale_out}'
     - '-c{client_threads}'
+    - '-U {auto_fix_ulimit}'
     - '--'
     - '--mediawiki-mlp'
     - '--client-duration={duration}'
@@ -133,6 +140,7 @@
     - 'client_threads=0'
     - 'duration=10m'
     - 'timeout=11m'
+    - 'auto_fix_ulimit=1'
     - 'extra_args='
   hooks:
     - hook: copymove
@@ -151,6 +159,7 @@
     - '-s {lg_path}'
     - '-R{scale_out}'
     - '-c{client_threads}'
+    - '-U {auto_fix_ulimit}'
     - '--'
     - '--mediawiki-mem'
     - '--client-duration={duration}'
@@ -165,6 +174,7 @@
     - 'client_threads=0'
     - 'duration=10m'
     - 'timeout=11m'
+    - 'auto_fix_ulimit=1'
     - 'extra_args='
   hooks:
     - hook: copymove
diff --git a/packages/common/diagnosis_utils.py b/packages/common/diagnosis_utils.py
index 4c61ed20..52ba6515 100644
--- a/packages/common/diagnosis_utils.py
+++ b/packages/common/diagnosis_utils.py
@@ -14,7 +14,9 @@
 import fcntl
 import json
 import os
+import resource
 import socket
+import subprocess
 import sys
 from datetime import datetime
 from typing import Any, Dict, List, Optional
@@ -212,8 +214,8 @@ def record_auto_fix(
         Record a notable auto-fix that was applied and may affect the benchmark score.
 
         Args:
-            benchmark: Name of the benchmark (e.g., "tao_bench")
-            fix_type: Type of fix (e.g., "ephemeral_port_cap")
+            benchmark: Name of the benchmark (e.g., "tao_bench", "mediawiki")
+            fix_type: Type of fix (e.g., "ephemeral_port_cap", "file_descriptor_limit")
             description: Human-readable description of what was fixed and why
             original_value: The original value before the fix
             fixed_value: The value after the fix was applied
@@ -539,3 +541,194 @@ def record_port_unavailable_error(
     """
     recorder = DiagnosisRecorder.get_instance(root_dir=root_dir)
     recorder.record_port_unavailable_error(port=port, benchmark=benchmark, errno=errno)
+
+
+def check_file_descriptor_limit(
+    benchmark: str,
+    required_fds: int,
+    auto_fix: bool = False,
+    root_dir: Optional[str] = None,
+    metadata: Optional[Dict[str, Any]] = None,
+) -> bool:
+    """
+    Check if the file descriptor soft limit is sufficient and optionally auto-fix.
+
+    This is useful for benchmarks that open many sockets, files, or database
+    connections (e.g., mediawiki with HHVM + nginx + wrk, or tao_bench with
+    many memtier client connections).
+
+    Args:
+        benchmark: Name of the benchmark (for diagnosis recording)
+        required_fds: Minimum number of file descriptors needed
+        auto_fix: If True, attempt to raise the soft limit automatically
+        root_dir: Root directory for diagnosis file
+        metadata: Additional metadata to include in diagnosis records
+
+    Returns:
+        True if the FD limit is sufficient (or was auto-fixed), False otherwise
+    """
+    soft_limit, hard_limit = resource.getrlimit(resource.RLIMIT_NOFILE)
+
+    if soft_limit >= required_fds:
+        print(f"File descriptor limit OK: soft={soft_limit}, need={required_fds}")
+        return True
+
+    recorder = DiagnosisRecorder.get_instance(root_dir=root_dir)
+    extra_meta = metadata or {}
+
+    if auto_fix:
+        try:
+            new_limit = required_fds
+            if hard_limit < new_limit:
+                resource.setrlimit(resource.RLIMIT_NOFILE, (new_limit, new_limit))
+            else:
+                resource.setrlimit(resource.RLIMIT_NOFILE, (new_limit, hard_limit))
+            actual_soft, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
+            recorder.record_auto_fix(
+                benchmark=benchmark,
+                fix_type="file_descriptor_limit",
+                description=(
+                    f"Raised file descriptor soft limit from {soft_limit} to "
+                    f"{actual_soft} (need at least {required_fds})."
+                ),
+                original_value=soft_limit,
+                fixed_value=actual_soft,
+                score_impact=(
+                    "None \u2014 raising the file descriptor limit has no "
+                    "performance impact."
+                ),
+                metadata={
+                    "original_soft_limit": soft_limit,
+                    "original_hard_limit": hard_limit,
+                    "new_soft_limit": actual_soft,
+                    "required_fds": required_fds,
+                    **extra_meta,
+                },
+            )
+            return True
+        except (ValueError, OSError) as e:
+            error_msg = (
+                f"File descriptor soft limit ({soft_limit}) is too low "
+                f"(need at least {required_fds}). Auto-fix failed: {e}"
+            )
+            print(f"\nWARNING: {error_msg}\n", file=sys.stderr)
+            recorder.record_failure(
+                benchmark=benchmark,
+                error_type="file_descriptor_limit_low",
+                reason=error_msg,
+                solutions=[
+                    f"Run with higher ulimit: sudo bash -c 'ulimit -n "
+                    f"{required_fds} && ./benchpress_cli.py run ...'",
+                    "Set system-wide limit in /etc/security/limits.conf and reboot",
+                ],
+                metadata={
+                    "soft_limit": soft_limit,
+                    "hard_limit": hard_limit,
+                    "required_fds": required_fds,
+                    "auto_fix_error": str(e),
+                    **extra_meta,
+                },
+            )
+            return False
+
+    # No auto-fix \u2014 record failure and warn
+    error_msg = (
+        f"File descriptor soft limit ({soft_limit}) is too low "
+        f"(need at least {required_fds}). "
+        f"Processes may fail with 'Too many open files'."
+    )
+    print(
+        f"\n{'=' * 80}\n"
+        f"ERROR: File descriptor limit too low\n"
+        f"{'=' * 80}\n\n"
+        f"{error_msg}\n"
+        f"  Current: soft={soft_limit}, hard={hard_limit}\n\n"
+        f"SOLUTIONS:\n"
+        f"  1. Enable auto-fix: add auto_fix_ulimit=1 to job input\n"
+        f"  2. Run with higher ulimit: sudo bash -c 'ulimit -n "
+        f"{required_fds} && ./benchpress_cli.py run ...'\n"
+        f"  3. Set in /etc/security/limits.conf and reboot\n"
+        f"\n{'=' * 80}\n",
+        file=sys.stderr,
+    )
+    recorder.record_failure(
+        benchmark=benchmark,
+        error_type="file_descriptor_limit_low",
+        reason=error_msg,
+        solutions=[
+            "Enable auto-fix: add auto_fix_ulimit=1 to job input",
+            f"Run with higher ulimit: sudo bash -c 'ulimit -n "
+            f"{required_fds} && ./benchpress_cli.py run ...'",
+            "Set system-wide limit in /etc/security/limits.conf and reboot",
+        ],
+        metadata={
+            "soft_limit": soft_limit,
+            "hard_limit": hard_limit,
+            "required_fds": required_fds,
+            **extra_meta,
+        },
+    )
+    # Continue anyway so the diagnosis appears in the output JSON
+    return False
+
+
+def check_selinux(
+    benchmark: str,
+    root_dir: Optional[str] = None,
+) -> bool:
+    """
+    Check if SELinux is disabled or permissive.
+
+    SELinux in Enforcing mode can cause segfaults or permission denials for
+    benchmarks that use JIT compilation, custom memory allocators, or bind
+    to non-standard ports.
+
+    Args:
+        benchmark: Name of the benchmark (for diagnosis recording)
+        root_dir: Root directory for diagnosis file
+
+    Returns:
+        True if SELinux is disabled or permissive, False if Enforcing
+    """
+    try:
+        result = subprocess.run(
+            ["getenforce"], capture_output=True, text=True, timeout=5
+        )
+        status = result.stdout.strip()
+    except (FileNotFoundError, subprocess.TimeoutExpired):
+        # getenforce not found \u2014 SELinux likely not installed
+        print("SELinux check: getenforce not found, assuming SELinux is not installed")
+        return True
+
+    if status in ("Disabled", "Permissive"):
+        print(f"SELinux check OK: {status}")
+        return True
+
+    error_msg = (
+        f"SELinux is set to '{status}'. This may cause segfaults or "
+        f"permission errors during the {benchmark} benchmark."
+    )
+    print(
+        f"\n{'=' * 80}\n"
+        f"ERROR: SELinux is Enforcing\n"
+        f"{'=' * 80}\n\n"
+        f"{error_msg}\n\n"
+        f"SOLUTIONS:\n"
+        f"  1. Disable SELinux temporarily: sudo setenforce 0\n"
+        f"  2. Disable SELinux permanently: edit /etc/selinux/config\n"
+        f"\n{'=' * 80}\n",
+        file=sys.stderr,
+    )
+    recorder = DiagnosisRecorder.get_instance(root_dir=root_dir)
+    recorder.record_failure(
+        benchmark=benchmark,
+        error_type="selinux_enforcing",
+        reason=error_msg,
+        solutions=[
+            "Disable SELinux temporarily: sudo setenforce 0",
+            "Disable SELinux permanently: set SELINUX=disabled in "
+            "/etc/selinux/config and reboot",
+        ],
+        metadata={"selinux_status": status},
+    )
+    return False
diff --git a/packages/common/preflight_checks.py b/packages/common/preflight_checks.py
new file mode 100644
index 00000000..7b3b5f4c
--- /dev/null
+++ b/packages/common/preflight_checks.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Reusable pre-flight checks for any benchmark.
+
+Can be called from any benchmark's runner script (especially bash-based
+runners that cannot call diagnosis_utils.py functions directly).
+Diagnosis merging is handled at the framework level in run.py.
+
+Usage:
+    python3 preflight_checks.py --benchmark mediawiki --benchpress-root /path \
+        [--auto-fix-ulimit] [--min-fds 100000]
+"""
+
+import argparse
+import sys
+
+from diagnosis_utils import (
+    check_file_descriptor_limit,
+    check_ipv6_hostname,
+    check_selinux,
+    DiagnosisRecorder,
+)
+
+
+# Default minimum file descriptors needed for a reliable run.
+DEFAULT_MIN_FDS = 100_000
+
+
+def run_checks(args):
+    """Run pre-flight checks before the benchmark starts."""
+    DiagnosisRecorder.get_instance(root_dir=args.benchpress_root)
+
+    all_ok = True
+    all_ok = (
+        check_selinux(benchmark=args.benchmark, root_dir=args.benchpress_root)
+        and all_ok
+    )
+    # check_ipv6_hostname returns True when IPv6 is detected (not necessarily
+    # broken). It records a diagnosis failure internally only when IPv6 is
+    # detected AND broken, so we don't use its return value for all_ok.
+    check_ipv6_hostname(
+        "localhost", benchmark=args.benchmark, root_dir=args.benchpress_root
+    )
+    all_ok = (
+        check_file_descriptor_limit(
+            benchmark=args.benchmark,
+            required_fds=args.min_fds,
+            auto_fix=args.auto_fix_ulimit,
+            root_dir=args.benchpress_root,
+        )
+        and all_ok
+    )
+
+    if all_ok:
+        print("\nAll pre-flight checks passed.")
+    else:
+        print(
+            "\nSome pre-flight checks failed (see above). "
+            "Continuing anyway so diagnostics appear in the output.",
+            file=sys.stderr,
+        )
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Reusable pre-flight checks for benchpress benchmarks"
+    )
+    parser.add_argument(
+        "--benchmark",
+        default="unknown",
+        help="Name of the benchmark (e.g., mediawiki, tao_bench)",
+    )
+    parser.add_argument(
+        "--benchpress-root", default=".", help="Path to benchpress root directory"
+    )
+    parser.add_argument(
+        "--min-fds",
+        type=int,
+        default=DEFAULT_MIN_FDS,
+        help=f"Minimum file descriptor soft limit required (default: {DEFAULT_MIN_FDS})",
+    )
+    parser.add_argument(
+        "--auto-fix-ulimit",
+        action="store_true",
+        help="Automatically raise file descriptor soft limit if too low",
+    )
+    args = parser.parse_args()
+
+    run_checks(args)
+
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/packages/mediawiki/run.sh b/packages/mediawiki/run.sh
index 6716c40d..4ab0f0bb 100755
--- a/packages/mediawiki/run.sh
+++ b/packages/mediawiki/run.sh
@@ -68,6 +68,7 @@ Proxy shell script to executes oss-performance benchmark
     -D          enable TC dump with vm-dump-tc
     -C          TC dump interval in seconds (default:600)
     -O          output directory for JIT monitoring files (default: /tmp/jit_study_output)
+    -U <0|1>    set to 1 to automatically raise file descriptor soft limit if too low
 
 Any other options that oss-performance perf.php script could accept can be
 passed in as extra arguments appending two hyphens '--' followed by the
@@ -251,6 +252,10 @@ function main() {
   local temp_dir
   temp_dir=""
 
+  # Pre-flight check options
+  local auto_fix_ulimit
+  auto_fix_ulimit=false
+
   # JIT monitoring options
   enable_jit_monitoring=false
   jit_monitor_port="localhost:9092"
@@ -259,7 +264,7 @@ function main() {
   tc_dump_interval=600
   jit_output_dir="/tmp/jit_study_output"
 
-  while getopts 'H:n:r:L:s:R:t:c:m:pT:jJ:DI:C:O:' OPTION "${@}"; do
+  while getopts 'H:n:r:L:s:R:t:c:m:pT:jJ:DI:C:O:U:' OPTION "${@}"; do
     case "$OPTION" in
       H)
         db_host="${OPTARG}"
@@ -338,6 +343,11 @@ function main() {
         # Make sure the directory exists
         mkdir -p "${jit_output_dir}"
         ;;
+      U)
+        if [[ "${OPTARG}" -ne 0 ]]; then
+          auto_fix_ulimit=true
+        fi
+        ;;
       ?)
         show_help >&2
         exit 1
@@ -359,6 +369,7 @@ function main() {
   readonly disable_perf_record
   readonly use_temp_dir
   readonly temp_dir
+  readonly auto_fix_ulimit
   readonly enable_jit_monitoring
   readonly jit_monitor_port
   readonly jit_monitor_interval
@@ -366,6 +377,16 @@ function main() {
   readonly tc_dump_interval
   readonly jit_output_dir
 
+  # Run pre-flight checks (FD limits, SELinux, IPv6 hostname)
+  local _preflight_args=(--benchmark mediawiki --benchpress-root "${OLD_CWD}")
+  if [[ "${auto_fix_ulimit}" == "true" ]]; then
+    _preflight_args+=(--auto-fix-ulimit)
+    # Also raise the ulimit in this shell so child processes (HHVM, nginx, wrk)
+    # inherit the higher limit. The Python auto-fix only affects its own process.
+    ulimit -n 100000 2>/dev/null || true
+  fi
+  python3 "${SCRIPT_DIR}/../common/preflight_checks.py" "${_preflight_args[@]}"
+
   # Check if dump_jit_size.py exists when JIT monitoring is enabled
   if [[ "${enable_jit_monitoring}" == "true" ]]; then
     if [[ ! -f "${SCRIPT_DIR}/dump_jit_size.py" ]]; then
diff --git a/packages/tao_bench/run_autoscale.py b/packages/tao_bench/run_autoscale.py
index d8f9173e..12d9ec43 100755
--- a/packages/tao_bench/run_autoscale.py
+++ b/packages/tao_bench/run_autoscale.py
@@ -733,10 +733,6 @@ def run_server(args):
                 # Valid server result
                 results.append(res)
                 print(f"Server {i}: Successfully parsed and added to results")
-    recorder.merge_failure_to_results(
-        results_dict=overall,
-    )
-
     for res in results:
         overall["fast_qps"] += res["fast_qps"]
         overall["slow_qps"] += res["slow_qps"]