diff --git a/docs/src/changelog.md b/docs/src/changelog.md
index df57e42..3e3dd46 100644
--- a/docs/src/changelog.md
+++ b/docs/src/changelog.md
@@ -2,6 +2,7 @@
 ## Unreleased
 ### Added
 #### Commands
+- `runbms`: new `--exit-on-failure [CODE]` flag to exit with a specified code (default: 1) when any benchmark configuration fails, making it suitable for CI environments.
 - `runbms` gains an extra argument, `--randomize-configs`, to randomize the order of configs for each invocation to help distinguish between system-related noise and configuration-specific issues.
 
 ### Changed
diff --git a/docs/src/commands/runbms.md b/docs/src/commands/runbms.md
index 661176f..500d8da 100644
--- a/docs/src/commands/runbms.md
+++ b/docs/src/commands/runbms.md
@@ -3,7 +3,7 @@ This subcommand runs benchmarks with different configs, possibly with varying he
 
 ## Usage
 ```console
-runbms [-h|--help] [-i|--invocations INVOCATIONS] [-s|--slice SLICE] [-p|--id-prefix ID_PREFIX] [-m|--minheap-multiplier MINHEAP_MULTIPLIER] [--skip-oom SKIP_OOM] [--skip-timeout SKIP_TIMEOUT] [--resume RESUME] [--workdir WORKDIR] [--skip-log-compression] [--randomize-configs] LOG_DIR CONFIG [N] [n ...]
+runbms [-h|--help] [-i|--invocations INVOCATIONS] [-s|--slice SLICE] [-p|--id-prefix ID_PREFIX] [-m|--minheap-multiplier MINHEAP_MULTIPLIER] [--skip-oom SKIP_OOM] [--skip-timeout SKIP_TIMEOUT] [--resume RESUME] [--workdir WORKDIR] [--skip-log-compression] [--exit-on-failure CODE] [--randomize-configs] LOG_DIR CONFIG [N] [n ...]
 ```
 
 `-h`: print help message.
@@ -14,7 +14,7 @@ Overrides `invocations` in the config file.
 `-s`: only use the specified heap sizes.
 This is a comma-separated string of integers or floating point numbers.
 For each slice `s` in `SLICE`, we run benchmarks at `s * minheap`.
-`N` and `n`s are ignored. 
+`N` and `n`s are ignored.
 
 `-p`: add a prefix to the folder names where the results are stored.
 By default, the folder that stores the result is named using the host name and the timestamp.
@@ -35,6 +35,11 @@ If not specified, a temporary directory will be created under an OS-dependent lo
 
 `--skip-log-compression`: skip compressing log file as gzip.
 
+`--exit-on-failure` (preview ⚠️): exit with the specified code (default: 1) if any configuration fails.
+This is useful for CI environments where you need to detect failed runs without parsing the output.
+By default, `runbms` exits with code 0 even when some configurations fail.
+If the flag is provided without a code, it defaults to exit code 1.
+
 `--randomize-configs` (preview ⚠️): randomize the order of configs for each invocation to help distinguish between system-related noise and configuration-specific issues.
 
 `LOG_DIR`: where to store the results.
diff --git a/src/running/command/runbms.py b/src/running/command/runbms.py
index d2d37ff..76c8d07 100644
--- a/src/running/command/runbms.py
+++ b/src/running/command/runbms.py
@@ -32,6 +32,7 @@
 import math
 import yaml
 from collections import defaultdict
+import sys
 import random
 
 if TYPE_CHECKING:
@@ -47,6 +48,7 @@
 randomize_configs: bool = False
 plugins: Dict[str, Any]
 resume: Optional[str]
+exit_on_failure_code: Optional[int] = None
 
 
 def setup_parser(subparsers):
@@ -67,6 +69,14 @@ def setup_parser(subparsers):
     f.add_argument(
         "--skip-log-compression", action="store_true", help="Skip compressing log files"
     )
+    f.add_argument(
+        "--exit-on-failure",
+        nargs="?",
+        const=1,
+        type=int,
+        metavar="CODE",
+        help="Exit with specified code (default: 1) if any configuration fails",
+    )
     f.add_argument(
         "--randomize-configs",
         action="store_true",
@@ -299,9 +309,13 @@ def run_one_benchmark(
                 p.start_config(hfac, size, bm, i, c, j)
             if skip_oom is not None and oomed_count[c] >= skip_oom:
                 print(".", end="", flush=True)
+                if exit_on_failure_code is not None:
+                    sys.exit(exit_on_failure_code)
                 continue
             if skip_timeout is not None and timeout_count[c] >= skip_timeout:
                 print(".", end="", flush=True)
+                if exit_on_failure_code is not None:
+                    sys.exit(exit_on_failure_code)
                 continue
             if resume:
                 log_filename_completed = get_filename_completed(bm, hfac, size, c)
@@ -328,14 +342,20 @@ def run_one_benchmark(
             if exit_status is SubprocessrExit.Timeout:
                 timeout_count[c] += 1
                 print(".", end="", flush=True)
+                if exit_on_failure_code is not None:
+                    sys.exit(exit_on_failure_code)
             elif exit_status is SubprocessrExit.Error:
                 print(".", end="", flush=True)
+                if exit_on_failure_code is not None:
+                    sys.exit(exit_on_failure_code)
             elif exit_status is SubprocessrExit.Normal:
                 if suite.is_passed(output):
                     config_passed = True
                     print(config_index_to_chr(j), end="", flush=True)
                 else:
                     print(".", end="", flush=True)
+                    if exit_on_failure_code is not None:
+                        sys.exit(exit_on_failure_code)
             elif exit_status is SubprocessrExit.Dryrun:
                 print(".", end="", flush=True)
             else:
@@ -426,6 +446,8 @@ def run(args):
         skip_timeout = args.get("skip_timeout")
         global skip_log_compression
         skip_log_compression = args.get("skip_log_compression")
+        global exit_on_failure_code
+        exit_on_failure_code = args.get("exit_on_failure")
         global randomize_configs
         randomize_configs = args.get("randomize_configs")
         # Load from configuration file
diff --git a/tests/test_runbms.py b/tests/test_runbms.py
index 10b583a..83599be 100644
--- a/tests/test_runbms.py
+++ b/tests/test_runbms.py
@@ -22,6 +22,47 @@ def test_spread_1():
         assert left == right
 
 
+def test_exit_on_failure_flag_available():
+    """Test that the --exit-on-failure flag is available in the argument parser."""
+    from running.command.runbms import setup_parser
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers()
+    setup_parser(subparsers)
+
+    # Test that the flag is recognized and sets the correct default value
+    args = parser.parse_args(["runbms", "/tmp/logs", "/tmp/config.yml"])
+    assert hasattr(args, "exit_on_failure")
+    assert args.exit_on_failure is None
+
+    # Test that the flag can be set without argument (defaults to 1)
+    args = parser.parse_args(
+        ["runbms", "/tmp/logs", "/tmp/config.yml", "--exit-on-failure"]
+    )
+    assert args.exit_on_failure == 1
+
+    # Test that the flag can be set with custom argument
+    args = parser.parse_args(
+        ["runbms", "/tmp/logs", "/tmp/config.yml", "--exit-on-failure", "42"]
+    )
+    assert args.exit_on_failure == 42
+
+
+def test_global_variables_initialization():
+    """Test that the new global variables are properly initialized."""
+    from running.command import runbms
+
+    # Test that the new global variables exist
+    assert hasattr(runbms, "exit_on_failure_code")
+
+    # Test default values (these are module-level globals)
+    # Note: These might be modified by other tests, so we just check they exist
+    assert runbms.exit_on_failure_code is None or isinstance(
+        runbms.exit_on_failure_code, int
+    )
+
+
 def test_randomize_configs_arg_parsing():
     """Test that --randomize-configs argument is parsed correctly"""
     parser = argparse.ArgumentParser()
@@ -39,6 +80,47 @@ def test_randomize_configs_arg_parsing():
     assert args.randomize_configs == True
 
 
+def test_exit_on_failure_flag_parsing():
+    """Test that the --exit-on-failure flag is parsed correctly."""
+    from running.command.runbms import setup_parser
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers()
+    setup_parser(subparsers)
+
+    # Test that the flag is recognized and sets the correct default value
+    args = parser.parse_args(["runbms", "/tmp/logs", "/tmp/config.yml"])
+    assert hasattr(args, "exit_on_failure")
+    assert args.exit_on_failure is None
+
+    # Test that the flag can be set without argument (defaults to 1)
+    args = parser.parse_args(
+        ["runbms", "/tmp/logs", "/tmp/config.yml", "--exit-on-failure"]
+    )
+    assert args.exit_on_failure == 1
+
+    # Test that the flag can be set with custom argument
+    args = parser.parse_args(
+        ["runbms", "/tmp/logs", "/tmp/config.yml", "--exit-on-failure", "42"]
+    )
+    assert args.exit_on_failure == 42
+
+
+def test_global_variables_initialization():
+    """Test that the new global variables are properly initialized."""
+    from running.command import runbms
+
+    # Test that the new global variables exist
+    assert hasattr(runbms, "exit_on_failure_code")
+
+    # Test default values (these are module-level globals)
+    # Note: These might be modified by other tests, so we just check they exist
+    assert runbms.exit_on_failure_code is None or isinstance(
+        runbms.exit_on_failure_code, int
+    )
+
+
 def test_config_randomization_logic():
     """Test that the config randomization logic works as expected"""
     # Test the randomization logic independently