From 1e69a7795d956d7f71e2b2ab3c1eae05351ea4a5 Mon Sep 17 00:00:00 2001 From: Alex Keizer Date: Wed, 25 Jun 2025 12:49:18 +0100 Subject: [PATCH 1/2] feat: make (fixed-width) evaluation return a non-zero exitcode on mismatch This PR changes the evaluation script to return a non-zero exitcode whenever the ripgrep checks find a mismatched number (i.e., whenever it currently prints FAIL to the log). This exitcode should cause the CI to fail, meaning we no longer have to manually check the logs to confirm the evaluation actually got the right numbers. It does mean that now whenever we do expect the numbers to change, we have to change the expected numbers in the script before CI can pass, but forcing us to keep these numbers in sync with reality seems more like a feature than a bug. --- bv-evaluation/collect-data-llvm.py | 8 ++++++++ bv-evaluation/results/InstCombine/.placeholder | 0 2 files changed, 8 insertions(+) delete mode 100644 bv-evaluation/results/InstCombine/.placeholder diff --git a/bv-evaluation/collect-data-llvm.py b/bv-evaluation/collect-data-llvm.py index 836dd99830..32143a38fd 100644 --- a/bv-evaluation/collect-data-llvm.py +++ b/bv-evaluation/collect-data-llvm.py @@ -257,7 +257,11 @@ print("There were "+str(inconsistencies)+" inconsistencies") print("Errors raised: "+str(errTot)) +shell_command_mismatch = False + # Double Checking Against Grep Output +# This function sets the global `shell_command_mismatch` to True if the output +# does not match the expected value. def run_shell_command_and_assert_output_eq_int(cwd : str, cmd : str, expected_val : int) -> int: response = subprocess.check_output(cmd, shell=True, cwd=cwd, text=True) val = int(response) @@ -305,3 +309,7 @@ def run_shell_command_and_assert_output_eq_int(cwd : str, cmd : str, expected_va df.to_csv(raw_data_dir+'llvm-proved-data.csv') df_ceg.to_csv(raw_data_dir+'llvm-ceg-data.csv') +# If any of the shell commands were not as expected, set a non-zero exit code +# to signal this failure (in particular, making CI fail). +if shell_command_mismatch: + exit(1) \ No newline at end of file diff --git a/bv-evaluation/results/InstCombine/.placeholder b/bv-evaluation/results/InstCombine/.placeholder deleted file mode 100644 index e69de29bb2..0000000000 From ff7dd03b3889ecc63635da3aee4a70aa93faea97 Mon Sep 17 00:00:00 2001 From: Alex Keizer Date: Wed, 25 Jun 2025 15:10:14 +0100 Subject: [PATCH 2/2] fix: actually modify the global exit code var --- bv-evaluation/collect-data-llvm.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bv-evaluation/collect-data-llvm.py b/bv-evaluation/collect-data-llvm.py index 32143a38fd..8d89b2b953 100644 --- a/bv-evaluation/collect-data-llvm.py +++ b/bv-evaluation/collect-data-llvm.py @@ -263,10 +263,14 @@ # This function sets the global `shell_command_mismatch` to True if the output # does not match the expected value. def run_shell_command_and_assert_output_eq_int(cwd : str, cmd : str, expected_val : int) -> int: + global shell_command_mismatch + response = subprocess.check_output(cmd, shell=True, cwd=cwd, text=True) val = int(response) failed = val != expected_val failed_str = "FAIL" if failed else "SUCCESS" + if failed: + shell_command_mismatch = True print(f"ran {cmd}, expected {expected_val}, found {val}, {failed_str}") run_shell_command_and_assert_output_eq_int("results/InstCombine/", "rg 'Bitwuzla failed' | wc -l", both_failed+bw_only_failed)