3232from presets import enabled_suites , presets
3333
3434
35+ def generate_github_summary (execution_stats , failures ):
36+ """Generate GitHub workflow summary with execution statistics"""
37+ gh_summary : list [str ] = []
38+ gh_summary .append ("### Benchmarks Execution" )
39+
40+ # Overall statistics
41+ total_tests = execution_stats ["total_tests" ]
42+ passed_tests = execution_stats ["tests_passed" ]
43+ failed_tests = execution_stats ["tests_failed" ]
44+ warnings = execution_stats ["warnings" ]
45+ errors = len (failures )
46+
47+ gh_summary .append ("#### Overall Statistics" )
48+ gh_summary .append (f"- **Total Number of benchmarks:** { total_tests } " )
49+ gh_summary .append (f"- **Tests Passed:** { passed_tests } " )
50+ gh_summary .append (f"- **Tests Failed:** { failed_tests } " )
51+ gh_summary .append (f"- **Errors:** { errors } " )
52+ gh_summary .append (f"- **Warnings:** { warnings } " )
53+ gh_summary .append ("" )
54+
55+ # Overall status of execution
56+ if failed_tests == 0 and errors == 0 :
57+ gh_summary .append ("#### ✅ Status: SUCCESS" )
58+ gh_summary .append ("Benchmarks seem to have executed successfully!" )
59+ elif failed_tests > 0 or errors > 0 :
60+ gh_summary .append ("#### ❌ Status: FAILURES DETECTED" )
61+ gh_summary .append ("Some benchmarks failed or encountered errors." )
62+
63+ if warnings > 0 :
64+ gh_summary .append ("#### ⚠️ Status: WARNINGS DETECTED" )
65+ gh_summary .append ("Some benchmarks executed with warnings." )
66+
67+ gh_summary .append ("" )
68+
69+ # Detailed failures info
70+ if failures :
71+ gh_summary .append ("#### Failure Details" )
72+ gh_summary .append (
73+ f"<details><summary>{ len (failures )} failed benchmarks:</summary>"
74+ )
75+ gh_summary .append ("" )
76+
77+ for benchmark_name , failure_reason in failures .items ():
78+ gh_summary .append (f"##### { benchmark_name } " )
79+ gh_summary .append (f"- **Reason:** { failure_reason } " )
80+ gh_summary .append ("" )
81+
82+ gh_summary .append ("</details>" )
83+ gh_summary .append ("" )
84+
85+ # Write the summary to file
86+ try :
87+ with open (options .github_summary_filename , "w" ) as f :
88+ f .write ("\n " .join (gh_summary ))
89+ log .info (f"GitHub summary written to { options .github_summary_filename } " )
90+ except Exception as e :
91+ log .error (f"Failed to write GitHub summary: { e } " )
92+
93+
3594def run_iterations (
3695 benchmark : Benchmark ,
3796 env_vars ,
@@ -110,7 +169,7 @@ def remove_outliers(
110169
111170
112171def process_results (
113- results : dict [str , list [Result ]], stddev_threshold_override
172+ results : dict [str , list [Result ]], stddev_threshold_override , execution_stats
114173) -> tuple [bool , list [Result ]]:
115174 processed : list [Result ] = []
116175 # technically, we can detect whether result is below or above threshold per
@@ -142,6 +201,7 @@ def process_results(
142201 log .warning (
143202 f"stddev { stddev } above the threshold { threshold_scaled } ({ threshold } times { mean_value } ) for { label } "
144203 )
204+ execution_stats ["warnings" ] += 1
145205 valid_results = False
146206
147207 rlist .sort (key = lambda res : res .value )
@@ -170,7 +230,7 @@ def collect_metadata(suites):
170230 return metadata
171231
172232
173- def main (directory , additional_env_vars , compare_names , filter ):
233+ def main (directory , additional_env_vars , compare_names , filter , execution_stats ):
174234 prepare_workdir (directory , INTERNAL_WORKDIR_VERSION )
175235
176236 if options .dry_run :
@@ -218,7 +278,7 @@ def main(directory, additional_env_vars, compare_names, filter):
218278
219279 # TODO: rename "s", rename setup in suite to suite_setup, rename setup in benchmark to benchmark_setup
220280 # TODO: do not add benchmarks whose suite setup failed
221- # TODO: add a mode where we fail etire script in case of setup (or other) failures and use in CI
281+ # TODO: add a mode where we fail entire script in case of setup (or other) failures and use in CI
222282
223283 for s in suites :
224284 if s .name () not in enabled_suites (options .preset ):
@@ -246,9 +306,9 @@ def main(directory, additional_env_vars, compare_names, filter):
246306 except Exception as e :
247307 if options .exit_on_failure :
248308 raise e
249- failures [s .name ()] = f"Suite setup failure: { e } "
309+ failures [s .name ()] = f"Suite ' { s . name () } ' setup failure: { e } "
250310 log .error (
251- f"{ type (s ).__name__ } setup failed. Benchmarks won't be added."
311+ f"Suite { type (s ).__name__ } setup failed. Benchmarks won't be added."
252312 )
253313 log .error (f"failed: { e } " )
254314 else :
@@ -265,12 +325,15 @@ def main(directory, additional_env_vars, compare_names, filter):
265325 if options .exit_on_failure :
266326 raise e
267327 else :
268- failures [benchmark .name ()] = f"Benchmark setup failure: { e } "
328+ failures [benchmark .name ()] = (
329+ f"Benchmark '{ benchmark .name ()} ' setup failure: { e } "
330+ )
269331 log .error (f"failed: { e } " )
270332
271333 results = []
272334 if benchmarks :
273335 log .info (f"Running { len (benchmarks )} benchmarks..." )
336+ execution_stats ["total_tests" ] = len (benchmarks )
274337 elif not options .dry_run :
275338 raise RuntimeError ("No benchmarks to run." )
276339 for benchmark in benchmarks :
@@ -301,7 +364,9 @@ def main(directory, additional_env_vars, compare_names, filter):
301364 run_trace = TracingType .NONE ,
302365 )
303366 valid , processed = process_results (
304- intermediate_results , benchmark .stddev_threshold ()
367+ intermediate_results ,
368+ benchmark .stddev_threshold (),
369+ execution_stats ,
305370 )
306371 if valid :
307372 break
@@ -335,11 +400,15 @@ def main(directory, additional_env_vars, compare_names, filter):
335400 )
336401
337402 results += processed
403+ execution_stats ["tests_passed" ] += 1
338404 except Exception as e :
405+ execution_stats ["tests_failed" ] += 1
339406 if options .exit_on_failure :
340407 raise e
341408 else :
342- failures [benchmark .name ()] = f"Benchmark run failure: { e } "
409+ failures [benchmark .name ()] = (
410+ f"Benchmark '{ benchmark .name ()} ' run failure: { e } "
411+ )
343412 log .error (f"failed: { e } " )
344413
345414 this_name = options .current_run_name
@@ -408,6 +477,10 @@ def main(directory, additional_env_vars, compare_names, filter):
408477 generate_html (history , compare_names , html_path , metadata )
409478 log .info (f"HTML with benchmark results has been generated" )
410479
480+ # Generate GitHub summary
481+ if options .produce_github_summary :
482+ generate_github_summary (execution_stats , failures )
483+
411484 if options .exit_on_failure and failures :
412485 # just in case code missed to raise earlier
413486 raise RuntimeError (str (failures ))
@@ -691,6 +764,12 @@ def validate_and_parse_env_args(env_args):
691764 help = "Set the logging level" ,
692765 default = "info" ,
693766 )
767+ parser .add_argument (
768+ "--produce-github-summary" ,
769+ action = "store_true" ,
770+ help = f"Produce execution stats summary for Github workflow, in file '{ options .github_summary_filename } '." ,
771+ default = False ,
772+ )
694773
695774 args = parser .parse_args ()
696775 additional_env_vars = validate_and_parse_env_args (args .env )
@@ -724,6 +803,7 @@ def validate_and_parse_env_args(env_args):
724803 options .flamegraph = args .flamegraph is not None
725804 options .archive_baseline_days = args .archive_baseline_after
726805 options .archive_pr_days = args .archive_pr_after
806+ options .produce_github_summary = args .produce_github_summary
727807
728808 # Initialize logger with command line arguments
729809 initialize_logger (args .verbose , args .log_level )
@@ -738,6 +818,14 @@ def validate_and_parse_env_args(env_args):
738818 parser .error ("Specified --output-dir is not a valid path" )
739819 options .output_directory = os .path .abspath (args .output_dir )
740820
821+ # Initialize GitHub summary tracking
822+ execution_stats = {
823+ "total_tests" : 0 ,
824+ "tests_passed" : 0 ,
825+ "tests_failed" : 0 ,
826+ "warnings" : 0 ,
827+ }
828+
741829 # Options intended for CI:
742830 options .timestamp_override = args .timestamp_override
743831 if args .results_dir is not None :
@@ -780,6 +868,7 @@ def validate_and_parse_env_args(env_args):
780868 options .device_architecture = ""
781869 log .warning (f"Failed to fetch device architecture: { e } " )
782870 log .warning ("Defaulting to generic benchmark parameters." )
871+ execution_stats ["warnings" ] += 1
783872
784873 log .info (f"Selected device architecture: { options .device_architecture } " )
785874
@@ -788,4 +877,5 @@ def validate_and_parse_env_args(env_args):
788877 additional_env_vars ,
789878 args .compare ,
790879 benchmark_filter ,
880+ execution_stats ,
791881 )
0 commit comments