From 69b612dac02b46b90f4f767f28ec5a471f01b457 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96zhan=20=C3=96zen?= Date: Mon, 25 Aug 2025 15:15:29 +0200 Subject: [PATCH 1/2] Added custom progress reporter --- scripts/reinforcement_learning/ray/tuner.py | 38 +++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/scripts/reinforcement_learning/ray/tuner.py b/scripts/reinforcement_learning/ray/tuner.py index c9d5d6e20b9..a1d5eb62bbe 100644 --- a/scripts/reinforcement_learning/ray/tuner.py +++ b/scripts/reinforcement_learning/ray/tuner.py @@ -12,6 +12,7 @@ import ray import util from ray import air, tune +from ray.tune.progress_reporter import ProgressReporter from ray.tune.search.optuna import OptunaSearch from ray.tune.search.repeater import Repeater @@ -203,13 +204,18 @@ def stop_all(self): return False -def invoke_tuning_run(cfg: dict, args: argparse.Namespace) -> None: +def invoke_tuning_run( + cfg: dict, + args: argparse.Namespace, + progress_reporter: ProgressReporter | None = None, +) -> None: """Invoke an Isaac-Ray tuning run. Log either to a local directory or to MLFlow. Args: cfg: Configuration dictionary extracted from job setup args: Command-line arguments related to tuning. + progress_reporter: Custom progress reporter. Defaults to CLIReporter or JupyterNotebookReporter if not provided. """ # Allow for early exit os.environ["TUNE_DISABLE_STRICT_METRIC_CHECKING"] = "1" @@ -237,6 +243,17 @@ def invoke_tuning_run(cfg: dict, args: argparse.Namespace) -> None: ) repeat_search = Repeater(searcher, repeat=args.repeat_run_count) + if progress_reporter is not None: + os.environ["RAY_AIR_NEW_OUTPUT"] = "0" + if ( + getattr(progress_reporter, "_metric", None) is not None + or getattr(progress_reporter, "_mode", None) is not None + ): + raise ValueError( + "Do not set or directly in the custom progress reporter class, " + "provide them as arguments to tuner.py instead." + ) + if args.run_mode == "local": # Standard config, to file run_config = air.RunConfig( storage_path="/tmp/ray", @@ -247,6 +264,7 @@ def invoke_tuning_run(cfg: dict, args: argparse.Namespace) -> None: checkpoint_at_end=False, # Disable final checkpoint ), stop=LogExtractionErrorStopper(max_errors=MAX_LOG_EXTRACTION_ERRORS), + progress_reporter=progress_reporter, ) elif args.run_mode == "remote": # MLFlow, to MLFlow server @@ -263,6 +281,7 @@ def invoke_tuning_run(cfg: dict, args: argparse.Namespace) -> None: callbacks=[mlflow_callback], checkpoint_config=ray.train.CheckpointConfig(checkpoint_frequency=0, checkpoint_at_end=False), stop=LogExtractionErrorStopper(max_errors=MAX_LOG_EXTRACTION_ERRORS), + progress_reporter=progress_reporter, ) else: raise ValueError("Unrecognized run mode.") @@ -399,6 +418,12 @@ def __init__(self, cfg: dict): default=MAX_LOG_EXTRACTION_ERRORS, help="Max number number of LogExtractionError failures before we abort the whole tuning run.", ) + parser.add_argument( + "--progress_reporter", + type=str, + default=None, + help="A progress reporter in cfg_file, must be a ProgressReporter object.", + ) args = parser.parse_args() PROCESS_RESPONSE_TIMEOUT = args.process_response_timeout @@ -457,7 +482,16 @@ def __init__(self, cfg: dict): print(f"[INFO]: Successfully instantiated class '{class_name}' from {file_path}") cfg = instance.cfg print(f"[INFO]: Grabbed the following hyperparameter sweep config: \n {cfg}") - invoke_tuning_run(cfg, args) + # Load optional progress reporter config + progress_reporter = None + if args.progress_reporter and hasattr(module, args.progress_reporter): + progress_reporter = getattr(module, args.progress_reporter) + if isinstance(progress_reporter, type) and issubclass(progress_reporter, tune.ProgressReporter): + progress_reporter = progress_reporter() + else: + raise TypeError(f"[ERROR]: {args.progress_reporter} is not a valid ProgressReporter.") + print(f"[INFO]: Loaded custom progress reporter from '{args.progress_reporter}'") + invoke_tuning_run(cfg, args, progress_reporter=progress_reporter) else: raise AttributeError(f"[ERROR]:Class '{class_name}' not found in {file_path}") From 10d7d240812435cb5c5d1f663353df5efb4cd073 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96zhan=20=C3=96zen?= Date: Mon, 25 Aug 2025 16:45:53 +0200 Subject: [PATCH 2/2] Adds CustomCartpoleProgressReporter as an example --- .../hyperparameter_tuning/vision_cartpole_cfg.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/scripts/reinforcement_learning/ray/hyperparameter_tuning/vision_cartpole_cfg.py b/scripts/reinforcement_learning/ray/hyperparameter_tuning/vision_cartpole_cfg.py index 0a6889d075b..580bb0c8f4f 100644 --- a/scripts/reinforcement_learning/ray/hyperparameter_tuning/vision_cartpole_cfg.py +++ b/scripts/reinforcement_learning/ray/hyperparameter_tuning/vision_cartpole_cfg.py @@ -12,6 +12,7 @@ import util import vision_cfg from ray import tune +from ray.tune.progress_reporter import CLIReporter class CartpoleRGBNoTuneJobCfg(vision_cfg.CameraJobCfg): @@ -47,3 +48,18 @@ def __init__(self, cfg: dict = {}): cfg = util.populate_isaac_ray_cfg_args(cfg) cfg["runner_args"]["--task"] = tune.choice(["Isaac-Cartpole-RGB-TheiaTiny-v0"]) super().__init__(cfg) + + +class CustomCartpoleProgressReporter(CLIReporter): + def __init__(self): + super().__init__( + metric_columns={ + "training_iteration": "iter", + "time_total_s": "total time (s)", + "Episode/Episode_Reward/alive": "alive", + "Episode/Episode_Reward/cart_vel": "cart velocity", + "rewards/time": "rewards/time", + }, + max_report_frequency=5, + sort_by_metric=True, + )