From 9b53a60f815b04227cb0237508f002092f1f3898 Mon Sep 17 00:00:00 2001 From: Kevin Date: Fri, 28 Nov 2025 14:48:38 -0500 Subject: [PATCH 1/8] config loading and key fix --- pufferlib/pufferl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 149768253..588802caa 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -1324,12 +1324,13 @@ def ensure_drive_binary(): def autotune(args=None, env_name=None, vecenv=None, policy=None): + args = args or load_config(env_name) package = args["package"] module_name = "pufferlib.ocean" if package == "ocean" else f"pufferlib.environments.{package}" env_module = importlib.import_module(module_name) env_name = args["env_name"] make_env = env_module.env_creator(env_name) - pufferlib.vector.autotune(make_env, batch_size=args["train"]["env_batch_size"]) + pufferlib.vector.autotune(make_env, batch_size=args["train"]["batch_size"]) def load_env(env_name, args): From 2b31f8b88abecd0131c1a0fc03bf3c187ba877b2 Mon Sep 17 00:00:00 2001 From: Kevin Date: Fri, 28 Nov 2025 15:13:41 -0500 Subject: [PATCH 2/8] infer hardware limits --- pufferlib/vector.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/pufferlib/vector.py b/pufferlib/vector.py index bf5dc7460..736dfa376 100644 --- a/pufferlib/vector.py +++ b/pufferlib/vector.py @@ -797,8 +797,8 @@ def autotune( batch_size, max_envs=194, model_forward_s=0.0, - max_env_ram_gb=32, - max_batch_vram_gb=0.05, + max_env_ram_gb=None, + max_batch_vram_gb=None, time_per_test=5, ): """Determine the optimal vectorization parameters for your system""" @@ -807,6 +807,30 @@ def autotune( if batch_size is None: raise ValueError("batch_size must not be None") + # Auto-detect hardware limits if not specified + if max_env_ram_gb is None: + # Use 80% of available system RAM to leave room for OS and other processes + total_ram_gb = psutil.virtual_memory().total / 1e9 + max_env_ram_gb = total_ram_gb * 0.8 + print(f"Auto-detected max RAM: {max_env_ram_gb:.2f} GB (80% of {total_ram_gb:.2f} GB total)") + + if max_batch_vram_gb is None: + try: + import torch + if torch.cuda.is_available(): + # Use 80% of GPU VRAM to leave room for model and gradients + total_vram_gb = torch.cuda.get_device_properties(0).total_memory / 1e9 + max_batch_vram_gb = total_vram_gb * 0.8 + print(f"Auto-detected max VRAM: {max_batch_vram_gb:.2f} GB (80% of {total_vram_gb:.2f} GB total)") + else: + # No GPU, use conservative default + max_batch_vram_gb = 0.05 + print("No GPU detected, using default max_batch_vram_gb=0.05 GB") + except ImportError: + # torch not available, use conservative default + max_batch_vram_gb = 0.05 + print("PyTorch not available, using default max_batch_vram_gb=0.05 GB") + if max_envs < batch_size: raise ValueError("max_envs < min_batch_size") From f0ac2ec838d3ae10303bfa2242dec3548ec8a5e8 Mon Sep 17 00:00:00 2001 From: Kevin Date: Fri, 28 Nov 2025 15:38:21 -0500 Subject: [PATCH 3/8] address batch_size ambiguity --- pufferlib/pufferl.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 588802caa..bf7e9d4a6 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -1330,7 +1330,24 @@ def autotune(args=None, env_name=None, vecenv=None, policy=None): env_module = importlib.import_module(module_name) env_name = args["env_name"] make_env = env_module.env_creator(env_name) - pufferlib.vector.autotune(make_env, batch_size=args["train"]["batch_size"]) + + # Create a temporary env to get num_agents for multi-agent environments + temp_env = make_env(**args["env"]) + num_agents_per_env = temp_env.num_agents + temp_env.close() + + # For multi-agent envs, convert train.batch_size (agent-steps) to orchestrator env count + # For single-agent envs, this division results in the same value + train_batch_size = args["train"]["batch_size"] + orchestrator_batch_size = train_batch_size // num_agents_per_env + + print(f"Autotune configuration:") + print(f" Training batch size: {train_batch_size} agent-steps") + print(f" Agents per environment: {num_agents_per_env}") + print(f" Orchestrator batch size: {orchestrator_batch_size} environments") + print() + + pufferlib.vector.autotune(make_env, batch_size=orchestrator_batch_size, env_kwargs=args["env"]) def load_env(env_name, args): From 575f9bc1f5df1485ad937c10462e47cf46ceda12 Mon Sep 17 00:00:00 2001 From: Kevin Date: Fri, 28 Nov 2025 15:50:07 -0500 Subject: [PATCH 4/8] cleanup --- pufferlib/vector.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pufferlib/vector.py b/pufferlib/vector.py index 736dfa376..ba003c451 100644 --- a/pufferlib/vector.py +++ b/pufferlib/vector.py @@ -817,6 +817,7 @@ def autotune( if max_batch_vram_gb is None: try: import torch + if torch.cuda.is_available(): # Use 80% of GPU VRAM to leave room for model and gradients total_vram_gb = torch.cuda.get_device_properties(0).total_memory / 1e9 From 18b2f48f09fd9295bd670dedc99e209e689e116c Mon Sep 17 00:00:00 2001 From: Kevin Date: Sat, 29 Nov 2025 15:32:20 -0500 Subject: [PATCH 5/8] fix autotune invocation --- pufferlib/pufferl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index bf7e9d4a6..a1c4db033 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -1347,7 +1347,7 @@ def autotune(args=None, env_name=None, vecenv=None, policy=None): print(f" Orchestrator batch size: {orchestrator_batch_size} environments") print() - pufferlib.vector.autotune(make_env, batch_size=orchestrator_batch_size, env_kwargs=args["env"]) + pufferlib.vector.autotune(lambda: make_env(**args["env"]), batch_size=orchestrator_batch_size) def load_env(env_name, args): From c33dff5008caf4d983c352cec1623e7395bbf28a Mon Sep 17 00:00:00 2001 From: Kevin Date: Sat, 29 Nov 2025 15:44:01 -0500 Subject: [PATCH 6/8] remove temp env creation --- pufferlib/pufferl.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index a1c4db033..32b1a5ee7 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -1331,13 +1331,9 @@ def autotune(args=None, env_name=None, vecenv=None, policy=None): env_name = args["env_name"] make_env = env_module.env_creator(env_name) - # Create a temporary env to get num_agents for multi-agent environments - temp_env = make_env(**args["env"]) - num_agents_per_env = temp_env.num_agents - temp_env.close() - # For multi-agent envs, convert train.batch_size (agent-steps) to orchestrator env count # For single-agent envs, this division results in the same value + num_agents_per_env = args["env"].get("num_agents", 1) train_batch_size = args["train"]["batch_size"] orchestrator_batch_size = train_batch_size // num_agents_per_env @@ -1347,7 +1343,14 @@ def autotune(args=None, env_name=None, vecenv=None, policy=None): print(f" Orchestrator batch size: {orchestrator_batch_size} environments") print() - pufferlib.vector.autotune(lambda: make_env(**args["env"]), batch_size=orchestrator_batch_size) + pufferlib.vector.autotune( + lambda: make_env(**args["env"]), + batch_size=orchestrator_batch_size, + max_env_ram_gb=args.get("max_env_ram_gb"), + max_batch_vram_gb=args.get("max_batch_vram_gb"), + max_envs=args.get("max_envs", 194), + time_per_test=args.get("autotune_time", 5), + ) def load_env(env_name, args): @@ -1428,6 +1431,10 @@ def load_config(env_name, config_dir=None): parser.add_argument("--neptune-project", type=str, default="ablations") parser.add_argument("--local-rank", type=int, default=0, help="Used by torchrun for DDP") parser.add_argument("--tag", type=str, default=None, help="Tag for experiment") + parser.add_argument("--max-env-ram-gb", type=float, default=None, help="Max RAM (GB) for autotune (overrides auto-detection)") + parser.add_argument("--max-batch-vram-gb", type=float, default=None, help="Max VRAM (GB) for autotune (overrides auto-detection)") + parser.add_argument("--max-envs", type=int, default=194, help="Max environments for autotune") + parser.add_argument("--autotune-time", type=int, default=5, help="Time per test (seconds) for autotune") args = parser.parse_known_args()[0] if config_dir is None: From 627b9d0973fa54b64d932af0b8decd084fafa152 Mon Sep 17 00:00:00 2001 From: Kevin Date: Sat, 29 Nov 2025 15:52:44 -0500 Subject: [PATCH 7/8] remove incorrect max env arg --- pufferlib/pufferl.py | 16 ++++++++++++++-- pufferlib/vector.py | 2 +- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 32b1a5ee7..27a95daf1 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -1337,10 +1337,22 @@ def autotune(args=None, env_name=None, vecenv=None, policy=None): train_batch_size = args["train"]["batch_size"] orchestrator_batch_size = train_batch_size // num_agents_per_env + # max_envs must be at least as large as the batch size + max_envs = args.get("max_envs") + if max_envs is None: + # Default to 2x the batch size to allow for testing different configurations + max_envs = orchestrator_batch_size * 2 + elif max_envs < orchestrator_batch_size: + raise ValueError( + f"max_envs ({max_envs}) must be >= orchestrator_batch_size ({orchestrator_batch_size}). " + f"Either increase --max-envs or reduce train.batch_size in the config." + ) + print(f"Autotune configuration:") print(f" Training batch size: {train_batch_size} agent-steps") print(f" Agents per environment: {num_agents_per_env}") print(f" Orchestrator batch size: {orchestrator_batch_size} environments") + print(f" Max environments to test: {max_envs}") print() pufferlib.vector.autotune( @@ -1348,7 +1360,7 @@ def autotune(args=None, env_name=None, vecenv=None, policy=None): batch_size=orchestrator_batch_size, max_env_ram_gb=args.get("max_env_ram_gb"), max_batch_vram_gb=args.get("max_batch_vram_gb"), - max_envs=args.get("max_envs", 194), + max_envs=max_envs, time_per_test=args.get("autotune_time", 5), ) @@ -1433,7 +1445,7 @@ def load_config(env_name, config_dir=None): parser.add_argument("--tag", type=str, default=None, help="Tag for experiment") parser.add_argument("--max-env-ram-gb", type=float, default=None, help="Max RAM (GB) for autotune (overrides auto-detection)") parser.add_argument("--max-batch-vram-gb", type=float, default=None, help="Max VRAM (GB) for autotune (overrides auto-detection)") - parser.add_argument("--max-envs", type=int, default=194, help="Max environments for autotune") + parser.add_argument("--max-envs", type=int, default=None, help="Max environments for autotune (default: 2x batch size)") parser.add_argument("--autotune-time", type=int, default=5, help="Time per test (seconds) for autotune") args = parser.parse_known_args()[0] diff --git a/pufferlib/vector.py b/pufferlib/vector.py index ba003c451..6475fdf1c 100644 --- a/pufferlib/vector.py +++ b/pufferlib/vector.py @@ -795,7 +795,7 @@ def check_envs(envs, driver): def autotune( env_creator, batch_size, - max_envs=194, + max_envs=None, model_forward_s=0.0, max_env_ram_gb=None, max_batch_vram_gb=None, From 21713a301622b7344be0479a5e29a821dbad840f Mon Sep 17 00:00:00 2001 From: Kevin Date: Sat, 29 Nov 2025 16:04:25 -0500 Subject: [PATCH 8/8] make autotune not rely on external resets --- pufferlib/vector.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pufferlib/vector.py b/pufferlib/vector.py index 6475fdf1c..c014ac965 100644 --- a/pufferlib/vector.py +++ b/pufferlib/vector.py @@ -854,7 +854,7 @@ def autotune( while time.time() - start < time_per_test: idle_ram = max(idle_ram, psutil.Process().memory_info().rss) s = time.time() - if env.done: + if hasattr(env, "done") and env.done: env.reset() reset_times.append(time.time() - s) else: @@ -864,10 +864,10 @@ def autotune( env.close() sum_time = sum(step_times) + sum(reset_times) - reset_percent = 100 * sum(reset_times) / sum_time - sps = steps * num_agents / sum_time - step_variance = 100 * np.std(step_times) / np.mean(step_times) - reset_mean = np.mean(reset_times) + reset_percent = 100 * sum(reset_times) / sum_time if sum_time > 0 else 0 + sps = steps * num_agents / sum_time if sum_time > 0 else 0 + step_variance = 100 * np.std(step_times) / np.mean(step_times) if len(step_times) > 0 else 0 + reset_mean = np.mean(reset_times) if len(reset_times) > 0 else 0 ram_usage = max(1, (idle_ram - load_ram)) / 1e9 obs_size_gb = np.prod(obs_space.shape) * np.dtype(obs_space.dtype).itemsize * num_agents / 1e9