Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion pufferlib/pufferl.py
Original file line number Diff line number Diff line change
Expand Up @@ -1324,12 +1324,45 @@ def ensure_drive_binary():


def autotune(args=None, env_name=None, vecenv=None, policy=None):
args = args or load_config(env_name)
package = args["package"]
module_name = "pufferlib.ocean" if package == "ocean" else f"pufferlib.environments.{package}"
env_module = importlib.import_module(module_name)
env_name = args["env_name"]
make_env = env_module.env_creator(env_name)
pufferlib.vector.autotune(make_env, batch_size=args["train"]["env_batch_size"])

# For multi-agent envs, convert train.batch_size (agent-steps) to orchestrator env count
# For single-agent envs, this division results in the same value
num_agents_per_env = args["env"].get("num_agents", 1)
train_batch_size = args["train"]["batch_size"]
orchestrator_batch_size = train_batch_size // num_agents_per_env

# max_envs must be at least as large as the batch size
max_envs = args.get("max_envs")
if max_envs is None:
# Default to 2x the batch size to allow for testing different configurations
max_envs = orchestrator_batch_size * 2
elif max_envs < orchestrator_batch_size:
raise ValueError(
f"max_envs ({max_envs}) must be >= orchestrator_batch_size ({orchestrator_batch_size}). "
f"Either increase --max-envs or reduce train.batch_size in the config."
)

print(f"Autotune configuration:")
print(f" Training batch size: {train_batch_size} agent-steps")
print(f" Agents per environment: {num_agents_per_env}")
print(f" Orchestrator batch size: {orchestrator_batch_size} environments")
print(f" Max environments to test: {max_envs}")
print()

pufferlib.vector.autotune(
lambda: make_env(**args["env"]),
batch_size=orchestrator_batch_size,
max_env_ram_gb=args.get("max_env_ram_gb"),
max_batch_vram_gb=args.get("max_batch_vram_gb"),
max_envs=max_envs,
time_per_test=args.get("autotune_time", 5),
)


def load_env(env_name, args):
Expand Down Expand Up @@ -1410,6 +1443,10 @@ def load_config(env_name, config_dir=None):
parser.add_argument("--neptune-project", type=str, default="ablations")
parser.add_argument("--local-rank", type=int, default=0, help="Used by torchrun for DDP")
parser.add_argument("--tag", type=str, default=None, help="Tag for experiment")
parser.add_argument("--max-env-ram-gb", type=float, default=None, help="Max RAM (GB) for autotune (overrides auto-detection)")
parser.add_argument("--max-batch-vram-gb", type=float, default=None, help="Max VRAM (GB) for autotune (overrides auto-detection)")
parser.add_argument("--max-envs", type=int, default=None, help="Max environments for autotune (default: 2x batch size)")
parser.add_argument("--autotune-time", type=int, default=5, help="Time per test (seconds) for autotune")
args = parser.parse_known_args()[0]

if config_dir is None:
Expand Down
41 changes: 33 additions & 8 deletions pufferlib/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,10 +795,10 @@ def check_envs(envs, driver):
def autotune(
env_creator,
batch_size,
max_envs=194,
max_envs=None,
model_forward_s=0.0,
max_env_ram_gb=32,
max_batch_vram_gb=0.05,
max_env_ram_gb=None,
max_batch_vram_gb=None,
time_per_test=5,
):
"""Determine the optimal vectorization parameters for your system"""
Expand All @@ -807,6 +807,31 @@ def autotune(
if batch_size is None:
raise ValueError("batch_size must not be None")

# Auto-detect hardware limits if not specified
if max_env_ram_gb is None:
# Use 80% of available system RAM to leave room for OS and other processes
total_ram_gb = psutil.virtual_memory().total / 1e9
max_env_ram_gb = total_ram_gb * 0.8
print(f"Auto-detected max RAM: {max_env_ram_gb:.2f} GB (80% of {total_ram_gb:.2f} GB total)")

if max_batch_vram_gb is None:
try:
import torch

if torch.cuda.is_available():
# Use 80% of GPU VRAM to leave room for model and gradients
total_vram_gb = torch.cuda.get_device_properties(0).total_memory / 1e9
max_batch_vram_gb = total_vram_gb * 0.8
print(f"Auto-detected max VRAM: {max_batch_vram_gb:.2f} GB (80% of {total_vram_gb:.2f} GB total)")
else:
# No GPU, use conservative default
max_batch_vram_gb = 0.05
print("No GPU detected, using default max_batch_vram_gb=0.05 GB")
except ImportError:
# torch not available, use conservative default
max_batch_vram_gb = 0.05
print("PyTorch not available, using default max_batch_vram_gb=0.05 GB")

if max_envs < batch_size:
raise ValueError("max_envs < min_batch_size")

Expand All @@ -829,7 +854,7 @@ def autotune(
while time.time() - start < time_per_test:
idle_ram = max(idle_ram, psutil.Process().memory_info().rss)
s = time.time()
if env.done:
if hasattr(env, "done") and env.done:
env.reset()
reset_times.append(time.time() - s)
else:
Expand All @@ -839,10 +864,10 @@ def autotune(

env.close()
sum_time = sum(step_times) + sum(reset_times)
reset_percent = 100 * sum(reset_times) / sum_time
sps = steps * num_agents / sum_time
step_variance = 100 * np.std(step_times) / np.mean(step_times)
reset_mean = np.mean(reset_times)
reset_percent = 100 * sum(reset_times) / sum_time if sum_time > 0 else 0
sps = steps * num_agents / sum_time if sum_time > 0 else 0
step_variance = 100 * np.std(step_times) / np.mean(step_times) if len(step_times) > 0 else 0
reset_mean = np.mean(reset_times) if len(reset_times) > 0 else 0
ram_usage = max(1, (idle_ram - load_ram)) / 1e9

obs_size_gb = np.prod(obs_space.shape) * np.dtype(obs_space.dtype).itemsize * num_agents / 1e9
Expand Down
Loading