Skip to content

datasets does not exist!和ValueError: Padding error! #68

@2561681244

Description

@2561681244

作者你好,我遇到了如下2个问题,像请教一下您:

/mnt/nvme/yfyuan/wangxiao/simplevla/LIBERO/libero/libero/../datasets does not exist!
(WorkerDict pid=4482) Saved rollout MP4 at path ./rollouts/libero_goal/step=0--task=libero_goal_task_0_trial_7--success=False--ran=5847.mp4
(WorkerDict pid=4482) Saved rollout MP4 at path ./rollouts/libero_goal/step=0--task=libero_goal_task_3_trial_5--success=True--ran=3310.mp4
(WorkerDict pid=4482) Saved rollout MP4 at path ./rollouts/libero_goal/step=0--task=libero_goal_task_3_trial_24--success=False--ran=9152.mp4
(WorkerDict pid=4482) Saved rollout MP4 at path ./rollouts/libero_goal/step=0--task=libero_goal_task_8_trial_8--success=False--ran=6695.mp4
(WorkerDict pid=4482) Saved rollout MP4 at path ./rollouts/libero_goal/step=0--task=libero_goal_task_3_trial_23--success=True--ran=9779.mp4
(WorkerDict pid=4482) Saved rollout MP4 at path ./rollouts/libero_goal/step=0--task=libero_goal_task_9_trial_10--success=False--ran=6578.mp4
(WorkerDict pid=4482) Saved rollout MP4 at path ./rollouts/libero_goal/step=0--task=libero_goal_task_5_trial_21--success=True--ran=755.mp4
(WorkerDict pid=4482) Saved rollout MP4 at path ./rollouts/libero_goal/step=0--task=libero_goal_task_9_trial_39--success=True--ran=5255.mp4
(main_task pid=4227) validation generation end

(main_task pid=45136) gen: 478.6 seconds
Error executing job with overrides: ['data.task_suite_name=libero_goal', 'data.num_trials_per_task=50', 'data.n_samples=8', 'data.filter_accuracy=True', 'data.accuracy_lower_bound=0.1', 'data.accuracy_upper_bound=0.9', 'data.oversample_factor=1', 'data.train_batch_size=8', 'data.val_batch_size=16', 'data.max_prompt_length=256', 'data.max_response_length=128', 'actor_rollout_ref.model.path=/mnt/nvme/yfyuan/wangxiao/models/simplevla/Openvla-oft-SFT-libero-goal-traj1', 'actor_rollout_ref.model.vla=openvla-oft', 'actor_rollout_ref.model.action_token_len=7', 'actor_rollout_ref.model.action_chunks_len=8', 'actor_rollout_ref.actor.optim.lr=5e-6', 'actor_rollout_ref.actor.optim.warmup_style=constant', 'actor_rollout_ref.actor.ppo_mini_batch_size=128', 'actor_rollout_ref.actor.ppo_micro_batch_size=1', 'actor_rollout_ref.actor.use_dynamic_bsz=False', 'actor_rollout_ref.actor.fsdp_config.param_offload=False', 'actor_rollout_ref.actor.fsdp_config.grad_offload=False', 'actor_rollout_ref.actor.fsdp_config.optimizer_offload=False', 'actor_rollout_ref.actor.grad_clip=1', 'actor_rollout_ref.actor.clip_ratio_high=0.28', 'actor_rollout_ref.actor.clip_ratio_low=0.2', 'actor_rollout_ref.actor.num_images_in_input=1', 'actor_rollout_ref.actor.traj_mini_batch_size=16', 'actor_rollout_ref.model.enable_gradient_checkpointing=False', 'actor_rollout_ref.model.use_remove_padding=False', 'actor_rollout_ref.actor.entropy_coeff=0.', 'actor_rollout_ref.rollout.num_images_in_input=1', 'actor_rollout_ref.rollout.use_proprio=False', 'actor_rollout_ref.rollout.val_micro_batch_size=8', 'actor_rollout_ref.rollout.temperature=1.6', 'actor_rollout_ref.rollout.experiment_name=libero_goal', 'actor_rollout_ref.rollout.micro_batch_size=1', 'actor_rollout_ref.rollout.unnorm_key=libero_goal', 'actor_rollout_ref.rollout.model_family=openvla', 'actor_rollout_ref.rollout.task_suite_name=libero_goal', 'actor_rollout_ref.rollout.num_steps_wait=10', 'actor_rollout_ref.rollout.pretrained_checkpoint=/mnt/nvme/yfyuan/wangxiao/models/simplevla/Openvla-oft-SFT-libero-goal-traj1', 'actor_rollout_ref.rollout.center_crop=True', 'actor_rollout_ref.rollout.max_prompt_length=512', 'actor_rollout_ref.rollout.log_prob_micro_batch_size=32', 'actor_rollout_ref.rollout.tensor_model_parallel_size=1', 'actor_rollout_ref.rollout.name=hf', 'actor_rollout_ref.rollout.gpu_memory_utilization=0.9', 'actor_rollout_ref.ref.log_prob_micro_batch_size=32', 'actor_rollout_ref.ref.fsdp_config.param_offload=True', 'algorithm.kl_ctrl.kl_coef=0.00', 'trainer.logger=[console]', 'trainer.project_name=SimpleVLA-RL', 'trainer.experiment_name=libero_goal', 'trainer.default_local_dir=/mnt/nvme/yfyuan/wangxiao/simplevla/checkpoints/SimpleVLA-RL/libero_goal', 'trainer.n_gpus_per_node=1', 'trainer.nnodes=1', 'trainer.save_freq=25', 'trainer.test_freq=4', 'trainer.total_epochs=100', 'trainer.val_only=False', 'algorithm.adv_estimator=grpo', 'algorithm.adv_params.verifier_gamma=1.0', 'algorithm.adv_params.reward_model_gamma=1.0', 'trainer.runtime_env=/mnt/pami203/yfyuan/wangxiao/simplevla/SimpleVLA-RL/align.json', 'trainer.wandb_mode=disabled', 'trainer.val_before_train=False']
Traceback (most recent call last):
File "/opt/conda/envs/simplevla/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/opt/conda/envs/simplevla/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/mnt/nvme/yfyuan/wangxiao/simplevla/SimpleVLA-RL/verl/trainer/main_ppo.py", line 212, in
main()
File "/opt/conda/envs/simplevla/lib/python3.10/site-packages/hydra/main.py", line 94, in decorated_main
_run_hydra(
File "/opt/conda/envs/simplevla/lib/python3.10/site-packages/hydra/_internal/utils.py", line 394, in _run_hydra
_run_app(
File "/opt/conda/envs/simplevla/lib/python3.10/site-packages/hydra/_internal/utils.py", line 457, in _run_app
run_and_report(
File "/opt/conda/envs/simplevla/lib/python3.10/site-packages/hydra/_internal/utils.py", line 223, in run_and_report
raise ex
File "/opt/conda/envs/simplevla/lib/python3.10/site-packages/hydra/_internal/utils.py", line 220, in run_and_report
return func()
File "/opt/conda/envs/simplevla/lib/python3.10/site-packages/hydra/_internal/utils.py", line 458, in
lambda: hydra.run(
File "/opt/conda/envs/simplevla/lib/python3.10/site-packages/hydra/_internal/hydra.py", line 132, in run
_ = ret.return_value
File "/opt/conda/envs/simplevla/lib/python3.10/site-packages/hydra/core/utils.py", line 260, in return_value
raise self._return_value
File "/opt/conda/envs/simplevla/lib/python3.10/site-packages/hydra/core/utils.py", line 186, in run_job
ret.return_value = task_function(task_cfg)
File "/mnt/nvme/yfyuan/wangxiao/simplevla/SimpleVLA-RL/verl/trainer/main_ppo.py", line 116, in main
ray.get(main_task.remote(config))
File "/opt/conda/envs/simplevla/lib/python3.10/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
return fn(*args, **kwargs)
File "/opt/conda/envs/simplevla/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
return func(*args, **kwargs)
File "/opt/conda/envs/simplevla/lib/python3.10/site-packages/ray/_private/worker.py", line 2972, in get
values, debugger_breakpoint = worker.get_objects(
File "/opt/conda/envs/simplevla/lib/python3.10/site-packages/ray/_private/worker.py", line 1031, in get_objects
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(ValueError): ray::main_task() (pid=45136, ip=10.244.136.198)
File "/mnt/nvme/yfyuan/wangxiao/simplevla/SimpleVLA-RL/verl/trainer/main_ppo.py", line 208, in main_task
trainer.fit()
File "/mnt/nvme/yfyuan/wangxiao/simplevla/SimpleVLA-RL/verl/trainer/ppo/ray_trainer.py", line 549, in fit
gen_batch_output = self.actor_rollout_wg.generate_sequences(prompts=gen_batch)
File "/mnt/nvme/yfyuan/wangxiao/simplevla/SimpleVLA-RL/verl/single_controller/ray/base.py", line 42, in func
output = ray.get(output)
ray.exceptions.RayTaskError(ValueError): ray::WorkerDict.actor_rollout_generate_sequences() (pid=45392, ip=10.244.136.198, actor_id=b2ad906862dc93628f67b31201000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7f070e09a7a0>)
File "/mnt/nvme/yfyuan/wangxiao/simplevla/SimpleVLA-RL/verl/single_controller/ray/base.py", line 397, in func
return getattr(self.worker_dict[key], name)(*args, **kwargs)
File "/mnt/nvme/yfyuan/wangxiao/simplevla/SimpleVLA-RL/verl/single_controller/base/decorator.py", line 404, in inner
return func(*args, **kwargs)
File "/mnt/nvme/yfyuan/wangxiao/simplevla/SimpleVLA-RL/verl/workers/fsdp_workers.py", line 523, in generate_sequences
old_log_probs = self.actor.compute_log_prob(data=output)
File "/mnt/nvme/yfyuan/wangxiao/simplevla/SimpleVLA-RL/verl/workers/actor/dp_rob.py", line 389, in compute_log_prob
_, log_probs = self._forward_micro_batch(micro_batch, temperature=temperature)
File "/mnt/nvme/yfyuan/wangxiao/simplevla/SimpleVLA-RL/verl/workers/actor/dp_rob.py", line 143, in _forward_micro_batch
input_ids_unpad, _ = self.process_tensor(input_ids, self.pad_token_id)
File "/mnt/nvme/yfyuan/wangxiao/simplevla/SimpleVLA-RL/verl/workers/actor/dp_rob.py", line 61, in process_tensor
raise ValueError("Padding error!")
ValueError: Padding error!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions