While running the training, getting the error with the following traceback:
Traceback (most recent call last):
File "/home/aws_install/workspace/ENVISION/source/farse-cnn/train.py", line 147, in
main(train_cfg_path)
File "/home/aws_install/workspace/ENVISION/source/farse-cnn/train.py", line 137, in main
trainer.fit(net, ckpt_path=existing_ckpt)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 545, in fit
call._call_and_handle_interrupt(
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 44, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 581, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 990, in _run
results = self._run_stage()
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 1036, in _run_stage
self.fit_loop.run()
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py", line 202, in run
self.advance()
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py", line 359, in advance
self.epoch_loop.run(self._data_fetcher)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/loops/training_epoch_loop.py", line 136, in run
self.advance(data_fetcher)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/loops/training_epoch_loop.py", line 240, in advance
batch_output = self.automatic_optimization.run(trainer.optimizers[0], batch_idx, kwargs)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 180, in run
closure()
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 140, in call
self._result = self.closure(*args, **kwargs)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 126, in closure
step_output = self._step_fn()
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 315, in _training_step
training_step_output = call._call_strategy_hook(trainer, "training_step", *kwargs.values())
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 309, in _call_strategy_hook
output = fn(*args, **kwargs)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/strategies/strategy.py", line 382, in training_step
return self.lightning_module.training_step(*args, **kwargs)
File "/home/aws_install/workspace/ENVISION/source/farse-cnn/model_farsecnn.py", line 228, in training_step
output = self.shared_step(batch, batch_idx)
File "/home/aws_install/workspace/ENVISION/source/farse-cnn/model_detect_farsecnn.py", line 39, in shared_step
pred = self(batch)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/aws_install/workspace/ENVISION/source/farse-cnn/model_detect_farsecnn.py", line 30, in forward
x = super(LitDetectFARSECNN, self).forward(batch)
File "/home/aws_install/workspace/ENVISION/source/farse-cnn/model_farsecnn.py", line 107, in forward
x = l(x)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/aws_install/workspace/ENVISION/source/farse-cnn/layers/BranchBlock.py", line 54, in forward
if (x1['lengths']!=x2['lengths']).any():
RuntimeError: The size of tensor a (188602) must match the size of tensor b (189998) at non-singleton dimension 0
Epoch 0: 0%| | 0/23999 [00:02<?, ?it/s]
Would you please advice on how to resolve this issue?
While running the training, getting the error with the following traceback:
Traceback (most recent call last):
File "/home/aws_install/workspace/ENVISION/source/farse-cnn/train.py", line 147, in
main(train_cfg_path)
File "/home/aws_install/workspace/ENVISION/source/farse-cnn/train.py", line 137, in main
trainer.fit(net, ckpt_path=existing_ckpt)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 545, in fit
call._call_and_handle_interrupt(
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 44, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 581, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 990, in _run
results = self._run_stage()
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 1036, in _run_stage
self.fit_loop.run()
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py", line 202, in run
self.advance()
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py", line 359, in advance
self.epoch_loop.run(self._data_fetcher)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/loops/training_epoch_loop.py", line 136, in run
self.advance(data_fetcher)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/loops/training_epoch_loop.py", line 240, in advance
batch_output = self.automatic_optimization.run(trainer.optimizers[0], batch_idx, kwargs)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 180, in run
closure()
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 140, in call
self._result = self.closure(*args, **kwargs)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 126, in closure
step_output = self._step_fn()
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 315, in _training_step
training_step_output = call._call_strategy_hook(trainer, "training_step", *kwargs.values())
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 309, in _call_strategy_hook
output = fn(*args, **kwargs)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/lightning/pytorch/strategies/strategy.py", line 382, in training_step
return self.lightning_module.training_step(*args, **kwargs)
File "/home/aws_install/workspace/ENVISION/source/farse-cnn/model_farsecnn.py", line 228, in training_step
output = self.shared_step(batch, batch_idx)
File "/home/aws_install/workspace/ENVISION/source/farse-cnn/model_detect_farsecnn.py", line 39, in shared_step
pred = self(batch)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/aws_install/workspace/ENVISION/source/farse-cnn/model_detect_farsecnn.py", line 30, in forward
x = super(LitDetectFARSECNN, self).forward(batch)
File "/home/aws_install/workspace/ENVISION/source/farse-cnn/model_farsecnn.py", line 107, in forward
x = l(x)
File "/home/aws_install/miniconda3/envs/fcnn/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/aws_install/workspace/ENVISION/source/farse-cnn/layers/BranchBlock.py", line 54, in forward
if (x1['lengths']!=x2['lengths']).any():
RuntimeError: The size of tensor a (188602) must match the size of tensor b (189998) at non-singleton dimension 0
Epoch 0: 0%| | 0/23999 [00:02<?, ?it/s]
Would you please advice on how to resolve this issue?