Skip to content

Commit 3b18037

Browse files
committed
Ensure final epoch always gets validated even if it doesn't line up with val interval. Add a few comments.
1 parent 2720dde commit 3b18037

File tree

1 file changed

+8
-6
lines changed

1 file changed

+8
-6
lines changed

train.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,14 +1036,16 @@ def main():
10361036
_logger.info("Distributing BatchNorm running means and vars")
10371037
utils.distribute_bn(model, args.world_size, args.dist_bn == 'reduce')
10381038

1039-
if (epoch + 1) % args.val_interval != 0:
1039+
epoch_p_1 = epoch + 1
1040+
if epoch_p_1 % args.val_interval != 0 and epoch_p_1 != num_epochs:
10401041
if utils.is_primary(args):
10411042
_logger.info("Skipping eval and checkpointing ")
10421043
if lr_scheduler is not None:
1043-
# step LR for next epoch
1044-
# careful when using metric dependent lr_scheduler
1045-
lr_scheduler.step(epoch + 1, metric=None)
1046-
# skip validation and metric logic
1044+
# step LR for next epoch, take care when using metric dependent lr_scheduler
1045+
lr_scheduler.step(epoch_p_1, metric=None)
1046+
# Skip validation and metric logic
1047+
# FIXME we could make the logic below able to handle no eval metrics more gracefully,
1048+
# but for simplicity opting to just skip for now.
10471049
continue
10481050

10491051
if loader_eval is not None:
@@ -1097,7 +1099,7 @@ def main():
10971099

10981100
if lr_scheduler is not None:
10991101
# step LR for next epoch
1100-
lr_scheduler.step(epoch + 1, latest_metric)
1102+
lr_scheduler.step(epoch_p_1, latest_metric)
11011103

11021104
latest_results = {
11031105
'epoch': epoch,

0 commit comments

Comments
 (0)