From d7c090a1a23eb13df75617a2fad8b2da514583a8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 6 Feb 2026 00:45:04 +0000 Subject: [PATCH 1/4] Initial plan From 76517119a51339b6cdd4014b4ffc84ba419c4ee7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 6 Feb 2026 00:49:42 +0000 Subject: [PATCH 2/4] Add gradient accumulation and AMP support to training Co-authored-by: thinksyncs <42225585+thinksyncs@users.noreply.github.com> --- .../test_train_minimal_grad_accum_amp.py | 61 +++++++++++++++++ rtdetr_pose/tools/train_minimal.py | 67 +++++++++++++++++-- train_setting.yaml | 7 ++ 3 files changed, 128 insertions(+), 7 deletions(-) create mode 100644 rtdetr_pose/tests/test_train_minimal_grad_accum_amp.py diff --git a/rtdetr_pose/tests/test_train_minimal_grad_accum_amp.py b/rtdetr_pose/tests/test_train_minimal_grad_accum_amp.py new file mode 100644 index 0000000..eaf611c --- /dev/null +++ b/rtdetr_pose/tests/test_train_minimal_grad_accum_amp.py @@ -0,0 +1,61 @@ +import importlib.util +import unittest +from pathlib import Path + +try: + import torch +except ImportError: # pragma: no cover + torch = None + + +def _load_train_minimal_module(): + repo_root = Path(__file__).resolve().parents[2] + script_path = repo_root / "rtdetr_pose" / "tools" / "train_minimal.py" + spec = importlib.util.spec_from_file_location("rtdetr_pose_tools_train_minimal", script_path) + assert spec is not None and spec.loader is not None + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod + + +@unittest.skipIf(torch is None, "torch not installed") +class TestTrainMinimalGradAccumAMP(unittest.TestCase): + def test_gradient_accumulation_argument(self): + """Test that gradient accumulation steps argument is parsed correctly.""" + mod = _load_train_minimal_module() + + # Test default value + args = mod.parse_args([]) + self.assertEqual(args.gradient_accumulation_steps, 1) + + # Test custom value + args = mod.parse_args(["--gradient-accumulation-steps", "4"]) + self.assertEqual(args.gradient_accumulation_steps, 4) + + def test_amp_argument(self): + """Test that AMP argument is parsed correctly.""" + mod = _load_train_minimal_module() + + # Test default value (False) + args = mod.parse_args([]) + self.assertFalse(args.use_amp) + + # Test with flag enabled + args = mod.parse_args(["--use-amp"]) + self.assertTrue(args.use_amp) + + def test_clip_grad_norm_exists(self): + """Test that gradient clipping argument exists (already implemented).""" + mod = _load_train_minimal_module() + + # Test default value + args = mod.parse_args([]) + self.assertEqual(args.clip_grad_norm, 0.0) + + # Test custom value + args = mod.parse_args(["--clip-grad-norm", "1.0"]) + self.assertEqual(args.clip_grad_norm, 1.0) + + +if __name__ == "__main__": + unittest.main() diff --git a/rtdetr_pose/tools/train_minimal.py b/rtdetr_pose/tools/train_minimal.py index 2640746..45df881 100644 --- a/rtdetr_pose/tools/train_minimal.py +++ b/rtdetr_pose/tools/train_minimal.py @@ -124,6 +124,17 @@ def build_parser() -> argparse.ArgumentParser: default=0.0, help="If >0, clip gradients to this max norm before optimizer step.", ) + parser.add_argument( + "--gradient-accumulation-steps", + type=int, + default=1, + help="Number of steps to accumulate gradients before optimizer update (default: 1, no accumulation).", + ) + parser.add_argument( + "--use-amp", + action="store_true", + help="Enable Automatic Mixed Precision (AMP) training with torch.cuda.amp.", + ) parser.add_argument( "--lr-warmup-steps", type=int, @@ -1697,6 +1708,15 @@ def main(argv: list[str] | None = None) -> int: weight_decay=float(args.weight_decay), ) + # Initialize GradScaler for AMP if enabled + scaler = None + if args.use_amp: + if device.startswith("cuda"): + scaler = torch.cuda.amp.GradScaler() + print("amp_enabled=True device=cuda") + else: + print("amp_warning: --use-amp requires CUDA device; AMP disabled") + start_epoch = 0 global_step = 0 if args.resume_from: @@ -1755,7 +1775,14 @@ def main(argv: list[str] | None = None) -> int: mim_ratio = float(targets["mim_mask_ratio"].mean().detach().cpu()) except Exception: mim_ratio = None - out = model(images) + + # Forward pass with optional AMP autocast + if scaler is not None: + with torch.cuda.amp.autocast(): + out = model(images) + else: + out = model(images) + mim_loss = None if args.mim_teacher and float(mim_weight) > 0 and isinstance(targets, dict): image_raw = targets.get("image_raw") @@ -1766,7 +1793,11 @@ def main(argv: list[str] | None = None) -> int: if was_training: model.eval() with torch.no_grad(): - teacher_out = model(image_raw.to(device)) + if scaler is not None: + with torch.cuda.amp.autocast(): + teacher_out = model(image_raw.to(device)) + else: + teacher_out = model(image_raw.to(device)) if was_training: model.train() loss_items = [] @@ -1873,11 +1904,33 @@ def main(argv: list[str] | None = None) -> int: } print("loss_breakdown", " ".join(f"{k}={v:.6g}" for k, v in sorted(printable.items()))) - optim.zero_grad(set_to_none=True) - loss.backward() - if args.clip_grad_norm and float(args.clip_grad_norm) > 0: - torch.nn.utils.clip_grad_norm_(model.parameters(), float(args.clip_grad_norm)) - optim.step() + # Gradient accumulation: scale loss by accumulation steps + accum_steps = int(args.gradient_accumulation_steps) + if accum_steps > 1: + loss = loss / accum_steps + + # Backward pass with optional AMP scaling + if scaler is not None: + scaler.scale(loss).backward() + else: + loss.backward() + + # Perform optimizer step only at accumulation boundaries + # steps is 0-indexed within each epoch, so we use (steps + 1) for the check + if (steps + 1) % accum_steps == 0: + if scaler is not None: + # Unscale gradients before clipping + if args.clip_grad_norm and float(args.clip_grad_norm) > 0: + scaler.unscale_(optim) + torch.nn.utils.clip_grad_norm_(model.parameters(), float(args.clip_grad_norm)) + scaler.step(optim) + scaler.update() + optim.zero_grad(set_to_none=True) + else: + if args.clip_grad_norm and float(args.clip_grad_norm) > 0: + torch.nn.utils.clip_grad_norm_(model.parameters(), float(args.clip_grad_norm)) + optim.step() + optim.zero_grad(set_to_none=True) if args.lr_warmup_steps and int(args.lr_warmup_steps) > 0: lr_now = compute_warmup_lr( diff --git a/train_setting.yaml b/train_setting.yaml index bd2a72a..516af17 100644 --- a/train_setting.yaml +++ b/train_setting.yaml @@ -14,3 +14,10 @@ metrics_csv: reports/train_metrics.csv tensorboard_logdir: reports/tb export_onnx: true onnx_out: reports/rtdetr_pose.onnx +# Gradient clipping (already implemented) +# clip_grad_norm: 1.0 +# Gradient accumulation (new feature) +# gradient_accumulation_steps: 1 +# AMP (Automatic Mixed Precision) - requires CUDA device (new feature) +# use_amp: false + From 484ba105bead4618118ab1baeecb0e076dfca39d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 6 Feb 2026 00:51:11 +0000 Subject: [PATCH 3/4] Fix loss logging to use unscaled loss for gradient accumulation Co-authored-by: thinksyncs <42225585+thinksyncs@users.noreply.github.com> --- .../tests/test_train_minimal_integration.py | 120 ++++++++++++++++++ rtdetr_pose/tools/train_minimal.py | 5 +- 2 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 rtdetr_pose/tests/test_train_minimal_integration.py diff --git a/rtdetr_pose/tests/test_train_minimal_integration.py b/rtdetr_pose/tests/test_train_minimal_integration.py new file mode 100644 index 0000000..3084d1f --- /dev/null +++ b/rtdetr_pose/tests/test_train_minimal_integration.py @@ -0,0 +1,120 @@ +import importlib.util +import json +import tempfile +import unittest +from pathlib import Path + +try: + import torch +except ImportError: # pragma: no cover + torch = None + + +def _load_train_minimal_module(): + repo_root = Path(__file__).resolve().parents[2] + script_path = repo_root / "rtdetr_pose" / "tools" / "train_minimal.py" + spec = importlib.util.spec_from_file_location("rtdetr_pose_tools_train_minimal", script_path) + assert spec is not None and spec.loader is not None + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod + + +@unittest.skipIf(torch is None, "torch not installed") +class TestTrainMinimalIntegration(unittest.TestCase): + def setUp(self): + self.repo_root = Path(__file__).resolve().parents[2] + self.data_dir = self.repo_root / "data" / "coco128" + if not self.data_dir.is_dir(): + self.data_dir = self.repo_root.parent / "data" / "coco128" + + def test_gradient_accumulation_integration(self): + """Test that training works with gradient accumulation.""" + if not self.data_dir.is_dir(): + self.skipTest("coco128 missing; run: bash tools/fetch_coco128.sh") + + mod = _load_train_minimal_module() + + with tempfile.TemporaryDirectory() as tmpdir: + args = [ + "--dataset-root", str(self.data_dir), + "--split", "train2017", + "--epochs", "1", + "--batch-size", "2", + "--max-steps", "3", + "--image-size", "64", + "--device", "cpu", + "--gradient-accumulation-steps", "2", + "--metrics-jsonl", str(Path(tmpdir) / "metrics.jsonl"), + "--no-export-onnx", + ] + + result = mod.main(args) + self.assertEqual(result, 0, "Training should complete successfully") + + # Check that metrics file exists + metrics_file = Path(tmpdir) / "metrics.jsonl" + self.assertTrue(metrics_file.exists(), "Metrics file should be created") + + # Verify metrics were written + with open(metrics_file) as f: + lines = f.readlines() + self.assertGreater(len(lines), 0, "Metrics should be logged") + + def test_amp_on_cpu_warning(self): + """Test that AMP on CPU device shows warning.""" + if not self.data_dir.is_dir(): + self.skipTest("coco128 missing; run: bash tools/fetch_coco128.sh") + + mod = _load_train_minimal_module() + + with tempfile.TemporaryDirectory() as tmpdir: + args = [ + "--dataset-root", str(self.data_dir), + "--split", "train2017", + "--epochs", "1", + "--batch-size", "2", + "--max-steps", "2", + "--image-size", "64", + "--device", "cpu", + "--use-amp", + "--metrics-jsonl", str(Path(tmpdir) / "metrics.jsonl"), + "--no-export-onnx", + ] + + # This should complete but print a warning about AMP requiring CUDA + result = mod.main(args) + self.assertEqual(result, 0, "Training should complete successfully even with AMP on CPU") + + def test_combined_features(self): + """Test that gradient clipping, accumulation work together.""" + if not self.data_dir.is_dir(): + self.skipTest("coco128 missing; run: bash tools/fetch_coco128.sh") + + mod = _load_train_minimal_module() + + with tempfile.TemporaryDirectory() as tmpdir: + args = [ + "--dataset-root", str(self.data_dir), + "--split", "train2017", + "--epochs", "1", + "--batch-size", "2", + "--max-steps", "4", + "--image-size", "64", + "--device", "cpu", + "--clip-grad-norm", "1.0", + "--gradient-accumulation-steps", "2", + "--metrics-jsonl", str(Path(tmpdir) / "metrics.jsonl"), + "--no-export-onnx", + ] + + result = mod.main(args) + self.assertEqual(result, 0, "Training should complete successfully with combined features") + + # Check that metrics file exists + metrics_file = Path(tmpdir) / "metrics.jsonl" + self.assertTrue(metrics_file.exists(), "Metrics file should be created") + + +if __name__ == "__main__": + unittest.main() diff --git a/rtdetr_pose/tools/train_minimal.py b/rtdetr_pose/tools/train_minimal.py index 45df881..9a210c7 100644 --- a/rtdetr_pose/tools/train_minimal.py +++ b/rtdetr_pose/tools/train_minimal.py @@ -1904,6 +1904,9 @@ def main(argv: list[str] | None = None) -> int: } print("loss_breakdown", " ".join(f"{k}={v:.6g}" for k, v in sorted(printable.items()))) + # Store unscaled loss for logging + loss_for_logging = loss.detach().cpu() + # Gradient accumulation: scale loss by accumulation steps accum_steps = int(args.gradient_accumulation_steps) if accum_steps > 1: @@ -1950,7 +1953,7 @@ def main(argv: list[str] | None = None) -> int: for group in optim.param_groups: group["lr"] = lr_now - running += float(loss.detach().cpu()) + running += float(loss_for_logging) steps += 1 global_step += 1 From 9230ff1e0f8451b2dbee3c6cabf58ef2a7dfaa6c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 6 Feb 2026 00:52:38 +0000 Subject: [PATCH 4/4] Remove trailing whitespace from empty lines Co-authored-by: thinksyncs <42225585+thinksyncs@users.noreply.github.com> --- rtdetr_pose/tools/train_minimal.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rtdetr_pose/tools/train_minimal.py b/rtdetr_pose/tools/train_minimal.py index 9a210c7..a80c4db 100644 --- a/rtdetr_pose/tools/train_minimal.py +++ b/rtdetr_pose/tools/train_minimal.py @@ -1775,14 +1775,14 @@ def main(argv: list[str] | None = None) -> int: mim_ratio = float(targets["mim_mask_ratio"].mean().detach().cpu()) except Exception: mim_ratio = None - + # Forward pass with optional AMP autocast if scaler is not None: with torch.cuda.amp.autocast(): out = model(images) else: out = model(images) - + mim_loss = None if args.mim_teacher and float(mim_weight) > 0 and isinstance(targets, dict): image_raw = targets.get("image_raw") @@ -1906,7 +1906,7 @@ def main(argv: list[str] | None = None) -> int: # Store unscaled loss for logging loss_for_logging = loss.detach().cpu() - + # Gradient accumulation: scale loss by accumulation steps accum_steps = int(args.gradient_accumulation_steps) if accum_steps > 1: