diff --git a/src/madengine/core/context.py b/src/madengine/core/context.py
index 24763588..d67e8c6a 100644
--- a/src/madengine/core/context.py
+++ b/src/madengine/core/context.py
@@ -83,6 +83,7 @@ def __init__(
         additional_context_file: str = None,
         build_only_mode: bool = False,
         rocm_path: str = None,
+        detect_local_gpu_arch: bool = False,
     ) -> None:
         """Constructor of the Context class.
 
@@ -91,6 +92,9 @@ def __init__(
             additional_context_file: The additional context file.
             build_only_mode: Whether running in build-only mode (no GPU detection).
             rocm_path: Optional ROCm installation path (overrides ROCM_PATH env; default /opt/rocm).
+            detect_local_gpu_arch: When True and in build_only_mode, attempt to auto-detect
+                MAD_SYSTEM_GPU_ARCHITECTURE from the local node and inject it into docker_build_arg.
+                Has no effect when build_only_mode=False (runtime mode detects it via init_gpu_context).
 
         Raises:
             RuntimeError: If GPU detection fails and not in build-only mode.
@@ -100,6 +104,7 @@ def __init__(
         self.console = Console()
         self._gpu_context_initialized = False
         self._build_only_mode = build_only_mode
+        self._detect_local_gpu_arch = detect_local_gpu_arch
         self._system_context_initialized = False
         self._gpu_tool_manager = None  # Lazy initialization
 
@@ -137,17 +142,22 @@ def __init__(
             self.init_runtime_context()
         else:
             # For build-only mode, only initialize what's needed for building
-            self.init_build_context()
+            self.init_build_context(detect_gpu_arch=self._detect_local_gpu_arch)
 
         ## ADD MORE CONTEXTS HERE ##
 
-    def init_build_context(self) -> None:
+    def init_build_context(self, detect_gpu_arch: bool = False) -> None:
         """Initialize build-specific context.
 
         This method sets up only the context needed for Docker builds,
         avoiding GPU detection that would fail on build-only nodes.
         System-specific contexts (host_os, numa_balancing, etc.) should be
         provided via --additional-context for build-only nodes if needed.
+
+        Args:
+            detect_gpu_arch: When True, attempt to auto-detect MAD_SYSTEM_GPU_ARCHITECTURE
+                from the local node and inject it into docker_build_arg. Fails gracefully
+                if no GPU is present (e.g., on a pure CI build node).
         """
         print("Initializing build-only context...")
 
@@ -168,9 +178,26 @@ def init_build_context(self) -> None:
                     "Consider providing host_os via --additional-context if needed for build"
                 )
 
-        # Don't detect GPU-specific contexts in build-only mode
-        # These should be provided via additional_context if needed for build args
-        # (GPU arch guidance is emitted in BuildOrchestrator after model/Dockerfile discovery.)
+        # Optionally auto-detect GPU architecture for local full-workflow builds (build+run).
+        # Skipped for standalone `madengine build` on non-GPU/CI nodes (detect_gpu_arch=False).
+        if detect_gpu_arch and "MAD_SYSTEM_GPU_ARCHITECTURE" not in self.ctx.get("docker_build_arg", {}):
+            try:
+                from madengine.utils.gpu_validator import detect_gpu_vendor
+                from madengine.execution.dockerfile_utils import normalize_architecture_name
+
+                vendor = detect_gpu_vendor(self._rocm_path)
+                if vendor in (GPUVendor.AMD, GPUVendor.NVIDIA):
+                    manager = get_gpu_tool_manager(vendor, self._rocm_path)
+                    raw_arch = manager.get_gpu_architecture()
+                    arch = normalize_architecture_name(raw_arch) or raw_arch.strip()
+                    self.ctx["docker_build_arg"]["MAD_SYSTEM_GPU_ARCHITECTURE"] = arch
+                    print(f"Auto-detected GPU architecture for build: {arch}")
+                else:
+                    print("Warning: No supported GPU detected; MAD_SYSTEM_GPU_ARCHITECTURE will not be set automatically.")
+                    print("Consider providing it via --additional-context if needed for build args.")
+            except Exception as e:
+                print(f"Warning: Could not auto-detect GPU architecture for build: {e}")
+                print("Consider providing MAD_SYSTEM_GPU_ARCHITECTURE via --additional-context if needed for build args.")
 
         # Don't initialize NUMA balancing check for build-only nodes
         # This is runtime-specific and should be handled on execution nodes
diff --git a/src/madengine/orchestration/build_orchestrator.py b/src/madengine/orchestration/build_orchestrator.py
index da06f91f..0825ab16 100644
--- a/src/madengine/orchestration/build_orchestrator.py
+++ b/src/madengine/orchestration/build_orchestrator.py
@@ -46,13 +46,17 @@ class BuildOrchestrator:
     - Save deployment_config from --additional-context
     """
 
-    def __init__(self, args, additional_context: Optional[Dict] = None):
+    def __init__(self, args, additional_context: Optional[Dict] = None, detect_local_gpu_arch: bool = False):
         """
         Initialize build orchestrator.
 
         Args:
             args: CLI arguments namespace
             additional_context: Dict from --additional-context (merged with args if present)
+            detect_local_gpu_arch: When True, auto-detect MAD_SYSTEM_GPU_ARCHITECTURE from the
+                local node before building. Intended for full workflow (build+run) on a local
+                single node. Has no effect if the user already provided the value via
+                --additional-context. Default False preserves existing standalone-build behavior.
         """
         self.args = args
         self.console = Console(live_output=getattr(args, "live_output", True))
@@ -120,7 +124,9 @@ def __init__(self, args, additional_context: Optional[Dict] = None):
         ))
         self.rich_console.print()
 
-        # Initialize context in build-only mode (no GPU detection)
+        # Initialize context in build-only mode (no GPU detection by default).
+        # Pass detect_local_gpu_arch so Context.init_build_context() can optionally
+        # auto-detect MAD_SYSTEM_GPU_ARCHITECTURE for full workflow (build+run) runs.
         # Context expects additional_context as a string representation of Python dict
         # Use repr() instead of json.dumps() because Context uses ast.literal_eval()
         # Use self.additional_context (post-ConfigLoader), not pre-defaults merged_context
@@ -128,6 +134,7 @@ def __init__(self, args, additional_context: Optional[Dict] = None):
         self.context = Context(
             additional_context=context_string,
             build_only_mode=True,
+            detect_local_gpu_arch=detect_local_gpu_arch,
         )
 
         # Load credentials if available
@@ -288,6 +295,12 @@ def execute(
             )
             self._warn_if_mad_arch_unresolved_for_dockerfiles(models, builder)
 
+            resolved_arch = self.context.ctx.get("docker_build_arg", {}).get("MAD_SYSTEM_GPU_ARCHITECTURE")
+            if resolved_arch:
+                self.rich_console.print(
+                    f"[green]✓ MAD_SYSTEM_GPU_ARCHITECTURE resolved: {resolved_arch}[/green]\n"
+                )
+
             # Step 3: Build Docker images
             self.rich_console.print("[bold cyan]🏗️  Building Docker images...[/bold cyan]")
 
diff --git a/src/madengine/orchestration/run_orchestrator.py b/src/madengine/orchestration/run_orchestrator.py
index 6725a457..67749514 100644
--- a/src/madengine/orchestration/run_orchestrator.py
+++ b/src/madengine/orchestration/run_orchestrator.py
@@ -345,7 +345,16 @@ def _build_phase(self, tags: list, registry: Optional[str] = None) -> str:
         # Update args with tags
         self.args.tags = tags
 
-        build_orch = BuildOrchestrator(self.args, self.additional_context)
+        # detect_local_gpu_arch=True: full workflow on a local single node — auto-detect
+        # MAD_SYSTEM_GPU_ARCHITECTURE before the build so Dockerfiles that require it
+        # (ARG MAD_SYSTEM_GPU_ARCHITECTURE with no default) are built correctly without
+        # requiring the user to manually pass --additional-context.
+        # The user's explicitly provided value (if any) is still respected and not overridden.
+        build_orch = BuildOrchestrator(
+            self.args,
+            self.additional_context,
+            detect_local_gpu_arch=True,
+        )
         manifest_file = build_orch.execute(
             registry=registry,
             clean_cache=getattr(self.args, "clean_docker_cache", False),
diff --git a/tests/unit/test_context_logic.py b/tests/unit/test_context_logic.py
index 7f50f491..17d1de5d 100644
--- a/tests/unit/test_context_logic.py
+++ b/tests/unit/test_context_logic.py
@@ -7,9 +7,10 @@
 """
 
 import pytest
-from unittest.mock import Mock, patch
+from unittest.mock import Mock, MagicMock, patch
 
 from madengine.core.context import Context
+from madengine.utils.gpu_validator import GPUVendor
 
 
 @pytest.mark.unit
@@ -94,4 +95,97 @@ def test_build_only_no_mad_arch_info_line(self, mock_host, mock_ctx):
         assert not any("MAD_SYSTEM_GPU_ARCHITECTURE" in m for m in msgs)
 
 
-# Total: 5 unit tests
+def _make_build_only_ctx(additional_context="{}") -> Context:
+    """Create a Context in build_only_mode with __init__'s init_build_context call suppressed.
+
+    Returns a fully constructed Context whose ctx dict is populated from additional_context
+    but whose init_build_context has NOT yet run, so callers can invoke it in a controlled way.
+    """
+    with patch.object(Context, "init_build_context"), \
+         patch.object(Context, "get_ctx_test", return_value="test"), \
+         patch.object(Context, "get_host_os", return_value="linux"):
+        ctx = Context(additional_context=additional_context, build_only_mode=True)
+    return ctx
+
+
+@pytest.mark.unit
+class TestBuildContextGpuArchAutoDetect:
+    """Test GPU architecture auto-detection in init_build_context (detect_gpu_arch=True)."""
+
+    def test_auto_detect_injects_arch_when_absent(self):
+        """Auto-detected arch should be injected into docker_build_arg when absent."""
+        ctx = _make_build_only_ctx()
+
+        manager = MagicMock()
+        manager.get_gpu_architecture.return_value = "gfx942"
+
+        # get_gpu_tool_manager is a module-level import in context.py; patch it there.
+        # detect_gpu_vendor / normalize_architecture_name are imported locally inside
+        # init_build_context, so patch them at their source modules.
+        with patch("madengine.core.context.get_gpu_tool_manager", return_value=manager), \
+             patch("madengine.utils.gpu_validator.detect_gpu_vendor", return_value=GPUVendor.AMD), \
+             patch("madengine.execution.dockerfile_utils.normalize_architecture_name", return_value="gfx942"), \
+             patch.object(Context, "get_ctx_test", return_value="test"), \
+             patch.object(Context, "get_host_os", return_value="linux"):
+            ctx.init_build_context(detect_gpu_arch=True)
+
+        assert ctx.ctx["docker_build_arg"]["MAD_SYSTEM_GPU_ARCHITECTURE"] == "gfx942"
+
+    def test_auto_detect_does_not_override_user_value(self):
+        """User-provided MAD_SYSTEM_GPU_ARCHITECTURE must not be overridden."""
+        ctx = _make_build_only_ctx(
+            additional_context="{'docker_build_arg': {'MAD_SYSTEM_GPU_ARCHITECTURE': 'gfx90a'}}"
+        )
+
+        manager = MagicMock()
+        manager.get_gpu_architecture.return_value = "gfx942"
+
+        with patch("madengine.core.context.get_gpu_tool_manager", return_value=manager), \
+             patch("madengine.utils.gpu_validator.detect_gpu_vendor", return_value=GPUVendor.AMD), \
+             patch("madengine.execution.dockerfile_utils.normalize_architecture_name", return_value="gfx942"), \
+             patch.object(Context, "get_ctx_test", return_value="test"), \
+             patch.object(Context, "get_host_os", return_value="linux"):
+            ctx.init_build_context(detect_gpu_arch=True)
+
+        # User value must be preserved; auto-detect must not overwrite it.
+        assert ctx.ctx["docker_build_arg"]["MAD_SYSTEM_GPU_ARCHITECTURE"] == "gfx90a"
+
+    def test_auto_detect_warns_on_no_gpu(self):
+        """Should warn (not crash) when no supported GPU is detected."""
+        ctx = _make_build_only_ctx()
+
+        with patch("madengine.utils.gpu_validator.detect_gpu_vendor", return_value=GPUVendor.UNKNOWN), \
+             patch.object(Context, "get_ctx_test", return_value="test"), \
+             patch.object(Context, "get_host_os", return_value="linux"), \
+             patch("builtins.print") as mock_print:
+            ctx.init_build_context(detect_gpu_arch=True)
+
+        msgs = [str(c.args[0]) for c in mock_print.call_args_list if c.args]
+        assert any("No supported GPU detected" in m for m in msgs)
+        assert "MAD_SYSTEM_GPU_ARCHITECTURE" not in ctx.ctx.get("docker_build_arg", {})
+
+    def test_auto_detect_handles_exception_gracefully(self):
+        """Detection failure should warn, not raise."""
+        ctx = _make_build_only_ctx()
+
+        with patch("madengine.utils.gpu_validator.detect_gpu_vendor", side_effect=RuntimeError("rocminfo not found")), \
+             patch.object(Context, "get_ctx_test", return_value="test"), \
+             patch.object(Context, "get_host_os", return_value="linux"), \
+             patch("builtins.print") as mock_print:
+            ctx.init_build_context(detect_gpu_arch=True)
+
+        msgs = [str(c.args[0]) for c in mock_print.call_args_list if c.args]
+        assert any("Could not auto-detect GPU architecture" in m for m in msgs)
+        assert "MAD_SYSTEM_GPU_ARCHITECTURE" not in ctx.ctx.get("docker_build_arg", {})
+
+    def test_no_detection_when_flag_is_false(self):
+        """detect_gpu_arch=False should skip detection entirely."""
+        ctx = _make_build_only_ctx()
+
+        with patch("madengine.utils.gpu_validator.detect_gpu_vendor") as mock_detect, \
+             patch.object(Context, "get_ctx_test", return_value="test"), \
+             patch.object(Context, "get_host_os", return_value="linux"):
+            ctx.init_build_context(detect_gpu_arch=False)
+
+        mock_detect.assert_not_called()
+        assert "MAD_SYSTEM_GPU_ARCHITECTURE" not in ctx.ctx.get("docker_build_arg", {})