Project-MONAI · mattlin1124 · Aug 9, 2025 · Aug 9, 2025 · Aug 9, 2025 · Aug 9, 2025
diff --git a/.gitignore b/.gitignore
@@ -165,3 +165,4 @@ runs
 *.pth
 
 *zarr/*
+issue38366/
@@ -38,6 +38,58 @@
 
 __all__ = ["sliding_window_inference"]
 
+def ensure_channel_first(x: torch.Tensor, spatial_ndim: Optional[int] = None) -> Tuple[torch.Tensor, int]:
+    """
+    Normalize a tensor to channel-first layout (N, C, spatial...).
+
+    Args:
+        x: Tensor with shape (N, C, spatial...) or (N, spatial..., C).
+        spatial_ndim: Number of spatial dimensions. If None, inferred as x.ndim - 2.
+
+    Returns:
+        A tuple (x_cf, orig_channel_dim):
+        - x_cf: the tensor in channel-first layout.
+        - orig_channel_dim: 1 if input was already channel-first; -1 if the channel was last.
+
+    Raises:
+        TypeError: if x is not a torch.Tensor.
+        ValueError: if x.ndim < 3 or the channel dimension cannot be inferred unambiguously.
+
+    Notes:
+        Uses a small-channel heuristic (<=32) typical for segmentation/classification. When ambiguous,
+        prefers preserving the input layout or raises ValueError to avoid silent errors.
+    """
+    if not isinstance(x, torch.Tensor):
+        raise TypeError(f"Expected torch.Tensor, got {type(x)}")
+    if x.ndim < 3:
+        raise ValueError(f"Expected >=3 dims (N,C,spatial...), got shape={tuple(x.shape)}")
+
+    # Infer spatial dims if not provided (handles 1D/2D/3D uniformly).
+    if spatial_ndim is None:
+        spatial_ndim = x.ndim - 2  # not directly used for logic; informative only
+
+    # Heuristic: channels are usually small (e.g., <=32) in segmentation/classification.
+    threshold = 32
+    s1 = int(x.shape[1])   # candidate channel at dim=1 (N, C, ...)
+    sl = int(x.shape[-1])  # candidate channel at last dim (..., C)
+
+    # Unambiguous cases first.
+    if s1 <= threshold and sl > threshold:
+        # Looks like NCHW/D already.
+        return x, 1
+    if sl <= threshold and s1 > threshold:
+        # Looks like NHWC/D: move last dim to channel dim.
+        return x.movedim(-1, 1), -1
+
+    # Ambiguous: both sides small (or both large). Prefer preserving to avoid silent mis-reordering.
+    if s1 <= threshold and sl <= threshold:
+        return x, 1
+
+    raise ValueError(
+        f"cannot infer channel dim for shape={tuple(x.shape)}; expected [N,C,spatial...] or [N,spatial...,C]; "
+        f"both dim1={s1} and dim-1={sl} look like spatial dims"
+    )
-    # Ambiguous: both sides small (or both large). Prefer preserving to avoid silent mis-reordering.
-    if s1 <= threshold and sl <= threshold:
-        return x, 1
-
-    raise ValueError(
-        f"cannot infer channel dim for shape={tuple(x.shape)}; expected [N,C,spatial...] or [N,spatial...,C]; "
-        f"both dim1={s1} and dim-1={sl} look like spatial dims"
-    )
+    # Ambiguous: both sides small (or both large). Prefer preserving to avoid silent mis-reordering.
+    if (s1 <= threshold and sl <= threshold) or (s1 > threshold and sl > threshold):
+        return x, 1
-    # Ambiguous: both sides small (or both large). Prefer preserving to avoid silent mis-reordering.
-    if s1 <= threshold and sl <= threshold:
-        return x, 1
-
-    raise ValueError(
-        f"cannot infer channel dim for shape={tuple(x.shape)}; expected [N,C,spatial...] or [N,spatial...,C]; "
-        f"both dim1={s1} and dim-1={sl} look like spatial dims"
-    )
+    # Ambiguous: both sides small (or both large). Prefer preserving to avoid silent mis-reordering.
+    if (s1 <= threshold and sl <= threshold) or (s1 > threshold and sl > threshold):
+        return x, 1
+
 
 def sliding_window_inference(
     inputs: torch.Tensor | MetaTensor,

@@ -15,6 +15,8 @@
 
 from monai.metrics.utils import do_metric_reduction
 from monai.utils import MetricReduction, deprecated_arg
+from monai.inferers.utils import ensure_channel_first
+
 
 from .metric import CumulativeIterationMetric
 
@@ -123,6 +125,7 @@ def __init__(
             num_classes=self.num_classes,
         )
 
+
     def _compute_tensor(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor:  # type: ignore[override]
         """
         Compute the dice value using ``DiceHelper``.
@@ -306,6 +309,13 @@ def __call__(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor | tupl
                 the number of channels is inferred from ``y_pred.shape[1]`` when ``num_classes is None``.
             y: ground truth with shape (batch_size, num_classes or 1, spatial_dims...).
         """
+        y_pred, _ = ensure_channel_first(y_pred)
+
+        n_ch = self.num_classes or y_pred.shape[1]
+        if y.ndim == y_pred.ndim and y.shape[-1] in (1, n_ch):
+            y, _ = ensure_channel_first(y)
+
+
         _apply_argmax, _threshold = self.apply_argmax, self.threshold
         if self.num_classes is None:
             n_pred_ch = y_pred.shape[1]  # y_pred is in one-hot format or multi-channel scores