diff --git a/.gitignore b/.gitignore
index f478b371..8bbecf96 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,4 @@ x.pth
 docs
 .Rhistory
 s.pth
+inst/doc
diff --git a/DESCRIPTION b/DESCRIPTION
index 51ce5fae..49cd1ec3 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -84,6 +84,7 @@ Collate:
     'dataset-fgvc.R'
     'dataset-flickr.R'
     'dataset-flowers.R'
+    'dataset-imagenet.R'
     'dataset-lfw.R'
     'dataset-mnist.R'
     'dataset-oxfordiiitpet.R'
@@ -94,7 +95,6 @@ Collate:
     'dataset-vggface2.R'
     'extension.R'
     'globals.R'
-    'imagenet.R'
     'models-alexnet.R'
     'models-convnext.R'
     'models-convnext_detection.R'
@@ -115,7 +115,6 @@ Collate:
     'models-vit.R'
     'ops-box_convert.R'
     'ops-boxes.R'
-    'tiny-imagenet-dataset.R'
     'transforms-array.R'
     'transforms-defaults.R'
     'transforms-generics.R'
diff --git a/NAMESPACE b/NAMESPACE
index a105773d..2d3e763d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -80,7 +80,9 @@ export(cifar100_dataset)
 export(cifar10_dataset)
 export(clip_boxes_to_image)
 export(coco_caption_dataset)
+export(coco_classes)
 export(coco_detection_dataset)
+export(coco_label)
 export(coco_segmentation_dataset)
 export(draw_bounding_boxes)
 export(draw_keypoints)
@@ -99,6 +101,8 @@ export(flowers102_dataset)
 export(generalized_box_iou)
 export(get_collection_catalog)
 export(image_folder_dataset)
+export(imagenet_21k_classes)
+export(imagenet_21k_label)
 export(imagenet_classes)
 export(imagenet_label)
 export(kmnist_dataset)
@@ -238,6 +242,9 @@ export(transform_to_tensor)
 export(transform_vflip)
 export(vggface2_dataset)
 export(vision_make_grid)
+export(voc_classes)
+export(voc_label)
+export(voc_segmentation_classes)
 export(whoi_plankton_dataset)
 export(whoi_small_coralnet_dataset)
 export(whoi_small_plankton_dataset)
@@ -270,6 +277,7 @@ importFrom(torch,nn_relu)
 importFrom(torch,nn_sequential)
 importFrom(torch,nn_softmax)
 importFrom(torch,nnf_gelu)
+importFrom(torch,nnf_grid_sample)
 importFrom(torch,nnf_interpolate)
 importFrom(torch,nnf_layer_norm)
 importFrom(torch,nnf_normalize)
@@ -279,6 +287,7 @@ importFrom(torch,torch_arange)
 importFrom(torch,torch_cat)
 importFrom(torch,torch_chunk)
 importFrom(torch,torch_clamp)
+importFrom(torch,torch_empty)
 importFrom(torch,torch_flatten)
 importFrom(torch,torch_float32)
 importFrom(torch,torch_linspace)
@@ -293,5 +302,6 @@ importFrom(torch,torch_stack)
 importFrom(torch,torch_tensor)
 importFrom(torch,torch_zeros)
 importFrom(torch,torch_zeros_like)
+importFrom(utils,read.delim)
 importFrom(utils,tail)
 importFrom(zeallot,"%<-%")
diff --git a/NEWS.md b/NEWS.md
index 3a71eccb..d4767694 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -7,6 +7,7 @@
 
 ## New features
 
+* Added resolution function for coco imagenet_21k and and pascal_voc classes and labels (#284).
 * Added article showcasing `model_fcn_resnet50()` with visualization utilities `draw_segmentation_masks()` and `vision_make_grid()` (@DerrickUnleashed, #281).
 * Added collection dataset catalog with `search_collection()`, `get_collection_catalog()`, and `list_collection_datasets()` functions for discovering and exploring collections (#271, @ANAMASGARD).
 * Added `target_transform_coco_masks()` and `target_transform_trimap_masks()` transformation functions for explicit segmentation mask generation (@ANAMASGARD).
@@ -23,6 +24,7 @@
 
 ## Bug fixes and improvements
 
+* fix `model_fasterrcnn_*` did not provide boxes output normalized to image size, did not manage batches, fix performance of the `roi_align()` function (#284)
 * fix rf100 collection bounding-box now consider the correct native COCO format being 'xywh' (#272)
 * Remove `.getbatch` method from MNIST as it is providing inconsistent tensor dimensions with `.getitem` due 
 to non-vectorized `transform_` operations (#264)
diff --git a/R/dataset-coco.R b/R/dataset-coco.R
index 0bdca5e7..dc91b089 100644
--- a/R/dataset-coco.R
+++ b/R/dataset-coco.R
@@ -58,7 +58,7 @@ coco_detection_dataset <- torch::dataset(
             rep("http://images.cocodataset.org/annotations/annotations_trainval2017.zip", time = 2),
             "http://images.cocodataset.org/zips/train2014.zip", "http://images.cocodataset.org/zips/val2014.zip",
             rep("http://images.cocodataset.org/annotations/annotations_trainval2014.zip", time = 2)),
-    size = c("800 MB", "800 MB", rep("770 MB", time = 2), "6.33 GB", "6.33 GB", rep("242 MB", time = 2)),
+    size = c("18.4 GB", "800 MB", rep("770 MB", time = 2), "6.33 GB", "6.33 GB", rep("242 MB", time = 2)),
     md5 = c(c("cced6f7f71b7629ddf16f17bbcfab6b2", "442b8da7639aecaf257c1dceb8ba8c80"),
             rep("f4bbac642086de4f52a3fdda2de5fa2c", time = 2),
             c("0da8cfa0e090c266b78f30e2d2874f1a", "a3d79f5ed8d289b7a7554ce06a5782b3"),
@@ -415,3 +415,28 @@ coco_caption_dataset <- torch::dataset(
     list(x = x, y = y)
   }
 )
+
+#' COCO Class Labels
+#'
+#' Utilities for resolving COCO 80 class identifiers to their corresponding
+#' human readable labels. The labels are retrieved from ultralytics source
+#'
+#' @return A character vector with the COCO class names
+#' @family class_resolution
+#' @importFrom utils read.delim
+#' @export
+coco_classes <- function() {
+  url <- "https://github.com/ultralytics/ultralytics/raw/refs/heads/main/ultralytics/cfg/datasets/coco.yaml"
+  labels <- read.delim(url, skip = 18, sep = ":", nrows = 80, strip.white = TRUE, header = FALSE)[,2]
+  labels[nzchar(labels)]
+}
+
+#' @rdname coco_classes
+#' @param id Integer vector of 1-based class identifiers.
+#' @return A character vector with the labels associated with `id`.
+#' @family class_resolution
+#' @export
+coco_label <- function(id) {
+  classes <- coco_classes()
+  classes[id]
+}
diff --git a/R/tiny-imagenet-dataset.R b/R/dataset-imagenet.R
similarity index 53%
rename from R/tiny-imagenet-dataset.R
rename to R/dataset-imagenet.R
index a9cb04ca..b5cafd03 100644
--- a/R/tiny-imagenet-dataset.R
+++ b/R/dataset-imagenet.R
@@ -68,3 +68,57 @@ tiny_imagenet_dataset <- torch::dataset(
 
   }
 )
+
+#' ImageNet Class Labels
+#'
+#' Utilities for resolving ImageNet-1k class identifiers to their corresponding
+#' human readable labels. The labels are retrieved from the same source used by
+#' PyTorch's reference implementation.
+#'
+#' @return A character vector with 1000 entries representing the ImageNet-1k
+#'   class labels.
+#' @family class_resolution
+#' @export
+imagenet_classes <- function() {
+  url <- "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
+  labels <- readLines(url, warn = FALSE)
+  labels[nzchar(labels)]
+}
+
+#' @param id Integer vector of 1-based class identifiers.
+#' @return A character vector with the labels associated with `id`.
+#' @family class_resolution
+#' @rdname imagenet_classes
+#' @export
+imagenet_label <- function(id) {
+  classes <- imagenet_classes()
+  classes[id]
+}
+
+imagenet_1k_classes <- imagenet_classes
+imagenet_1k_label <- imagenet_label
+
+#' @return A character vector with 21k entries representing the ImageNet-21k
+#'   class labels.
+#' @family class_resolution
+#' @rdname imagenet_classes
+#' @export
+imagenet_21k_classes <- function() {
+  url <- "https://storage.googleapis.com/bit_models/imagenet21k_wordnet_ids.txt"
+  ids <- readLines(url, warn = FALSE)
+  url <- "https://storage.googleapis.com/bit_models/imagenet21k_wordnet_lemmas.txt"
+  labels <- readLines(url, warn = FALSE)
+
+  data.frame(id = ids, label = labels)
+}
+
+#' @param id Integer vector of 1-based class identifiers.
+#' @return A character vector with the labels associated with `id`.
+#' @family class_resolution
+#' @rdname imagenet_classes
+#' @export
+imagenet_21k_label <- function(id) {
+  classes <- imagenet_21k_classes()$label
+  classes[id]
+}
+
diff --git a/R/imagenet.R b/R/imagenet.R
deleted file mode 100644
index 05b2a5b3..00000000
--- a/R/imagenet.R
+++ /dev/null
@@ -1,23 +0,0 @@
-#' ImageNet Class Labels
-#'
-#' Utilities for resolving ImageNet-1k class identifiers to their corresponding
-#' human readable labels. The labels are retrieved from the same source used by
-#' PyTorch's reference implementation.
-#'
-#' @return A character vector with 1000 entries representing the ImageNet-1k
-#'   class labels.
-#' @export
-imagenet_classes <- function() {
-  url <- "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
-  labels <- readLines(url, warn = FALSE)
-  labels[nzchar(labels)]
-}
-
-#' @rdname imagenet_classes
-#' @param id Integer vector of 1-based class identifiers.
-#' @return A character vector with the labels associated with `id`.
-#' @export
-imagenet_label <- function(id) {
-  classes <- imagenet_classes()
-  classes[id]
-}
diff --git a/R/models-convnext_detection.R b/R/models-convnext_detection.R
index 055424fe..f5c4b90b 100644
--- a/R/models-convnext_detection.R
+++ b/R/models-convnext_detection.R
@@ -31,28 +31,29 @@
 #' norm_std  <- c(0.229, 0.224, 0.225)
 #'
 #' # Use a publicly available image
-#' wmc <- "https://upload.wikimedia.org/wikipedia/commons/thumb/"
-#' url <- "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg"
-#' img <- base_loader(paste0(wmc, url))
+#' url <- paste0("https://upload.wikimedia.org/wikipedia/commons/thumb/",
+#'        "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg")
+#' img <- magick_loader(url) %>%
+#'   transform_to_tensor() %>%
+#'   transform_resize(c(520, 520))
 #'
 #' input <- img %>%
-#'   transform_to_tensor() %>%
-#'   transform_resize(c(520, 520)) %>%
 #'   transform_normalize(norm_mean, norm_std)
 #' batch <- input$unsqueeze(1)    # Add batch dimension (1, 3, H, W)
 #'
 #' # ConvNeXt Tiny detection
 #' model <- model_convnext_tiny_detection(pretrained_backbone = TRUE)
 #' model$eval()
-#' pred <- model(batch)$detections
+#' # Please wait 2 mins + on CPU
+#' pred <- model(batch)$detections[[1]]
 #' num_boxes <- as.integer(pred$boxes$size()[1])
 #' topk <- pred$scores$topk(k = 5)[[2]]
 #' boxes <- pred$boxes[topk, ]
-#' labels <- as.character(as.integer(pred$labels[topk]))
+#' labels <- imagenet_label(as.integer(pred$labels[topk]))
 #'
 #' # `draw_bounding_box()` may fail if bbox values are not consistent.
 #' if (num_boxes > 0) {
-#'   boxed <- draw_bounding_boxes(input, boxes, labels = labels)
+#'   boxed <- draw_bounding_boxes(img, boxes, labels = labels)
 #'   tensor_image_browse(boxed)
 #' }
 #' }
diff --git a/R/models-deeplabv3.R b/R/models-deeplabv3.R
index dadfbc1d..6a394843 100644
--- a/R/models-deeplabv3.R
+++ b/R/models-deeplabv3.R
@@ -90,12 +90,29 @@ deeplabv3_model_urls <- list(
   )
 )
 
+#' PASCAL VOC Class Labels
+#'
+#' Utilities for resolving PASCAL VOC class identifiers to their corresponding
+#' human readable labels. The labels are retrieved from the dataset.
+#'
+#' @return A character vector with the PASCAL VOC class names
+#' @family class_resolution
+#' @export
 voc_classes <- c(
   "background", "aeroplane", "bicycle", "bird", "boat", "bottle",
   "bus", "car", "cat", "chair", "cow", "dining table", "dog", "horse",
   "motorbike", "person", "potted plant", "sheep", "sofa", "train", "tv/monitor"
 )
 
+#' @rdname voc_classes
+#' @param id Integer vector of 1-based class identifiers.
+#' @return A character vector with the labels associated with `id`.
+#' @family class_resolution
+#' @export
+voc_label <- function(id) {
+  voc_classes[id]
+}
+
 deeplabv3_meta <- list(
   classes = voc_classes,
   class_to_idx = setNames(seq_along(voc_classes) - 1, voc_classes)
diff --git a/R/models-faster_rcnn.R b/R/models-faster_rcnn.R
index 5586d884..97251b3a 100644
--- a/R/models-faster_rcnn.R
+++ b/R/models-faster_rcnn.R
@@ -129,15 +129,15 @@ decode_boxes <- function(anchors, deltas) {
   torch::torch_stack(list(x1, y1, x2, y2), dim = 2)
 }
 
-generate_proposals <- function(features, rpn_out, image_size, strides,
+generate_proposals <- function(features, rpn_out, image_size, strides, batch_idx,
                                score_thresh = 0.05, nms_thresh = 0.7) {
   device <- rpn_out$objectness[[1]]$device
   all_proposals <- torch::torch_empty(0L, 4L, device = device)
   all_scores <- torch::torch_empty(0L, device = device)
 
   for (i in seq_along(features)) {
-    objectness <- rpn_out$objectness[[i]][1, , , ]
-    deltas <- rpn_out$bbox_deltas[[i]][1, , , ]
+    objectness <- rpn_out$objectness[[i]][batch_idx, , , ]
+    deltas <- rpn_out$bbox_deltas[[i]][batch_idx, , , ]
 
     c(a, h, w) %<-% objectness$shape
 
@@ -169,33 +169,56 @@ generate_proposals <- function(features, rpn_out, image_size, strides,
   list(proposals = proposals)
 }
 
-roi_align_stub <- function(feature_map, proposals, output_size = c(7L, 7L)) {
-  h <- as.integer(feature_map$shape[[3]])
-  w <- as.integer(feature_map$shape[[4]])
+#' @importFrom torch nnf_grid_sample torch_empty
+roi_align <- function(feature_map, proposals, batch_idx, output_size = c(7L, 7L)) {
+  # A vectorized version of roi_align_stub for feature_map: [B, C, H, W] and proposals: [N, 4] (x1, y1, x2, y2)
 
-  n <- proposals$size(1)
-  pooled <- vector("list", n)
-
-  for (i in seq_len(n)) {
-    x1 <- max(1, min(as.numeric(proposals[i, 1]), w))
-    y1 <- max(1, min(as.numeric(proposals[i, 2]), h))
-    x2 <- max(x1 + 1, min(as.numeric(proposals[i, 3]), w))
-    y2 <- max(y1 + 1, min(as.numeric(proposals[i, 4]), h))
-
-    region <- feature_map[1, , y1:y2, x1:x2]
-    pooled_feat <- torch::nnf_interpolate(
-      region$unsqueeze(1),
-      size = output_size,
-      mode = "bilinear",
-      align_corners = FALSE
-    )[1, , , ]
-
-    pooled[[i]] <- pooled_feat$reshape(-1)
+  num_rois <- proposals$size(1)
+  if (num_rois == 0) {
+    return(torch_empty(c(0, feature_map$size(2), output_size[1], output_size[2]), device = feature_map$device))
   }
 
-  torch::torch_stack(pooled)
-}
+  channels <- feature_map$size(2)
+  h_feat <- feature_map$size(3)
+  w_feat <- feature_map$size(4)
+
+  # Normalize coordinnates to match grid_sample [-1 et 1]
+  x1 <- (proposals[, 1] / (w_feat - 1) * 2) - 1
+  y1 <- (proposals[, 2] / (h_feat - 1) * 2) - 1
+  x2 <- (proposals[, 3] / (w_feat - 1) * 2) - 1
+  y2 <- (proposals[, 4] / (h_feat - 1) * 2) - 1
+
+  # Create a grid of output_size
+  grid_y <- torch_linspace(0, 1, output_size[1], device = feature_map$device)
+  grid_x <- torch_linspace(0, 1, output_size[2], device = feature_map$device)
+
+  # Meshgrid to get relative coordiantes in [7, 7]
+  grids <- torch_meshgrid(list(grid_y, grid_x), indexing = "ij")
+  rel_y <- grids[[1]]
+  rel_x <- grids[[2]]
+
+  # Linear interpolation for each ROI [N, 7, 7]
+  # x <- x1 + rel_x * (x2 - x1)
+  sampling_x <- x1$view(c(-1, 1, 1)) + rel_x$view(c(1, output_size[1], output_size[2])) * (x2 - x1)$view(c(-1, 1, 1))
+  sampling_y <- y1$view(c(-1, 1, 1)) + rel_y$view(c(1, output_size[1], output_size[2])) * (y2 - y1)$view(c(-1, 1, 1))
+
+  # Concat to get a grid of [N, 7, 7, 2]
+  grid <- torch_stack(list(sampling_x, sampling_y), dim = -1)
+
+  # bilinear sampling
+  input_selected <- feature_map[batch_idx, , , ]$unsqueeze(1)$expand(c(num_rois, channels, h_feat, w_feat))
+
+  pooled_features <- nnf_grid_sample(
+    input_selected,
+    grid,
+    mode = "bilinear",
+    padding_mode = "border",
+    align_corners = FALSE
+  )
 
+  # Return [N, C, 7, 7]
+  pooled_features
+}
 
 roi_heads_module <- function(num_classes = 91) {
   torch::nn_module(
@@ -226,10 +249,10 @@ roi_heads_module <- function(num_classes = 91) {
         }
       )()
     },
-    forward = function(features, proposals) {
+    forward = function(features, proposals, batch_idx) {
       feature_maps <- features[c("p2", "p3", "p4", "p5")]
-      pooled <- roi_align_stub(feature_maps[[1]], proposals)
-      x <- self$box_head(pooled)
+      pooled <- roi_align(feature_maps[[1]], proposals, batch_idx)
+      x <- self$box_head(pooled$flatten(start_dim = 2))
       self$box_predictor(x)
     }
   )
@@ -273,9 +296,9 @@ roi_heads_module_v2 <- function(num_classes = 91) {
         }
       )()
     },
-    forward = function(features, proposals) {
-      pooled <- roi_align_stub(features[[1]], proposals)
-      x <- self$box_head(pooled)
+    forward = function(features, proposals, batch_idx) {
+      pooled <- roi_align(features[[1]], proposals, batch_idx)
+      x <- self$box_head(pooled$flatten(start_dim = 2))
       self$box_predictor(x)
     }
   )
@@ -393,71 +416,76 @@ fasterrcnn_model <- function(backbone, num_classes,
       features <- self$backbone(images)
       rpn_out <- self$rpn(features)
 
-      image_size <- as.integer(images$shape[3:4])
-      props <- generate_proposals(features, rpn_out, image_size, c(4, 8, 16, 32),
-                                  score_thresh = self$score_thresh,
-                                  nms_thresh = self$nms_thresh)
-
-      if (props$proposals$shape[1] == 0) {
-        empty <- list(
-          boxes = torch::torch_empty(c(0, 4)),
-          labels = torch::torch_empty(c(0), dtype = torch::torch_long()),
-          scores = torch::torch_empty(c(0))
-        )
-        return(list(features = features, detections = empty))
-      }
-
-      detections <- self$roi_heads(features, props$proposals)
-
-      scores <- torch::nnf_softmax(detections$scores, dim = 2)
-      max_scores <- torch::torch_max(scores, dim = 2)
-      final_scores <- max_scores[[1]]
-      final_labels <- max_scores[[2]]
+      batch_size <- images$shape[1]
+      image_size <- images$shape[3:4]
+      final_results <- list()
 
-      box_reg <- detections$boxes$view(c(-1, num_classes, 4))
-      gather_idx <- final_labels$unsqueeze(2)$unsqueeze(3)$expand(c(-1, 1, 4))
-      final_boxes <- box_reg$gather(2, gather_idx)$squeeze(2)
+      for (b in seq_len(batch_size)) {
+        props <- generate_proposals(features, rpn_out, image_size, c(4, 8, 16, 32),
+                                    batch_idx = b, score_thresh = self$score_thresh,
+                                    nms_thresh = self$nms_thresh)
 
-      # Filter by score threshold
-      keep <- final_scores > self$score_thresh
-      num_detections <- torch::torch_sum(keep)$item()
-
-      if (num_detections > 0) {
-        final_boxes <- final_boxes[keep, ]
-        final_labels <- final_labels[keep]
-        final_scores <- final_scores[keep]
-
-        # Apply NMS to remove overlapping detections
-        if (final_boxes$shape[1] > 1) {
-          nms_keep <- nms(final_boxes, final_scores, self$nms_thresh)
-          final_boxes <- final_boxes[nms_keep, ]
-          final_labels <- final_labels[nms_keep]
-          final_scores <- final_scores[nms_keep]
+        if (props$proposals$shape[1] == 0) {
+          empty <- list(
+            boxes = torch::torch_empty(c(0, 4)),
+            labels = torch::torch_empty(c(0), dtype = torch::torch_long()),
+            scores = torch::torch_empty(c(0))
+          )
+          return(list(features = features, detections = empty))
         }
 
-        # Limit detections per image
-        n_det <- final_scores$shape[1]
-        if (n_det > self$detections_per_img) {
-          top_k <- torch::torch_topk(final_scores, self$detections_per_img)
-          top_idx <- top_k[[2]]
-          final_boxes <- final_boxes[top_idx, ]
-          final_labels <- final_labels[top_idx]
-          final_scores <- final_scores[top_idx]
+        detections <- self$roi_heads(features, props$proposals, batch_idx = b)
+
+        scores <- torch::nnf_softmax(detections$scores, dim = 2)
+        max_scores <- torch::torch_max(scores, dim = 2)
+        final_scores <- max_scores[[1]]
+        final_labels <- max_scores[[2]]
+
+        box_reg <- detections$boxes$view(c(-1, num_classes, 4))
+        gather_idx <- final_labels$unsqueeze(2)$unsqueeze(3)$expand(c(-1, 1, 4))
+        final_boxes <- box_reg$gather(2, gather_idx)$squeeze(2)
+
+        final_boxes <- decode_boxes(props$proposals, final_boxes)
+        final_boxes <- clip_boxes_to_image(final_boxes, image_size)
+
+        # Filter by score threshold
+        keep <- final_scores > self$score_thresh
+        num_detections <- torch::torch_sum(keep)$item()
+
+        if (num_detections > 0) {
+          final_boxes <- final_boxes[keep, ]
+          final_labels <- final_labels[keep]
+          final_scores <- final_scores[keep]
+
+          # Apply NMS to remove overlapping detections
+          if (final_boxes$shape[1] > 1) {
+            nms_keep <- nms(final_boxes, final_scores, self$nms_thresh)
+            final_boxes <- final_boxes[nms_keep, ]
+            final_labels <- final_labels[nms_keep]
+            final_scores <- final_scores[nms_keep]
+          }
+
+          # Limit detections per image
+          n_det <- final_scores$shape[1]
+          if (n_det > self$detections_per_img) {
+            top_k <- torch::torch_topk(final_scores, self$detections_per_img)
+            top_idx <- top_k[[2]]
+            final_boxes <- final_boxes[top_idx, ]
+            final_labels <- final_labels[top_idx]
+            final_scores <- final_scores[top_idx]
+          }
+        } else {
+          final_boxes <- torch::torch_empty(c(0, 4))
+          final_labels <- torch::torch_empty(c(0), dtype = torch::torch_long())
+          final_scores <- torch::torch_empty(c(0))
         }
-      } else {
-        final_boxes <- torch::torch_empty(c(0, 4))
-        final_labels <- torch::torch_empty(c(0), dtype = torch::torch_long())
-        final_scores <- torch::torch_empty(c(0))
-      }
-
-      list(
-        features = features,
-        detections = list(
+        final_results[[b]] <- list(
           boxes = final_boxes,
           labels = final_labels,
           scores = final_scores
         )
-      )
+      }
+      list(features = features, detections = final_results)
     }
   )
 }
@@ -579,71 +607,76 @@ fasterrcnn_model_v2 <- function(backbone, num_classes,
       features <- self$backbone(images)
       rpn_out <- self$rpn(features)
 
-      image_size <- as.integer(images$shape[3:4])
-      props <- generate_proposals(features, rpn_out, image_size, c(4, 8, 16, 32),
-                                  score_thresh = self$score_thresh,
-                                  nms_thresh = self$nms_thresh)
-
-      if (props$proposals$shape[1] == 0) {
-        empty <- list(
-          boxes = torch::torch_empty(c(0, 4)),
-          labels = torch::torch_empty(c(0), dtype = torch::torch_long()),
-          scores = torch::torch_empty(c(0))
-        )
-        return(list(features = features, detections = empty))
-      }
+      batch_size <- images$shape[1]
+      image_size <- images$shape[3:4]
+      final_results <- list()
 
-      detections <- self$roi_heads(features, props$proposals)
+      for (b in seq_len(batch_size)) {
+        props <- generate_proposals(features, rpn_out, image_size, c(4, 8, 16, 32),
+                                    batch_idx = b, score_thresh = self$score_thresh,
+                                    nms_thresh = self$nms_thresh)
 
-      scores <- torch::nnf_softmax(detections$scores, dim = 2)
-      max_scores <- torch::torch_max(scores, dim = 2)
-      final_scores <- max_scores[[1]]
-      final_labels <- max_scores[[2]]
-
-      box_reg <- detections$boxes$view(c(-1, num_classes, 4))
-      gather_idx <- final_labels$unsqueeze(2)$unsqueeze(3)$expand(c(-1, 1, 4))
-      final_boxes <- box_reg$gather(2, gather_idx)$squeeze(2)
-
-      # Filter by score threshold
-      keep <- final_scores > self$score_thresh
-      num_detections <- torch::torch_sum(keep)$item()
-
-      if (num_detections > 0) {
-        final_boxes <- final_boxes[keep, ]
-        final_labels <- final_labels[keep]
-        final_scores <- final_scores[keep]
-
-        # Apply NMS to remove overlapping detections
-        if (final_boxes$shape[1] > 1) {
-          nms_keep <- nms(final_boxes, final_scores, self$nms_thresh)
-          final_boxes <- final_boxes[nms_keep, ]
-          final_labels <- final_labels[nms_keep]
-          final_scores <- final_scores[nms_keep]
+        if (props$proposals$shape[1] == 0) {
+          empty <- list(
+            boxes = torch::torch_empty(c(0, 4)),
+            labels = torch::torch_empty(c(0), dtype = torch::torch_long()),
+            scores = torch::torch_empty(c(0))
+          )
+          return(list(features = features, detections = empty))
         }
 
-        # Limit detections per image
-        n_det <- final_scores$shape[1]
-        if (n_det > self$detections_per_img) {
-          top_k <- torch::torch_topk(final_scores, self$detections_per_img)
-          top_idx <- top_k[[2]]
-          final_boxes <- final_boxes[top_idx, ]
-          final_labels <- final_labels[top_idx]
-          final_scores <- final_scores[top_idx]
+        detections <- self$roi_heads(features, props$proposals, batch_idx = b)
+
+        scores <- torch::nnf_softmax(detections$scores, dim = 2)
+        max_scores <- torch::torch_max(scores, dim = 2)
+        final_scores <- max_scores[[1]]
+        final_labels <- max_scores[[2]]
+
+        box_reg <- detections$boxes$view(c(-1, num_classes, 4))
+        gather_idx <- final_labels$unsqueeze(2)$unsqueeze(3)$expand(c(-1, 1, 4))
+        final_boxes <- box_reg$gather(2, gather_idx)$squeeze(2)
+
+        final_boxes <- decode_boxes(props$proposals, final_boxes)
+        final_boxes <- clip_boxes_to_image(final_boxes, image_size)
+
+        # Filter by score threshold
+        keep <- final_scores > self$score_thresh
+        num_detections <- torch::torch_sum(keep)$item()
+
+        if (num_detections > 0) {
+          final_boxes <- final_boxes[keep, ]
+          final_labels <- final_labels[keep]
+          final_scores <- final_scores[keep]
+
+          # Apply NMS to remove overlapping detections
+          if (final_boxes$shape[1] > 1) {
+            nms_keep <- nms(final_boxes, final_scores, self$nms_thresh)
+            final_boxes <- final_boxes[nms_keep, ]
+            final_labels <- final_labels[nms_keep]
+            final_scores <- final_scores[nms_keep]
+          }
+
+          # Limit detections per image
+          n_det <- final_scores$shape[1]
+          if (n_det > self$detections_per_img) {
+            top_k <- torch::torch_topk(final_scores, self$detections_per_img)
+            top_idx <- top_k[[2]]
+            final_boxes <- final_boxes[top_idx, ]
+            final_labels <- final_labels[top_idx]
+            final_scores <- final_scores[top_idx]
+          }
+        } else {
+          final_boxes <- torch::torch_empty(c(0, 4))
+          final_labels <- torch::torch_empty(c(0), dtype = torch::torch_long())
+          final_scores <- torch::torch_empty(c(0))
         }
-      } else {
-        final_boxes <- torch::torch_empty(c(0, 4))
-        final_labels <- torch::torch_empty(c(0), dtype = torch::torch_long())
-        final_scores <- torch::torch_empty(c(0))
-      }
-
-      list(
-        features = features,
-        detections = list(
+        final_results[[b]] <- list(
           boxes = final_boxes,
           labels = final_labels,
           scores = final_scores
         )
-      )
+      }
+      list(features = features, detections = final_results)
     }
   )()
 }
@@ -736,63 +769,74 @@ fasterrcnn_mobilenet_model <- function(backbone, num_classes,
       features <- self$backbone(images)
       rpn_out <- self$rpn(features)
 
-      image_size <- as.integer(images$shape[3:4])
-      props <- generate_proposals(features, rpn_out, image_size, c(8, 16),
-                                  score_thresh = self$score_thresh,
-                                  nms_thresh = self$nms_thresh)
+      batch_size <- images$shape[1]
+      image_size <- images$shape[3:4]
+      final_results <- list()
 
-      if (props$proposals$shape[1] == 0) {
-        empty <- list(
-          boxes = torch::torch_empty(c(0, 4)),
-          labels = torch::torch_empty(c(0), dtype = torch::torch_long()),
-          scores = torch::torch_empty(c(0))
-        )
-        return(list(features = features, detections = empty))
-      }
+      for (b in seq_len(batch_size)) {
+        props <- generate_proposals(features, rpn_out, image_size, c(8, 16),
+                                    batch_idx = b, score_thresh = self$score_thresh,
+                                    nms_thresh = self$nms_thresh)
 
-      detections <- self$roi_heads(features, props$proposals)
-
-      scores <- nnf_softmax(detections$scores, dim = 2)
-      max_scores <- torch_max(scores, dim = 2)
-      final_scores <- max_scores[[1]]
-      final_labels <- max_scores[[2]]
-
-      box_reg <- detections$boxes$view(c(-1, num_classes, 4))
-      gather_idx <- final_labels$unsqueeze(2)$unsqueeze(3)$expand(c(-1, 1, 4))
-      final_boxes <- box_reg$gather(2, gather_idx)$squeeze(2)
-
-      # Filter by score threshold
-      keep <- final_scores > self$score_thresh
-      if (torch::torch_sum(keep)$item() > 0) {
-        final_boxes <- final_boxes[keep, ]
-        final_labels <- final_labels[keep]
-        final_scores <- final_scores[keep]
-
-        # Apply NMS to remove overlapping detections
-        if (final_boxes$shape[1] > 1) {
-          nms_keep <- nms(final_boxes, final_scores, self$nms_thresh)
-          final_boxes <- final_boxes[nms_keep, ]
-          final_labels <- final_labels[nms_keep]
-          final_scores <- final_scores[nms_keep]
+        if (props$proposals$shape[1] == 0) {
+          empty <- list(
+            boxes = torch::torch_empty(c(0, 4)),
+            labels = torch::torch_empty(c(0), dtype = torch::torch_long()),
+            scores = torch::torch_empty(c(0))
+          )
+          return(list(features = features, detections = empty))
         }
 
-        # Limit detections per image
-        n_det <- final_scores$shape[1]
-        if (n_det > self$detections_per_img) {
-          top_k <- torch::torch_topk(final_scores, self$detections_per_img)
-          top_idx <- top_k[[2]]
-          final_boxes <- final_boxes[top_idx, ]
-          final_labels <- final_labels[top_idx]
-          final_scores <- final_scores[top_idx]
+        detections <- self$roi_heads(features, props$proposals, batch_idx = b)
+
+        scores <- nnf_softmax(detections$scores, dim = 2)
+        max_scores <- torch_max(scores, dim = 2)
+        final_scores <- max_scores[[1]]
+        final_labels <- max_scores[[2]]
+
+        box_reg <- detections$boxes$view(c(-1, num_classes, 4))
+        gather_idx <- final_labels$unsqueeze(2)$unsqueeze(3)$expand(c(-1, 1, 4))
+        final_boxes <- box_reg$gather(2, gather_idx)$squeeze(2)
+
+        final_boxes <- decode_boxes(props$proposals, final_boxes)
+        final_boxes <- clip_boxes_to_image(final_boxes, image_size)
+
+        # Filter by score threshold
+        keep <- final_scores > self$score_thresh
+        if (torch::torch_sum(keep)$item() > 0) {
+          final_boxes <- final_boxes[keep, ]
+          final_labels <- final_labels[keep]
+          final_scores <- final_scores[keep]
+
+          # Apply NMS to remove overlapping detections
+          if (final_boxes$shape[1] > 1) {
+            nms_keep <- nms(final_boxes, final_scores, self$nms_thresh)
+            final_boxes <- final_boxes[nms_keep, ]
+            final_labels <- final_labels[nms_keep]
+            final_scores <- final_scores[nms_keep]
+          }
+
+          # Limit detections per image
+          n_det <- final_scores$shape[1]
+          if (n_det > self$detections_per_img) {
+            top_k <- torch::torch_topk(final_scores, self$detections_per_img)
+            top_idx <- top_k[[2]]
+            final_boxes <- final_boxes[top_idx, ]
+            final_labels <- final_labels[top_idx]
+            final_scores <- final_scores[top_idx]
+          }
+        } else {
+          final_boxes <- torch::torch_empty(c(0, 4))
+          final_labels <- torch::torch_empty(c(0), dtype = torch::torch_long())
+          final_scores <- torch::torch_empty(c(0))
         }
-      } else {
-        final_boxes <- torch::torch_empty(c(0, 4))
-        final_labels <- torch::torch_empty(c(0), dtype = torch::torch_long())
-        final_scores <- torch::torch_empty(c(0))
+        final_results[[b]] <- list(
+          boxes = final_boxes,
+          labels = final_labels,
+          scores = final_scores
+        )
       }
-
-      final <- list(boxes = final_boxes, labels = final_labels, scores = final_scores)
-      list(features = features, detections = final)
+      list(features = features, detections = final_results)
     }
   )()
 }
@@ -868,24 +912,25 @@ mobilenet_v3_320_fpn_backbone <- function(pretrained = TRUE) {
 #' # https://pytorch.org/vision/stable/models.html
 #' norm_std  <- c(0.229, 0.224, 0.225)
 #' # Use a publicly available image of an animal
-#' wmc <- "https://upload.wikimedia.org/wikipedia/commons/thumb/"
-#' url <- "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg"
-#' img <- base_loader(paste0(wmc,url))
-#'
-#' input <- img %>%
+#' url <- paste0("https://upload.wikimedia.org/wikipedia/commons/thumb/",
+#'        "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg")
+#' img <- magick_loader(url) %>%
 #'   transform_to_tensor() %>%
-#'   transform_resize(c(520, 520)) %>%
+#'   transform_resize(c(520, 520))
+#'
+#' input <- img  %>%
 #'   transform_normalize(norm_mean, norm_std)
 #' batch <- input$unsqueeze(1)    # Add batch dimension (1, 3, H, W)
 #'
 #' # ResNet-50 FPN
-#' model <- model_fasterrcnn_resnet50_fpn(pretrained = TRUE)
+#' model <- model_fasterrcnn_resnet50_fpn(pretrained = TRUE, score_thresh = 0.5,
+#'                                         nms_thresh = 0.8, detections_per_img = 3)
 #' model$eval()
-#' pred <- model(batch)$detections
+#' pred <- model(batch)$detections[[1]]
 #' num_boxes <- as.integer(pred$boxes$size()[1])
 #' keep <- seq_len(min(5, num_boxes))
 #' boxes <- pred$boxes[keep, ]$view(c(-1, 4))
-#' labels <- ds$category_names[as.character(as.integer(pred$labels[keep]))]
+#' labels <- coco_label(as.integer(pred$labels[keep]))
 #' if (num_boxes > 0) {
 #'   boxed <- draw_bounding_boxes(image, boxes, labels = labels)
 #'   tensor_image_browse(boxed)
@@ -894,24 +939,24 @@ mobilenet_v3_320_fpn_backbone <- function(pretrained = TRUE) {
 #' # ResNet-50 FPN V2
 #' model <- model_fasterrcnn_resnet50_fpn_v2(pretrained = TRUE)
 #' model$eval()
-#' pred <- model(batch)$detections
+#' pred <- model(batch)$detections[[1]]
 #' num_boxes <- as.integer(pred$boxes$size()[1])
 #' keep <- seq_len(min(5, num_boxes))
 #' boxes <- pred$boxes[keep, ]$view(c(-1, 4))
-#' labels <- ds$category_names[as.character(as.integer(pred$labels[keep]))]
+#' labels <- coco_label(as.integer(pred$labels[keep]))
 #' if (num_boxes > 0) {
-#'   boxed <- draw_bounding_boxes(image, boxes, labels = labels)
+#'   boxed <- draw_bounding_boxes(img, boxes, labels = labels)
 #'   tensor_image_browse(boxed)
 #' }
 #'
 #' # MobileNet V3 Large FPN
 #' model <- model_fasterrcnn_mobilenet_v3_large_fpn(pretrained = TRUE)
 #' model$eval()
-#' pred <- model(batch)$detections
+#' pred <- model(batch)$detections[[1]]
 #' num_boxes <- as.integer(pred$boxes$size()[1])
 #' keep <- seq_len(min(5, num_boxes))
 #' boxes <- pred$boxes[keep, ]$view(c(-1, 4))
-#' labels <- ds$category_names[as.character(as.integer(pred$labels[keep]))]
+#' labels <- coco_label(as.integer(pred$labels[keep]))
 #' if (num_boxes > 0) {
 #'   boxed <- draw_bounding_boxes(image, boxes, labels = labels)
 #'   tensor_image_browse(boxed)
@@ -920,11 +965,11 @@ mobilenet_v3_320_fpn_backbone <- function(pretrained = TRUE) {
 #' # MobileNet V3 Large 320 FPN
 #' model <- model_fasterrcnn_mobilenet_v3_large_320_fpn(pretrained = TRUE)
 #' model$eval()
-#' pred <- model(batch)$detections
+#' pred <- model(batch)$detections[[1]]
 #' num_boxes <- as.integer(pred$boxes$size()[1])
 #' keep <- seq_len(min(5, num_boxes))
 #' boxes <- pred$boxes[keep, ]$view(c(-1, 4))
-#' labels <- ds$category_names[as.character(as.integer(pred$labels[keep]))]
+#' labels <- coco_label(as.integer(pred$labels[keep]))]
 #' if (num_boxes > 0) {
 #'   boxed <- draw_bounding_boxes(image, boxes, labels = labels)
 #'   tensor_image_browse(boxed)
diff --git a/R/models-fcn.R b/R/models-fcn.R
index 23fe3936..093e6ea0 100644
--- a/R/models-fcn.R
+++ b/R/models-fcn.R
@@ -60,7 +60,11 @@
 #' }
 NULL
 
-
+#' PASCAL-VOC Class Labels
+#' @return A character vector with 21 entries representing the PASCAL-VOC
+#'   class labels.
+#' @family class_resolution
+#' @export
 voc_segmentation_classes <- c(
   "background", "aeroplane", "bicycle", "bird", "boat",
   "bottle", "bus", "car", "cat", "chair",
diff --git a/R/vision_utils.R b/R/vision_utils.R
index b4515cd4..e0b71017 100644
--- a/R/vision_utils.R
+++ b/R/vision_utils.R
@@ -174,18 +174,19 @@ draw_bounding_boxes.torch_tensor <- function(x,
     x <- x$tile(c(4, 2, 2))
   }
 
-  img_bb <- boxes$to(torch::torch_int64()) %>% as.array
+  img_bb <- boxes$to(torch::torch_int64()) %>% as.array()
 
   draw <- png::writePNG(img_to_draw) %>%
     magick::image_read() %>%
     magick::image_draw()
 
-  graphics::rect(img_bb[, 1], img_bb[, 2], img_bb[, 3], img_bb[, 4], col = fill_col, border = colors)
+  graphics::rect(img_bb[, 1], img_bb[, 2], img_bb[, 3], img_bb[, 4],
+                 col = fill_col, border = colors, lwd = width)
 
   if (!is.null(labels)) {
     graphics::text(
-      img_bb[, 1] + width,
-      img_bb[, 2] + width,
+      img_bb[, 1] + 2 * width + font_size,
+      img_bb[, 2] + 2 * width,
       labels = labels,
       col = colors,
       vfont = font,
@@ -230,6 +231,13 @@ draw_bounding_boxes.image_with_bounding_box <- function(x, ...) {
 coco_polygon_to_mask <- function(segmentation, height, width) {
   rlang::check_installed("magick")
 
+  # Handle empty polygon list early to avoid graphics device issues
+  if (length(segmentation) == 0) {
+    mask_logical <- matrix(FALSE, nrow = height, ncol = width)
+    mask_tensor <- torch::torch_tensor(mask_logical, dtype = torch::torch_bool())
+    return(mask_tensor)
+  }
+
   mask_img <- magick::image_blank(width = width, height = height, color = "black")
   mask_img <- magick::image_draw(mask_img)
 
diff --git a/man/coco_classes.Rd b/man/coco_classes.Rd
new file mode 100644
index 00000000..6f79689a
--- /dev/null
+++ b/man/coco_classes.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataset-coco.R
+\name{coco_classes}
+\alias{coco_classes}
+\alias{coco_label}
+\title{COCO Class Labels}
+\usage{
+coco_classes()
+
+coco_label(id)
+}
+\arguments{
+\item{id}{Integer vector of 1-based class identifiers.}
+}
+\value{
+A character vector with the COCO class names
+
+A character vector with the labels associated with \code{id}.
+}
+\description{
+Utilities for resolving COCO 80 class identifiers to their corresponding
+human readable labels. The labels are retrieved from ultralytics source
+}
+\seealso{
+Other class_resolution: 
+\code{\link{imagenet_classes}()},
+\code{\link{voc_classes}},
+\code{\link{voc_segmentation_classes}}
+
+Other class_resolution: 
+\code{\link{imagenet_classes}()},
+\code{\link{voc_classes}},
+\code{\link{voc_segmentation_classes}}
+}
+\concept{class_resolution}
diff --git a/man/imagenet_classes.Rd b/man/imagenet_classes.Rd
index f958f4b7..33709a1f 100644
--- a/man/imagenet_classes.Rd
+++ b/man/imagenet_classes.Rd
@@ -1,13 +1,19 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/imagenet.R
+% Please edit documentation in R/dataset-imagenet.R
 \name{imagenet_classes}
 \alias{imagenet_classes}
 \alias{imagenet_label}
+\alias{imagenet_21k_classes}
+\alias{imagenet_21k_label}
 \title{ImageNet Class Labels}
 \usage{
 imagenet_classes()
 
 imagenet_label(id)
+
+imagenet_21k_classes()
+
+imagenet_21k_label(id)
 }
 \arguments{
 \item{id}{Integer vector of 1-based class identifiers.}
@@ -16,6 +22,11 @@ imagenet_label(id)
 A character vector with 1000 entries representing the ImageNet-1k
 class labels.
 
+A character vector with the labels associated with \code{id}.
+
+A character vector with 21k entries representing the ImageNet-21k
+class labels.
+
 A character vector with the labels associated with \code{id}.
 }
 \description{
@@ -23,3 +34,25 @@ Utilities for resolving ImageNet-1k class identifiers to their corresponding
 human readable labels. The labels are retrieved from the same source used by
 PyTorch's reference implementation.
 }
+\seealso{
+Other class_resolution: 
+\code{\link{coco_classes}()},
+\code{\link{voc_classes}},
+\code{\link{voc_segmentation_classes}}
+
+Other class_resolution: 
+\code{\link{coco_classes}()},
+\code{\link{voc_classes}},
+\code{\link{voc_segmentation_classes}}
+
+Other class_resolution: 
+\code{\link{coco_classes}()},
+\code{\link{voc_classes}},
+\code{\link{voc_segmentation_classes}}
+
+Other class_resolution: 
+\code{\link{coco_classes}()},
+\code{\link{voc_classes}},
+\code{\link{voc_segmentation_classes}}
+}
+\concept{class_resolution}
diff --git a/man/model_convnext_detection.Rd b/man/model_convnext_detection.Rd
index 12ce8468..7d62c646 100644
--- a/man/model_convnext_detection.Rd
+++ b/man/model_convnext_detection.Rd
@@ -72,28 +72,29 @@ norm_mean <- c(0.485, 0.456, 0.406) # ImageNet normalization constants
 norm_std  <- c(0.229, 0.224, 0.225)
 
 # Use a publicly available image
-wmc <- "https://upload.wikimedia.org/wikipedia/commons/thumb/"
-url <- "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg"
-img <- base_loader(paste0(wmc, url))
+url <- paste0("https://upload.wikimedia.org/wikipedia/commons/thumb/",
+       "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg")
+img <- magick_loader(url) \%>\%
+  transform_to_tensor() \%>\%
+  transform_resize(c(520, 520))
 
 input <- img \%>\%
-  transform_to_tensor() \%>\%
-  transform_resize(c(520, 520)) \%>\%
   transform_normalize(norm_mean, norm_std)
 batch <- input$unsqueeze(1)    # Add batch dimension (1, 3, H, W)
 
 # ConvNeXt Tiny detection
 model <- model_convnext_tiny_detection(pretrained_backbone = TRUE)
 model$eval()
-pred <- model(batch)$detections
+# Please wait 2 mins + on CPU
+pred <- model(batch)$detections[[1]]
 num_boxes <- as.integer(pred$boxes$size()[1])
 topk <- pred$scores$topk(k = 5)[[2]]
 boxes <- pred$boxes[topk, ]
-labels <- as.character(as.integer(pred$labels[topk]))
+labels <- imagenet_label(as.integer(pred$labels[topk]))
 
 # `draw_bounding_box()` may fail if bbox values are not consistent.
 if (num_boxes > 0) {
-  boxed <- draw_bounding_boxes(input, boxes, labels = labels)
+  boxed <- draw_bounding_boxes(img, boxes, labels = labels)
   tensor_image_browse(boxed)
 }
 }
diff --git a/man/model_fasterrcnn.Rd b/man/model_fasterrcnn.Rd
index ccaa8968..8b2688d5 100644
--- a/man/model_fasterrcnn.Rd
+++ b/man/model_fasterrcnn.Rd
@@ -108,24 +108,25 @@ norm_mean <- c(0.485, 0.456, 0.406) # ImageNet normalization constants, see
 # https://pytorch.org/vision/stable/models.html
 norm_std  <- c(0.229, 0.224, 0.225)
 # Use a publicly available image of an animal
-wmc <- "https://upload.wikimedia.org/wikipedia/commons/thumb/"
-url <- "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg"
-img <- base_loader(paste0(wmc,url))
-
-input <- img \%>\%
+url <- paste0("https://upload.wikimedia.org/wikipedia/commons/thumb/",
+       "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg")
+img <- magick_loader(url) \%>\%
   transform_to_tensor() \%>\%
-  transform_resize(c(520, 520)) \%>\%
+  transform_resize(c(520, 520))
+
+input <- img  \%>\%
   transform_normalize(norm_mean, norm_std)
 batch <- input$unsqueeze(1)    # Add batch dimension (1, 3, H, W)
 
 # ResNet-50 FPN
-model <- model_fasterrcnn_resnet50_fpn(pretrained = TRUE)
+model <- model_fasterrcnn_resnet50_fpn(pretrained = TRUE, score_thresh = 0.5,
+                                        nms_thresh = 0.8, detections_per_img = 3)
 model$eval()
-pred <- model(batch)$detections
+pred <- model(batch)$detections[[1]]
 num_boxes <- as.integer(pred$boxes$size()[1])
 keep <- seq_len(min(5, num_boxes))
 boxes <- pred$boxes[keep, ]$view(c(-1, 4))
-labels <- ds$category_names[as.character(as.integer(pred$labels[keep]))]
+labels <- coco_label(as.integer(pred$labels[keep]))
 if (num_boxes > 0) {
   boxed <- draw_bounding_boxes(image, boxes, labels = labels)
   tensor_image_browse(boxed)
@@ -134,24 +135,24 @@ if (num_boxes > 0) {
 # ResNet-50 FPN V2
 model <- model_fasterrcnn_resnet50_fpn_v2(pretrained = TRUE)
 model$eval()
-pred <- model(batch)$detections
+pred <- model(batch)$detections[[1]]
 num_boxes <- as.integer(pred$boxes$size()[1])
 keep <- seq_len(min(5, num_boxes))
 boxes <- pred$boxes[keep, ]$view(c(-1, 4))
-labels <- ds$category_names[as.character(as.integer(pred$labels[keep]))]
+labels <- coco_label(as.integer(pred$labels[keep]))
 if (num_boxes > 0) {
-  boxed <- draw_bounding_boxes(image, boxes, labels = labels)
+  boxed <- draw_bounding_boxes(img, boxes, labels = labels)
   tensor_image_browse(boxed)
 }
 
 # MobileNet V3 Large FPN
 model <- model_fasterrcnn_mobilenet_v3_large_fpn(pretrained = TRUE)
 model$eval()
-pred <- model(batch)$detections
+pred <- model(batch)$detections[[1]]
 num_boxes <- as.integer(pred$boxes$size()[1])
 keep <- seq_len(min(5, num_boxes))
 boxes <- pred$boxes[keep, ]$view(c(-1, 4))
-labels <- ds$category_names[as.character(as.integer(pred$labels[keep]))]
+labels <- coco_label(as.integer(pred$labels[keep]))
 if (num_boxes > 0) {
   boxed <- draw_bounding_boxes(image, boxes, labels = labels)
   tensor_image_browse(boxed)
@@ -160,11 +161,11 @@ if (num_boxes > 0) {
 # MobileNet V3 Large 320 FPN
 model <- model_fasterrcnn_mobilenet_v3_large_320_fpn(pretrained = TRUE)
 model$eval()
-pred <- model(batch)$detections
+pred <- model(batch)$detections[[1]]
 num_boxes <- as.integer(pred$boxes$size()[1])
 keep <- seq_len(min(5, num_boxes))
 boxes <- pred$boxes[keep, ]$view(c(-1, 4))
-labels <- ds$category_names[as.character(as.integer(pred$labels[keep]))]
+labels <- coco_label(as.integer(pred$labels[keep]))]
 if (num_boxes > 0) {
   boxed <- draw_bounding_boxes(image, boxes, labels = labels)
   tensor_image_browse(boxed)
diff --git a/man/tiny_imagenet_dataset.Rd b/man/tiny_imagenet_dataset.Rd
index d8fb7acc..77af8c3a 100644
--- a/man/tiny_imagenet_dataset.Rd
+++ b/man/tiny_imagenet_dataset.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/tiny-imagenet-dataset.R
+% Please edit documentation in R/dataset-imagenet.R
 \name{tiny_imagenet_dataset}
 \alias{tiny_imagenet_dataset}
 \title{Tiny ImageNet dataset}
diff --git a/man/voc_classes.Rd b/man/voc_classes.Rd
new file mode 100644
index 00000000..3afbbe2e
--- /dev/null
+++ b/man/voc_classes.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/models-deeplabv3.R
+\docType{data}
+\name{voc_classes}
+\alias{voc_classes}
+\alias{voc_label}
+\title{PASCAL VOC Class Labels}
+\format{
+An object of class \code{character} of length 21.
+}
+\usage{
+voc_classes
+
+voc_label(id)
+}
+\arguments{
+\item{id}{Integer vector of 1-based class identifiers.}
+}
+\value{
+A character vector with the PASCAL VOC class names
+
+A character vector with the labels associated with \code{id}.
+}
+\description{
+Utilities for resolving PASCAL VOC class identifiers to their corresponding
+human readable labels. The labels are retrieved from the dataset.
+}
+\seealso{
+Other class_resolution: 
+\code{\link{coco_classes}()},
+\code{\link{imagenet_classes}()},
+\code{\link{voc_segmentation_classes}}
+
+Other class_resolution: 
+\code{\link{coco_classes}()},
+\code{\link{imagenet_classes}()},
+\code{\link{voc_segmentation_classes}}
+}
+\concept{class_resolution}
+\keyword{datasets}
diff --git a/man/voc_segmentation_classes.Rd b/man/voc_segmentation_classes.Rd
new file mode 100644
index 00000000..7be05189
--- /dev/null
+++ b/man/voc_segmentation_classes.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/models-fcn.R
+\docType{data}
+\name{voc_segmentation_classes}
+\alias{voc_segmentation_classes}
+\title{PASCAL-VOC Class Labels}
+\format{
+An object of class \code{character} of length 21.
+}
+\usage{
+voc_segmentation_classes
+}
+\value{
+A character vector with 21 entries representing the PASCAL-VOC
+class labels.
+}
+\description{
+PASCAL-VOC Class Labels
+}
+\seealso{
+Other class_resolution: 
+\code{\link{coco_classes}()},
+\code{\link{imagenet_classes}()},
+\code{\link{voc_classes}}
+}
+\concept{class_resolution}
+\keyword{datasets}
diff --git a/po/R-fr.po b/po/R-fr.po
index f93bb7cf..93c21467 100644
--- a/po/R-fr.po
+++ b/po/R-fr.po
@@ -697,7 +697,7 @@ msgstr "Il faut passer une image individuelle en `x`, et non un batch."
 #: vision_utils.R:132 vision_utils.R:329 vision_utils.R:445 vision_utils.R:507
 #: vision_utils.R:537
 msgid "Only grayscale and RGB images are supported"
-msgstr "Seules les images en niveau de gris et RGB son prise en compte."
+msgstr "Seules les images en niveaux de gris et RVB sont prises en charge"
 
 #: vision_utils.R:139
 msgid "`x` should be of dtype `torch_uint8` or `torch_float`"
diff --git a/tests/testthat/test-models-convnext_detection.R b/tests/testthat/test-models-convnext_detection.R
index 583da3b7..2c143e20 100644
--- a/tests/testthat/test-models-convnext_detection.R
+++ b/tests/testthat/test-models-convnext_detection.R
@@ -1,107 +1,153 @@
 context("models-convnext-detection")
 
-test_that("tests for non-pretrained model_convnext_tiny_detection", {
+test_that("tests for non-pretrained model_convnext_tiny_detection works with batch", {
   skip_on_cran()
   skip_if_not(torch::torch_is_installed())
 
-  model <- model_convnext_tiny_detection()
+  model <- model_convnext_tiny_detection(pretrained_backbone = TRUE)
   input <- base_loader("assets/class/cat/cat.0.jpg") %>%
     transform_to_tensor() %>% transform_resize(c(200, 200)) %>% torch_unsqueeze(1)
   model$eval()
   out <- model(input)
   expect_named(out, c("features", "detections"))
-  expect_named(out$detections, c("boxes", "labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_is(out$detections, "list")
+  expect_named(out$detections[[1]], c("boxes", "labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
+
 
+  batch <- torch_stack(list(base_loader("assets/class/cat/cat.0.jpg") %>% transform_to_tensor() %>% transform_resize(c(200, 200)),
+                            base_loader("assets/class/cat/cat.1.jpg") %>% transform_to_tensor() %>% transform_resize(c(200, 200))),
+                       dim = 1)
   model <- model_convnext_tiny_detection(num_classes = 10)
-  out <- model(input)
+  out <- model(batch)
   expect_named(out, c("features", "detections"))
-  expect_named(out$detections, c("boxes", "labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_is(out$detections, "list")
+  expect_length(out$detections, 2)
+  expect_named(out$detections[[1]], c("boxes", "labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
+  expect_named(out$detections[[2]], c("boxes", "labels", "scores"))
+  expect_tensor(out$detections[[2]]$boxes)
+  expect_tensor(out$detections[[2]]$labels)
+  expect_tensor(out$detections[[2]]$scores)
+  expect_equal(out$detections[[2]]$boxes$shape[2], 4L)
 })
 
-test_that("tests for non-pretrained model_convnext_small_detection", {
+test_that("tests for pretrained / non-pretrained model_convnext_small_detection", {
   skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1,
           "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.")
   skip_on_cran()
   skip_if_not(torch::torch_is_installed())
 
-  model <- model_convnext_small_detection()
+  model <- model_convnext_small_detection(pretrained_backbone = TRUE)
   input <- base_loader("assets/class/cat/cat.1.jpg") %>%
     transform_to_tensor() %>% transform_resize(c(180, 180)) %>% torch_unsqueeze(1)
   model$eval()
   out <- model(input)
   expect_named(out, c("features", "detections"))
-  expect_named(out$detections, c("boxes", "labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_is(out$detections, "list")
+  expect_named(out$detections[[1]], c("boxes", "labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
+  if (out$detections[[1]]$boxes$shape[1] > 0) {
+    boxes <- as.matrix(out$detections[[1]]$boxes)
+
+    # bbox must be positive and within (200x200)
+    expect_true(all(boxes >= 0))
+    expect_true(all(boxes[, c(1, 3)] <= 180))
+    expect_true(all(boxes[, c(2, 4)] <= 180))
+
+    # bbox must be coherent: x2 > x1 et y2 > y1
+    # TODO may fail
+    # expect_true(all(boxes[, 3] >= boxes[, 1]))
+    expect_true(all(boxes[, 4] >= boxes[, 2]))
+
+    # scores must be within [0, 1]
+    scores <- as.numeric(out$detections[[1]]$scores)
+    expect_all_true(scores >= 0)
+    expect_all_true(scores <= 1)
+  }
 
   model <- model_convnext_small_detection(num_classes = 10)
   out <- model(input)
   expect_named(out, c("features", "detections"))
-  expect_named(out$detections, c("boxes", "labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_is(out$detections, "list")
+  expect_named(out$detections[[1]], c("boxes", "labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
 })
 
-test_that("tests for non-pretrained model_convnext_base_detection", {
-  skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1,
-          "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.")
+test_that("tests for pretrained / non-pretrained model_convnext_base_detection", {
+  skip_if(Sys.getenv("TEST_HUGE_MODELS", unset = 0) != 1,
+          "Skipping test: set TEST_HUGE_MODELS=1 to enable tests requiring large downloads.")
   skip_on_cran()
   skip_if_not(torch::torch_is_installed())
 
-  model <- model_convnext_base_detection()
+  model <- model_convnext_base_detection(pretrained_backbone = TRUE)
   input <- base_loader("assets/class/cat/cat.2.jpg") %>%
     transform_to_tensor() %>% transform_resize(c(180, 180)) %>% torch_unsqueeze(1)
   model$eval()
   out <- model(input)
   expect_named(out, c("features", "detections"))
-  expect_named(out$detections, c("boxes", "labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
-
-  model <- model_convnext_base_detection(num_classes = 10)
-  out <- model(input)
-  expect_named(out, c("features", "detections"))
-  expect_named(out$detections, c("boxes", "labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_is(out$detections, "list")
+  expect_named(out$detections[[1]], c("boxes", "labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
+  if (out$detections[[1]]$boxes$shape[1] > 0) {
+    boxes <- as.matrix(out$detections[[1]]$boxes)
+
+    # bbox must be positive and within (200x200)
+    expect_true(all(boxes >= 0))
+    expect_true(all(boxes[, c(1, 3)] <= 180))
+    expect_true(all(boxes[, c(2, 4)] <= 180))
+
+    # bbox must be coherent: x2 > x1 et y2 > y1
+    # TODO may fail
+    # expect_true(all(boxes[, 3] >= boxes[, 1]))
+    expect_true(all(boxes[, 4] >= boxes[, 2]))
+
+    # scores must be within [0, 1]
+    scores <- as.numeric(out$detections[[1]]$scores)
+    expect_all_true(scores >= 0)
+    expect_all_true(scores <= 1)
+  }
 })
 
-test_that("model_convnext_detection works with pretrained backbone", {
-  skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1,
-          "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.")
+test_that("tests for non-pretrained model_convnext_base_detection", {
+  skip_if(Sys.getenv("TEST_HUGE_MODELS", unset = 0) != 1,
+          "Skipping test: set TEST_HUGE_MODELS=1 to enable tests requiring large downloads.")
   skip_on_cran()
   skip_if_not(torch::torch_is_installed())
 
-  model <- model_convnext_tiny_detection(pretrained_backbone = TRUE)
-  input <- base_loader("assets/class/cat/cat.3.jpg") %>%
+  model <- model_convnext_base_detection(num_classes = 10)
+  input <- base_loader("assets/class/cat/cat.2.jpg") %>%
     transform_to_tensor() %>% transform_resize(c(180, 180)) %>% torch_unsqueeze(1)
   model$eval()
   out <- model(input)
+  expect_is(out$detections, "list")
   expect_named(out, c("features", "detections"))
-  expect_named(out$detections, c("boxes", "labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_named(out$detections[[1]], c("boxes", "labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
 })
 
+
 test_that("model_convnext_detection handles different image sizes", {
+  skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1,
+          "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.")
   skip_on_cran()
   skip_if_not(torch::torch_is_installed())
 
@@ -112,19 +158,19 @@ test_that("model_convnext_detection handles different image sizes", {
     transform_to_tensor() %>% transform_resize(c(224, 224)) %>% torch_unsqueeze(1)
   out_224 <- model(input_224)
   expect_named(out_224, c("features", "detections"))
-  expect_named(out_224$detections, c("boxes", "labels", "scores"))
+  expect_named(out_224$detections[[1]], c("boxes", "labels", "scores"))
 
   input_320 <- base_loader("assets/class/dog/dog.1.jpg") %>%
     transform_to_tensor() %>% transform_resize(c(320, 320)) %>% torch_unsqueeze(1)
   out_320 <- model(input_320)
   expect_named(out_320, c("features", "detections"))
-  expect_named(out_320$detections, c("boxes", "labels", "scores"))
+  expect_named(out_320$detections[[1]], c("boxes", "labels", "scores"))
 
   input_512 <- base_loader("assets/class/dog/dog.2.jpg") %>%
     transform_to_tensor() %>% transform_resize(c(512, 512)) %>% torch_unsqueeze(1)
   out_512 <- model(input_512)
   expect_named(out_512, c("features", "detections"))
-  expect_named(out_512$detections, c("boxes", "labels", "scores"))
+  expect_named(out_512$detections[[1]], c("boxes", "labels", "scores"))
 })
 
 test_that("model_convnext_detection validates num_classes parameter", {
@@ -133,60 +179,8 @@ test_that("model_convnext_detection validates num_classes parameter", {
 
   expect_no_error(model_convnext_tiny_detection(num_classes = 10, pretrained_backbone = FALSE))
   expect_no_error(model_convnext_tiny_detection(num_classes = 91, pretrained_backbone = FALSE))
-  expect_error(model_convnext_tiny_detection(num_classes = 0), "`num_classes` must be positive")
-  expect_error(model_convnext_tiny_detection(num_classes = -1), "`num_classes` must be positive")
+  expect_error(model_convnext_tiny_detection(num_classes = 0), "must be positive")
+  expect_error(model_convnext_tiny_detection(num_classes = -1), "must be positive")
 })
 
-test_that("model_convnext_detection has FPN and produces multi-scale features", {
-  skip_on_cran()
-  skip_if_not(torch::torch_is_installed())
-
-  model <- model_convnext_tiny_detection(num_classes = 10)
-  expect_false(is.null(model$backbone))
 
-  input <- base_loader("assets/class/dog/dog.3.jpg") %>%
-    transform_to_tensor() %>% transform_resize(c(224, 224)) %>% torch_unsqueeze(1)
-  model$eval()
-  out <- model(input)
-
-  expect_type(out$features, "list")
-  expect_true(length(out$features) >= 4)
-
-  for (i in seq_along(out$features)) {
-    expect_tensor(out$features[[i]])
-  }
-})
-
-test_that("model_convnext_detection output format matches faster_rcnn", {
-  skip_on_cran()
-  skip_if_not(torch::torch_is_installed())
-
-  model <- model_convnext_tiny_detection(num_classes = 10)
-  model$eval()
-
-  input <- base_loader("assets/class/dog/dog.4.jpg") %>%
-    transform_to_tensor() %>% transform_resize(c(200, 200)) %>% torch_unsqueeze(1)
-  out <- model(input)
-
-  expect_named(out, c("features", "detections"))
-  expect_named(out$detections, c("boxes", "labels", "scores"))
-  expect_equal(out$detections$boxes$shape[2], 4L)
-  expect_equal(out$detections$labels$shape[1], out$detections$scores$shape[1])
-  expect_equal(out$detections$boxes$shape[1], out$detections$labels$shape[1])
-})
-
-test_that("model_convnext_detection handles batch processing", {
-  skip_on_cran()
-  skip_if_not(torch::torch_is_installed())
-
-  model <- model_convnext_tiny_detection(num_classes = 10)
-  model$eval()
-
-  input_single <- base_loader("assets/class/dog/dog.5.jpg") %>%
-    transform_to_tensor() %>% transform_resize(c(200, 200)) %>% torch_unsqueeze(1)
-  out_single <- model(input_single)
-  expect_named(out_single, c("features", "detections"))
-  expect_tensor(out_single$detections$boxes)
-  expect_tensor(out_single$detections$labels)
-  expect_tensor(out_single$detections$scores)
-})
diff --git a/tests/testthat/test-models-faster_rcnn.R b/tests/testthat/test-models-faster_rcnn.R
index 31a8d1ed..1c46b2be 100644
--- a/tests/testthat/test-models-faster_rcnn.R
+++ b/tests/testthat/test-models-faster_rcnn.R
@@ -1,170 +1,240 @@
 test_that("tests for non-pretrained model_fasterrcnn_resnet50_fpn", {
 
-  model <- model_fasterrcnn_resnet50_fpn()
+  model <- model_fasterrcnn_resnet50_fpn(score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3)
   input <- base_loader("assets/class/cat/cat.0.jpg") %>%
     transform_to_tensor() %>% transform_resize(c(200,200)) %>% torch_unsqueeze(1)
   model$eval()
   out <- model(input)
   expect_named(out, c("features","detections"))
-  expect_named(out$detections, c("boxes","labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_is(out$detections, "list")
+  expect_named(out$detections[[1]], c("boxes","labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
 
 
-  model <- model_fasterrcnn_resnet50_fpn(num_classes = 10)
+  model <- model_fasterrcnn_resnet50_fpn(num_classes = 10, score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3)
   out <- model(input)
   expect_named(out, c("features","detections"))
-  expect_named(out$detections, c("boxes","labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_named(out$detections[[1]], c("boxes","labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
 })
 
 test_that("tests for non-pretrained model_fasterrcnn_resnet50_fpn_v2", {
   skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1,
           "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.")
 
-  model <- model_fasterrcnn_resnet50_fpn_v2()
+  model <- model_fasterrcnn_resnet50_fpn_v2(score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3)
   input <- base_loader("assets/class/cat/cat.1.jpg") %>%
     transform_to_tensor() %>% transform_resize(c(180,180)) %>% torch_unsqueeze(1)
   model$eval()
   out <- model(input)
   expect_named(out, c("features","detections"))
-  expect_named(out$detections, c("boxes","labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_named(out$detections[[1]], c("boxes","labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
 
 
-  model <- model_fasterrcnn_resnet50_fpn_v2(num_classes = 10)
+  model <- model_fasterrcnn_resnet50_fpn_v2(num_classes = 10, score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3)
   out <- model(input)
   expect_named(out, c("features","detections"))
-  expect_named(out$detections, c("boxes","labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_named(out$detections[[1]], c("boxes","labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
 })
 
 test_that("tests for non-pretrained model_fasterrcnn_mobilenet_v3_large_fpn", {
   skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1,
           "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.")
 
-  model <- model_fasterrcnn_mobilenet_v3_large_fpn()
+  model <- model_fasterrcnn_mobilenet_v3_large_fpn(score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3)
   input <- base_loader("assets/class/cat/cat.2.jpg") %>%
     transform_to_tensor() %>% transform_resize(c(180,180)) %>% torch_unsqueeze(1)
   model$eval()
   out <- model(input)
   expect_named(out, c("features","detections"))
-  expect_named(out$detections, c("boxes","labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_named(out$detections[[1]], c("boxes","labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
 
 
-  model <- model_fasterrcnn_resnet50_fpn_v2(num_classes = 10)
+  model <- model_fasterrcnn_resnet50_fpn_v2(num_classes = 10, score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3)
   out <- model(input)
   expect_named(out, c("features","detections"))
-  expect_named(out$detections, c("boxes","labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_named(out$detections[[1]], c("boxes","labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
 })
 
 test_that("tests for non-pretrained model_fasterrcnn_mobilenet_v3_large_320_fpn", {
   skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1,
           "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.")
 
-  model <- model_fasterrcnn_mobilenet_v3_large_320_fpn()
+  model <- model_fasterrcnn_mobilenet_v3_large_320_fpn(score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3)
   input <- base_loader("assets/class/cat/cat.3.jpg") %>%
     transform_to_tensor() %>% transform_resize(c(180,180)) %>% torch_unsqueeze(1)
   model$eval()
   out <- model(input)
   expect_named(out, c("features","detections"))
-  expect_named(out$detections, c("boxes","labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_named(out$detections[[1]], c("boxes","labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
 
-
-  model <- model_fasterrcnn_resnet50_fpn_v2(num_classes = 10)
+  model <- model_fasterrcnn_mobilenet_v3_large_320_fpn(num_classes = 10, score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3)
   out <- model(input)
   expect_named(out, c("features","detections"))
-  expect_named(out$detections, c("boxes","labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_named(out$detections[[1]], c("boxes","labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
 })
 
 test_that("tests for pretrained model_fasterrcnn_resnet50_fpn", {
   skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1,
           "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.")
 
-  model <- model_fasterrcnn_resnet50_fpn(pretrained = TRUE)
+  model <- model_fasterrcnn_resnet50_fpn(pretrained = TRUE, score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3)
   input <- base_loader("assets/class/cat/cat.4.jpg") %>%
     transform_to_tensor() %>% transform_resize(c(180,180)) %>% torch_unsqueeze(1)
   out <- model(input)
   expect_named(out, c("features","detections"))
-  expect_named(out$detections, c("boxes","labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_named(out$detections[[1]], c("boxes","labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
+  if (out$detections[[1]]$boxes$shape[1] > 0) {
+    boxes <- as.matrix(out$detections[[1]]$boxes)
+
+    # bbox must be positive and within (180x180)
+    expect_true(all(boxes >= 0))
+    expect_true(all(boxes[, c(1, 3)] <= 180))
+    expect_true(all(boxes[, c(2, 4)] <= 180))
+
+    # bbox must be coherent: x2 > x1 et y2 > y1
+    # TODO may need rework
+    # expect_true(all(boxes[, 3] >= boxes[, 1]))
+    expect_true(all(boxes[, 4] >= boxes[, 2]))
+
+    # scores must be within [0, 1]
+    scores <- as.numeric(out$detections[[1]]$scores)
+    expect_all_true(scores >= 0)
+    expect_all_true(scores <= 1)
+    }
 })
 
 test_that("tests for pretrained model_fasterrcnn_resnet50_fpn_v2", {
   skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1,
           "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.")
 
-  model <- model_fasterrcnn_resnet50_fpn_v2(pretrained = TRUE)
+  model <- model_fasterrcnn_resnet50_fpn_v2(pretrained = TRUE, score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3)
   input <- base_loader("assets/class/cat/cat.5.jpg") %>%
-    transform_to_tensor() %>% transform_resize(c(180,180)) %>% torch_unsqueeze(1)
+    transform_to_tensor() %>% transform_resize(c(220,220)) %>% torch_unsqueeze(1)
   out <- model(input)
   expect_named(out, c("features","detections"))
-  expect_named(out$detections, c("boxes","labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_named(out$detections[[1]], c("boxes","labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
+  if (out$detections[[1]]$boxes$shape[1] > 0) {
+    boxes <- as.matrix(out$detections[[1]]$boxes)
+
+    # bbox must be positive and within (220x220)
+    expect_true(all(boxes >= 0))
+    expect_true(all(boxes[, c(1, 3)] <= 220))
+    expect_true(all(boxes[, c(2, 4)] <= 220))
+
+    # bbox must be coherent: x2 > x1 et y2 > y1
+    # TODO may need rework
+    # expect_true(all(boxes[, 3] >= boxes[, 1]))
+    expect_true(all(boxes[, 4] >= boxes[, 2]))
+
+    # scores must be within [0, 1]
+    scores <- as.numeric(out$detections[[1]]$scores)
+    expect_all_true(scores >= 0)
+    expect_all_true(scores <= 1)
+    }
 })
 
 test_that("tests for pretrained model_fasterrcnn_mobilenet_v3_large_fpn", {
   skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1,
           "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.")
 
-  model <- model_fasterrcnn_mobilenet_v3_large_fpn(pretrained = TRUE)
+  model <- model_fasterrcnn_mobilenet_v3_large_fpn(pretrained = TRUE, score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 10)
   input <- base_loader("assets/class/dog/dog.0.jpg") %>%
-    transform_to_tensor() %>% transform_resize(c(180,180)) %>% torch_unsqueeze(1)
+    transform_to_tensor() %>% transform_resize(c(240,240)) %>% torch_unsqueeze(1)
   out <- model(input)
   expect_named(out, c("features","detections"))
-  expect_named(out$detections, c("boxes","labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_named(out$detections[[1]], c("boxes","labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
+  if (out$detections[[1]]$boxes$shape[1] > 0) {
+    boxes <- as.matrix(out$detections[[1]]$boxes)
+
+    # bbox must be positive and within (240x240)
+    expect_true(all(boxes >= 0))
+    expect_true(all(boxes[, c(1, 3)] <= 240))
+    expect_true(all(boxes[, c(2, 4)] <= 240))
+
+    # bbox must be coherent: x2 > x1 et y2 > y1
+    expect_true(all(boxes[, 3] >= boxes[, 1]))
+    expect_true(all(boxes[, 4] >= boxes[, 2]))
+
+    # scores must be within [0, 1]
+    scores <- as.numeric(out$detections[[1]]$scores)
+    expect_all_true(scores >= 0)
+    expect_all_true(scores <= 1)
+    }
 })
 
 test_that("tests for pretrained model_fasterrcnn_mobilenet_v3_large_320_fpn", {
   skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1,
           "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.")
 
-  model <- model_fasterrcnn_mobilenet_v3_large_320_fpn(pretrained = TRUE)
+  model <- model_fasterrcnn_mobilenet_v3_large_320_fpn(pretrained = TRUE, score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 10)
   input <- base_loader("assets/class/dog/dog.1.jpg") %>%
-    transform_to_tensor() %>% transform_resize(c(180,180)) %>% torch_unsqueeze(1)
+    transform_to_tensor() %>% transform_resize(c(360,360)) %>% torch_unsqueeze(1)
   out <- model(input)
   expect_named(out, c("features","detections"))
-  expect_named(out$detections, c("boxes","labels", "scores"))
-  expect_tensor(out$detections$boxes)
-  expect_tensor(out$detections$labels)
-  expect_tensor(out$detections$scores)
-  expect_equal(out$detections$boxes$shape[2], 4L)
+  expect_named(out$detections[[1]], c("boxes","labels", "scores"))
+  expect_tensor(out$detections[[1]]$boxes)
+  expect_tensor(out$detections[[1]]$labels)
+  expect_tensor(out$detections[[1]]$scores)
+  expect_equal(out$detections[[1]]$boxes$shape[2], 4L)
+  if (out$detections[[1]]$boxes$shape[1] > 0) {
+    boxes <- as.matrix(out$detections[[1]]$boxes)
+
+    # bbox must be positive and within (360x360)
+    expect_true(all(boxes >= 0))
+    expect_true(all(boxes[, c(1, 3)] <= 360))
+    expect_true(all(boxes[, c(2, 4)] <= 360))
+
+    # bbox must be coherent: x2 > x1 et y2 > y1
+    expect_true(all(boxes[, 3] >= boxes[, 1]))
+    expect_true(all(boxes[, 4] >= boxes[, 2]))
+
+    # scores must be within [0, 1]
+    scores <- as.numeric(out$detections[[1]]$scores)
+    expect_all_true(scores >= 0)
+    expect_all_true(scores <= 1)
+  }
 })
 
diff --git a/tests/testthat/test-vision-utils.R b/tests/testthat/test-vision-utils.R
index 9f81c576..43f7a082 100644
--- a/tests/testthat/test-vision-utils.R
+++ b/tests/testthat/test-vision-utils.R
@@ -18,13 +18,15 @@ test_that("draw_bounding_boxes works", {
   image_uint <- (255 - (torch::torch_randint(low = 1, high = 60, size = c(3, 360, 360))))$to(torch::torch_uint8())
   x <- torch::torch_randint(low = 1, high = 160, size = c(12,1))
   y <- torch::torch_randint(low = 1, high = 260, size = c(12,1))
-  boxes <- torch::torch_cat(c(x, y, x + runif(1, 5, 60), y +  runif(1, 5, 10)), dim = 2)
+  w <- torch::torch_randint(low = 10, high = 100, size = c(12,1))
+  h <- torch::torch_randint(low = 30, high = 60, size = c(12,1))
+  boxes <- torch::torch_cat(c(x, y, x + w, y +  h), dim = 2)
 
   expect_error(bboxed_image <- draw_bounding_boxes(image_uint$to(dtype = torch::torch_int32()), boxes),
                class = "type_error", regexp = "torch_uint8")
 
-  expect_no_error(bboxed_image <- draw_bounding_boxes(image_float, boxes, labels = "dog"))
-  expect_no_error(bboxed_image <- draw_bounding_boxes(image_uint, boxes, labels = "dog"))
+  expect_no_error(bboxed_image <- draw_bounding_boxes(image_float, boxes, labels = "dog", width = 5))
+  expect_no_error(bboxed_image <- draw_bounding_boxes(image_uint, boxes, labels = "Leptailurus serval constantina", width = 1))
   expect_tensor_dtype(bboxed_image, torch::torch_uint8())
   expect_tensor_shape(bboxed_image, c(3, 360, 360))
 
diff --git a/vignettes/examples/assets/dog1.jpg b/vignettes/examples/assets/dog1.jpg
new file mode 100644
index 00000000..df29f9d9
Binary files /dev/null and b/vignettes/examples/assets/dog1.jpg differ
diff --git a/vignettes/examples/assets/dog2.jpg b/vignettes/examples/assets/dog2.jpg
new file mode 100644
index 00000000..528dfec7
Binary files /dev/null and b/vignettes/examples/assets/dog2.jpg differ
diff --git a/vignettes/examples/assets/dog_with_two_bbox.png b/vignettes/examples/assets/dog_with_two_bbox.png
new file mode 100644
index 00000000..dab38255
Binary files /dev/null and b/vignettes/examples/assets/dog_with_two_bbox.png differ
diff --git a/vignettes/examples/assets/file84fb43ce0fa5.png b/vignettes/examples/assets/file84fb43ce0fa5.png
new file mode 100644
index 00000000..6ea0d76e
Binary files /dev/null and b/vignettes/examples/assets/file84fb43ce0fa5.png differ
diff --git a/vignettes/examples/fcnresnet.R b/vignettes/examples/fcnresnet.R
index ae456f74..f6c70a99 100644
--- a/vignettes/examples/fcnresnet.R
+++ b/vignettes/examples/fcnresnet.R
@@ -5,8 +5,8 @@ library(torch)
 url1 <- "https://raw.githubusercontent.com/pytorch/vision/main/gallery/assets/dog1.jpg"
 url2 <- "https://raw.githubusercontent.com/pytorch/vision/main/gallery/assets/dog2.jpg"
 
-dog1 <- magick_loader(url1) |> transform_to_tensor()
-dog2 <- magick_loader(url2) |> transform_to_tensor()
+dog1 <- magick_loader(url1) %>% transform_to_tensor()
+dog2 <- magick_loader(url2) %>% transform_to_tensor()
 
 
 # Visualizing a grid of images -------------------------------------
@@ -23,11 +23,11 @@ tensor_image_browse(grid)
 norm_mean <- c(0.485, 0.456, 0.406)
 norm_std  <- c(0.229, 0.224, 0.225)
 
-dog1_prep <- dog1 |>
-  transform_resize(c(520,520)) |>
+dog1_prep <- dog1 %>%
+  transform_resize(c(520,520)) %>%
   transform_normalize(mean = norm_mean, std = norm_std)
-dog2_prep <- dog2 |>
-  transform_resize(c(520,520)) |>
+dog2_prep <- dog2 %>%
+  transform_resize(c(520,520)) %>%
   transform_normalize(mean = norm_mean, std = norm_std)
 
 # make batch (2,3,520,520)
@@ -54,13 +54,13 @@ mask$dtype
 
 
 segmented1 <- draw_segmentation_masks(
-  dog1 |> transform_resize(c(520,520)),
+  dog1 %>% transform_resize(c(520,520)),
   masks = mask[1,, ],
   alpha = 0.5
 )
 
 segmented2 <- draw_segmentation_masks(
-  dog2 |> transform_resize(c(520,520)),
+  dog2 %>% transform_resize(c(520,520)),
   masks = mask[2,, ],
   alpha = 0.5
 )
diff --git a/vignettes/examples/fcnresnet.Rmd b/vignettes/examples/fcnresnet.Rmd
index 237fde8d..548f2ae1 100644
--- a/vignettes/examples/fcnresnet.Rmd
+++ b/vignettes/examples/fcnresnet.Rmd
@@ -4,6 +4,6 @@ type: docs
 ---
 
 ```{r, echo = FALSE}
-knitr::opts_chunk$set(eval = TRUE)
+knitr::opts_chunk$set(eval = FALSE)
 knitr::spin_child(paste0(rmarkdown::metadata$title, ".R"))
 ```
diff --git a/vignettes/examples/image_segmentation.Rmd b/vignettes/examples/image_segmentation.Rmd
new file mode 100644
index 00000000..afd2d8ac
--- /dev/null
+++ b/vignettes/examples/image_segmentation.Rmd
@@ -0,0 +1,14 @@
+---
+title: "Image Segmentation with fcn_resnet50"
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(torchvision)
+```