Add label functions and fix faster_rcnn design for batch inference #284

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

cregouby merged 32 commits into main from vignette/viz_utilities

Feb 4, 2026

.gitignore

-Original file line number
+Diff line change
@@ Expand Up / @@ -7,3 +7,4 @@ x.pth @@
     docs
     .Rhistory
     s.pth
+    inst/doc

DESCRIPTION

-Original file line number
+Diff line change
@@ Expand Up / @@ -84,6 +84,7 @@ Collate: @@
         'dataset-fgvc.R'
         'dataset-flickr.R'
         'dataset-flowers.R'
+        'dataset-imagenet.R'
         'dataset-lfw.R'
         'dataset-mnist.R'
         'dataset-oxfordiiitpet.R'
@@ Expand All / @@ -94,7 +95,6 @@ Collate: @@
         'dataset-vggface2.R'
         'extension.R'
         'globals.R'
-        'imagenet.R'
         'models-alexnet.R'
         'models-convnext.R'
         'models-convnext_detection.R'
@@ Expand All / @@ -115,7 +115,6 @@ Collate: @@
         'models-vit.R'
         'ops-box_convert.R'
         'ops-boxes.R'
-        'tiny-imagenet-dataset.R'
         'transforms-array.R'
         'transforms-defaults.R'
         'transforms-generics.R'
@@ Expand Down @@

NAMESPACE

-Original file line number
+Diff line change
@@ Expand Up / @@ -80,7 +80,9 @@ export(cifar100_dataset) @@
     export(cifar10_dataset)
     export(clip_boxes_to_image)
     export(coco_caption_dataset)
+    export(coco_classes)
     export(coco_detection_dataset)
+    export(coco_label)
     export(coco_segmentation_dataset)
     export(draw_bounding_boxes)
     export(draw_keypoints)
@@ Expand All / @@ -99,6 +101,8 @@ export(flowers102_dataset) @@
     export(generalized_box_iou)
     export(get_collection_catalog)
     export(image_folder_dataset)
+    export(imagenet_21k_classes)
+    export(imagenet_21k_label)
     export(imagenet_classes)
     export(imagenet_label)
     export(kmnist_dataset)
@@ Expand Down Expand Up / @@ -238,6 +242,9 @@ export(transform_to_tensor) @@
     export(transform_vflip)
     export(vggface2_dataset)
     export(vision_make_grid)
+    export(voc_classes)
+    export(voc_label)
+    export(voc_segmentation_classes)
     export(whoi_plankton_dataset)
     export(whoi_small_coralnet_dataset)
     export(whoi_small_plankton_dataset)
@@ Expand Down Expand Up / @@ -270,6 +277,7 @@ importFrom(torch,nn_relu) @@
     importFrom(torch,nn_sequential)
     importFrom(torch,nn_softmax)
     importFrom(torch,nnf_gelu)
+    importFrom(torch,nnf_grid_sample)
     importFrom(torch,nnf_interpolate)
     importFrom(torch,nnf_layer_norm)
     importFrom(torch,nnf_normalize)
@@ Expand All / @@ -279,6 +287,7 @@ importFrom(torch,torch_arange) @@
     importFrom(torch,torch_cat)
     importFrom(torch,torch_chunk)
     importFrom(torch,torch_clamp)
+    importFrom(torch,torch_empty)
     importFrom(torch,torch_flatten)
     importFrom(torch,torch_float32)
     importFrom(torch,torch_linspace)
@@ Expand All / @@ -293,5 +302,6 @@ importFrom(torch,torch_stack) @@
     importFrom(torch,torch_tensor)
     importFrom(torch,torch_zeros)
     importFrom(torch,torch_zeros_like)
+    importFrom(utils,read.delim)
     importFrom(utils,tail)
     importFrom(zeallot,"%<-%")

NEWS.md

-Original file line number
+Diff line change
@@ Expand Up / @@ -7,6 +7,7 @@ @@
     ## New features
+    * Added resolution function for coco imagenet_21k and and pascal_voc classes and labels (#284).
     * Added article showcasing `model_fcn_resnet50()` with visualization utilities `draw_segmentation_masks()` and `vision_make_grid()` (@DerrickUnleashed, #281).
     * Added collection dataset catalog with `search_collection()`, `get_collection_catalog()`, and `list_collection_datasets()` functions for discovering and exploring collections (#271, @ANAMASGARD).
     * Added `target_transform_coco_masks()` and `target_transform_trimap_masks()` transformation functions for explicit segmentation mask generation (@ANAMASGARD).
@@ Expand All / @@ -23,6 +24,7 @@ @@
     ## Bug fixes and improvements
+    * fix `model_fasterrcnn_*` did not provide boxes output normalized to image size, did not manage batches, fix performance of the `roi_align()` function (#284)
     * fix rf100 collection bounding-box now consider the correct native COCO format being 'xywh' (#272)
     * Remove `.getbatch` method from MNIST as it is providing inconsistent tensor dimensions with `.getitem` due
     to non-vectorized `transform_` operations (#264)
@@ Expand Down @@

R/dataset-coco.R

-Original file line number
+Diff line change
@@ Expand Up / @@ -58,7 +58,7 @@ coco_detection_dataset <- torch::dataset( @@
                 rep("http://images.cocodataset.org/annotations/annotations_trainval2017.zip", time = 2),
                 "http://images.cocodataset.org/zips/train2014.zip", "http://images.cocodataset.org/zips/val2014.zip",
                 rep("http://images.cocodataset.org/annotations/annotations_trainval2014.zip", time = 2)),
-        size = c("800 MB", "800 MB", rep("770 MB", time = 2), "6.33 GB", "6.33 GB", rep("242 MB", time = 2)),
+        size = c("18.4 GB", "800 MB", rep("770 MB", time = 2), "6.33 GB", "6.33 GB", rep("242 MB", time = 2)),
         md5 = c(c("cced6f7f71b7629ddf16f17bbcfab6b2", "442b8da7639aecaf257c1dceb8ba8c80"),
                 rep("f4bbac642086de4f52a3fdda2de5fa2c", time = 2),
                 c("0da8cfa0e090c266b78f30e2d2874f1a", "a3d79f5ed8d289b7a7554ce06a5782b3"),
@@ Expand Down Expand Up / @@ -415,3 +415,28 @@ coco_caption_dataset <- torch::dataset( @@
         list(x = x, y = y)
       }
     )
+    #' COCO Class Labels
+    #'
+    #' Utilities for resolving COCO 80 class identifiers to their corresponding
+    #' human readable labels. The labels are retrieved from ultralytics source
+    #'
+    #' @return A character vector with the COCO class names
+    #' @family class_resolution
+    #' @importFrom utils read.delim
+    #' @export
+    coco_classes <- function() {
+      url <- "https://github.com/ultralytics/ultralytics/raw/refs/heads/main/ultralytics/cfg/datasets/coco.yaml"
+      labels <- read.delim(url, skip = 18, sep = ":", nrows = 80, strip.white = TRUE, header = FALSE)[,2]
+      labels[nzchar(labels)]
+    }
+    #' @rdname coco_classes
+    #' @param id Integer vector of 1-based class identifiers.
+    #' @return A character vector with the labels associated with `id`.
+    #' @family class_resolution
+    #' @export
+    coco_label <- function(id) {
+      classes <- coco_classes()
+      classes[id]
+    }

R/tiny-imagenet-dataset.R → R/dataset-imagenet.R

-Original file line number
+Diff line change
@@ Expand Up / @@ -68,3 +68,57 @@ tiny_imagenet_dataset <- torch::dataset( @@
       }
     )
+    #' ImageNet Class Labels
+    #'
+    #' Utilities for resolving ImageNet-1k class identifiers to their corresponding
+    #' human readable labels. The labels are retrieved from the same source used by
+    #' PyTorch's reference implementation.
+    #'
+    #' @return A character vector with 1000 entries representing the ImageNet-1k
+    #'   class labels.
+    #' @family class_resolution
+    #' @export
+    imagenet_classes <- function() {
+      url <- "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
+      labels <- readLines(url, warn = FALSE)
+      labels[nzchar(labels)]
+    }
+    #' @param id Integer vector of 1-based class identifiers.
+    #' @return A character vector with the labels associated with `id`.
+    #' @family class_resolution
+    #' @rdname imagenet_classes
+    #' @export
+    imagenet_label <- function(id) {
+      classes <- imagenet_classes()
+      classes[id]
+    }
+    imagenet_1k_classes <- imagenet_classes
+    imagenet_1k_label <- imagenet_label
+    #' @return A character vector with 21k entries representing the ImageNet-21k
+    #'   class labels.
+    #' @family class_resolution
+    #' @rdname imagenet_classes
+    #' @export
+    imagenet_21k_classes <- function() {
+      url <- "https://storage.googleapis.com/bit_models/imagenet21k_wordnet_ids.txt"
+      ids <- readLines(url, warn = FALSE)
+      url <- "https://storage.googleapis.com/bit_models/imagenet21k_wordnet_lemmas.txt"
+      labels <- readLines(url, warn = FALSE)
+      data.frame(id = ids, label = labels)
+    }
+    #' @param id Integer vector of 1-based class identifiers.
+    #' @return A character vector with the labels associated with `id`.
+    #' @family class_resolution
+    #' @rdname imagenet_classes
+    #' @export
+    imagenet_21k_label <- function(id) {
+      classes <- imagenet_21k_classes()$label
+      classes[id]
+    }

R/imagenet.R

This file was deleted.

R/models-convnext_detection.R

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -31,28 +31,29 @@
  
    #' norm_std  <- c(0.229, 0.224, 0.225)

    #'

    #' # Use a publicly available image

    #' wmc <- "https://upload.wikimedia.org/wikipedia/commons/thumb/"

    #' url <- "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg"

    #' img <- base_loader(paste0(wmc, url))

    #' url <- paste0("https://upload.wikimedia.org/wikipedia/commons/thumb/",

    #'        "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg")

    #' img <- magick_loader(url) %>%

    #'   transform_to_tensor() %>%

    #'   transform_resize(c(520, 520))

    #'

    #' input <- img %>%

    #'   transform_to_tensor() %>%

    #'   transform_resize(c(520, 520)) %>%

    #'   transform_normalize(norm_mean, norm_std)

    #' batch <- input$unsqueeze(1)    # Add batch dimension (1, 3, H, W)

    #'

    #' # ConvNeXt Tiny detection

    #' model <- model_convnext_tiny_detection(pretrained_backbone = TRUE)

    #' model$eval()

    #' pred <- model(batch)$detections

    #' # Please wait 2 mins + on CPU

    #' pred <- model(batch)$detections[[1]]

    #' num_boxes <- as.integer(pred$boxes$size()[1])

    #' topk <- pred$scores$topk(k = 5)[[2]]

    #' boxes <- pred$boxes[topk, ]

    #' labels <- as.character(as.integer(pred$labels[topk]))

    #' labels <- imagenet_label(as.integer(pred$labels[topk]))

    #'

    #' # `draw_bounding_box()` may fail if bbox values are not consistent.

    #' if (num_boxes > 0) {

    #'   boxed <- draw_bounding_boxes(input, boxes, labels = labels)

    #'   boxed <- draw_bounding_boxes(img, boxes, labels = labels)

    #'   tensor_image_browse(boxed)

    #' }

    #' }

R/models-deeplabv3.R

-Original file line number
+Diff line change
@@ Expand Up / @@ -90,12 +90,29 @@ deeplabv3_model_urls <- list( @@
       )
     )
+    #' PASCAL VOC Class Labels
+    #'
+    #' Utilities for resolving PASCAL VOC class identifiers to their corresponding
+    #' human readable labels. The labels are retrieved from the dataset.
+    #'
+    #' @return A character vector with the PASCAL VOC class names
+    #' @family class_resolution
+    #' @export
     voc_classes <- c(
       "background", "aeroplane", "bicycle", "bird", "boat", "bottle",
       "bus", "car", "cat", "chair", "cow", "dining table", "dog", "horse",
       "motorbike", "person", "potted plant", "sheep", "sofa", "train", "tv/monitor"
     )
+    #' @rdname voc_classes
+    #' @param id Integer vector of 1-based class identifiers.
+    #' @return A character vector with the labels associated with `id`.
+    #' @family class_resolution
+    #' @export
+    voc_label <- function(id) {
+      voc_classes[id]
+    }
     deeplabv3_meta <- list(
       classes = voc_classes,
       class_to_idx = setNames(seq_along(voc_classes) - 1, voc_classes)
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add label functions and fix faster_rcnn design for batch inference #284

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Uh oh!

Uh oh!