diff --git a/.gitignore b/.gitignore index f478b371..8bbecf96 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ x.pth docs .Rhistory s.pth +inst/doc diff --git a/DESCRIPTION b/DESCRIPTION index 51ce5fae..49cd1ec3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -84,6 +84,7 @@ Collate: 'dataset-fgvc.R' 'dataset-flickr.R' 'dataset-flowers.R' + 'dataset-imagenet.R' 'dataset-lfw.R' 'dataset-mnist.R' 'dataset-oxfordiiitpet.R' @@ -94,7 +95,6 @@ Collate: 'dataset-vggface2.R' 'extension.R' 'globals.R' - 'imagenet.R' 'models-alexnet.R' 'models-convnext.R' 'models-convnext_detection.R' @@ -115,7 +115,6 @@ Collate: 'models-vit.R' 'ops-box_convert.R' 'ops-boxes.R' - 'tiny-imagenet-dataset.R' 'transforms-array.R' 'transforms-defaults.R' 'transforms-generics.R' diff --git a/NAMESPACE b/NAMESPACE index a105773d..2d3e763d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -80,7 +80,9 @@ export(cifar100_dataset) export(cifar10_dataset) export(clip_boxes_to_image) export(coco_caption_dataset) +export(coco_classes) export(coco_detection_dataset) +export(coco_label) export(coco_segmentation_dataset) export(draw_bounding_boxes) export(draw_keypoints) @@ -99,6 +101,8 @@ export(flowers102_dataset) export(generalized_box_iou) export(get_collection_catalog) export(image_folder_dataset) +export(imagenet_21k_classes) +export(imagenet_21k_label) export(imagenet_classes) export(imagenet_label) export(kmnist_dataset) @@ -238,6 +242,9 @@ export(transform_to_tensor) export(transform_vflip) export(vggface2_dataset) export(vision_make_grid) +export(voc_classes) +export(voc_label) +export(voc_segmentation_classes) export(whoi_plankton_dataset) export(whoi_small_coralnet_dataset) export(whoi_small_plankton_dataset) @@ -270,6 +277,7 @@ importFrom(torch,nn_relu) importFrom(torch,nn_sequential) importFrom(torch,nn_softmax) importFrom(torch,nnf_gelu) +importFrom(torch,nnf_grid_sample) importFrom(torch,nnf_interpolate) importFrom(torch,nnf_layer_norm) importFrom(torch,nnf_normalize) @@ -279,6 +287,7 @@ importFrom(torch,torch_arange) importFrom(torch,torch_cat) importFrom(torch,torch_chunk) importFrom(torch,torch_clamp) +importFrom(torch,torch_empty) importFrom(torch,torch_flatten) importFrom(torch,torch_float32) importFrom(torch,torch_linspace) @@ -293,5 +302,6 @@ importFrom(torch,torch_stack) importFrom(torch,torch_tensor) importFrom(torch,torch_zeros) importFrom(torch,torch_zeros_like) +importFrom(utils,read.delim) importFrom(utils,tail) importFrom(zeallot,"%<-%") diff --git a/NEWS.md b/NEWS.md index 3a71eccb..d4767694 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,7 @@ ## New features +* Added resolution function for coco imagenet_21k and and pascal_voc classes and labels (#284). * Added article showcasing `model_fcn_resnet50()` with visualization utilities `draw_segmentation_masks()` and `vision_make_grid()` (@DerrickUnleashed, #281). * Added collection dataset catalog with `search_collection()`, `get_collection_catalog()`, and `list_collection_datasets()` functions for discovering and exploring collections (#271, @ANAMASGARD). * Added `target_transform_coco_masks()` and `target_transform_trimap_masks()` transformation functions for explicit segmentation mask generation (@ANAMASGARD). @@ -23,6 +24,7 @@ ## Bug fixes and improvements +* fix `model_fasterrcnn_*` did not provide boxes output normalized to image size, did not manage batches, fix performance of the `roi_align()` function (#284) * fix rf100 collection bounding-box now consider the correct native COCO format being 'xywh' (#272) * Remove `.getbatch` method from MNIST as it is providing inconsistent tensor dimensions with `.getitem` due to non-vectorized `transform_` operations (#264) diff --git a/R/dataset-coco.R b/R/dataset-coco.R index 0bdca5e7..dc91b089 100644 --- a/R/dataset-coco.R +++ b/R/dataset-coco.R @@ -58,7 +58,7 @@ coco_detection_dataset <- torch::dataset( rep("http://images.cocodataset.org/annotations/annotations_trainval2017.zip", time = 2), "http://images.cocodataset.org/zips/train2014.zip", "http://images.cocodataset.org/zips/val2014.zip", rep("http://images.cocodataset.org/annotations/annotations_trainval2014.zip", time = 2)), - size = c("800 MB", "800 MB", rep("770 MB", time = 2), "6.33 GB", "6.33 GB", rep("242 MB", time = 2)), + size = c("18.4 GB", "800 MB", rep("770 MB", time = 2), "6.33 GB", "6.33 GB", rep("242 MB", time = 2)), md5 = c(c("cced6f7f71b7629ddf16f17bbcfab6b2", "442b8da7639aecaf257c1dceb8ba8c80"), rep("f4bbac642086de4f52a3fdda2de5fa2c", time = 2), c("0da8cfa0e090c266b78f30e2d2874f1a", "a3d79f5ed8d289b7a7554ce06a5782b3"), @@ -415,3 +415,28 @@ coco_caption_dataset <- torch::dataset( list(x = x, y = y) } ) + +#' COCO Class Labels +#' +#' Utilities for resolving COCO 80 class identifiers to their corresponding +#' human readable labels. The labels are retrieved from ultralytics source +#' +#' @return A character vector with the COCO class names +#' @family class_resolution +#' @importFrom utils read.delim +#' @export +coco_classes <- function() { + url <- "https://github.com/ultralytics/ultralytics/raw/refs/heads/main/ultralytics/cfg/datasets/coco.yaml" + labels <- read.delim(url, skip = 18, sep = ":", nrows = 80, strip.white = TRUE, header = FALSE)[,2] + labels[nzchar(labels)] +} + +#' @rdname coco_classes +#' @param id Integer vector of 1-based class identifiers. +#' @return A character vector with the labels associated with `id`. +#' @family class_resolution +#' @export +coco_label <- function(id) { + classes <- coco_classes() + classes[id] +} diff --git a/R/tiny-imagenet-dataset.R b/R/dataset-imagenet.R similarity index 53% rename from R/tiny-imagenet-dataset.R rename to R/dataset-imagenet.R index a9cb04ca..b5cafd03 100644 --- a/R/tiny-imagenet-dataset.R +++ b/R/dataset-imagenet.R @@ -68,3 +68,57 @@ tiny_imagenet_dataset <- torch::dataset( } ) + +#' ImageNet Class Labels +#' +#' Utilities for resolving ImageNet-1k class identifiers to their corresponding +#' human readable labels. The labels are retrieved from the same source used by +#' PyTorch's reference implementation. +#' +#' @return A character vector with 1000 entries representing the ImageNet-1k +#' class labels. +#' @family class_resolution +#' @export +imagenet_classes <- function() { + url <- "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt" + labels <- readLines(url, warn = FALSE) + labels[nzchar(labels)] +} + +#' @param id Integer vector of 1-based class identifiers. +#' @return A character vector with the labels associated with `id`. +#' @family class_resolution +#' @rdname imagenet_classes +#' @export +imagenet_label <- function(id) { + classes <- imagenet_classes() + classes[id] +} + +imagenet_1k_classes <- imagenet_classes +imagenet_1k_label <- imagenet_label + +#' @return A character vector with 21k entries representing the ImageNet-21k +#' class labels. +#' @family class_resolution +#' @rdname imagenet_classes +#' @export +imagenet_21k_classes <- function() { + url <- "https://storage.googleapis.com/bit_models/imagenet21k_wordnet_ids.txt" + ids <- readLines(url, warn = FALSE) + url <- "https://storage.googleapis.com/bit_models/imagenet21k_wordnet_lemmas.txt" + labels <- readLines(url, warn = FALSE) + + data.frame(id = ids, label = labels) +} + +#' @param id Integer vector of 1-based class identifiers. +#' @return A character vector with the labels associated with `id`. +#' @family class_resolution +#' @rdname imagenet_classes +#' @export +imagenet_21k_label <- function(id) { + classes <- imagenet_21k_classes()$label + classes[id] +} + diff --git a/R/imagenet.R b/R/imagenet.R deleted file mode 100644 index 05b2a5b3..00000000 --- a/R/imagenet.R +++ /dev/null @@ -1,23 +0,0 @@ -#' ImageNet Class Labels -#' -#' Utilities for resolving ImageNet-1k class identifiers to their corresponding -#' human readable labels. The labels are retrieved from the same source used by -#' PyTorch's reference implementation. -#' -#' @return A character vector with 1000 entries representing the ImageNet-1k -#' class labels. -#' @export -imagenet_classes <- function() { - url <- "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt" - labels <- readLines(url, warn = FALSE) - labels[nzchar(labels)] -} - -#' @rdname imagenet_classes -#' @param id Integer vector of 1-based class identifiers. -#' @return A character vector with the labels associated with `id`. -#' @export -imagenet_label <- function(id) { - classes <- imagenet_classes() - classes[id] -} diff --git a/R/models-convnext_detection.R b/R/models-convnext_detection.R index 055424fe..f5c4b90b 100644 --- a/R/models-convnext_detection.R +++ b/R/models-convnext_detection.R @@ -31,28 +31,29 @@ #' norm_std <- c(0.229, 0.224, 0.225) #' #' # Use a publicly available image -#' wmc <- "https://upload.wikimedia.org/wikipedia/commons/thumb/" -#' url <- "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg" -#' img <- base_loader(paste0(wmc, url)) +#' url <- paste0("https://upload.wikimedia.org/wikipedia/commons/thumb/", +#' "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg") +#' img <- magick_loader(url) %>% +#' transform_to_tensor() %>% +#' transform_resize(c(520, 520)) #' #' input <- img %>% -#' transform_to_tensor() %>% -#' transform_resize(c(520, 520)) %>% #' transform_normalize(norm_mean, norm_std) #' batch <- input$unsqueeze(1) # Add batch dimension (1, 3, H, W) #' #' # ConvNeXt Tiny detection #' model <- model_convnext_tiny_detection(pretrained_backbone = TRUE) #' model$eval() -#' pred <- model(batch)$detections +#' # Please wait 2 mins + on CPU +#' pred <- model(batch)$detections[[1]] #' num_boxes <- as.integer(pred$boxes$size()[1]) #' topk <- pred$scores$topk(k = 5)[[2]] #' boxes <- pred$boxes[topk, ] -#' labels <- as.character(as.integer(pred$labels[topk])) +#' labels <- imagenet_label(as.integer(pred$labels[topk])) #' #' # `draw_bounding_box()` may fail if bbox values are not consistent. #' if (num_boxes > 0) { -#' boxed <- draw_bounding_boxes(input, boxes, labels = labels) +#' boxed <- draw_bounding_boxes(img, boxes, labels = labels) #' tensor_image_browse(boxed) #' } #' } diff --git a/R/models-deeplabv3.R b/R/models-deeplabv3.R index dadfbc1d..6a394843 100644 --- a/R/models-deeplabv3.R +++ b/R/models-deeplabv3.R @@ -90,12 +90,29 @@ deeplabv3_model_urls <- list( ) ) +#' PASCAL VOC Class Labels +#' +#' Utilities for resolving PASCAL VOC class identifiers to their corresponding +#' human readable labels. The labels are retrieved from the dataset. +#' +#' @return A character vector with the PASCAL VOC class names +#' @family class_resolution +#' @export voc_classes <- c( "background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "dining table", "dog", "horse", "motorbike", "person", "potted plant", "sheep", "sofa", "train", "tv/monitor" ) +#' @rdname voc_classes +#' @param id Integer vector of 1-based class identifiers. +#' @return A character vector with the labels associated with `id`. +#' @family class_resolution +#' @export +voc_label <- function(id) { + voc_classes[id] +} + deeplabv3_meta <- list( classes = voc_classes, class_to_idx = setNames(seq_along(voc_classes) - 1, voc_classes) diff --git a/R/models-faster_rcnn.R b/R/models-faster_rcnn.R index 5586d884..97251b3a 100644 --- a/R/models-faster_rcnn.R +++ b/R/models-faster_rcnn.R @@ -129,15 +129,15 @@ decode_boxes <- function(anchors, deltas) { torch::torch_stack(list(x1, y1, x2, y2), dim = 2) } -generate_proposals <- function(features, rpn_out, image_size, strides, +generate_proposals <- function(features, rpn_out, image_size, strides, batch_idx, score_thresh = 0.05, nms_thresh = 0.7) { device <- rpn_out$objectness[[1]]$device all_proposals <- torch::torch_empty(0L, 4L, device = device) all_scores <- torch::torch_empty(0L, device = device) for (i in seq_along(features)) { - objectness <- rpn_out$objectness[[i]][1, , , ] - deltas <- rpn_out$bbox_deltas[[i]][1, , , ] + objectness <- rpn_out$objectness[[i]][batch_idx, , , ] + deltas <- rpn_out$bbox_deltas[[i]][batch_idx, , , ] c(a, h, w) %<-% objectness$shape @@ -169,33 +169,56 @@ generate_proposals <- function(features, rpn_out, image_size, strides, list(proposals = proposals) } -roi_align_stub <- function(feature_map, proposals, output_size = c(7L, 7L)) { - h <- as.integer(feature_map$shape[[3]]) - w <- as.integer(feature_map$shape[[4]]) +#' @importFrom torch nnf_grid_sample torch_empty +roi_align <- function(feature_map, proposals, batch_idx, output_size = c(7L, 7L)) { + # A vectorized version of roi_align_stub for feature_map: [B, C, H, W] and proposals: [N, 4] (x1, y1, x2, y2) - n <- proposals$size(1) - pooled <- vector("list", n) - - for (i in seq_len(n)) { - x1 <- max(1, min(as.numeric(proposals[i, 1]), w)) - y1 <- max(1, min(as.numeric(proposals[i, 2]), h)) - x2 <- max(x1 + 1, min(as.numeric(proposals[i, 3]), w)) - y2 <- max(y1 + 1, min(as.numeric(proposals[i, 4]), h)) - - region <- feature_map[1, , y1:y2, x1:x2] - pooled_feat <- torch::nnf_interpolate( - region$unsqueeze(1), - size = output_size, - mode = "bilinear", - align_corners = FALSE - )[1, , , ] - - pooled[[i]] <- pooled_feat$reshape(-1) + num_rois <- proposals$size(1) + if (num_rois == 0) { + return(torch_empty(c(0, feature_map$size(2), output_size[1], output_size[2]), device = feature_map$device)) } - torch::torch_stack(pooled) -} + channels <- feature_map$size(2) + h_feat <- feature_map$size(3) + w_feat <- feature_map$size(4) + + # Normalize coordinnates to match grid_sample [-1 et 1] + x1 <- (proposals[, 1] / (w_feat - 1) * 2) - 1 + y1 <- (proposals[, 2] / (h_feat - 1) * 2) - 1 + x2 <- (proposals[, 3] / (w_feat - 1) * 2) - 1 + y2 <- (proposals[, 4] / (h_feat - 1) * 2) - 1 + + # Create a grid of output_size + grid_y <- torch_linspace(0, 1, output_size[1], device = feature_map$device) + grid_x <- torch_linspace(0, 1, output_size[2], device = feature_map$device) + + # Meshgrid to get relative coordiantes in [7, 7] + grids <- torch_meshgrid(list(grid_y, grid_x), indexing = "ij") + rel_y <- grids[[1]] + rel_x <- grids[[2]] + + # Linear interpolation for each ROI [N, 7, 7] + # x <- x1 + rel_x * (x2 - x1) + sampling_x <- x1$view(c(-1, 1, 1)) + rel_x$view(c(1, output_size[1], output_size[2])) * (x2 - x1)$view(c(-1, 1, 1)) + sampling_y <- y1$view(c(-1, 1, 1)) + rel_y$view(c(1, output_size[1], output_size[2])) * (y2 - y1)$view(c(-1, 1, 1)) + + # Concat to get a grid of [N, 7, 7, 2] + grid <- torch_stack(list(sampling_x, sampling_y), dim = -1) + + # bilinear sampling + input_selected <- feature_map[batch_idx, , , ]$unsqueeze(1)$expand(c(num_rois, channels, h_feat, w_feat)) + + pooled_features <- nnf_grid_sample( + input_selected, + grid, + mode = "bilinear", + padding_mode = "border", + align_corners = FALSE + ) + # Return [N, C, 7, 7] + pooled_features +} roi_heads_module <- function(num_classes = 91) { torch::nn_module( @@ -226,10 +249,10 @@ roi_heads_module <- function(num_classes = 91) { } )() }, - forward = function(features, proposals) { + forward = function(features, proposals, batch_idx) { feature_maps <- features[c("p2", "p3", "p4", "p5")] - pooled <- roi_align_stub(feature_maps[[1]], proposals) - x <- self$box_head(pooled) + pooled <- roi_align(feature_maps[[1]], proposals, batch_idx) + x <- self$box_head(pooled$flatten(start_dim = 2)) self$box_predictor(x) } ) @@ -273,9 +296,9 @@ roi_heads_module_v2 <- function(num_classes = 91) { } )() }, - forward = function(features, proposals) { - pooled <- roi_align_stub(features[[1]], proposals) - x <- self$box_head(pooled) + forward = function(features, proposals, batch_idx) { + pooled <- roi_align(features[[1]], proposals, batch_idx) + x <- self$box_head(pooled$flatten(start_dim = 2)) self$box_predictor(x) } ) @@ -393,71 +416,76 @@ fasterrcnn_model <- function(backbone, num_classes, features <- self$backbone(images) rpn_out <- self$rpn(features) - image_size <- as.integer(images$shape[3:4]) - props <- generate_proposals(features, rpn_out, image_size, c(4, 8, 16, 32), - score_thresh = self$score_thresh, - nms_thresh = self$nms_thresh) - - if (props$proposals$shape[1] == 0) { - empty <- list( - boxes = torch::torch_empty(c(0, 4)), - labels = torch::torch_empty(c(0), dtype = torch::torch_long()), - scores = torch::torch_empty(c(0)) - ) - return(list(features = features, detections = empty)) - } - - detections <- self$roi_heads(features, props$proposals) - - scores <- torch::nnf_softmax(detections$scores, dim = 2) - max_scores <- torch::torch_max(scores, dim = 2) - final_scores <- max_scores[[1]] - final_labels <- max_scores[[2]] + batch_size <- images$shape[1] + image_size <- images$shape[3:4] + final_results <- list() - box_reg <- detections$boxes$view(c(-1, num_classes, 4)) - gather_idx <- final_labels$unsqueeze(2)$unsqueeze(3)$expand(c(-1, 1, 4)) - final_boxes <- box_reg$gather(2, gather_idx)$squeeze(2) + for (b in seq_len(batch_size)) { + props <- generate_proposals(features, rpn_out, image_size, c(4, 8, 16, 32), + batch_idx = b, score_thresh = self$score_thresh, + nms_thresh = self$nms_thresh) - # Filter by score threshold - keep <- final_scores > self$score_thresh - num_detections <- torch::torch_sum(keep)$item() - - if (num_detections > 0) { - final_boxes <- final_boxes[keep, ] - final_labels <- final_labels[keep] - final_scores <- final_scores[keep] - - # Apply NMS to remove overlapping detections - if (final_boxes$shape[1] > 1) { - nms_keep <- nms(final_boxes, final_scores, self$nms_thresh) - final_boxes <- final_boxes[nms_keep, ] - final_labels <- final_labels[nms_keep] - final_scores <- final_scores[nms_keep] + if (props$proposals$shape[1] == 0) { + empty <- list( + boxes = torch::torch_empty(c(0, 4)), + labels = torch::torch_empty(c(0), dtype = torch::torch_long()), + scores = torch::torch_empty(c(0)) + ) + return(list(features = features, detections = empty)) } - # Limit detections per image - n_det <- final_scores$shape[1] - if (n_det > self$detections_per_img) { - top_k <- torch::torch_topk(final_scores, self$detections_per_img) - top_idx <- top_k[[2]] - final_boxes <- final_boxes[top_idx, ] - final_labels <- final_labels[top_idx] - final_scores <- final_scores[top_idx] + detections <- self$roi_heads(features, props$proposals, batch_idx = b) + + scores <- torch::nnf_softmax(detections$scores, dim = 2) + max_scores <- torch::torch_max(scores, dim = 2) + final_scores <- max_scores[[1]] + final_labels <- max_scores[[2]] + + box_reg <- detections$boxes$view(c(-1, num_classes, 4)) + gather_idx <- final_labels$unsqueeze(2)$unsqueeze(3)$expand(c(-1, 1, 4)) + final_boxes <- box_reg$gather(2, gather_idx)$squeeze(2) + + final_boxes <- decode_boxes(props$proposals, final_boxes) + final_boxes <- clip_boxes_to_image(final_boxes, image_size) + + # Filter by score threshold + keep <- final_scores > self$score_thresh + num_detections <- torch::torch_sum(keep)$item() + + if (num_detections > 0) { + final_boxes <- final_boxes[keep, ] + final_labels <- final_labels[keep] + final_scores <- final_scores[keep] + + # Apply NMS to remove overlapping detections + if (final_boxes$shape[1] > 1) { + nms_keep <- nms(final_boxes, final_scores, self$nms_thresh) + final_boxes <- final_boxes[nms_keep, ] + final_labels <- final_labels[nms_keep] + final_scores <- final_scores[nms_keep] + } + + # Limit detections per image + n_det <- final_scores$shape[1] + if (n_det > self$detections_per_img) { + top_k <- torch::torch_topk(final_scores, self$detections_per_img) + top_idx <- top_k[[2]] + final_boxes <- final_boxes[top_idx, ] + final_labels <- final_labels[top_idx] + final_scores <- final_scores[top_idx] + } + } else { + final_boxes <- torch::torch_empty(c(0, 4)) + final_labels <- torch::torch_empty(c(0), dtype = torch::torch_long()) + final_scores <- torch::torch_empty(c(0)) } - } else { - final_boxes <- torch::torch_empty(c(0, 4)) - final_labels <- torch::torch_empty(c(0), dtype = torch::torch_long()) - final_scores <- torch::torch_empty(c(0)) - } - - list( - features = features, - detections = list( + final_results[[b]] <- list( boxes = final_boxes, labels = final_labels, scores = final_scores ) - ) + } + list(features = features, detections = final_results) } ) } @@ -579,71 +607,76 @@ fasterrcnn_model_v2 <- function(backbone, num_classes, features <- self$backbone(images) rpn_out <- self$rpn(features) - image_size <- as.integer(images$shape[3:4]) - props <- generate_proposals(features, rpn_out, image_size, c(4, 8, 16, 32), - score_thresh = self$score_thresh, - nms_thresh = self$nms_thresh) - - if (props$proposals$shape[1] == 0) { - empty <- list( - boxes = torch::torch_empty(c(0, 4)), - labels = torch::torch_empty(c(0), dtype = torch::torch_long()), - scores = torch::torch_empty(c(0)) - ) - return(list(features = features, detections = empty)) - } + batch_size <- images$shape[1] + image_size <- images$shape[3:4] + final_results <- list() - detections <- self$roi_heads(features, props$proposals) + for (b in seq_len(batch_size)) { + props <- generate_proposals(features, rpn_out, image_size, c(4, 8, 16, 32), + batch_idx = b, score_thresh = self$score_thresh, + nms_thresh = self$nms_thresh) - scores <- torch::nnf_softmax(detections$scores, dim = 2) - max_scores <- torch::torch_max(scores, dim = 2) - final_scores <- max_scores[[1]] - final_labels <- max_scores[[2]] - - box_reg <- detections$boxes$view(c(-1, num_classes, 4)) - gather_idx <- final_labels$unsqueeze(2)$unsqueeze(3)$expand(c(-1, 1, 4)) - final_boxes <- box_reg$gather(2, gather_idx)$squeeze(2) - - # Filter by score threshold - keep <- final_scores > self$score_thresh - num_detections <- torch::torch_sum(keep)$item() - - if (num_detections > 0) { - final_boxes <- final_boxes[keep, ] - final_labels <- final_labels[keep] - final_scores <- final_scores[keep] - - # Apply NMS to remove overlapping detections - if (final_boxes$shape[1] > 1) { - nms_keep <- nms(final_boxes, final_scores, self$nms_thresh) - final_boxes <- final_boxes[nms_keep, ] - final_labels <- final_labels[nms_keep] - final_scores <- final_scores[nms_keep] + if (props$proposals$shape[1] == 0) { + empty <- list( + boxes = torch::torch_empty(c(0, 4)), + labels = torch::torch_empty(c(0), dtype = torch::torch_long()), + scores = torch::torch_empty(c(0)) + ) + return(list(features = features, detections = empty)) } - # Limit detections per image - n_det <- final_scores$shape[1] - if (n_det > self$detections_per_img) { - top_k <- torch::torch_topk(final_scores, self$detections_per_img) - top_idx <- top_k[[2]] - final_boxes <- final_boxes[top_idx, ] - final_labels <- final_labels[top_idx] - final_scores <- final_scores[top_idx] + detections <- self$roi_heads(features, props$proposals, batch_idx = b) + + scores <- torch::nnf_softmax(detections$scores, dim = 2) + max_scores <- torch::torch_max(scores, dim = 2) + final_scores <- max_scores[[1]] + final_labels <- max_scores[[2]] + + box_reg <- detections$boxes$view(c(-1, num_classes, 4)) + gather_idx <- final_labels$unsqueeze(2)$unsqueeze(3)$expand(c(-1, 1, 4)) + final_boxes <- box_reg$gather(2, gather_idx)$squeeze(2) + + final_boxes <- decode_boxes(props$proposals, final_boxes) + final_boxes <- clip_boxes_to_image(final_boxes, image_size) + + # Filter by score threshold + keep <- final_scores > self$score_thresh + num_detections <- torch::torch_sum(keep)$item() + + if (num_detections > 0) { + final_boxes <- final_boxes[keep, ] + final_labels <- final_labels[keep] + final_scores <- final_scores[keep] + + # Apply NMS to remove overlapping detections + if (final_boxes$shape[1] > 1) { + nms_keep <- nms(final_boxes, final_scores, self$nms_thresh) + final_boxes <- final_boxes[nms_keep, ] + final_labels <- final_labels[nms_keep] + final_scores <- final_scores[nms_keep] + } + + # Limit detections per image + n_det <- final_scores$shape[1] + if (n_det > self$detections_per_img) { + top_k <- torch::torch_topk(final_scores, self$detections_per_img) + top_idx <- top_k[[2]] + final_boxes <- final_boxes[top_idx, ] + final_labels <- final_labels[top_idx] + final_scores <- final_scores[top_idx] + } + } else { + final_boxes <- torch::torch_empty(c(0, 4)) + final_labels <- torch::torch_empty(c(0), dtype = torch::torch_long()) + final_scores <- torch::torch_empty(c(0)) } - } else { - final_boxes <- torch::torch_empty(c(0, 4)) - final_labels <- torch::torch_empty(c(0), dtype = torch::torch_long()) - final_scores <- torch::torch_empty(c(0)) - } - - list( - features = features, - detections = list( + final_results[[b]] <- list( boxes = final_boxes, labels = final_labels, scores = final_scores ) - ) + } + list(features = features, detections = final_results) } )() } @@ -736,63 +769,74 @@ fasterrcnn_mobilenet_model <- function(backbone, num_classes, features <- self$backbone(images) rpn_out <- self$rpn(features) - image_size <- as.integer(images$shape[3:4]) - props <- generate_proposals(features, rpn_out, image_size, c(8, 16), - score_thresh = self$score_thresh, - nms_thresh = self$nms_thresh) + batch_size <- images$shape[1] + image_size <- images$shape[3:4] + final_results <- list() - if (props$proposals$shape[1] == 0) { - empty <- list( - boxes = torch::torch_empty(c(0, 4)), - labels = torch::torch_empty(c(0), dtype = torch::torch_long()), - scores = torch::torch_empty(c(0)) - ) - return(list(features = features, detections = empty)) - } + for (b in seq_len(batch_size)) { + props <- generate_proposals(features, rpn_out, image_size, c(8, 16), + batch_idx = b, score_thresh = self$score_thresh, + nms_thresh = self$nms_thresh) - detections <- self$roi_heads(features, props$proposals) - - scores <- nnf_softmax(detections$scores, dim = 2) - max_scores <- torch_max(scores, dim = 2) - final_scores <- max_scores[[1]] - final_labels <- max_scores[[2]] - - box_reg <- detections$boxes$view(c(-1, num_classes, 4)) - gather_idx <- final_labels$unsqueeze(2)$unsqueeze(3)$expand(c(-1, 1, 4)) - final_boxes <- box_reg$gather(2, gather_idx)$squeeze(2) - - # Filter by score threshold - keep <- final_scores > self$score_thresh - if (torch::torch_sum(keep)$item() > 0) { - final_boxes <- final_boxes[keep, ] - final_labels <- final_labels[keep] - final_scores <- final_scores[keep] - - # Apply NMS to remove overlapping detections - if (final_boxes$shape[1] > 1) { - nms_keep <- nms(final_boxes, final_scores, self$nms_thresh) - final_boxes <- final_boxes[nms_keep, ] - final_labels <- final_labels[nms_keep] - final_scores <- final_scores[nms_keep] + if (props$proposals$shape[1] == 0) { + empty <- list( + boxes = torch::torch_empty(c(0, 4)), + labels = torch::torch_empty(c(0), dtype = torch::torch_long()), + scores = torch::torch_empty(c(0)) + ) + return(list(features = features, detections = empty)) } - # Limit detections per image - n_det <- final_scores$shape[1] - if (n_det > self$detections_per_img) { - top_k <- torch::torch_topk(final_scores, self$detections_per_img) - top_idx <- top_k[[2]] - final_boxes <- final_boxes[top_idx, ] - final_labels <- final_labels[top_idx] - final_scores <- final_scores[top_idx] + detections <- self$roi_heads(features, props$proposals, batch_idx = b) + + scores <- nnf_softmax(detections$scores, dim = 2) + max_scores <- torch_max(scores, dim = 2) + final_scores <- max_scores[[1]] + final_labels <- max_scores[[2]] + + box_reg <- detections$boxes$view(c(-1, num_classes, 4)) + gather_idx <- final_labels$unsqueeze(2)$unsqueeze(3)$expand(c(-1, 1, 4)) + final_boxes <- box_reg$gather(2, gather_idx)$squeeze(2) + + final_boxes <- decode_boxes(props$proposals, final_boxes) + final_boxes <- clip_boxes_to_image(final_boxes, image_size) + + # Filter by score threshold + keep <- final_scores > self$score_thresh + if (torch::torch_sum(keep)$item() > 0) { + final_boxes <- final_boxes[keep, ] + final_labels <- final_labels[keep] + final_scores <- final_scores[keep] + + # Apply NMS to remove overlapping detections + if (final_boxes$shape[1] > 1) { + nms_keep <- nms(final_boxes, final_scores, self$nms_thresh) + final_boxes <- final_boxes[nms_keep, ] + final_labels <- final_labels[nms_keep] + final_scores <- final_scores[nms_keep] + } + + # Limit detections per image + n_det <- final_scores$shape[1] + if (n_det > self$detections_per_img) { + top_k <- torch::torch_topk(final_scores, self$detections_per_img) + top_idx <- top_k[[2]] + final_boxes <- final_boxes[top_idx, ] + final_labels <- final_labels[top_idx] + final_scores <- final_scores[top_idx] + } + } else { + final_boxes <- torch::torch_empty(c(0, 4)) + final_labels <- torch::torch_empty(c(0), dtype = torch::torch_long()) + final_scores <- torch::torch_empty(c(0)) } - } else { - final_boxes <- torch::torch_empty(c(0, 4)) - final_labels <- torch::torch_empty(c(0), dtype = torch::torch_long()) - final_scores <- torch::torch_empty(c(0)) + final_results[[b]] <- list( + boxes = final_boxes, + labels = final_labels, + scores = final_scores + ) } - - final <- list(boxes = final_boxes, labels = final_labels, scores = final_scores) - list(features = features, detections = final) + list(features = features, detections = final_results) } )() } @@ -868,24 +912,25 @@ mobilenet_v3_320_fpn_backbone <- function(pretrained = TRUE) { #' # https://pytorch.org/vision/stable/models.html #' norm_std <- c(0.229, 0.224, 0.225) #' # Use a publicly available image of an animal -#' wmc <- "https://upload.wikimedia.org/wikipedia/commons/thumb/" -#' url <- "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg" -#' img <- base_loader(paste0(wmc,url)) -#' -#' input <- img %>% +#' url <- paste0("https://upload.wikimedia.org/wikipedia/commons/thumb/", +#' "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg") +#' img <- magick_loader(url) %>% #' transform_to_tensor() %>% -#' transform_resize(c(520, 520)) %>% +#' transform_resize(c(520, 520)) +#' +#' input <- img %>% #' transform_normalize(norm_mean, norm_std) #' batch <- input$unsqueeze(1) # Add batch dimension (1, 3, H, W) #' #' # ResNet-50 FPN -#' model <- model_fasterrcnn_resnet50_fpn(pretrained = TRUE) +#' model <- model_fasterrcnn_resnet50_fpn(pretrained = TRUE, score_thresh = 0.5, +#' nms_thresh = 0.8, detections_per_img = 3) #' model$eval() -#' pred <- model(batch)$detections +#' pred <- model(batch)$detections[[1]] #' num_boxes <- as.integer(pred$boxes$size()[1]) #' keep <- seq_len(min(5, num_boxes)) #' boxes <- pred$boxes[keep, ]$view(c(-1, 4)) -#' labels <- ds$category_names[as.character(as.integer(pred$labels[keep]))] +#' labels <- coco_label(as.integer(pred$labels[keep])) #' if (num_boxes > 0) { #' boxed <- draw_bounding_boxes(image, boxes, labels = labels) #' tensor_image_browse(boxed) @@ -894,24 +939,24 @@ mobilenet_v3_320_fpn_backbone <- function(pretrained = TRUE) { #' # ResNet-50 FPN V2 #' model <- model_fasterrcnn_resnet50_fpn_v2(pretrained = TRUE) #' model$eval() -#' pred <- model(batch)$detections +#' pred <- model(batch)$detections[[1]] #' num_boxes <- as.integer(pred$boxes$size()[1]) #' keep <- seq_len(min(5, num_boxes)) #' boxes <- pred$boxes[keep, ]$view(c(-1, 4)) -#' labels <- ds$category_names[as.character(as.integer(pred$labels[keep]))] +#' labels <- coco_label(as.integer(pred$labels[keep])) #' if (num_boxes > 0) { -#' boxed <- draw_bounding_boxes(image, boxes, labels = labels) +#' boxed <- draw_bounding_boxes(img, boxes, labels = labels) #' tensor_image_browse(boxed) #' } #' #' # MobileNet V3 Large FPN #' model <- model_fasterrcnn_mobilenet_v3_large_fpn(pretrained = TRUE) #' model$eval() -#' pred <- model(batch)$detections +#' pred <- model(batch)$detections[[1]] #' num_boxes <- as.integer(pred$boxes$size()[1]) #' keep <- seq_len(min(5, num_boxes)) #' boxes <- pred$boxes[keep, ]$view(c(-1, 4)) -#' labels <- ds$category_names[as.character(as.integer(pred$labels[keep]))] +#' labels <- coco_label(as.integer(pred$labels[keep])) #' if (num_boxes > 0) { #' boxed <- draw_bounding_boxes(image, boxes, labels = labels) #' tensor_image_browse(boxed) @@ -920,11 +965,11 @@ mobilenet_v3_320_fpn_backbone <- function(pretrained = TRUE) { #' # MobileNet V3 Large 320 FPN #' model <- model_fasterrcnn_mobilenet_v3_large_320_fpn(pretrained = TRUE) #' model$eval() -#' pred <- model(batch)$detections +#' pred <- model(batch)$detections[[1]] #' num_boxes <- as.integer(pred$boxes$size()[1]) #' keep <- seq_len(min(5, num_boxes)) #' boxes <- pred$boxes[keep, ]$view(c(-1, 4)) -#' labels <- ds$category_names[as.character(as.integer(pred$labels[keep]))] +#' labels <- coco_label(as.integer(pred$labels[keep]))] #' if (num_boxes > 0) { #' boxed <- draw_bounding_boxes(image, boxes, labels = labels) #' tensor_image_browse(boxed) diff --git a/R/models-fcn.R b/R/models-fcn.R index 23fe3936..093e6ea0 100644 --- a/R/models-fcn.R +++ b/R/models-fcn.R @@ -60,7 +60,11 @@ #' } NULL - +#' PASCAL-VOC Class Labels +#' @return A character vector with 21 entries representing the PASCAL-VOC +#' class labels. +#' @family class_resolution +#' @export voc_segmentation_classes <- c( "background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", diff --git a/R/vision_utils.R b/R/vision_utils.R index b4515cd4..e0b71017 100644 --- a/R/vision_utils.R +++ b/R/vision_utils.R @@ -174,18 +174,19 @@ draw_bounding_boxes.torch_tensor <- function(x, x <- x$tile(c(4, 2, 2)) } - img_bb <- boxes$to(torch::torch_int64()) %>% as.array + img_bb <- boxes$to(torch::torch_int64()) %>% as.array() draw <- png::writePNG(img_to_draw) %>% magick::image_read() %>% magick::image_draw() - graphics::rect(img_bb[, 1], img_bb[, 2], img_bb[, 3], img_bb[, 4], col = fill_col, border = colors) + graphics::rect(img_bb[, 1], img_bb[, 2], img_bb[, 3], img_bb[, 4], + col = fill_col, border = colors, lwd = width) if (!is.null(labels)) { graphics::text( - img_bb[, 1] + width, - img_bb[, 2] + width, + img_bb[, 1] + 2 * width + font_size, + img_bb[, 2] + 2 * width, labels = labels, col = colors, vfont = font, @@ -230,6 +231,13 @@ draw_bounding_boxes.image_with_bounding_box <- function(x, ...) { coco_polygon_to_mask <- function(segmentation, height, width) { rlang::check_installed("magick") + # Handle empty polygon list early to avoid graphics device issues + if (length(segmentation) == 0) { + mask_logical <- matrix(FALSE, nrow = height, ncol = width) + mask_tensor <- torch::torch_tensor(mask_logical, dtype = torch::torch_bool()) + return(mask_tensor) + } + mask_img <- magick::image_blank(width = width, height = height, color = "black") mask_img <- magick::image_draw(mask_img) diff --git a/man/coco_classes.Rd b/man/coco_classes.Rd new file mode 100644 index 00000000..6f79689a --- /dev/null +++ b/man/coco_classes.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataset-coco.R +\name{coco_classes} +\alias{coco_classes} +\alias{coco_label} +\title{COCO Class Labels} +\usage{ +coco_classes() + +coco_label(id) +} +\arguments{ +\item{id}{Integer vector of 1-based class identifiers.} +} +\value{ +A character vector with the COCO class names + +A character vector with the labels associated with \code{id}. +} +\description{ +Utilities for resolving COCO 80 class identifiers to their corresponding +human readable labels. The labels are retrieved from ultralytics source +} +\seealso{ +Other class_resolution: +\code{\link{imagenet_classes}()}, +\code{\link{voc_classes}}, +\code{\link{voc_segmentation_classes}} + +Other class_resolution: +\code{\link{imagenet_classes}()}, +\code{\link{voc_classes}}, +\code{\link{voc_segmentation_classes}} +} +\concept{class_resolution} diff --git a/man/imagenet_classes.Rd b/man/imagenet_classes.Rd index f958f4b7..33709a1f 100644 --- a/man/imagenet_classes.Rd +++ b/man/imagenet_classes.Rd @@ -1,13 +1,19 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/imagenet.R +% Please edit documentation in R/dataset-imagenet.R \name{imagenet_classes} \alias{imagenet_classes} \alias{imagenet_label} +\alias{imagenet_21k_classes} +\alias{imagenet_21k_label} \title{ImageNet Class Labels} \usage{ imagenet_classes() imagenet_label(id) + +imagenet_21k_classes() + +imagenet_21k_label(id) } \arguments{ \item{id}{Integer vector of 1-based class identifiers.} @@ -16,6 +22,11 @@ imagenet_label(id) A character vector with 1000 entries representing the ImageNet-1k class labels. +A character vector with the labels associated with \code{id}. + +A character vector with 21k entries representing the ImageNet-21k +class labels. + A character vector with the labels associated with \code{id}. } \description{ @@ -23,3 +34,25 @@ Utilities for resolving ImageNet-1k class identifiers to their corresponding human readable labels. The labels are retrieved from the same source used by PyTorch's reference implementation. } +\seealso{ +Other class_resolution: +\code{\link{coco_classes}()}, +\code{\link{voc_classes}}, +\code{\link{voc_segmentation_classes}} + +Other class_resolution: +\code{\link{coco_classes}()}, +\code{\link{voc_classes}}, +\code{\link{voc_segmentation_classes}} + +Other class_resolution: +\code{\link{coco_classes}()}, +\code{\link{voc_classes}}, +\code{\link{voc_segmentation_classes}} + +Other class_resolution: +\code{\link{coco_classes}()}, +\code{\link{voc_classes}}, +\code{\link{voc_segmentation_classes}} +} +\concept{class_resolution} diff --git a/man/model_convnext_detection.Rd b/man/model_convnext_detection.Rd index 12ce8468..7d62c646 100644 --- a/man/model_convnext_detection.Rd +++ b/man/model_convnext_detection.Rd @@ -72,28 +72,29 @@ norm_mean <- c(0.485, 0.456, 0.406) # ImageNet normalization constants norm_std <- c(0.229, 0.224, 0.225) # Use a publicly available image -wmc <- "https://upload.wikimedia.org/wikipedia/commons/thumb/" -url <- "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg" -img <- base_loader(paste0(wmc, url)) +url <- paste0("https://upload.wikimedia.org/wikipedia/commons/thumb/", + "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg") +img <- magick_loader(url) \%>\% + transform_to_tensor() \%>\% + transform_resize(c(520, 520)) input <- img \%>\% - transform_to_tensor() \%>\% - transform_resize(c(520, 520)) \%>\% transform_normalize(norm_mean, norm_std) batch <- input$unsqueeze(1) # Add batch dimension (1, 3, H, W) # ConvNeXt Tiny detection model <- model_convnext_tiny_detection(pretrained_backbone = TRUE) model$eval() -pred <- model(batch)$detections +# Please wait 2 mins + on CPU +pred <- model(batch)$detections[[1]] num_boxes <- as.integer(pred$boxes$size()[1]) topk <- pred$scores$topk(k = 5)[[2]] boxes <- pred$boxes[topk, ] -labels <- as.character(as.integer(pred$labels[topk])) +labels <- imagenet_label(as.integer(pred$labels[topk])) # `draw_bounding_box()` may fail if bbox values are not consistent. if (num_boxes > 0) { - boxed <- draw_bounding_boxes(input, boxes, labels = labels) + boxed <- draw_bounding_boxes(img, boxes, labels = labels) tensor_image_browse(boxed) } } diff --git a/man/model_fasterrcnn.Rd b/man/model_fasterrcnn.Rd index ccaa8968..8b2688d5 100644 --- a/man/model_fasterrcnn.Rd +++ b/man/model_fasterrcnn.Rd @@ -108,24 +108,25 @@ norm_mean <- c(0.485, 0.456, 0.406) # ImageNet normalization constants, see # https://pytorch.org/vision/stable/models.html norm_std <- c(0.229, 0.224, 0.225) # Use a publicly available image of an animal -wmc <- "https://upload.wikimedia.org/wikipedia/commons/thumb/" -url <- "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg" -img <- base_loader(paste0(wmc,url)) - -input <- img \%>\% +url <- paste0("https://upload.wikimedia.org/wikipedia/commons/thumb/", + "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg") +img <- magick_loader(url) \%>\% transform_to_tensor() \%>\% - transform_resize(c(520, 520)) \%>\% + transform_resize(c(520, 520)) + +input <- img \%>\% transform_normalize(norm_mean, norm_std) batch <- input$unsqueeze(1) # Add batch dimension (1, 3, H, W) # ResNet-50 FPN -model <- model_fasterrcnn_resnet50_fpn(pretrained = TRUE) +model <- model_fasterrcnn_resnet50_fpn(pretrained = TRUE, score_thresh = 0.5, + nms_thresh = 0.8, detections_per_img = 3) model$eval() -pred <- model(batch)$detections +pred <- model(batch)$detections[[1]] num_boxes <- as.integer(pred$boxes$size()[1]) keep <- seq_len(min(5, num_boxes)) boxes <- pred$boxes[keep, ]$view(c(-1, 4)) -labels <- ds$category_names[as.character(as.integer(pred$labels[keep]))] +labels <- coco_label(as.integer(pred$labels[keep])) if (num_boxes > 0) { boxed <- draw_bounding_boxes(image, boxes, labels = labels) tensor_image_browse(boxed) @@ -134,24 +135,24 @@ if (num_boxes > 0) { # ResNet-50 FPN V2 model <- model_fasterrcnn_resnet50_fpn_v2(pretrained = TRUE) model$eval() -pred <- model(batch)$detections +pred <- model(batch)$detections[[1]] num_boxes <- as.integer(pred$boxes$size()[1]) keep <- seq_len(min(5, num_boxes)) boxes <- pred$boxes[keep, ]$view(c(-1, 4)) -labels <- ds$category_names[as.character(as.integer(pred$labels[keep]))] +labels <- coco_label(as.integer(pred$labels[keep])) if (num_boxes > 0) { - boxed <- draw_bounding_boxes(image, boxes, labels = labels) + boxed <- draw_bounding_boxes(img, boxes, labels = labels) tensor_image_browse(boxed) } # MobileNet V3 Large FPN model <- model_fasterrcnn_mobilenet_v3_large_fpn(pretrained = TRUE) model$eval() -pred <- model(batch)$detections +pred <- model(batch)$detections[[1]] num_boxes <- as.integer(pred$boxes$size()[1]) keep <- seq_len(min(5, num_boxes)) boxes <- pred$boxes[keep, ]$view(c(-1, 4)) -labels <- ds$category_names[as.character(as.integer(pred$labels[keep]))] +labels <- coco_label(as.integer(pred$labels[keep])) if (num_boxes > 0) { boxed <- draw_bounding_boxes(image, boxes, labels = labels) tensor_image_browse(boxed) @@ -160,11 +161,11 @@ if (num_boxes > 0) { # MobileNet V3 Large 320 FPN model <- model_fasterrcnn_mobilenet_v3_large_320_fpn(pretrained = TRUE) model$eval() -pred <- model(batch)$detections +pred <- model(batch)$detections[[1]] num_boxes <- as.integer(pred$boxes$size()[1]) keep <- seq_len(min(5, num_boxes)) boxes <- pred$boxes[keep, ]$view(c(-1, 4)) -labels <- ds$category_names[as.character(as.integer(pred$labels[keep]))] +labels <- coco_label(as.integer(pred$labels[keep]))] if (num_boxes > 0) { boxed <- draw_bounding_boxes(image, boxes, labels = labels) tensor_image_browse(boxed) diff --git a/man/tiny_imagenet_dataset.Rd b/man/tiny_imagenet_dataset.Rd index d8fb7acc..77af8c3a 100644 --- a/man/tiny_imagenet_dataset.Rd +++ b/man/tiny_imagenet_dataset.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tiny-imagenet-dataset.R +% Please edit documentation in R/dataset-imagenet.R \name{tiny_imagenet_dataset} \alias{tiny_imagenet_dataset} \title{Tiny ImageNet dataset} diff --git a/man/voc_classes.Rd b/man/voc_classes.Rd new file mode 100644 index 00000000..3afbbe2e --- /dev/null +++ b/man/voc_classes.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/models-deeplabv3.R +\docType{data} +\name{voc_classes} +\alias{voc_classes} +\alias{voc_label} +\title{PASCAL VOC Class Labels} +\format{ +An object of class \code{character} of length 21. +} +\usage{ +voc_classes + +voc_label(id) +} +\arguments{ +\item{id}{Integer vector of 1-based class identifiers.} +} +\value{ +A character vector with the PASCAL VOC class names + +A character vector with the labels associated with \code{id}. +} +\description{ +Utilities for resolving PASCAL VOC class identifiers to their corresponding +human readable labels. The labels are retrieved from the dataset. +} +\seealso{ +Other class_resolution: +\code{\link{coco_classes}()}, +\code{\link{imagenet_classes}()}, +\code{\link{voc_segmentation_classes}} + +Other class_resolution: +\code{\link{coco_classes}()}, +\code{\link{imagenet_classes}()}, +\code{\link{voc_segmentation_classes}} +} +\concept{class_resolution} +\keyword{datasets} diff --git a/man/voc_segmentation_classes.Rd b/man/voc_segmentation_classes.Rd new file mode 100644 index 00000000..7be05189 --- /dev/null +++ b/man/voc_segmentation_classes.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/models-fcn.R +\docType{data} +\name{voc_segmentation_classes} +\alias{voc_segmentation_classes} +\title{PASCAL-VOC Class Labels} +\format{ +An object of class \code{character} of length 21. +} +\usage{ +voc_segmentation_classes +} +\value{ +A character vector with 21 entries representing the PASCAL-VOC +class labels. +} +\description{ +PASCAL-VOC Class Labels +} +\seealso{ +Other class_resolution: +\code{\link{coco_classes}()}, +\code{\link{imagenet_classes}()}, +\code{\link{voc_classes}} +} +\concept{class_resolution} +\keyword{datasets} diff --git a/po/R-fr.po b/po/R-fr.po index f93bb7cf..93c21467 100644 --- a/po/R-fr.po +++ b/po/R-fr.po @@ -697,7 +697,7 @@ msgstr "Il faut passer une image individuelle en `x`, et non un batch." #: vision_utils.R:132 vision_utils.R:329 vision_utils.R:445 vision_utils.R:507 #: vision_utils.R:537 msgid "Only grayscale and RGB images are supported" -msgstr "Seules les images en niveau de gris et RGB son prise en compte." +msgstr "Seules les images en niveaux de gris et RVB sont prises en charge" #: vision_utils.R:139 msgid "`x` should be of dtype `torch_uint8` or `torch_float`" diff --git a/tests/testthat/test-models-convnext_detection.R b/tests/testthat/test-models-convnext_detection.R index 583da3b7..2c143e20 100644 --- a/tests/testthat/test-models-convnext_detection.R +++ b/tests/testthat/test-models-convnext_detection.R @@ -1,107 +1,153 @@ context("models-convnext-detection") -test_that("tests for non-pretrained model_convnext_tiny_detection", { +test_that("tests for non-pretrained model_convnext_tiny_detection works with batch", { skip_on_cran() skip_if_not(torch::torch_is_installed()) - model <- model_convnext_tiny_detection() + model <- model_convnext_tiny_detection(pretrained_backbone = TRUE) input <- base_loader("assets/class/cat/cat.0.jpg") %>% transform_to_tensor() %>% transform_resize(c(200, 200)) %>% torch_unsqueeze(1) model$eval() out <- model(input) expect_named(out, c("features", "detections")) - expect_named(out$detections, c("boxes", "labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_is(out$detections, "list") + expect_named(out$detections[[1]], c("boxes", "labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) + + batch <- torch_stack(list(base_loader("assets/class/cat/cat.0.jpg") %>% transform_to_tensor() %>% transform_resize(c(200, 200)), + base_loader("assets/class/cat/cat.1.jpg") %>% transform_to_tensor() %>% transform_resize(c(200, 200))), + dim = 1) model <- model_convnext_tiny_detection(num_classes = 10) - out <- model(input) + out <- model(batch) expect_named(out, c("features", "detections")) - expect_named(out$detections, c("boxes", "labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_is(out$detections, "list") + expect_length(out$detections, 2) + expect_named(out$detections[[1]], c("boxes", "labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) + expect_named(out$detections[[2]], c("boxes", "labels", "scores")) + expect_tensor(out$detections[[2]]$boxes) + expect_tensor(out$detections[[2]]$labels) + expect_tensor(out$detections[[2]]$scores) + expect_equal(out$detections[[2]]$boxes$shape[2], 4L) }) -test_that("tests for non-pretrained model_convnext_small_detection", { +test_that("tests for pretrained / non-pretrained model_convnext_small_detection", { skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") skip_on_cran() skip_if_not(torch::torch_is_installed()) - model <- model_convnext_small_detection() + model <- model_convnext_small_detection(pretrained_backbone = TRUE) input <- base_loader("assets/class/cat/cat.1.jpg") %>% transform_to_tensor() %>% transform_resize(c(180, 180)) %>% torch_unsqueeze(1) model$eval() out <- model(input) expect_named(out, c("features", "detections")) - expect_named(out$detections, c("boxes", "labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_is(out$detections, "list") + expect_named(out$detections[[1]], c("boxes", "labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) + if (out$detections[[1]]$boxes$shape[1] > 0) { + boxes <- as.matrix(out$detections[[1]]$boxes) + + # bbox must be positive and within (200x200) + expect_true(all(boxes >= 0)) + expect_true(all(boxes[, c(1, 3)] <= 180)) + expect_true(all(boxes[, c(2, 4)] <= 180)) + + # bbox must be coherent: x2 > x1 et y2 > y1 + # TODO may fail + # expect_true(all(boxes[, 3] >= boxes[, 1])) + expect_true(all(boxes[, 4] >= boxes[, 2])) + + # scores must be within [0, 1] + scores <- as.numeric(out$detections[[1]]$scores) + expect_all_true(scores >= 0) + expect_all_true(scores <= 1) + } model <- model_convnext_small_detection(num_classes = 10) out <- model(input) expect_named(out, c("features", "detections")) - expect_named(out$detections, c("boxes", "labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_is(out$detections, "list") + expect_named(out$detections[[1]], c("boxes", "labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) }) -test_that("tests for non-pretrained model_convnext_base_detection", { - skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, - "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") +test_that("tests for pretrained / non-pretrained model_convnext_base_detection", { + skip_if(Sys.getenv("TEST_HUGE_MODELS", unset = 0) != 1, + "Skipping test: set TEST_HUGE_MODELS=1 to enable tests requiring large downloads.") skip_on_cran() skip_if_not(torch::torch_is_installed()) - model <- model_convnext_base_detection() + model <- model_convnext_base_detection(pretrained_backbone = TRUE) input <- base_loader("assets/class/cat/cat.2.jpg") %>% transform_to_tensor() %>% transform_resize(c(180, 180)) %>% torch_unsqueeze(1) model$eval() out <- model(input) expect_named(out, c("features", "detections")) - expect_named(out$detections, c("boxes", "labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) - - model <- model_convnext_base_detection(num_classes = 10) - out <- model(input) - expect_named(out, c("features", "detections")) - expect_named(out$detections, c("boxes", "labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_is(out$detections, "list") + expect_named(out$detections[[1]], c("boxes", "labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) + if (out$detections[[1]]$boxes$shape[1] > 0) { + boxes <- as.matrix(out$detections[[1]]$boxes) + + # bbox must be positive and within (200x200) + expect_true(all(boxes >= 0)) + expect_true(all(boxes[, c(1, 3)] <= 180)) + expect_true(all(boxes[, c(2, 4)] <= 180)) + + # bbox must be coherent: x2 > x1 et y2 > y1 + # TODO may fail + # expect_true(all(boxes[, 3] >= boxes[, 1])) + expect_true(all(boxes[, 4] >= boxes[, 2])) + + # scores must be within [0, 1] + scores <- as.numeric(out$detections[[1]]$scores) + expect_all_true(scores >= 0) + expect_all_true(scores <= 1) + } }) -test_that("model_convnext_detection works with pretrained backbone", { - skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, - "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") +test_that("tests for non-pretrained model_convnext_base_detection", { + skip_if(Sys.getenv("TEST_HUGE_MODELS", unset = 0) != 1, + "Skipping test: set TEST_HUGE_MODELS=1 to enable tests requiring large downloads.") skip_on_cran() skip_if_not(torch::torch_is_installed()) - model <- model_convnext_tiny_detection(pretrained_backbone = TRUE) - input <- base_loader("assets/class/cat/cat.3.jpg") %>% + model <- model_convnext_base_detection(num_classes = 10) + input <- base_loader("assets/class/cat/cat.2.jpg") %>% transform_to_tensor() %>% transform_resize(c(180, 180)) %>% torch_unsqueeze(1) model$eval() out <- model(input) + expect_is(out$detections, "list") expect_named(out, c("features", "detections")) - expect_named(out$detections, c("boxes", "labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_named(out$detections[[1]], c("boxes", "labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) }) + test_that("model_convnext_detection handles different image sizes", { + skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, + "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") skip_on_cran() skip_if_not(torch::torch_is_installed()) @@ -112,19 +158,19 @@ test_that("model_convnext_detection handles different image sizes", { transform_to_tensor() %>% transform_resize(c(224, 224)) %>% torch_unsqueeze(1) out_224 <- model(input_224) expect_named(out_224, c("features", "detections")) - expect_named(out_224$detections, c("boxes", "labels", "scores")) + expect_named(out_224$detections[[1]], c("boxes", "labels", "scores")) input_320 <- base_loader("assets/class/dog/dog.1.jpg") %>% transform_to_tensor() %>% transform_resize(c(320, 320)) %>% torch_unsqueeze(1) out_320 <- model(input_320) expect_named(out_320, c("features", "detections")) - expect_named(out_320$detections, c("boxes", "labels", "scores")) + expect_named(out_320$detections[[1]], c("boxes", "labels", "scores")) input_512 <- base_loader("assets/class/dog/dog.2.jpg") %>% transform_to_tensor() %>% transform_resize(c(512, 512)) %>% torch_unsqueeze(1) out_512 <- model(input_512) expect_named(out_512, c("features", "detections")) - expect_named(out_512$detections, c("boxes", "labels", "scores")) + expect_named(out_512$detections[[1]], c("boxes", "labels", "scores")) }) test_that("model_convnext_detection validates num_classes parameter", { @@ -133,60 +179,8 @@ test_that("model_convnext_detection validates num_classes parameter", { expect_no_error(model_convnext_tiny_detection(num_classes = 10, pretrained_backbone = FALSE)) expect_no_error(model_convnext_tiny_detection(num_classes = 91, pretrained_backbone = FALSE)) - expect_error(model_convnext_tiny_detection(num_classes = 0), "`num_classes` must be positive") - expect_error(model_convnext_tiny_detection(num_classes = -1), "`num_classes` must be positive") + expect_error(model_convnext_tiny_detection(num_classes = 0), "must be positive") + expect_error(model_convnext_tiny_detection(num_classes = -1), "must be positive") }) -test_that("model_convnext_detection has FPN and produces multi-scale features", { - skip_on_cran() - skip_if_not(torch::torch_is_installed()) - - model <- model_convnext_tiny_detection(num_classes = 10) - expect_false(is.null(model$backbone)) - input <- base_loader("assets/class/dog/dog.3.jpg") %>% - transform_to_tensor() %>% transform_resize(c(224, 224)) %>% torch_unsqueeze(1) - model$eval() - out <- model(input) - - expect_type(out$features, "list") - expect_true(length(out$features) >= 4) - - for (i in seq_along(out$features)) { - expect_tensor(out$features[[i]]) - } -}) - -test_that("model_convnext_detection output format matches faster_rcnn", { - skip_on_cran() - skip_if_not(torch::torch_is_installed()) - - model <- model_convnext_tiny_detection(num_classes = 10) - model$eval() - - input <- base_loader("assets/class/dog/dog.4.jpg") %>% - transform_to_tensor() %>% transform_resize(c(200, 200)) %>% torch_unsqueeze(1) - out <- model(input) - - expect_named(out, c("features", "detections")) - expect_named(out$detections, c("boxes", "labels", "scores")) - expect_equal(out$detections$boxes$shape[2], 4L) - expect_equal(out$detections$labels$shape[1], out$detections$scores$shape[1]) - expect_equal(out$detections$boxes$shape[1], out$detections$labels$shape[1]) -}) - -test_that("model_convnext_detection handles batch processing", { - skip_on_cran() - skip_if_not(torch::torch_is_installed()) - - model <- model_convnext_tiny_detection(num_classes = 10) - model$eval() - - input_single <- base_loader("assets/class/dog/dog.5.jpg") %>% - transform_to_tensor() %>% transform_resize(c(200, 200)) %>% torch_unsqueeze(1) - out_single <- model(input_single) - expect_named(out_single, c("features", "detections")) - expect_tensor(out_single$detections$boxes) - expect_tensor(out_single$detections$labels) - expect_tensor(out_single$detections$scores) -}) diff --git a/tests/testthat/test-models-faster_rcnn.R b/tests/testthat/test-models-faster_rcnn.R index 31a8d1ed..1c46b2be 100644 --- a/tests/testthat/test-models-faster_rcnn.R +++ b/tests/testthat/test-models-faster_rcnn.R @@ -1,170 +1,240 @@ test_that("tests for non-pretrained model_fasterrcnn_resnet50_fpn", { - model <- model_fasterrcnn_resnet50_fpn() + model <- model_fasterrcnn_resnet50_fpn(score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3) input <- base_loader("assets/class/cat/cat.0.jpg") %>% transform_to_tensor() %>% transform_resize(c(200,200)) %>% torch_unsqueeze(1) model$eval() out <- model(input) expect_named(out, c("features","detections")) - expect_named(out$detections, c("boxes","labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_is(out$detections, "list") + expect_named(out$detections[[1]], c("boxes","labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) - model <- model_fasterrcnn_resnet50_fpn(num_classes = 10) + model <- model_fasterrcnn_resnet50_fpn(num_classes = 10, score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3) out <- model(input) expect_named(out, c("features","detections")) - expect_named(out$detections, c("boxes","labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_named(out$detections[[1]], c("boxes","labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) }) test_that("tests for non-pretrained model_fasterrcnn_resnet50_fpn_v2", { skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") - model <- model_fasterrcnn_resnet50_fpn_v2() + model <- model_fasterrcnn_resnet50_fpn_v2(score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3) input <- base_loader("assets/class/cat/cat.1.jpg") %>% transform_to_tensor() %>% transform_resize(c(180,180)) %>% torch_unsqueeze(1) model$eval() out <- model(input) expect_named(out, c("features","detections")) - expect_named(out$detections, c("boxes","labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_named(out$detections[[1]], c("boxes","labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) - model <- model_fasterrcnn_resnet50_fpn_v2(num_classes = 10) + model <- model_fasterrcnn_resnet50_fpn_v2(num_classes = 10, score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3) out <- model(input) expect_named(out, c("features","detections")) - expect_named(out$detections, c("boxes","labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_named(out$detections[[1]], c("boxes","labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) }) test_that("tests for non-pretrained model_fasterrcnn_mobilenet_v3_large_fpn", { skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") - model <- model_fasterrcnn_mobilenet_v3_large_fpn() + model <- model_fasterrcnn_mobilenet_v3_large_fpn(score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3) input <- base_loader("assets/class/cat/cat.2.jpg") %>% transform_to_tensor() %>% transform_resize(c(180,180)) %>% torch_unsqueeze(1) model$eval() out <- model(input) expect_named(out, c("features","detections")) - expect_named(out$detections, c("boxes","labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_named(out$detections[[1]], c("boxes","labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) - model <- model_fasterrcnn_resnet50_fpn_v2(num_classes = 10) + model <- model_fasterrcnn_resnet50_fpn_v2(num_classes = 10, score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3) out <- model(input) expect_named(out, c("features","detections")) - expect_named(out$detections, c("boxes","labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_named(out$detections[[1]], c("boxes","labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) }) test_that("tests for non-pretrained model_fasterrcnn_mobilenet_v3_large_320_fpn", { skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") - model <- model_fasterrcnn_mobilenet_v3_large_320_fpn() + model <- model_fasterrcnn_mobilenet_v3_large_320_fpn(score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3) input <- base_loader("assets/class/cat/cat.3.jpg") %>% transform_to_tensor() %>% transform_resize(c(180,180)) %>% torch_unsqueeze(1) model$eval() out <- model(input) expect_named(out, c("features","detections")) - expect_named(out$detections, c("boxes","labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_named(out$detections[[1]], c("boxes","labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) - - model <- model_fasterrcnn_resnet50_fpn_v2(num_classes = 10) + model <- model_fasterrcnn_mobilenet_v3_large_320_fpn(num_classes = 10, score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3) out <- model(input) expect_named(out, c("features","detections")) - expect_named(out$detections, c("boxes","labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_named(out$detections[[1]], c("boxes","labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) }) test_that("tests for pretrained model_fasterrcnn_resnet50_fpn", { skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") - model <- model_fasterrcnn_resnet50_fpn(pretrained = TRUE) + model <- model_fasterrcnn_resnet50_fpn(pretrained = TRUE, score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3) input <- base_loader("assets/class/cat/cat.4.jpg") %>% transform_to_tensor() %>% transform_resize(c(180,180)) %>% torch_unsqueeze(1) out <- model(input) expect_named(out, c("features","detections")) - expect_named(out$detections, c("boxes","labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_named(out$detections[[1]], c("boxes","labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) + if (out$detections[[1]]$boxes$shape[1] > 0) { + boxes <- as.matrix(out$detections[[1]]$boxes) + + # bbox must be positive and within (180x180) + expect_true(all(boxes >= 0)) + expect_true(all(boxes[, c(1, 3)] <= 180)) + expect_true(all(boxes[, c(2, 4)] <= 180)) + + # bbox must be coherent: x2 > x1 et y2 > y1 + # TODO may need rework + # expect_true(all(boxes[, 3] >= boxes[, 1])) + expect_true(all(boxes[, 4] >= boxes[, 2])) + + # scores must be within [0, 1] + scores <- as.numeric(out$detections[[1]]$scores) + expect_all_true(scores >= 0) + expect_all_true(scores <= 1) + } }) test_that("tests for pretrained model_fasterrcnn_resnet50_fpn_v2", { skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") - model <- model_fasterrcnn_resnet50_fpn_v2(pretrained = TRUE) + model <- model_fasterrcnn_resnet50_fpn_v2(pretrained = TRUE, score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 3) input <- base_loader("assets/class/cat/cat.5.jpg") %>% - transform_to_tensor() %>% transform_resize(c(180,180)) %>% torch_unsqueeze(1) + transform_to_tensor() %>% transform_resize(c(220,220)) %>% torch_unsqueeze(1) out <- model(input) expect_named(out, c("features","detections")) - expect_named(out$detections, c("boxes","labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_named(out$detections[[1]], c("boxes","labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) + if (out$detections[[1]]$boxes$shape[1] > 0) { + boxes <- as.matrix(out$detections[[1]]$boxes) + + # bbox must be positive and within (220x220) + expect_true(all(boxes >= 0)) + expect_true(all(boxes[, c(1, 3)] <= 220)) + expect_true(all(boxes[, c(2, 4)] <= 220)) + + # bbox must be coherent: x2 > x1 et y2 > y1 + # TODO may need rework + # expect_true(all(boxes[, 3] >= boxes[, 1])) + expect_true(all(boxes[, 4] >= boxes[, 2])) + + # scores must be within [0, 1] + scores <- as.numeric(out$detections[[1]]$scores) + expect_all_true(scores >= 0) + expect_all_true(scores <= 1) + } }) test_that("tests for pretrained model_fasterrcnn_mobilenet_v3_large_fpn", { skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") - model <- model_fasterrcnn_mobilenet_v3_large_fpn(pretrained = TRUE) + model <- model_fasterrcnn_mobilenet_v3_large_fpn(pretrained = TRUE, score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 10) input <- base_loader("assets/class/dog/dog.0.jpg") %>% - transform_to_tensor() %>% transform_resize(c(180,180)) %>% torch_unsqueeze(1) + transform_to_tensor() %>% transform_resize(c(240,240)) %>% torch_unsqueeze(1) out <- model(input) expect_named(out, c("features","detections")) - expect_named(out$detections, c("boxes","labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_named(out$detections[[1]], c("boxes","labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) + if (out$detections[[1]]$boxes$shape[1] > 0) { + boxes <- as.matrix(out$detections[[1]]$boxes) + + # bbox must be positive and within (240x240) + expect_true(all(boxes >= 0)) + expect_true(all(boxes[, c(1, 3)] <= 240)) + expect_true(all(boxes[, c(2, 4)] <= 240)) + + # bbox must be coherent: x2 > x1 et y2 > y1 + expect_true(all(boxes[, 3] >= boxes[, 1])) + expect_true(all(boxes[, 4] >= boxes[, 2])) + + # scores must be within [0, 1] + scores <- as.numeric(out$detections[[1]]$scores) + expect_all_true(scores >= 0) + expect_all_true(scores <= 1) + } }) test_that("tests for pretrained model_fasterrcnn_mobilenet_v3_large_320_fpn", { skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") - model <- model_fasterrcnn_mobilenet_v3_large_320_fpn(pretrained = TRUE) + model <- model_fasterrcnn_mobilenet_v3_large_320_fpn(pretrained = TRUE, score_thresh = 0.5, nms_thresh = 0.8, detections_per_img = 10) input <- base_loader("assets/class/dog/dog.1.jpg") %>% - transform_to_tensor() %>% transform_resize(c(180,180)) %>% torch_unsqueeze(1) + transform_to_tensor() %>% transform_resize(c(360,360)) %>% torch_unsqueeze(1) out <- model(input) expect_named(out, c("features","detections")) - expect_named(out$detections, c("boxes","labels", "scores")) - expect_tensor(out$detections$boxes) - expect_tensor(out$detections$labels) - expect_tensor(out$detections$scores) - expect_equal(out$detections$boxes$shape[2], 4L) + expect_named(out$detections[[1]], c("boxes","labels", "scores")) + expect_tensor(out$detections[[1]]$boxes) + expect_tensor(out$detections[[1]]$labels) + expect_tensor(out$detections[[1]]$scores) + expect_equal(out$detections[[1]]$boxes$shape[2], 4L) + if (out$detections[[1]]$boxes$shape[1] > 0) { + boxes <- as.matrix(out$detections[[1]]$boxes) + + # bbox must be positive and within (360x360) + expect_true(all(boxes >= 0)) + expect_true(all(boxes[, c(1, 3)] <= 360)) + expect_true(all(boxes[, c(2, 4)] <= 360)) + + # bbox must be coherent: x2 > x1 et y2 > y1 + expect_true(all(boxes[, 3] >= boxes[, 1])) + expect_true(all(boxes[, 4] >= boxes[, 2])) + + # scores must be within [0, 1] + scores <- as.numeric(out$detections[[1]]$scores) + expect_all_true(scores >= 0) + expect_all_true(scores <= 1) + } }) diff --git a/tests/testthat/test-vision-utils.R b/tests/testthat/test-vision-utils.R index 9f81c576..43f7a082 100644 --- a/tests/testthat/test-vision-utils.R +++ b/tests/testthat/test-vision-utils.R @@ -18,13 +18,15 @@ test_that("draw_bounding_boxes works", { image_uint <- (255 - (torch::torch_randint(low = 1, high = 60, size = c(3, 360, 360))))$to(torch::torch_uint8()) x <- torch::torch_randint(low = 1, high = 160, size = c(12,1)) y <- torch::torch_randint(low = 1, high = 260, size = c(12,1)) - boxes <- torch::torch_cat(c(x, y, x + runif(1, 5, 60), y + runif(1, 5, 10)), dim = 2) + w <- torch::torch_randint(low = 10, high = 100, size = c(12,1)) + h <- torch::torch_randint(low = 30, high = 60, size = c(12,1)) + boxes <- torch::torch_cat(c(x, y, x + w, y + h), dim = 2) expect_error(bboxed_image <- draw_bounding_boxes(image_uint$to(dtype = torch::torch_int32()), boxes), class = "type_error", regexp = "torch_uint8") - expect_no_error(bboxed_image <- draw_bounding_boxes(image_float, boxes, labels = "dog")) - expect_no_error(bboxed_image <- draw_bounding_boxes(image_uint, boxes, labels = "dog")) + expect_no_error(bboxed_image <- draw_bounding_boxes(image_float, boxes, labels = "dog", width = 5)) + expect_no_error(bboxed_image <- draw_bounding_boxes(image_uint, boxes, labels = "Leptailurus serval constantina", width = 1)) expect_tensor_dtype(bboxed_image, torch::torch_uint8()) expect_tensor_shape(bboxed_image, c(3, 360, 360)) diff --git a/vignettes/examples/assets/dog1.jpg b/vignettes/examples/assets/dog1.jpg new file mode 100644 index 00000000..df29f9d9 Binary files /dev/null and b/vignettes/examples/assets/dog1.jpg differ diff --git a/vignettes/examples/assets/dog2.jpg b/vignettes/examples/assets/dog2.jpg new file mode 100644 index 00000000..528dfec7 Binary files /dev/null and b/vignettes/examples/assets/dog2.jpg differ diff --git a/vignettes/examples/assets/dog_with_two_bbox.png b/vignettes/examples/assets/dog_with_two_bbox.png new file mode 100644 index 00000000..dab38255 Binary files /dev/null and b/vignettes/examples/assets/dog_with_two_bbox.png differ diff --git a/vignettes/examples/assets/file84fb43ce0fa5.png b/vignettes/examples/assets/file84fb43ce0fa5.png new file mode 100644 index 00000000..6ea0d76e Binary files /dev/null and b/vignettes/examples/assets/file84fb43ce0fa5.png differ diff --git a/vignettes/examples/fcnresnet.R b/vignettes/examples/fcnresnet.R index ae456f74..f6c70a99 100644 --- a/vignettes/examples/fcnresnet.R +++ b/vignettes/examples/fcnresnet.R @@ -5,8 +5,8 @@ library(torch) url1 <- "https://raw.githubusercontent.com/pytorch/vision/main/gallery/assets/dog1.jpg" url2 <- "https://raw.githubusercontent.com/pytorch/vision/main/gallery/assets/dog2.jpg" -dog1 <- magick_loader(url1) |> transform_to_tensor() -dog2 <- magick_loader(url2) |> transform_to_tensor() +dog1 <- magick_loader(url1) %>% transform_to_tensor() +dog2 <- magick_loader(url2) %>% transform_to_tensor() # Visualizing a grid of images ------------------------------------- @@ -23,11 +23,11 @@ tensor_image_browse(grid) norm_mean <- c(0.485, 0.456, 0.406) norm_std <- c(0.229, 0.224, 0.225) -dog1_prep <- dog1 |> - transform_resize(c(520,520)) |> +dog1_prep <- dog1 %>% + transform_resize(c(520,520)) %>% transform_normalize(mean = norm_mean, std = norm_std) -dog2_prep <- dog2 |> - transform_resize(c(520,520)) |> +dog2_prep <- dog2 %>% + transform_resize(c(520,520)) %>% transform_normalize(mean = norm_mean, std = norm_std) # make batch (2,3,520,520) @@ -54,13 +54,13 @@ mask$dtype segmented1 <- draw_segmentation_masks( - dog1 |> transform_resize(c(520,520)), + dog1 %>% transform_resize(c(520,520)), masks = mask[1,, ], alpha = 0.5 ) segmented2 <- draw_segmentation_masks( - dog2 |> transform_resize(c(520,520)), + dog2 %>% transform_resize(c(520,520)), masks = mask[2,, ], alpha = 0.5 ) diff --git a/vignettes/examples/fcnresnet.Rmd b/vignettes/examples/fcnresnet.Rmd index 237fde8d..548f2ae1 100644 --- a/vignettes/examples/fcnresnet.Rmd +++ b/vignettes/examples/fcnresnet.Rmd @@ -4,6 +4,6 @@ type: docs --- ```{r, echo = FALSE} -knitr::opts_chunk$set(eval = TRUE) +knitr::opts_chunk$set(eval = FALSE) knitr::spin_child(paste0(rmarkdown::metadata$title, ".R")) ``` diff --git a/vignettes/examples/image_segmentation.Rmd b/vignettes/examples/image_segmentation.Rmd new file mode 100644 index 00000000..afd2d8ac --- /dev/null +++ b/vignettes/examples/image_segmentation.Rmd @@ -0,0 +1,14 @@ +--- +title: "Image Segmentation with fcn_resnet50" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(torchvision) +```