diff --git a/apps/computer-vision/app/image_segmentation/index.tsx b/apps/computer-vision/app/image_segmentation/index.tsx index 87293b01b..61a98ddea 100644 --- a/apps/computer-vision/app/image_segmentation/index.tsx +++ b/apps/computer-vision/app/image_segmentation/index.tsx @@ -12,30 +12,13 @@ import { Skia, AlphaType, ColorType, + SkImage, } from '@shopify/react-native-skia'; import { View, StyleSheet, Image } from 'react-native'; import React, { useContext, useEffect, useState } from 'react'; import { GeneratingContext } from '../../context'; import ScreenWrapper from '../../ScreenWrapper'; -const width = 224; -const height = 224; - -let pixels = new Uint8Array(width * height * 4); -pixels.fill(255); - -let data = Skia.Data.fromBytes(pixels); -let img = Skia.Image.MakeImage( - { - width: width, - height: height, - alphaType: AlphaType.Opaque, - colorType: ColorType.RGBA_8888, - }, - data, - width * 4 -); - const numberToColor: number[][] = [ [255, 87, 51], // 0 Red [51, 255, 87], // 1 Green @@ -67,48 +50,58 @@ export default function ImageSegmentationScreen() { setGlobalGenerating(model.isGenerating); }, [model.isGenerating, setGlobalGenerating]); const [imageUri, setImageUri] = useState(''); + const [imageSize, setImageSize] = useState({ width: 0, height: 0 }); + const [segImage, setSegImage] = useState(null); + const [canvasSize, setCanvasSize] = useState({ width: 0, height: 0 }); const handleCameraPress = async (isCamera: boolean) => { const image = await getImage(isCamera); - const uri = image?.uri; - setImageUri(uri as string); + if (!image?.uri) return; + setImageUri(image.uri); + setImageSize({ + width: image.width ?? 0, + height: image.height ?? 0, + }); + setSegImage(null); }; - const [resultPresent, setResultPresent] = useState(false); - const runForward = async () => { - if (imageUri) { - try { - const output = await model.forward(imageUri); - pixels = new Uint8Array(width * height * 4); + if (!imageUri || imageSize.width === 0 || imageSize.height === 0) return; + try { + const { width, height } = imageSize; + const output = await model.forward(imageUri, [DeeplabLabel.ARGMAX]); + const argmax = output[DeeplabLabel.ARGMAX] || []; + const uniqueValues = new Set(); + for (let i = 0; i < argmax.length; i++) { + uniqueValues.add(argmax[i]); + } + const pixels = new Uint8Array(width * height * 4); - for (let x = 0; x < width; x++) { - for (let y = 0; y < height; y++) { - for (let i = 0; i < 3; i++) { - pixels[(x * height + y) * 4 + i] = - numberToColor[ - (output[DeeplabLabel.ARGMAX] || [])[x * height + y] - ][i]; - } - pixels[(x * height + y) * 4 + 3] = 255; - } + for (let row = 0; row < height; row++) { + for (let col = 0; col < width; col++) { + const idx = row * width + col; + const color = numberToColor[argmax[idx]] || [0, 0, 0]; + pixels[idx * 4] = color[0]; + pixels[idx * 4 + 1] = color[1]; + pixels[idx * 4 + 2] = color[2]; + pixels[idx * 4 + 3] = 255; } - - data = Skia.Data.fromBytes(pixels); - img = Skia.Image.MakeImage( - { - width: width, - height: height, - alphaType: AlphaType.Opaque, - colorType: ColorType.RGBA_8888, - }, - data, - width * 4 - ); - setResultPresent(true); - } catch (e) { - console.error(e); } + + const data = Skia.Data.fromBytes(pixels); + const img = Skia.Image.MakeImage( + { + width, + height, + alphaType: AlphaType.Opaque, + colorType: ColorType.RGBA_8888, + }, + data, + width * 4 + ); + setSegImage(img); + } catch (e) { + console.error(e); } }; @@ -135,16 +128,24 @@ export default function ImageSegmentationScreen() { } /> - {resultPresent && ( - + {segImage && ( + + setCanvasSize({ + width: e.nativeEvent.layout.width, + height: e.nativeEvent.layout.height, + }) + } + > @@ -181,7 +182,7 @@ const styles = StyleSheet.create({ padding: 4, }, canvas: { - width: width, - height: height, + width: '100%', + height: '100%', }, }); diff --git a/docs/docs/03-hooks/02-computer-vision/useImageSegmentation.md b/docs/docs/03-hooks/02-computer-vision/useImageSegmentation.md index 681d1928a..edcb6de05 100644 --- a/docs/docs/03-hooks/02-computer-vision/useImageSegmentation.md +++ b/docs/docs/03-hooks/02-computer-vision/useImageSegmentation.md @@ -55,7 +55,7 @@ To run the model, you can use the [`forward`](../../06-api-reference/interfaces/ - The image can be a remote URL, a local file URI, or a base64-encoded image. - The [`classesOfInterest`](../../06-api-reference/interfaces/ImageSegmentationType.md#classesofinterest) list contains classes for which to output the full results. By default the list is empty, and only the most probable classes are returned (essentially an arg max for each pixel). Look at [`DeeplabLabel`](../../06-api-reference/enumerations/DeeplabLabel.md) enum for possible classes. -- The [`resize`](../../06-api-reference/interfaces/ImageSegmentationType.md#resize) flag says whether the output will be rescaled back to the size of the image you put in. The default is `false`. The model runs inference on a scaled (probably smaller) version of your image (224x224 for `DEEPLAB_V3_RESNET50`). If you choose to resize, the output will be `number[]` of size `width * height` of your original image. +- The [`resizeToInput`](../../06-api-reference/interfaces/ImageSegmentationType.md#resizeToInput) flag specifies whether the output will be rescaled back to the size of the input image. The default is `true`. The model runs inference on a scaled (probably smaller) version of your image (224x224 for `DEEPLAB_V3_RESNET50`). If you choose to resize, the output will be `number[]` of size `width * height` of your original image. :::warning Setting `resize` to true will make `forward` slower. diff --git a/docs/docs/04-typescript-api/02-computer-vision/ImageSegmentationModule.md b/docs/docs/04-typescript-api/02-computer-vision/ImageSegmentationModule.md index 631331625..373da8b9d 100644 --- a/docs/docs/04-typescript-api/02-computer-vision/ImageSegmentationModule.md +++ b/docs/docs/04-typescript-api/02-computer-vision/ImageSegmentationModule.md @@ -52,7 +52,7 @@ To run the model, you can use the [`forward`](../../06-api-reference/classes/Ima - The image can be a remote URL, a local file URI, or a base64-encoded image. - The [`classesOfInterest`](../../06-api-reference/classes/ImageSegmentationModule.md#classesofinterest) list contains classes for which to output the full results. By default the list is empty, and only the most probable classes are returned (essentially an arg max for each pixel). Look at [`DeeplabLabel`](../../06-api-reference/enumerations/DeeplabLabel.md) enum for possible classes. -- The [`resize`](../../06-api-reference/classes/ImageSegmentationModule.md#resize) flag says whether the output will be rescaled back to the size of the image you put in. The default is `false`. The model runs inference on a scaled (probably smaller) version of your image (224x224 for the `DEEPLAB_V3_RESNET50`). If you choose to resize, the output will be `number[]` of size `width * height` of your original image. +- The [`resizeToInput`](../../06-api-reference/classes/ImageSegmentationModule.md#resizeToInput) flag specifies whether the output will be rescaled back to the size of the input image. The default is `true`. The model runs inference on a scaled (probably smaller) version of your image (224x224 for the `DEEPLAB_V3_RESNET50`). If you choose to resize, the output will be `number[]` of size `width * height` of your original image. :::warning Setting `resize` to true will make `forward` slower. diff --git a/docs/docs/06-api-reference/classes/ImageSegmentationModule.md b/docs/docs/06-api-reference/classes/ImageSegmentationModule.md index 87368f186..35eeff8d5 100644 --- a/docs/docs/06-api-reference/classes/ImageSegmentationModule.md +++ b/docs/docs/06-api-reference/classes/ImageSegmentationModule.md @@ -1,6 +1,6 @@ # Class: ImageSegmentationModule -Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:13](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L13) +Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:13](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L13) Module for image segmentation tasks. @@ -28,7 +28,7 @@ Module for image segmentation tasks. > **nativeModule**: `any` = `null` -Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/BaseModule.ts#L8) Native module instance @@ -42,7 +42,7 @@ Native module instance > **delete**(): `void` -Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/BaseModule.ts#L41) Unloads the model from memory. @@ -58,9 +58,9 @@ Unloads the model from memory. ### forward() -> **forward**(`imageSource`, `classesOfInterest?`, `resize?`): `Promise`\<`Partial`\<`Record`\<[`DeeplabLabel`](../enumerations/DeeplabLabel.md), `number`[]\>\>\> +> **forward**(`imageSource`, `classesOfInterest?`, `resizeToInput?`): `Promise`\<`Partial`\<`Record`\<[`DeeplabLabel`](../enumerations/DeeplabLabel.md), `number`[]\>\>\> -Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:46](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L46) +Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:46](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L46) Executes the model's forward pass @@ -78,11 +78,11 @@ a fetchable resource or a Base64-encoded string. an optional list of DeeplabLabel used to indicate additional arrays of probabilities to output (see section "Running the model"). The default is an empty list. -##### resize? +##### resizeToInput? `boolean` -an optional boolean to indicate whether the output should be resized to the original image dimensions, or left in the size of the model (see section "Running the model"). The default is `false`. +an optional boolean to indicate whether the output should be resized to the original input image dimensions. If `false`, returns the model output without any resizing (see section "Running the model"). Defaults to `true`. #### Returns @@ -96,7 +96,7 @@ A dictionary where keys are `DeeplabLabel` and values are arrays of probabilitie > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/BaseModule.ts#L23) Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -125,7 +125,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/BaseModule.ts#L34) Gets the input shape for a given method and index. @@ -159,7 +159,7 @@ The input shape as an array of numbers. > **load**(`model`, `onDownloadProgressCallback`): `Promise`\<`void`\> -Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:21](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L21) +Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:21](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L21) Loads the model, where `modelSource` is a string that specifies the location of the model binary. To track the download progress, supply a callback function `onDownloadProgressCallback`. diff --git a/docs/docs/06-api-reference/functions/useImageSegmentation.md b/docs/docs/06-api-reference/functions/useImageSegmentation.md index 954853756..6753775cb 100644 --- a/docs/docs/06-api-reference/functions/useImageSegmentation.md +++ b/docs/docs/06-api-reference/functions/useImageSegmentation.md @@ -2,7 +2,7 @@ > **useImageSegmentation**(`ImageSegmentationProps`): [`ImageSegmentationType`](../interfaces/ImageSegmentationType.md) -Defined in: [packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts:15](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts#L15) +Defined in: [packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts:15](https://github.com/software-mansion/react-native-executorch/blob/9e79b9bf2a34159a71071fbfdaed3ddd9393702f/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts#L15) React hook for managing an Image Segmentation model instance. diff --git a/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts index 5d78decaf..cc26d68b2 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts @@ -40,13 +40,13 @@ export class ImageSegmentationModule extends BaseModule { * * @param imageSource - a fetchable resource or a Base64-encoded string. * @param classesOfInterest - an optional list of DeeplabLabel used to indicate additional arrays of probabilities to output (see section "Running the model"). The default is an empty list. - * @param resize - an optional boolean to indicate whether the output should be resized to the original image dimensions, or left in the size of the model (see section "Running the model"). The default is `false`. + * @param resizeToInput - an optional boolean to indicate whether the output should be resized to the original input image dimensions. If `false`, returns the model output without any resizing (see section "Running the model"). Defaults to `true`. * @returns A dictionary where keys are `DeeplabLabel` and values are arrays of probabilities for each pixel belonging to the corresponding class. */ async forward( imageSource: string, classesOfInterest?: DeeplabLabel[], - resize?: boolean + resizeToInput?: boolean ): Promise>> { if (this.nativeModule == null) { throw new RnExecutorchError( @@ -58,7 +58,7 @@ export class ImageSegmentationModule extends BaseModule { const stringDict = await this.nativeModule.generate( imageSource, (classesOfInterest || []).map((label) => DeeplabLabel[label]), - resize || false + resizeToInput ?? true ); let enumDict: { [key in DeeplabLabel]?: number[] } = {}; diff --git a/packages/react-native-executorch/src/types/imageSegmentation.ts b/packages/react-native-executorch/src/types/imageSegmentation.ts index 8a59b3a61..02d9eec10 100644 --- a/packages/react-native-executorch/src/types/imageSegmentation.ts +++ b/packages/react-native-executorch/src/types/imageSegmentation.ts @@ -76,13 +76,13 @@ export interface ImageSegmentationType { * Executes the model's forward pass to perform semantic segmentation on the provided image. * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be processed. * @param classesOfInterest - An optional array of `DeeplabLabel` enums. If provided, the model will only return segmentation masks for these specific classes. - * @param resize - An optional boolean indicating whether the output segmentation masks should be resized to match the original image dimensions. Defaults to standard model behavior if undefined. + * @param resizeToInput - an optional boolean to indicate whether the output should be resized to the original input image dimensions. If `false`, returns the model output without any resizing (see section "Running the model"). Defaults to `true`. * @returns A Promise that resolves to an object mapping each detected `DeeplabLabel` to its corresponding segmentation mask (represented as a flattened array of numbers). * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image. */ forward: ( imageSource: string, classesOfInterest?: DeeplabLabel[], - resize?: boolean + resizeToInput?: boolean ) => Promise>>; }