Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 59 additions & 58 deletions apps/computer-vision/app/image_segmentation/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -12,30 +12,13 @@ import {
Skia,
AlphaType,
ColorType,
SkImage,
} from '@shopify/react-native-skia';
import { View, StyleSheet, Image } from 'react-native';
import React, { useContext, useEffect, useState } from 'react';
import { GeneratingContext } from '../../context';
import ScreenWrapper from '../../ScreenWrapper';

const width = 224;
const height = 224;

let pixels = new Uint8Array(width * height * 4);
pixels.fill(255);

let data = Skia.Data.fromBytes(pixels);
let img = Skia.Image.MakeImage(
{
width: width,
height: height,
alphaType: AlphaType.Opaque,
colorType: ColorType.RGBA_8888,
},
data,
width * 4
);

const numberToColor: number[][] = [
[255, 87, 51], // 0 Red
[51, 255, 87], // 1 Green
Expand Down Expand Up @@ -67,48 +50,58 @@ export default function ImageSegmentationScreen() {
setGlobalGenerating(model.isGenerating);
}, [model.isGenerating, setGlobalGenerating]);
const [imageUri, setImageUri] = useState('');
const [imageSize, setImageSize] = useState({ width: 0, height: 0 });
const [segImage, setSegImage] = useState<SkImage | null>(null);
const [canvasSize, setCanvasSize] = useState({ width: 0, height: 0 });

const handleCameraPress = async (isCamera: boolean) => {
const image = await getImage(isCamera);
const uri = image?.uri;
setImageUri(uri as string);
if (!image?.uri) return;
setImageUri(image.uri);
setImageSize({
width: image.width ?? 0,
height: image.height ?? 0,
});
setSegImage(null);
};

const [resultPresent, setResultPresent] = useState(false);

const runForward = async () => {
if (imageUri) {
try {
const output = await model.forward(imageUri);
pixels = new Uint8Array(width * height * 4);
if (!imageUri || imageSize.width === 0 || imageSize.height === 0) return;
try {
const { width, height } = imageSize;
const output = await model.forward(imageUri, [DeeplabLabel.ARGMAX]);
const argmax = output[DeeplabLabel.ARGMAX] || [];
const uniqueValues = new Set<number>();
for (let i = 0; i < argmax.length; i++) {
uniqueValues.add(argmax[i]);
}
const pixels = new Uint8Array(width * height * 4);

for (let x = 0; x < width; x++) {
for (let y = 0; y < height; y++) {
for (let i = 0; i < 3; i++) {
pixels[(x * height + y) * 4 + i] =
numberToColor[
(output[DeeplabLabel.ARGMAX] || [])[x * height + y]
][i];
}
pixels[(x * height + y) * 4 + 3] = 255;
}
for (let row = 0; row < height; row++) {
for (let col = 0; col < width; col++) {
const idx = row * width + col;
const color = numberToColor[argmax[idx]] || [0, 0, 0];
pixels[idx * 4] = color[0];
pixels[idx * 4 + 1] = color[1];
pixels[idx * 4 + 2] = color[2];
pixels[idx * 4 + 3] = 255;
}

data = Skia.Data.fromBytes(pixels);
img = Skia.Image.MakeImage(
{
width: width,
height: height,
alphaType: AlphaType.Opaque,
colorType: ColorType.RGBA_8888,
},
data,
width * 4
);
setResultPresent(true);
} catch (e) {
console.error(e);
}

const data = Skia.Data.fromBytes(pixels);
const img = Skia.Image.MakeImage(
{
width,
height,
alphaType: AlphaType.Opaque,
colorType: ColorType.RGBA_8888,
},
data,
width * 4
);
setSegImage(img);
} catch (e) {
console.error(e);
}
};

Expand All @@ -135,16 +128,24 @@ export default function ImageSegmentationScreen() {
}
/>
</View>
{resultPresent && (
<View style={styles.canvasContainer}>
{segImage && (
<View
style={styles.canvasContainer}
onLayout={(e) =>
setCanvasSize({
width: e.nativeEvent.layout.width,
height: e.nativeEvent.layout.height,
})
}
>
<Canvas style={styles.canvas}>
<SkiaImage
image={img}
image={segImage}
fit="contain"
x={0}
y={0}
width={width}
height={height}
width={canvasSize.width}
height={canvasSize.height}
/>
</Canvas>
</View>
Expand Down Expand Up @@ -181,7 +182,7 @@ const styles = StyleSheet.create({
padding: 4,
},
canvas: {
width: width,
height: height,
width: '100%',
height: '100%',
},
});
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ To run the model, you can use the [`forward`](../../06-api-reference/interfaces/

- The image can be a remote URL, a local file URI, or a base64-encoded image.
- The [`classesOfInterest`](../../06-api-reference/interfaces/ImageSegmentationType.md#classesofinterest) list contains classes for which to output the full results. By default the list is empty, and only the most probable classes are returned (essentially an arg max for each pixel). Look at [`DeeplabLabel`](../../06-api-reference/enumerations/DeeplabLabel.md) enum for possible classes.
- The [`resize`](../../06-api-reference/interfaces/ImageSegmentationType.md#resize) flag says whether the output will be rescaled back to the size of the image you put in. The default is `false`. The model runs inference on a scaled (probably smaller) version of your image (224x224 for `DEEPLAB_V3_RESNET50`). If you choose to resize, the output will be `number[]` of size `width * height` of your original image.
- The [`resizeToInput`](../../06-api-reference/interfaces/ImageSegmentationType.md#resizeToInput) flag specifies whether the output will be rescaled back to the size of the input image. The default is `true`. The model runs inference on a scaled (probably smaller) version of your image (224x224 for `DEEPLAB_V3_RESNET50`). If you choose to resize, the output will be `number[]` of size `width * height` of your original image.

:::warning
Setting `resize` to true will make `forward` slower.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ To run the model, you can use the [`forward`](../../06-api-reference/classes/Ima

- The image can be a remote URL, a local file URI, or a base64-encoded image.
- The [`classesOfInterest`](../../06-api-reference/classes/ImageSegmentationModule.md#classesofinterest) list contains classes for which to output the full results. By default the list is empty, and only the most probable classes are returned (essentially an arg max for each pixel). Look at [`DeeplabLabel`](../../06-api-reference/enumerations/DeeplabLabel.md) enum for possible classes.
- The [`resize`](../../06-api-reference/classes/ImageSegmentationModule.md#resize) flag says whether the output will be rescaled back to the size of the image you put in. The default is `false`. The model runs inference on a scaled (probably smaller) version of your image (224x224 for the `DEEPLAB_V3_RESNET50`). If you choose to resize, the output will be `number[]` of size `width * height` of your original image.
- The [`resizeToInput`](../../06-api-reference/classes/ImageSegmentationModule.md#resizeToInput) flag specifies whether the output will be rescaled back to the size of the input image. The default is `true`. The model runs inference on a scaled (probably smaller) version of your image (224x224 for the `DEEPLAB_V3_RESNET50`). If you choose to resize, the output will be `number[]` of size `width * height` of your original image.

:::warning
Setting `resize` to true will make `forward` slower.
Expand Down
20 changes: 10 additions & 10 deletions docs/docs/06-api-reference/classes/ImageSegmentationModule.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Class: ImageSegmentationModule

Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:13](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L13)
Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:13](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L13)

Module for image segmentation tasks.

Expand Down Expand Up @@ -28,7 +28,7 @@ Module for image segmentation tasks.

> **nativeModule**: `any` = `null`
Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/BaseModule.ts#L8)
Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/BaseModule.ts#L8)

Native module instance

Expand All @@ -42,7 +42,7 @@ Native module instance

> **delete**(): `void`
Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/BaseModule.ts#L41)
Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/BaseModule.ts#L41)

Unloads the model from memory.

Expand All @@ -58,9 +58,9 @@ Unloads the model from memory.

### forward()

> **forward**(`imageSource`, `classesOfInterest?`, `resize?`): `Promise`\<`Partial`\<`Record`\<[`DeeplabLabel`](../enumerations/DeeplabLabel.md), `number`[]\>\>\>
> **forward**(`imageSource`, `classesOfInterest?`, `resizeToInput?`): `Promise`\<`Partial`\<`Record`\<[`DeeplabLabel`](../enumerations/DeeplabLabel.md), `number`[]\>\>\>
Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:46](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L46)
Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:46](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L46)

Executes the model's forward pass

Expand All @@ -78,11 +78,11 @@ a fetchable resource or a Base64-encoded string.

an optional list of DeeplabLabel used to indicate additional arrays of probabilities to output (see section "Running the model"). The default is an empty list.

##### resize?
##### resizeToInput?

`boolean`

an optional boolean to indicate whether the output should be resized to the original image dimensions, or left in the size of the model (see section "Running the model"). The default is `false`.
an optional boolean to indicate whether the output should be resized to the original input image dimensions. If `false`, returns the model output without any resizing (see section "Running the model"). Defaults to `true`.

#### Returns

Expand All @@ -96,7 +96,7 @@ A dictionary where keys are `DeeplabLabel` and values are arrays of probabilitie

> `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\>
Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/BaseModule.ts#L23)
Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/BaseModule.ts#L23)

Runs the model's forward method with the given input tensors.
It returns the output tensors that mimic the structure of output from ExecuTorch.
Expand Down Expand Up @@ -125,7 +125,7 @@ Array of output tensors.

> **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\>
Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/BaseModule.ts#L34)
Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/BaseModule.ts#L34)

Gets the input shape for a given method and index.

Expand Down Expand Up @@ -159,7 +159,7 @@ The input shape as an array of numbers.

> **load**(`model`, `onDownloadProgressCallback`): `Promise`\<`void`\>
Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:21](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L21)
Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:21](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L21)

Loads the model, where `modelSource` is a string that specifies the location of the model binary.
To track the download progress, supply a callback function `onDownloadProgressCallback`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

> **useImageSegmentation**(`ImageSegmentationProps`): [`ImageSegmentationType`](../interfaces/ImageSegmentationType.md)
Defined in: [packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts:15](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts#L15)
Defined in: [packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts:15](https://github.com/software-mansion/react-native-executorch/blob/9e79b9bf2a34159a71071fbfdaed3ddd9393702f/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts#L15)

React hook for managing an Image Segmentation model instance.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@ export class ImageSegmentationModule extends BaseModule {
*
* @param imageSource - a fetchable resource or a Base64-encoded string.
* @param classesOfInterest - an optional list of DeeplabLabel used to indicate additional arrays of probabilities to output (see section "Running the model"). The default is an empty list.
* @param resize - an optional boolean to indicate whether the output should be resized to the original image dimensions, or left in the size of the model (see section "Running the model"). The default is `false`.
* @param resizeToInput - an optional boolean to indicate whether the output should be resized to the original input image dimensions. If `false`, returns the model output without any resizing (see section "Running the model"). Defaults to `true`.
* @returns A dictionary where keys are `DeeplabLabel` and values are arrays of probabilities for each pixel belonging to the corresponding class.
*/
async forward(
imageSource: string,
classesOfInterest?: DeeplabLabel[],
resize?: boolean
resizeToInput?: boolean
): Promise<Partial<Record<DeeplabLabel, number[]>>> {
if (this.nativeModule == null) {
throw new RnExecutorchError(
Expand All @@ -58,7 +58,7 @@ export class ImageSegmentationModule extends BaseModule {
const stringDict = await this.nativeModule.generate(
imageSource,
(classesOfInterest || []).map((label) => DeeplabLabel[label]),
resize || false
resizeToInput ?? true
);

let enumDict: { [key in DeeplabLabel]?: number[] } = {};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,13 @@ export interface ImageSegmentationType {
* Executes the model's forward pass to perform semantic segmentation on the provided image.
* @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be processed.
* @param classesOfInterest - An optional array of `DeeplabLabel` enums. If provided, the model will only return segmentation masks for these specific classes.
* @param resize - An optional boolean indicating whether the output segmentation masks should be resized to match the original image dimensions. Defaults to standard model behavior if undefined.
* @param resizeToInput - an optional boolean to indicate whether the output should be resized to the original input image dimensions. If `false`, returns the model output without any resizing (see section "Running the model"). Defaults to `true`.
* @returns A Promise that resolves to an object mapping each detected `DeeplabLabel` to its corresponding segmentation mask (represented as a flattened array of numbers).
* @throws {RnExecutorchError} If the model is not loaded or is currently processing another image.
*/
forward: (
imageSource: string,
classesOfInterest?: DeeplabLabel[],
resize?: boolean
resizeToInput?: boolean
) => Promise<Partial<Record<DeeplabLabel, number[]>>>;
}
Loading