diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt index b17dd3f8f95..a846b87c198 100644 --- a/.ci/docker/ci_commit_pins/pytorch.txt +++ b/.ci/docker/ci_commit_pins/pytorch.txt @@ -1 +1 @@ -295f2ed4d103017f7e19a7b8263ece606cd629db +7ae0ce6360b6e4f944906502d20da24c04debee5 diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py index 3f22d7699de..802aee4b53c 100755 --- a/.ci/scripts/gather_test_models.py +++ b/.ci/scripts/gather_test_models.py @@ -14,7 +14,7 @@ from typing import Any from examples.models import MODEL_NAME_TO_MODEL -from examples.xnnpack import MODEL_NAME_TO_OPTIONS +from examples.xnnpack import MODEL_NAME_TO_OPTIONS, QuantType DEFAULT_RUNNERS = { "linux": "linux.2xlarge", @@ -154,7 +154,7 @@ def export_models_for_ci() -> dict[str, dict]: if backend == "xnnpack": if name not in MODEL_NAME_TO_OPTIONS: continue - if MODEL_NAME_TO_OPTIONS[name].quantization: + if MODEL_NAME_TO_OPTIONS[name].quantization != QuantType.NONE: backend += "-quantization" if MODEL_NAME_TO_OPTIONS[name].delegation: diff --git a/.ci/scripts/unittest-linux.sh b/.ci/scripts/unittest-linux.sh index f8ff9df773e..a05211d8e0e 100755 --- a/.ci/scripts/unittest-linux.sh +++ b/.ci/scripts/unittest-linux.sh @@ -21,8 +21,7 @@ if [[ "$BUILD_TOOL" == "cmake" ]]; then source .ci/scripts/setup-vulkan-linux-deps.sh PYTHON_EXECUTABLE=python \ - EXECUTORCH_BUILD_PYBIND=ON \ - CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ + CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ .ci/scripts/setup-linux.sh "$@" # Install llama3_2_vision dependencies. diff --git a/.ci/scripts/unittest-macos.sh b/.ci/scripts/unittest-macos.sh index d5ca97404aa..12c9d3f1508 100755 --- a/.ci/scripts/unittest-macos.sh +++ b/.ci/scripts/unittest-macos.sh @@ -21,8 +21,7 @@ trap 'rm -rfv ${TMP_DIR}' EXIT # Setup MacOS dependencies as there is no Docker support on MacOS atm PYTHON_EXECUTABLE=python \ -EXECUTORCH_BUILD_PYBIND=ON \ -CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ +CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ ${CONDA_RUN} --no-capture-output \ .ci/scripts/setup-macos.sh "$@" diff --git a/.ci/scripts/wheel/envvar_base.sh b/.ci/scripts/wheel/envvar_base.sh index 6379dee6b5a..15f590f0f68 100755 --- a/.ci/scripts/wheel/envvar_base.sh +++ b/.ci/scripts/wheel/envvar_base.sh @@ -8,13 +8,10 @@ # should typically only contain shell variable assignments. Be sure to export # any variables so that subprocesses will see them. -# Enable pybindings so that users can execute ExecuTorch programs from python. -export EXECUTORCH_BUILD_PYBIND=1 - # Ensure that CMAKE_ARGS is defined before referencing it. Defaults to empty # if not defined. export CMAKE_ARGS="${CMAKE_ARGS:-}" # Link the XNNPACK backend into the pybindings runtime so that users can execute # ExecuTorch programs that delegate to it. -CMAKE_ARGS="${CMAKE_ARGS} -DEXECUTORCH_BUILD_XNNPACK=ON" +CMAKE_ARGS="${CMAKE_ARGS} -DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON" diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 9a2221b3aac..c3eafc02c39 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -365,8 +365,7 @@ jobs: # build module for executorch.extension.pybindings.portable_lib BUILD_TOOL="cmake" PYTHON_EXECUTABLE=python \ - EXECUTORCH_BUILD_XNNPACK=ON \ - EXECUTORCH_BUILD_PYBIND=ON \ + CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON" \ bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" # see if we can import the module successfully @@ -504,7 +503,7 @@ jobs: # Setup MacOS dependencies as there is no Docker support on MacOS atm PYTHON_EXECUTABLE=python \ - EXECUTORCH_BUILD_PYBIND=ON \ + CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON" \ EXECUTORCH_BUILD_ARM_BAREMETAL=ON \ .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 829d8fd88f4..6c4d7f8a58e 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -65,22 +65,29 @@ jobs: matrix: model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe] backend: [portable, xnnpack-quantization-delegation] + runner: [linux.arm64.2xlarge] include: - model: lstm backend: portable + runner: linux.arm64.2xlarge - model: mul backend: portable + runner: linux.arm64.2xlarge - model: softmax backend: portable + runner: linux.arm64.2xlarge - model: phi_4_mini backend: portable + runner: linux.arm64.m7g.4xlarge - model: qwen2_5 backend: portable + runner: linux.arm64.2xlarge - model: llama3_2_vision_encoder backend: portable + runner: linux.arm64.2xlarge fail-fast: false with: - runner: linux.arm64.2xlarge + runner: ${{ matrix.runner }} docker-image: executorch-ubuntu-22.04-gcc11-aarch64 submodules: 'true' ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} @@ -261,7 +268,7 @@ jobs: # build module for executorch.extension.pybindings.portable_lib BUILD_TOOL=${{ matrix.build-tool }} - EXECUTORCH_BUILD_PYBIND=ON PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" + CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON" PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" # see if we can import the module successfully ${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')" @@ -536,9 +543,8 @@ jobs: git clone https://github.com/huggingface/optimum-executorch cd optimum-executorch # There is no release yet, for CI stability, always test from the same commit on main - git checkout 6a7e83f3eee2976fa809335bfb78a45b1ea1cb25 - pip install . - pip install accelerate sentencepiece + git checkout 577a2b19670e4c643a5c6ecb09bf47b9a699e7c6 + pip install .[tests] pip list echo "::endgroup::" diff --git a/Package.swift b/Package.swift index 94acfc4cd7b..1322b918c07 100644 --- a/Package.swift +++ b/Package.swift @@ -82,5 +82,24 @@ let package = Package( (value["libraries"] as? [String] ?? []).map { .linkedLibrary($0) } ), ] - } + } + [ + .testTarget( + name: "tests", + dependencies: [ + .target(name: "executorch_debug"), + .target(name: "kernels_portable"), + ], + path: "extension/apple/ExecuTorch/__tests__", + resources: [ + .copy("resources/add.pte") + ], + linkerSettings: [ + .linkedLibrary("c++"), + .unsafeFlags([ + "-Xlinker", "-force_load", + "-Xlinker", "cmake-out/kernels_portable.xcframework/macos-arm64/libkernels_portable_macos.a", + ]) + ] + ) + ] ) diff --git a/backends/apple/coreml/compiler/coreml_preprocess.py b/backends/apple/coreml/compiler/coreml_preprocess.py index c7828888ee5..e9afd819d94 100644 --- a/backends/apple/coreml/compiler/coreml_preprocess.py +++ b/backends/apple/coreml/compiler/coreml_preprocess.py @@ -16,8 +16,8 @@ import coremltools as ct import coremltools.optimize as cto -import executorchcoreml +from executorch.backends.apple.coreml import executorchcoreml from executorch.exir.backend.backend_details import ( BackendDetails, ExportedProgram, diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm index 3e11999e939..3848f7c9b3c 100644 --- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm +++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm @@ -208,7 +208,7 @@ void set_outputs(std::vector& outputs, const inmemoryfs::InMemoryFileSystem *inmemory_fs, NSError * __autoreleasing *error) { NSError *local_error = nil; - if (![fm createDirectoryAtURL:dst_url withIntermediateDirectories:NO attributes:@{} error:error]) { + if (![fm createDirectoryAtURL:dst_url withIntermediateDirectories:YES attributes:@{} error:error]) { ETCoreMLLogUnderlyingErrorAndSetNSError(error, ETCoreMLErrorModelSaveFailed, local_error, diff --git a/backends/apple/coreml/runtime/inmemoryfs/setup.py b/backends/apple/coreml/runtime/inmemoryfs/setup.py deleted file mode 100644 index c93022ed341..00000000000 --- a/backends/apple/coreml/runtime/inmemoryfs/setup.py +++ /dev/null @@ -1,52 +0,0 @@ -import os - -import pathlib -import sys - -REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent.parent.parent.parent.parent -PYBIND11_DIR_PATH = REPO_ROOT / "third-party" / "pybind11" -sys.path.append(str(PYBIND11_DIR_PATH.absolute())) - -from pybind11.setup_helpers import build_ext, Pybind11Extension -from setuptools import setup - -__version__ = "0.0.1" - -cxx_std = int(os.environ.get("CMAKE_CXX_STANDARD", "17")) - -ext_modules = [ - Pybind11Extension( - "executorchcoreml", - [ - "../util/json_util.cpp", - "inmemory_filesystem.cpp", - "inmemory_filesystem_py.cpp", - "inmemory_filesystem_utils.cpp", - "memory_buffer.cpp", - "memory_stream.cpp", - "reversed_memory_stream.cpp", - ], - define_macros=[("VERSION_INFO", __version__)], - cxx_std=cxx_std, - extra_compile_args=["-mmacosx-version-min=10.15", "-g"], - include_dirs=[ - "../../third-party/nlohmann_json/single_include", - ".", - "../util", - ], - ), -] - -setup( - name="executorchcoreml", - version=__version__, - description="CoreML extension for executorch", - long_description="", - author="Apple Inc.", - ext_modules=ext_modules, - extras_require={"test": "pytest"}, - cmdclass={"build_ext": build_ext}, - include_package_data=True, - zip_safe=False, - python_requires=">=3.9", -) diff --git a/backends/apple/coreml/scripts/install_inmemoryfs.sh b/backends/apple/coreml/scripts/install_inmemoryfs.sh deleted file mode 100644 index 1fb9dd1c4d5..00000000000 --- a/backends/apple/coreml/scripts/install_inmemoryfs.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright © 2023 Apple Inc. All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -SCRIPT_DIR_PATH="$( - cd -- "$(dirname "$0")" >/dev/null 2>&1 - pwd -P -)" - -EXECUTORCH_ROOT_PATH=$(realpath "$SCRIPT_DIR_PATH/../../../../") -COREML_DIR_PATH="$EXECUTORCH_ROOT_PATH/backends/apple/coreml" - -red=`tput setaf 1` -green=`tput setaf 2` - -echo "${green}ExecuTorch: Installing inmemoryfs extension." -pip install "$COREML_DIR_PATH/runtime/inmemoryfs" -STATUS=$? -if [ $STATUS -ne 0 ]; then - echo "${red}ExecuTorch: Failed to install inmemoryfs extension." - exit 1 -fi \ No newline at end of file diff --git a/backends/apple/coreml/scripts/install_requirements.sh b/backends/apple/coreml/scripts/install_requirements.sh index 140ba09c702..5b3f4e3f31a 100755 --- a/backends/apple/coreml/scripts/install_requirements.sh +++ b/backends/apple/coreml/scripts/install_requirements.sh @@ -10,6 +10,10 @@ SCRIPT_DIR_PATH="$( pwd -P )" +# TODO(jathu): remove the need to fetch coremltools to build deps for coreml_executor_runner. +# Keep this version in sync with: pyproject.toml +COREMLTOOLS_VERSION="8.1" + red=`tput setaf 1` green=`tput setaf 2` @@ -24,7 +28,7 @@ rm -rf "$COREML_DIR_PATH/third-party" mkdir "$COREML_DIR_PATH/third-party" echo "${green}ExecuTorch: Cloning coremltools." -git clone --depth 1 --branch 8.1 "https://github.com/apple/coremltools.git" $COREMLTOOLS_DIR_PATH +git clone --depth 1 --branch "${COREMLTOOLS_VERSION}" "https://github.com/apple/coremltools.git" $COREMLTOOLS_DIR_PATH cd $COREMLTOOLS_DIR_PATH STATUS=$? @@ -43,16 +47,7 @@ fi mkdir "$COREMLTOOLS_DIR_PATH/build" cmake -S "$COREMLTOOLS_DIR_PATH" -B "$COREMLTOOLS_DIR_PATH/build" -cmake --build "$COREMLTOOLS_DIR_PATH/build" --parallel - -echo "${green}ExecuTorch: Installing coremltools." -pip install "$COREMLTOOLS_DIR_PATH" - -STATUS=$? -if [ $STATUS -ne 0 ]; then - echo "${red}ExecuTorch: Failed to install coremltools." - exit 1 -fi +cmake --build "$COREMLTOOLS_DIR_PATH/build" --parallel --target mlmodel echo "${green}ExecuTorch: Cloning nlohmann." git clone https://github.com/nlohmann/json.git "$COREML_DIR_PATH/third-party/nlohmann_json" @@ -62,8 +57,6 @@ if [ $STATUS -ne 0 ]; then exit 1 fi -sh "$COREML_DIR_PATH/scripts/install_inmemoryfs.sh" - echo "${green}ExecuTorch: Copying protobuf files." mkdir -p "$COREML_DIR_PATH/runtime/sdk/format/" cp -rf "$PROTOBUF_FILES_DIR_PATH" "$COREML_DIR_PATH/runtime/sdk/format/" diff --git a/backends/apple/coreml/setup.md b/backends/apple/coreml/setup.md index 6b7ffa4ded8..c6daae0d989 100644 --- a/backends/apple/coreml/setup.md +++ b/backends/apple/coreml/setup.md @@ -6,16 +6,8 @@ This is a tutorial for setting up the Core ML backend. 1. Follow the instructions described in [Setting Up ExecuTorch](/docs/source/getting-started-setup.md) to set up ExecuTorch environment. -2. Run `install_requirements.sh` to install dependencies required by the **Core ML** backend. -``` -cd executorch - -./backends/apple/coreml/scripts/install_requirements.sh - -``` - -3. Run the example script to validate that the **Core ML** backend is set up correctly. +2. Run the example script to validate that the **Core ML** backend is set up correctly. ``` cd executorch @@ -26,7 +18,7 @@ python3 -m examples.apple.coreml.scripts.export --model_name add ``` -4. You can now integrate the **Core ML** backend in code. +3. You can now integrate the **Core ML** backend in code. ```python # Delegate to Core ML backend diff --git a/backends/arm/operator_support/convolution_support.py b/backends/arm/operator_support/convolution_support.py index b07ae82f98f..9e13babe23a 100644 --- a/backends/arm/operator_support/convolution_support.py +++ b/backends/arm/operator_support/convolution_support.py @@ -55,7 +55,7 @@ def _is_node_supported_u55(self, node: fx.Node): C_in = shape_in[1] C_out = shape_out[1] - if (C_in == group) and (C_out % C_in) == 0: + if (C_in == group) and (C_out % C_in) == 0 and len(shape_in) <= 4: # Depthwise convolution for dim in shape_in[1:]: if not 1 <= dim <= 65536: @@ -74,6 +74,7 @@ def _is_node_supported_u55(self, node: fx.Node): kernel_w = kernel[2] kernel_h = kernel[3] if len(kernel) > 3 else 1 + kernel_z = kernel[4] if len(kernel) > 4 else 1 # Kernel condition misses constraint on sum of absolute weights if not 1 <= kernel_h <= 64 or not 1 <= kernel_w * kernel_h <= 4096: self.reporter.report_reject( @@ -81,6 +82,11 @@ def _is_node_supported_u55(self, node: fx.Node): f"Convolution needs to have kernel_y<=64, kernel_x*kernel_y<=4096, got kernel ({kernel_w}, {kernel_h})", ) return False + if kernel_z != 1: + self.reporter.report_reject( + node, f"Convolution3d needs to have kernel_z==1, got {kernel_z}." + ) + return False if not self._stride_condition(node): self.reporter.report_reject( @@ -107,6 +113,14 @@ def _stride_condition(self, node: fx.Node) -> bool: if len(strides) == 1: strides = [strides[0]] * 2 + if len(strides) > 2: + stride_z = strides[2] + if stride_z > 1: + self.reporter.report_reject( + node, f"Convolution3d only supports stride_z<=1, got {stride_z}." + ) + return False + for stride, dilation in zip(strides, dilations): stride_condition = 1 <= stride <= 3 dilation_condition = (not has_padding) and (dilation == 1) diff --git a/backends/arm/test/models/test_llama.py b/backends/arm/test/models/test_llama.py index 973f62d2724..2656c12417d 100644 --- a/backends/arm/test/models/test_llama.py +++ b/backends/arm/test/models/test_llama.py @@ -52,7 +52,7 @@ def prepare_model(self): params_file, str ), "invalid input for --llama_inputs" else: - logging.warning( + logger.warning( "Skipping Llama test because of lack of input. To run use --llama_inputs <.pt> <.json>" ) return None, None, None @@ -61,6 +61,8 @@ def prepare_model(self): params_file ), "Invalid file paths" + logger.info("Running test_llama.py") + # TODO: Enable key value cache args = [ "--disable_dynamic_shape", @@ -112,9 +114,11 @@ def test_llama_tosa_MI(self): ) .export() .to_edge_transform_and_lower() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 14}) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 26}) .to_executorch() .run_method_and_compare_outputs( - inputs=llama_inputs, atol=1.8, rtol=0.01 # TODO: decrease tolerance + inputs=llama_inputs, + atol=4.3, + rtol=1.1, # TODO: MLETORCH-825 decrease tolerance ) ) diff --git a/backends/arm/test/ops/test_conv2d.py b/backends/arm/test/ops/test_conv2d.py index 8083b2ecf71..844eed97638 100644 --- a/backends/arm/test/ops/test_conv2d.py +++ b/backends/arm/test/ops/test_conv2d.py @@ -8,10 +8,10 @@ import torch from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester from executorch.backends.arm.test.tester.test_pipeline import ( EthosU55PipelineBI, EthosU85PipelineBI, + OpNotSupportedPipeline, TosaPipelineBI, TosaPipelineMI, ) @@ -34,9 +34,9 @@ def __init__( in_channels: Union[List, int, None] = None, out_channels: Union[List, int, None] = None, kernel_size: Union[List, Tuple, None] = None, - stride: Union[List, Tuple, None] = None, - padding: Union[List, Tuple, None] = None, - dilation: Union[List, Tuple, None] = None, + stride: Union[List, Tuple, int, None] = None, + padding: Union[List, Tuple, int, None] = None, + dilation: Union[List, Tuple, int, None] = None, groups: Union[List, int, None] = None, bias: Union[List, bool, None] = None, padding_mode: Union[List, str, None] = None, @@ -446,17 +446,9 @@ def test_convolution_2d_u85_BI_on_fvp(test_module): def test_reject_convolution_2d_u55_BI( module: Conv2d, ): - ( - ArmTester( - module, - example_inputs=module.get_inputs(), - compile_spec=common.get_u55_compile_spec(), - ) - .quantize() - .export() - .check_count({"torch.ops.aten.conv2d.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge_transform_and_lower() - .check(["executorch_exir_dialects_edge__ops_aten_convolution_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 0}) - ) + OpNotSupportedPipeline( + module, + module.get_inputs(), + "TOSA-0.80+BI+u55", + {"executorch_exir_dialects_edge__ops_aten_convolution_default": 1}, + ).run() diff --git a/backends/arm/test/ops/test_conv3d.py b/backends/arm/test/ops/test_conv3d.py new file mode 100644 index 00000000000..22f7e9e7f54 --- /dev/null +++ b/backends/arm/test/ops/test_conv3d.py @@ -0,0 +1,399 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + + +from typing import List, Tuple, Union + +import pytest +import torch +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + OpNotSupportedPipeline, + TosaPipelineBI, + TosaPipelineMI, +) + +aten_op = "torch.ops.aten.conv3d.default" +exir_op = "executorch_exir_dialects_edge__ops_aten_convolution_default" + + +class Conv3d(torch.nn.Module): + """ + Creates one or many chained 3D-convolutions. For multiple convolutions, the + respective parameteres are provided as lists. + """ + + def __init__( + self, + height=8, + width=8, + depth=8, + nbr_conv=1, # Number of chained convs + in_channels: Union[List, int, None] = None, + out_channels: Union[List, int, None] = None, + kernel_size: Union[List, Tuple, None] = None, + stride: Union[List, Tuple, int, None] = None, + padding: Union[List, Tuple, int, None] = None, + dilation: Union[List, Tuple, int, None] = None, + groups: Union[List, int, None] = None, + bias: Union[List, bool, None] = None, + padding_mode: Union[List, str, None] = None, + batches=1, + dtype=torch.float, + ): + super().__init__() + self.nbr_convs = nbr_conv + + # Handle default values + in_channels = [2] * nbr_conv if in_channels is None else in_channels + out_channels = [1 * nbr_conv] if out_channels is None else out_channels + kernel_size = [(3, 3, 1)] * nbr_conv if kernel_size is None else kernel_size + stride = [(2, 2, 1)] * nbr_conv if stride is None else stride + padding = [(1, 1, 1)] * nbr_conv if padding is None else padding + dilation = [(1, 1, 1)] * nbr_conv if dilation is None else dilation + groups = [1] * nbr_conv if groups is None else groups + bias = [True] * nbr_conv if bias is None else bias + padding_mode = ["zeros"] * nbr_conv if padding_mode is None else padding_mode + + # This allows the input parameters to be either a single value or a list + # as type hint implies + if not isinstance(in_channels, List): + in_channels = [in_channels] + if not isinstance(out_channels, List): + out_channels = [out_channels] + if not isinstance(kernel_size, List): + kernel_size = [kernel_size] + if not isinstance(stride, List): + stride = [stride] + if not isinstance(padding, List): + padding = [padding] + if not isinstance(dilation, List): + dilation = [dilation] + if not isinstance(groups, List): + groups = [groups] + if not isinstance(bias, List): + bias = [bias] + if not isinstance(padding_mode, List): + padding_mode = [padding_mode] + + self.batches = batches + self.in_channels = in_channels + self.height = height + self.width = width + self.depth = depth + self.dtype = dtype + + # Build chain of convs + for i in range(self.nbr_convs): + setattr( + self, + f"conv_{i}", + torch.nn.Conv3d( + in_channels=in_channels[i], + out_channels=out_channels[i], + kernel_size=kernel_size[i], + stride=stride[i], + padding=padding[i], + dilation=dilation[i], + groups=groups[i], + bias=bias[i], + padding_mode=padding_mode[i], + ).to(dtype), + ) + + def get_inputs(self): + return ( + torch.randn( + self.batches, self.in_channels[0], self.height, self.width, self.depth + ).to(self.dtype), + ) + + def forward(self, x): + for i in range(self.nbr_convs): + conv = getattr(self, f"conv_{i}") + x = conv(x) + return x + + +conv3d_2x2_3x2x40x40_nobias = Conv3d( + in_channels=2, + out_channels=3, + kernel_size=(2, 2, 2), + stride=1, + bias=False, + padding=0, + width=40, + height=40, + batches=3, +) + +conv3d_3x3_1x3x256x256_st1 = Conv3d( + in_channels=3, + out_channels=10, + kernel_size=(3, 3, 3), + stride=1, + padding=0, + width=256, + height=256, + batches=1, +) + +conv3d_3x3_1x3x12x12_st2_pd1 = Conv3d( + in_channels=3, + out_channels=4, + kernel_size=(3, 3, 3), + stride=2, + padding=1, + width=12, + height=12, + batches=1, +) + +conv3d_1x1_1x2x128x128_st1 = Conv3d( + in_channels=2, + out_channels=1, + kernel_size=(1, 1, 1), + stride=1, + padding=0, + width=128, + height=128, + batches=1, +) + +conv3d_2x2_1x1x14x13_st2 = Conv3d( + in_channels=1, + out_channels=1, + kernel_size=(2, 2, 2), + stride=2, + padding=0, + width=14, + height=13, + batches=1, +) + +conv3d_5x5_3x2x128x128_st1 = Conv3d( + in_channels=2, + out_channels=3, + kernel_size=(5, 5, 5), + stride=1, + padding=0, + width=128, + height=128, + batches=3, +) + +conv3d_3x3_1x3x224x224_st2_pd1 = Conv3d( + in_channels=3, + out_channels=16, + kernel_size=(3, 3, 3), + stride=2, + padding=1, + width=224, + height=224, + batches=1, +) + +conv3d_5x5_1x3x14x15_st3_pd1 = Conv3d( + in_channels=3, + out_channels=16, + kernel_size=(5, 5, 5), + stride=3, + padding=1, + width=14, + height=15, + batches=1, +) + +conv3d_7x7_1x3x16x16_st2_pd1_dl2 = Conv3d( + in_channels=3, + out_channels=3, + kernel_size=(7, 7, 7), + stride=2, + padding=1, + dilation=2, + width=16, + height=16, + batches=1, +) + +conv3d_7x7_1x3x15x15_st1_pd0_dl1 = Conv3d( + in_channels=3, + out_channels=3, + kernel_size=(7, 7, 7), + stride=1, + padding=0, + dilation=1, + width=15, + height=15, + batches=1, +) + +conv3d_5x5_1x3x14x14_st5_pd0_dl1 = Conv3d( + in_channels=3, + out_channels=3, + kernel_size=(5, 5, 5), + stride=5, + padding=0, + dilation=1, + width=14, + height=14, + batches=1, +) + +conv3d_5x5_1x3x9x9_st5_pd0_dl1 = Conv3d( + in_channels=3, + out_channels=3, + kernel_size=(5, 5, 5), + stride=5, + padding=0, + dilation=1, + width=9, + height=9, + batches=1, +) + +conv3d_3x3_1x3x8x9_st3_pd0_dl1 = Conv3d( + in_channels=3, + out_channels=3, + kernel_size=(3, 3, 3), + stride=3, + padding=0, + dilation=1, + width=8, + height=9, + batches=1, +) + +conv3d_3x3_1x3x9x8_st3_pd0_dl1 = Conv3d( + in_channels=3, + out_channels=3, + kernel_size=(3, 3, 3), + stride=3, + padding=0, + dilation=1, + width=8, + height=9, + batches=1, +) + +conv3d_3x4_1x3x7x7_st3_pd0_dl1 = Conv3d( + in_channels=3, + out_channels=3, + kernel_size=(3, 4, 3), + stride=3, + padding=0, + dilation=1, + width=7, + height=7, + batches=1, +) + +conv3d_4x3_1x3x7x7_st3_pd0_dl1 = Conv3d( + in_channels=3, + out_channels=3, + kernel_size=(4, 3, 3), + stride=3, + padding=0, + dilation=1, + width=7, + height=7, + batches=1, +) + +test_modules = { + "2x2_3x2x40x40_nobias": conv3d_2x2_3x2x40x40_nobias, + "3x3_1x3x256x256_st1": conv3d_3x3_1x3x256x256_st1, + "3x3_1x3x12x12_st2_pd1": conv3d_3x3_1x3x12x12_st2_pd1, + "1x1_1x2x128x128_st1": conv3d_1x1_1x2x128x128_st1, + "2x2_1x1x14x13_st2_needs_adjust_pass": conv3d_2x2_1x1x14x13_st2, + "5x5_1x3x14x15_st3_pd1_needs_adjust_pass": conv3d_5x5_1x3x14x15_st3_pd1, + "7x7_1x3x16x16_st2_pd1_dl2_needs_adjust_pass": conv3d_7x7_1x3x16x16_st2_pd1_dl2, + "7x7_1x3x15x15_st1_pd0_dl1_needs_adjust_pass": conv3d_7x7_1x3x15x15_st1_pd0_dl1, + "5x5_1x3x14x14_st5_pd0_dl1_needs_adjust_pass": conv3d_5x5_1x3x14x14_st5_pd0_dl1, + "5x5_1x3x9x9_st5_pd0_dl1_needs_adjust_pass": conv3d_5x5_1x3x9x9_st5_pd0_dl1, + "3x3_1x3x9x8_st3_pd0_dl1_needs_adjust_pass": conv3d_3x3_1x3x9x8_st3_pd0_dl1, + "3x3_1x3x8x9_st3_pd0_dl1_needs_adjust_pass": conv3d_3x3_1x3x8x9_st3_pd0_dl1, + "3x4_1x3x7x7_st3_pd0_dl1_needs_adjust_pass": conv3d_3x4_1x3x7x7_st3_pd0_dl1, + "4x3_1x3x7x7_st3_pd0_dl1_needs_adjust_pass": conv3d_4x3_1x3x7x7_st3_pd0_dl1, + "5x5_3x2x128x128_st1": conv3d_5x5_3x2x128x128_st1, + "3x3_1x3x224x224_st2_pd1": conv3d_3x3_1x3x224x224_st2_pd1, +} + +input_t = Tuple[torch.Tensor] + + +@common.parametrize("test_module", test_modules) +@pytest.mark.skip # Not implemented, skip until it is. +def test_convolution_3d_tosa_MI(test_module): + pipeline = TosaPipelineMI[input_t]( + test_module, test_module.get_inputs(), aten_op, exir_op + ) + pipeline.run() + + +@common.parametrize("test_module", test_modules) +@pytest.mark.skip # Not implemented, skip until it is. +def test_convolution_3d_tosa_BI(test_module): + pipeline = TosaPipelineBI[input_t]( + test_module, test_module.get_inputs(), aten_op, exir_op + ) + pipeline.change_args("run_method_and_compare_outputs", qtol=1) + pipeline.run() + + +@common.parametrize("test_module", test_modules) +@pytest.mark.skip # Not implemented, skip until it is. +def test_convolution_3d_u55_BI(test_module): + pipeline = EthosU55PipelineBI[input_t]( + test_module, test_module.get_inputs(), aten_op, exir_op, run_on_fvp=True + ) + pipeline.run() + + +@common.parametrize("test_module", test_modules) +@pytest.mark.skip # Not implemented, skip until it is. +def test_convolution_3d_u85_BI(test_module): + pipeline = EthosU85PipelineBI[input_t]( + test_module, test_module.get_inputs(), aten_op, exir_op, run_on_fvp=True + ) + pipeline.run() + + +reject_suite = { + "large_stride": Conv3d( + in_channels=1, + out_channels=1, + kernel_size=(2, 2, 1), + stride=(2, 4, 2), + padding=1, + width=10, + height=14, + batches=1, + ), + "large_kernel_z": Conv3d( + in_channels=1, + out_channels=1, + kernel_size=(2, 2, 2), + stride=1, + padding=0, + width=80, + height=80, + batches=1, + ), +} + + +@common.parametrize("module", reject_suite) +def test_reject_convolution_3d_u55_BI( + module: Conv3d, +): + OpNotSupportedPipeline( + module, + module.get_inputs(), + "TOSA-0.80+BI+u55", + {"executorch_exir_dialects_edge__ops_aten_convolution_default": 1}, + ).run() diff --git a/backends/arm/test/ops/test_layer_norm.py b/backends/arm/test/ops/test_layer_norm.py index 4e91554e05a..7ed181711a1 100644 --- a/backends/arm/test/ops/test_layer_norm.py +++ b/backends/arm/test/ops/test_layer_norm.py @@ -77,47 +77,31 @@ def test_native_layer_norm_tosa_BI(test_data): test_data[0], "torch.ops.aten.sub.Tensor", # Just check for sub op included in the layernorm decomposition ) + pipeline.change_args("run_method_and_compare_outputs", qtol=1) pipeline.run() @common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone300 def test_native_layer_norm_u55_BI(test_data): pipeline = EthosU55PipelineBI[input_t]( test_data[1], test_data[0], "torch.ops.aten.sub.Tensor", # Just check for sub op included in the layernorm decomposition + run_on_fvp=True, ) + pipeline.change_args("run_method_and_compare_outputs", qtol=1) pipeline.run() @common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone320 def test_native_layer_norm_u85_BI(test_data): - pipeline = EthosU85PipelineBI[input_t]( - test_data[1], - test_data[0], - "torch.ops.aten.sub.Tensor", # Just check for sub op included in the layernorm decomposition - ) - pipeline.run() - - -@common.parametrize("test_data", test_data_suite) -@common.SkipIfNoCorstone300 -def test_native_layer_norm_u55_BI_on_fvp(test_data): - pipeline = EthosU55PipelineBI[input_t]( - test_data[1], - test_data[0], - "torch.ops.aten.sub.Tensor", # Just check for sub op included in the layernorm decomposition - ) - pipeline.run() - - -@common.parametrize("test_data", test_data_suite) -@common.SkipIfNoCorstone320 -def test_native_layer_norm_u85_BI_on_fvp(test_data): pipeline = EthosU85PipelineBI[input_t]( test_data[1], test_data[0], "torch.ops.aten.sub.Tensor", # Just check for sub op included in the layernorm decomposition run_on_fvp=True, ) + pipeline.change_args("run_method_and_compare_outputs", qtol=1) pipeline.run() diff --git a/backends/arm/test/ops/test_logsoftmax.py b/backends/arm/test/ops/test_logsoftmax.py index 3e4cc1c0faa..7068ee77e01 100644 --- a/backends/arm/test/ops/test_logsoftmax.py +++ b/backends/arm/test/ops/test_logsoftmax.py @@ -47,6 +47,7 @@ def test_log_softmax_tosa_MI(test_data): pipeline.add_stage_after( "to_edge_transform_and_lower", pipeline.tester.check_not, [exir_op] ) + pipeline.run() @@ -55,22 +56,7 @@ def test_log_softmax_tosa_BI(test_data): data, dim = test_data pipeline = TosaPipelineBI[input_t1](LogSoftmax(dim), data, []) pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op]) - pipeline.run() - - -@common.parametrize("test_data", LogSoftmax.test_data) -def test_log_softmax_u55_BI(test_data): - data, dim = test_data - pipeline = EthosU55PipelineBI[input_t1](LogSoftmax(dim), data, [], run_on_fvp=False) - pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op]) - pipeline.run() - - -@common.parametrize("test_data", LogSoftmax.test_data) -def test_log_softmax_u85_BI(test_data): - data, dim = test_data - pipeline = EthosU85PipelineBI[input_t1](LogSoftmax(dim), data, [], run_on_fvp=False) - pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op]) + pipeline.change_args("run_method_and_compare_outputs", qtol=1) pipeline.run() @@ -81,11 +67,12 @@ def test_log_softmax_u85_BI(test_data): "randn_mult_batches": "MLETORCH-433: Multiple batches not supported on FVP" }, ) -@common.SkipIfNoCorstone300() -def test_log_softmax_u55_BI_on_fvp(test_data): +@common.XfailIfNoCorstone300() +def test_log_softmax_u55_BI(test_data): data, dim = test_data pipeline = EthosU55PipelineBI[input_t1](LogSoftmax(dim), data, [], run_on_fvp=True) pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op]) + pipeline.change_args("run_method_and_compare_outputs", qtol=1) pipeline.run() @@ -96,9 +83,10 @@ def test_log_softmax_u55_BI_on_fvp(test_data): "randn_mult_batches": "MLETORCH-433: Multiple batches not supported on FVP" }, ) -@common.SkipIfNoCorstone320 -def test_log_softmax_u85_BI_on_fvp(test_data): +@common.XfailIfNoCorstone320 +def test_log_softmax_u85_BI(test_data): data, dim = test_data pipeline = EthosU85PipelineBI[input_t1](LogSoftmax(dim), data, [], run_on_fvp=True) pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op]) + pipeline.change_args("run_method_and_compare_outputs", qtol=1) pipeline.run() diff --git a/backends/arm/test/ops/test_mean_dim.py b/backends/arm/test/ops/test_mean_dim.py index 9d67030cc4f..2351b0f9e9c 100644 --- a/backends/arm/test/ops/test_mean_dim.py +++ b/backends/arm/test/ops/test_mean_dim.py @@ -139,48 +139,36 @@ def test_mean_tosa_MI(test_data): @common.parametrize("test_data", MeanDim.test_data_suite) def test_mean_tosa_BI(test_data): - TosaPipelineBI[input_t]( - MeanDim(test_data[1], test_data[2]), - (test_data[0],), - "torch.ops.aten.sum.dim_IntList", # Just check for sum op included in the mean decomposition - ).run() - - -@common.parametrize("test_data", MeanDim.test_data_suite) -def test_mean_u55(test_data): - EthosU55PipelineBI[input_t]( - MeanDim(test_data[1], test_data[2]), - (test_data[0],), - "torch.ops.aten.sum.dim_IntList", # Just check for sum op included in the mean decomposition - ).run() - - -@common.parametrize("test_data", MeanDim.test_data_suite) -def test_mean_u85(test_data): - EthosU85PipelineBI[input_t]( + pipeline = TosaPipelineBI[input_t]( MeanDim(test_data[1], test_data[2]), (test_data[0],), "torch.ops.aten.sum.dim_IntList", # Just check for sum op included in the mean decomposition - ).run() + ) + pipeline.change_args("run_method_and_compare_outputs", qtol=1) + pipeline.run() @common.parametrize("test_data", MeanDim.test_data_suite) -@common.SkipIfNoCorstone300 -def test_mean_u55_on_fvp(test_data): - EthosU55PipelineBI[input_t]( +@common.XfailIfNoCorstone300 +def test_mean_u55_BI(test_data): + pipeline = EthosU55PipelineBI[input_t]( MeanDim(test_data[1], test_data[2]), (test_data[0],), "torch.ops.aten.sum.dim_IntList", # Just check for sum op included in the mean decomposition run_on_fvp=True, - ).run() + ) + pipeline.change_args("run_method_and_compare_outputs", qtol=1) + pipeline.run() @common.parametrize("test_data", MeanDim.test_data_suite) -@common.SkipIfNoCorstone320 -def test_mean_u85_on_fvp(test_data): - EthosU85PipelineBI[input_t]( +@common.XfailIfNoCorstone320 +def test_mean_u85_BI(test_data): + pipeline = EthosU85PipelineBI[input_t]( MeanDim(test_data[1], test_data[2]), (test_data[0],), "torch.ops.aten.sum.dim_IntList", # Just check for sum op included in the mean decomposition run_on_fvp=True, - ).run() + ) + pipeline.change_args("run_method_and_compare_outputs", qtol=1) + pipeline.run() diff --git a/backends/arm/test/ops/test_scalars.py b/backends/arm/test/ops/test_scalars.py index 17dcd6f1d27..97af070120b 100644 --- a/backends/arm/test/ops/test_scalars.py +++ b/backends/arm/test/ops/test_scalars.py @@ -220,7 +220,9 @@ def _test_passes_tosa_BI_pipeline(module: torch.nn.Module, test_data: tuple): } -@common.parametrize("tensor_scalar_tests", tensor_scalar_tests, passes_xfails) +@common.parametrize( + "tensor_scalar_tests", tensor_scalar_tests, passes_xfails, strict=False +) def test_passes_BI(tensor_scalar_tests: list): op, x, y = tensor_scalar_tests _test_passes_tosa_BI_pipeline(op, (x, y)) diff --git a/backends/arm/test/ops/test_softmax.py b/backends/arm/test/ops/test_softmax.py index d51f20040b0..dcee5d038f2 100644 --- a/backends/arm/test/ops/test_softmax.py +++ b/backends/arm/test/ops/test_softmax.py @@ -57,22 +57,7 @@ def test_softmax_tosa_BI(test_data): data, dim = test_data pipeline = TosaPipelineBI[input_t1](Softmax(dim), data, []) pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op]) - pipeline.run() - - -@common.parametrize("test_data", Softmax.test_data) -def test_softmax_u55_BI(test_data): - data, dim = test_data - pipeline = EthosU55PipelineBI[input_t1](Softmax(dim), data, [], run_on_fvp=False) - pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op]) - pipeline.run() - - -@common.parametrize("test_data", Softmax.test_data) -def test_softmax_u85_BI(test_data): - data, dim = test_data - pipeline = EthosU85PipelineBI[input_t1](Softmax(dim), data, [], run_on_fvp=False) - pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op]) + pipeline.change_args("run_method_and_compare_outputs", qtol=1) pipeline.run() @@ -83,11 +68,12 @@ def test_softmax_u85_BI(test_data): "randn_mult_batches": "MLETORCH-433: Multiple batches not supported on FVP" }, ) -@common.SkipIfNoCorstone300 -def test_softmax_u55_BI_on_fvp(test_data): +@common.XfailIfNoCorstone300 +def test_softmax_u55_BI(test_data): data, dim = test_data pipeline = EthosU55PipelineBI[input_t1](Softmax(dim), data, [], run_on_fvp=True) pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op]) + pipeline.change_args("run_method_and_compare_outputs", qtol=1) pipeline.run() @@ -98,9 +84,10 @@ def test_softmax_u55_BI_on_fvp(test_data): "randn_mult_batches": "MLETORCH-433: Multiple batches not supported on FVP" }, ) -@common.SkipIfNoCorstone320 -def test_softmax_u85_BI_on_fvp(test_data): +@common.XfailIfNoCorstone320 +def test_softmax_u85_BI(test_data): data, dim = test_data pipeline = EthosU85PipelineBI[input_t1](Softmax(dim), data, [], run_on_fvp=True) pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op]) + pipeline.change_args("run_method_and_compare_outputs", qtol=1) pipeline.run() diff --git a/backends/qualcomm/_passes/__init__.py b/backends/qualcomm/_passes/__init__.py index fb65e6b5f75..fb1f985edb9 100644 --- a/backends/qualcomm/_passes/__init__.py +++ b/backends/qualcomm/_passes/__init__.py @@ -1,10 +1,18 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + from .annotate_decomposed import AnnotateDecomposed from .annotate_quant_attrs import AnnotateQuantAttrs from .constant_i64_to_i32 import ConstantI64toI32 from .convert_bmm_to_matmul import ConvertBmmToMatmul +from .convert_conv1d_to_conv2d import ConvertConv1dToConv2d from .convert_to_linear import ConvertToLinear from .decompose_any import DecomposeAny from .decompose_einsum import DecomposeEinsum +from .decompose_expm1 import DecomposeExpM1 from .decompose_linalg_vector_norm import DecomposeLinalgVectorNorm from .decompose_silu import DecomposeSilu from .expand_broadcast_tensor_shape import ExpandBroadcastTensorShape @@ -19,8 +27,9 @@ from .recompose_rms_norm import RecomposeRmsNorm from .reduce_dynamic_range import ReduceDynamicRange from .remove_redundancy import RemoveRedundancy +from .replace_arange_args import ReplaceArangeArgs from .replace_index_put_input import ReplaceIndexPutInput -from .replace_inf_buffer import ReplaceInfBuffer +from .replace_inf_values import ReplaceInfValues from .tensor_i64_to_i32 import TensorI64toI32 @@ -29,10 +38,12 @@ AnnotateQuantAttrs, ConstantI64toI32, ConvertBmmToMatmul, + ConvertConv1dToConv2d, RecomposePReLU, ConvertToLinear, DecomposeAny, DecomposeEinsum, + DecomposeExpM1, DecomposeLinalgVectorNorm, DecomposeSilu, ExpandBroadcastTensorShape, @@ -46,7 +57,8 @@ RecomposeRmsNorm, ReduceDynamicRange, RemoveRedundancy, + ReplaceArangeArgs, ReplaceIndexPutInput, - ReplaceInfBuffer, + ReplaceInfValues, TensorI64toI32, ] diff --git a/backends/qualcomm/_passes/annotate_decomposed.py b/backends/qualcomm/_passes/annotate_decomposed.py index a8a757ce9bf..918b705e5e9 100644 --- a/backends/qualcomm/_passes/annotate_decomposed.py +++ b/backends/qualcomm/_passes/annotate_decomposed.py @@ -17,6 +17,8 @@ class AnnotateDecomposed(ExportPass): generated after quantization process. """ + decomp_ops = [torch.ops.aten.stack.default, torch.ops.aten.unbind.int] + def __init__(self, edge_program: torch.export.ExportedProgram): super(AnnotateDecomposed, self).__init__() self.edge_program = edge_program @@ -32,7 +34,7 @@ def _annotate_unbind(self, graph_module: torch.fx.GraphModule): n.meta[QCOM_QUANT_ATTRS] = quant_attrs.copy() def _annotate_stack(self, graph_module: torch.fx.GraphModule): - partitions = get_source_partitions(graph_module.graph, [torch.stack]) + partitions = get_source_partitions(graph_module.graph, [torch.stack, "stack"]) for _, src_partitions in partitions.items(): for src_partition in src_partitions: output = src_partition.output_nodes[0] diff --git a/backends/qualcomm/_passes/convert_conv1d_to_conv2d.py b/backends/qualcomm/_passes/convert_conv1d_to_conv2d.py new file mode 100644 index 00000000000..947b631dbbf --- /dev/null +++ b/backends/qualcomm/_passes/convert_conv1d_to_conv2d.py @@ -0,0 +1,99 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn +from executorch.backends.qualcomm.builders.utils import get_parameter, set_parameter +from executorch.exir.dialects._ops import ops as exir_ops +from executorch.exir.pass_base import ExportPass, PassResult + +from .utils import copy_meta + + +class ConvertConv1dToConv2d(ExportPass): + """ + Conv1d is not supported by QNN. + Change it to input -> unsqueeze -> conv2d -> squeeze -> output + """ + + def __init__(self, edge_program: torch.export.ExportedProgram): + super(ConvertConv1dToConv2d, self).__init__() + self.edge_program = edge_program + + def call(self, graph_module: torch.fx.GraphModule): + graph = graph_module.graph + conv_op = exir_ops.edge.aten.convolution.default + for node in graph.nodes: + if node.target == conv_op and node.meta["val"].dim() == 3: + + input_node = node.args[0] + with graph_module.graph.inserting_after(input_node): + unsqueeze_op = exir_ops.edge.aten.unsqueeze_copy.default + unsqueeze_node = graph.create_node( + "call_function", + unsqueeze_op, + ( + input_node, + 2, + ), + ) + unsqueeze_node.meta = copy_meta( + input_node.meta, lambda m: {**m, "val": m["val"].unsqueeze(2)} + ) + with graph_module.graph.inserting_after(unsqueeze_node): + + filter_node = node.args[1] + filter_node.meta["val"] = ( + filter_node.meta["val"].unsqueeze(2).contiguous() + ) + filter_tensor = get_parameter(filter_node, self.edge_program) + # Ensure tensor is nn.Parameter type, so program does not fail during edge_program._validate() + filter_tensor = nn.Parameter(filter_tensor.unsqueeze(2)) + set_parameter(filter_tensor, filter_node, self.edge_program) + + bias_node = node.args[2] + stride = [1] + node.args[3] + padding = [0] + node.args[4] + dilation = [1] + node.args[5] + transpose = node.args[6] + output_padding = [0] + node.args[7] + groups = node.args[8] + + conv2d_node = graph.create_node( + "call_function", + conv_op, + ( + unsqueeze_node, + filter_node, + bias_node, + stride, + padding, + dilation, + transpose, + output_padding, + groups, + ), + ) + conv2d_node.meta = copy_meta( + node.meta, lambda m: {**m, "val": m["val"].unsqueeze(2)} + ) + + with graph_module.graph.inserting_after(conv2d_node): + squeeze_op = exir_ops.edge.aten.squeeze_copy.dims + squeeze_node = graph.create_node( + "call_function", + squeeze_op, + ( + conv2d_node, + [2], + ), + ) + squeeze_node.meta = copy_meta(node.meta) + for user in node.users.copy(): + user.replace_input_with(node, squeeze_node) + graph.eliminate_dead_code() + graph_module.recompile() + return PassResult(graph_module, True) diff --git a/backends/qualcomm/_passes/decompose_expm1.py b/backends/qualcomm/_passes/decompose_expm1.py new file mode 100644 index 00000000000..8fe6ebdec5b --- /dev/null +++ b/backends/qualcomm/_passes/decompose_expm1.py @@ -0,0 +1,46 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from executorch.exir.pass_base import ExportPass, PassResult + +from .utils import copy_meta + + +class DecomposeExpM1(ExportPass): + """ + Decompose for expm1 to exponential and minus 1. + """ + + def __init__(self, quantization_capture=False) -> None: + super().__init__() + + def call(self, graph_module: torch.fx.GraphModule) -> PassResult: + graph = graph_module.graph + for node in graph.nodes: + if node.target == torch.ops.aten.special_expm1.default: + input_node = node.args[0] + with graph_module.graph.inserting_after(input_node): + exp_op = torch.ops.aten.exp.default + exp_node = graph.create_node("call_function", exp_op, (input_node,)) + exp_node.meta = copy_meta(node.meta) + with graph_module.graph.inserting_after(exp_node): + sub_op = torch.ops.aten.sub.Tensor + sub_node = graph.create_node( + "call_function", + sub_op, + ( + exp_node, + 1, + ), + ) + sub_node.meta = copy_meta(node.meta) + for user in node.users.copy(): + user.replace_input_with(node, sub_node) + + graph.eliminate_dead_code() + graph_module.recompile() + return PassResult(graph_module, True) diff --git a/backends/qualcomm/_passes/decompose_silu.py b/backends/qualcomm/_passes/decompose_silu.py index 96c48920419..c3ac45a8d9d 100644 --- a/backends/qualcomm/_passes/decompose_silu.py +++ b/backends/qualcomm/_passes/decompose_silu.py @@ -3,22 +3,17 @@ # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from typing import Dict import torch from executorch.exir.pass_base import ExportPass, PassResult +from .utils import copy_meta + class DecomposeSilu(ExportPass): def __init__(self): super(DecomposeSilu, self).__init__() - def _copy_meta(self, meta: Dict): - copied = {} - for k, v in meta.items(): - copied[k] = v - return copied - def call(self, graph_module: torch.fx.GraphModule): graph = graph_module.graph for node in graph.nodes: @@ -34,14 +29,14 @@ def call(self, graph_module: torch.fx.GraphModule): torch.ops.aten.sigmoid.default, (silu_node_input,), ) - sigmoid_node.meta = self._copy_meta(silu_node.meta) + sigmoid_node.meta = copy_meta(silu_node.meta) with graph_module.graph.inserting_after(sigmoid_node): mul_node = graph.create_node( "call_function", torch.ops.aten.mul.Tensor, (silu_node_input, sigmoid_node), ) - mul_node.meta = self._copy_meta(silu_node.meta) + mul_node.meta = copy_meta(silu_node.meta) for user in silu_node.users.copy(): user.replace_input_with(silu_node, mul_node) diff --git a/backends/qualcomm/_passes/layout_transform.py b/backends/qualcomm/_passes/layout_transform.py index 31bb936f3c4..64fdcb2bb88 100644 --- a/backends/qualcomm/_passes/layout_transform.py +++ b/backends/qualcomm/_passes/layout_transform.py @@ -49,12 +49,15 @@ class LayoutTransform(ExportPass): exir_ops.edge.aten.add.Tensor, exir_ops.edge.aten.bitwise_or.Tensor, exir_ops.edge.aten.bmm.default, + exir_ops.edge.aten.bitwise_and.Tensor, exir_ops.edge.aten.cat.default, exir_ops.edge.aten.ceil.default, exir_ops.edge.aten.clamp.default, exir_ops.edge.aten.constant_pad_nd.default, exir_ops.edge.aten.div.Tensor, + exir_ops.edge.aten.elu.default, exir_ops.edge.aten.eq.Tensor, + exir_ops.edge.aten.exp.default, exir_ops.edge.aten.full.default, exir_ops.edge.aten.full_like.default, exir_ops.edge.aten.ge.Tensor, @@ -87,10 +90,13 @@ class LayoutTransform(ExportPass): exir_ops.edge.aten.sqrt.default, exir_ops.edge.aten.sub.Tensor, exir_ops.edge.aten.sum.dim_IntList, + exir_ops.edge.aten.stack.default, exir_ops.edge.aten.topk.default, exir_ops.edge.aten._to_copy.default, + exir_ops.edge.aten.unbind.int, exir_ops.edge.aten.where.self, _operator.getitem, + torch.ops.aten.scalar_tensor.default, } layout_type = { diff --git a/backends/qualcomm/_passes/lift_constant_scalar_operands.py b/backends/qualcomm/_passes/lift_constant_scalar_operands.py index 749d30f3564..cef28988520 100644 --- a/backends/qualcomm/_passes/lift_constant_scalar_operands.py +++ b/backends/qualcomm/_passes/lift_constant_scalar_operands.py @@ -28,24 +28,27 @@ class TensorConstant: class TensorOpInfo: target: torch._ops.OpOverload use_schema_args: bool + use_self_dtype: bool SCALAR_OPS = { - aten.eq.Scalar: TensorOpInfo(aten.eq.Tensor, False), - aten.ge.Scalar: TensorOpInfo(aten.ge.Tensor, False), - aten.gt.Scalar: TensorOpInfo(aten.gt.Tensor, False), - aten.le.Scalar: TensorOpInfo(aten.le.Tensor, False), - aten.lt.Scalar: TensorOpInfo(aten.lt.Tensor, False), - aten.ne.Scalar: TensorOpInfo(aten.ne.Tensor, False), - aten.add.Scalar: TensorOpInfo(aten.add.Tensor, False), - aten.add_.Scalar: TensorOpInfo(aten.add_.Tensor, False), - aten.div.Scalar: TensorOpInfo(aten.div.Tensor, False), - aten.mul.Scalar: TensorOpInfo(aten.mul.Tensor, False), - aten.rsub.Scalar: TensorOpInfo(aten.rsub.Tensor, False), - aten.sub.Scalar: TensorOpInfo(aten.sub.Tensor, False), - aten.pow.Tensor_Scalar: TensorOpInfo(aten.pow.Tensor_Tensor, False), + aten.eq.Scalar: TensorOpInfo(aten.eq.Tensor, False, False), + aten.ge.Scalar: TensorOpInfo(aten.ge.Tensor, False, False), + aten.gt.Scalar: TensorOpInfo(aten.gt.Tensor, False, False), + aten.le.Scalar: TensorOpInfo(aten.le.Tensor, False, False), + aten.lt.Scalar: TensorOpInfo(aten.lt.Tensor, False, False), + aten.ne.Scalar: TensorOpInfo(aten.ne.Tensor, False, False), + aten.add.Scalar: TensorOpInfo(aten.add.Tensor, False, False), + aten.add_.Scalar: TensorOpInfo(aten.add_.Tensor, False, False), + aten.div.Scalar: TensorOpInfo(aten.div.Tensor, False, False), + aten.mul.Scalar: TensorOpInfo(aten.mul.Tensor, False, False), + aten.rsub.Scalar: TensorOpInfo(aten.rsub.Tensor, False, False), + aten.sub.Scalar: TensorOpInfo(aten.sub.Tensor, False, False), + aten.pow.Tensor_Scalar: TensorOpInfo(aten.pow.Tensor_Tensor, False, False), # The scalar number arg[1] is missing when using default. Result in a corner case to deal - aten.leaky_relu.default: TensorOpInfo(aten.prelu.default, True), + aten.leaky_relu.default: TensorOpInfo(aten.prelu.default, True, False), + aten.where.ScalarOther: TensorOpInfo(aten.where.self, False, True), + aten.where.Scalar: TensorOpInfo(aten.where.self, False, True), } @@ -63,11 +66,14 @@ def __init__(self): def _build_tensor_constant( self, gm: torch.fx.GraphModule, node: fx.Node, const_val ) -> TensorConstant: + # For dtype, in some cases, we cannot use node.args[0] as scalar dtype. + # Ex: Where op args[0] can be bool, however, we probably want args[1] and args[2] to be dtype same as node.meta["val"] instead of bool type tensor = torch.tensor( [const_val], dtype=( node.args[0].meta["val"].dtype if not is_float_tensor(node) + and not SCALAR_OPS.get(node.target).use_self_dtype else node.meta["val"].dtype ), device=node.meta["val"].device, diff --git a/backends/qualcomm/_passes/replace_arange_args.py b/backends/qualcomm/_passes/replace_arange_args.py new file mode 100644 index 00000000000..19ebc60227f --- /dev/null +++ b/backends/qualcomm/_passes/replace_arange_args.py @@ -0,0 +1,48 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from executorch.exir.pass_base import ExportPass, PassResult + +from .utils import copy_meta + + +class ReplaceArangeArgs(ExportPass): + """ + During annotation, kwargs for arange will be removed due to restrictions by quantizer. + This causes arange to have no dtype, which means FP nodes might be inferred as INT nodes during calibration. + This can cause calibration to fail since QDQ can only be applied on FP nodes but not INT nodes. + To hint the dtype, we provide step size as 1.0 instead of 1, which makes the node a FP node. + """ + + def __init__(self, quantization_capture=False) -> None: + super().__init__() + self.quantization_capture = quantization_capture + + def call(self, graph_module: torch.fx.GraphModule) -> PassResult: + graph = graph_module.graph + for node in graph.nodes: + if node.target == torch.ops.aten.arange.default: + if torch.is_floating_point(node.meta["val"]) and len(node.args) == 1: + with graph_module.graph.inserting_after(node): + step_arange_op = torch.torch.ops.aten.arange.start_step + step_arange_node = graph.create_node( + "call_function", + step_arange_op, + ( + 0, + node.args[0], + 1.0, + ), + ) + step_arange_node.meta = copy_meta(node.meta) + + for user in node.users.copy(): + user.replace_input_with(node, step_arange_node) + + graph.eliminate_dead_code() + graph_module.recompile() + return PassResult(graph_module, True) diff --git a/backends/qualcomm/_passes/replace_inf_buffer.py b/backends/qualcomm/_passes/replace_inf_values.py similarity index 58% rename from backends/qualcomm/_passes/replace_inf_buffer.py rename to backends/qualcomm/_passes/replace_inf_values.py index 776bc9beeba..5f7fb9bd768 100644 --- a/backends/qualcomm/_passes/replace_inf_buffer.py +++ b/backends/qualcomm/_passes/replace_inf_values.py @@ -7,20 +7,30 @@ from executorch.exir.pass_base import ExportPass, PassResult -class ReplaceInfBuffer(ExportPass): +class ReplaceInfValues(ExportPass): """ Due to limitation in Qnn, we need to change inf or -inf to arbitrary value in quantization. """ def __init__(self): - super(ReplaceInfBuffer, self).__init__() + super(ReplaceInfValues, self).__init__() def call(self, graph_module: torch.fx.GraphModule): for buf_name, tensor in graph_module.named_buffers(): if tensor.is_floating_point(): + # 255 here is mainly for attention_mask in Llama for reasonable quant scale tensor[tensor == float("inf")] = 255 tensor[tensor == float("-inf")] = -255 setattr(graph_module, buf_name, tensor) + for node in graph_module.graph.nodes: + arg_list = list(node.args) + for index, arg in enumerate(arg_list): + if arg == float("-inf"): + arg_list[index] = torch.finfo(torch.float32).min + elif arg == float("inf"): + arg_list[index] = torch.finfo(torch.float32).max + node.args = tuple(arg_list) + graph_module.recompile() return PassResult(graph_module, True) diff --git a/backends/qualcomm/_passes/tensor_i64_to_i32.py b/backends/qualcomm/_passes/tensor_i64_to_i32.py index b590e30884c..baddd747f99 100644 --- a/backends/qualcomm/_passes/tensor_i64_to_i32.py +++ b/backends/qualcomm/_passes/tensor_i64_to_i32.py @@ -24,6 +24,9 @@ class TensorI64toI32(ExportPass): cast_ops = { torch.ops.aten.argmin.default, + torch.ops.aten.arange.start_step, + torch.ops.aten.full.default, + torch.ops.aten.scalar_tensor.default, } def __init__(self, edge_program): @@ -61,7 +64,13 @@ def _cast_to_int32(self, core_ep: ExirExportedProgram): cast_node.args = args for user in users: - user.replace_input_with(n, cast_node) + # _assert_tensor_metadata is used to check dtype, which will cause lowering to fail since we are changing int64 to int32 + # We also skip if the next op is already a cast op, which prevents redundant casting. + if user.target not in { + torch.ops.aten._assert_tensor_metadata.default, + torch.ops.aten._to_copy.default, + }: + user.replace_input_with(n, cast_node) core_ep.exported_program._graph_signature = _get_updated_graph_signature( core_ep.exported_program._graph_signature, diff --git a/backends/qualcomm/_passes/utils.py b/backends/qualcomm/_passes/utils.py index 23dfb569a8f..0c838e9a676 100755 --- a/backends/qualcomm/_passes/utils.py +++ b/backends/qualcomm/_passes/utils.py @@ -4,6 +4,8 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +from typing import Dict + import torch from executorch.backends.qualcomm.builders.utils import get_parameter from executorch.backends.qualcomm.utils.constants import QCOM_DTYPE, QCOM_ENCODING @@ -24,6 +26,15 @@ } +def copy_meta(meta: Dict, callback=None): + copied = {} + for k, v in meta.items(): + copied[k] = v + if callback: + copied = callback(copied) + return copied + + def get_quant_attrs( edge_program: torch.export.ExportedProgram, quant_node: torch.fx.Node ): @@ -66,6 +77,7 @@ def get_passes_dependency_for_capture_program(): AnnotateQuantAttrs, ConstantI64toI32, ConvertBmmToMatmul, + ConvertConv1dToConv2d, ConvertToLinear, DecomposeAny, DecomposeLinalgVectorNorm, @@ -91,6 +103,7 @@ def get_passes_dependency_for_capture_program(): ], ConstantI64toI32: [RemoveRedundancy], ConvertBmmToMatmul: [ConvertToLinear], + ConvertConv1dToConv2d: [FoldQDQ], ConvertToLinear: [RecomposePixelUnshuffle], DecomposeAny: [RemoveRedundancy], DecomposeLinalgVectorNorm: [RemoveRedundancy], @@ -98,6 +111,7 @@ def get_passes_dependency_for_capture_program(): FoldQDQ: [AnnotateQuantAttrs, AnnotateDecomposed], LayoutTransform: [ AnnotateQuantAttrs, + ConvertConv1dToConv2d, ExpandBroadcastTensorShape, ], RecomposePixelUnshuffle: [RemoveRedundancy], diff --git a/backends/qualcomm/builders/__init__.py b/backends/qualcomm/builders/__init__.py index c5352a7fbee..cc85333f26b 100644 --- a/backends/qualcomm/builders/__init__.py +++ b/backends/qualcomm/builders/__init__.py @@ -9,6 +9,7 @@ op_abs, op_adaptive_avg_pool2d, op_add, + op_and, op_arange, op_argmin, op_avg_pool2d, @@ -22,8 +23,10 @@ op_depth_to_space, op_dequantize, op_div, + op_elu, op_embedding, op_eq, + op_exp, op_expand, op_full, op_full_like, @@ -62,6 +65,7 @@ op_reshape, op_rms_norm, op_rsqrt, + op_scalar_tensor, op_select_copy, op_sigmoid, op_sin, @@ -72,12 +76,14 @@ op_split_with_sizes, op_sqrt, op_squeeze, + op_stack, op_sub, op_sum_int_list, op_tanh, op_to, op_topk, op_transpose, + op_unbind, op_unsqueeze, op_upsample_bilinear2d, op_upsample_nearest2d, @@ -89,6 +95,7 @@ op_abs, op_adaptive_avg_pool2d, op_add, + op_and, op_arange, op_argmin, op_avg_pool2d, @@ -102,8 +109,10 @@ op_depth_to_space, op_dequantize, op_div, + op_elu, op_embedding, op_eq, + op_exp, op_expand, op_full, op_full_like, @@ -142,6 +151,7 @@ op_reshape, op_rms_norm, op_rsqrt, + op_scalar_tensor, op_select_copy, op_sigmoid, op_sin, @@ -152,12 +162,14 @@ op_split_with_sizes, op_squeeze, op_sqrt, + op_stack, op_sub, op_sum_int_list, op_tanh, op_topk, op_to, op_transpose, + op_unbind, op_unsqueeze, op_upsample_bilinear2d, op_upsample_nearest2d, diff --git a/backends/qualcomm/builders/op_and.py b/backends/qualcomm/builders/op_and.py new file mode 100644 index 00000000000..44e6f2893f5 --- /dev/null +++ b/backends/qualcomm/builders/op_and.py @@ -0,0 +1,59 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import Dict + +import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper + +import torch + +from .node_visitor import NodeVisitor, register_node_visitor +from .qnn_constants import OpElementWiseAnd, QNN_OP_PACKAGE_NAME_QTI_AISW + + +@register_node_visitor +class OpAnd(NodeVisitor): + target = ["aten.bitwise_and.Tensor"] + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper], + ) -> PyQnnWrapper.PyQnnOpWrapper: + out_tensor = self.get_tensor(node, node) + output_tensor_wrapper = self.define_tensor( + node, + node, + out_tensor, + PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, + nodes_to_wrappers, + ) + and_output_tensors = [output_tensor_wrapper] + + and_input_tensors = [] + for index in range(2): + input_node = node.args[index] + input_tensor = self.get_tensor(input_node, node) + tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE + + input_tensor_wrapper = self.define_tensor( + input_node, + node, + input_tensor, + tensor_type, + nodes_to_wrappers, + ) + and_input_tensors.append(input_tensor_wrapper) + and_op = PyQnnWrapper.PyQnnOpWrapper( + node.name, + QNN_OP_PACKAGE_NAME_QTI_AISW, + OpElementWiseAnd.op_name, + ) + and_op.AddInputTensors(and_input_tensors) + and_op.AddOutputTensors(and_output_tensors) + return and_op diff --git a/backends/qualcomm/builders/op_conv2d.py b/backends/qualcomm/builders/op_conv2d.py index a6051636d3e..c019a835223 100644 --- a/backends/qualcomm/builders/op_conv2d.py +++ b/backends/qualcomm/builders/op_conv2d.py @@ -4,7 +4,6 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import warnings from typing import cast, Dict, List import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper @@ -17,8 +16,6 @@ from .qnn_constants import ( OpConv2d, OpDepthWiseConv2d, - OpExpandDims, - OpReshape, OpTransposeConv2d, QNN_OP_PACKAGE_NAME_QTI_AISW, ) @@ -102,176 +99,16 @@ def _add_conv_op_parameter( return conv_op - def _define_conv1d( - self, - node: torch.fx.Node, - nodes_to_wrappers: Dict[str, PyQnnWrapper.TensorWrapper], - ) -> PyQnnWrapper.PyQnnOpWrapper: - """ - Conv1D is a special case for convolutional operation. QNN does not support Conv1D, therefore, - we need to cast from input -> Conv1d -> output to input -> unsqueeze -> Conv2d -> squeeze -> output. - """ - transpose_conv = cast(bool, node.args[6]) - if transpose_conv: - print("ConvTranspose1d is not yet supported") - return - - op_wrapper_list = [] # op_wrapper to return - unsqueeze_input_node = node.args[0] - input_quant_encoding, input_quant_configs = self.get_quant_encoding_conf( - unsqueeze_input_node, node - ) - - unsqueeze_input_tensor = self.get_tensor(unsqueeze_input_node, node) - unsqueeze_input_tensor_wrapper = self.define_tensor( - unsqueeze_input_node, - node, - unsqueeze_input_tensor, - PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, - nodes_to_wrappers, - ) - unsqueeze_output_tensor = unsqueeze_input_tensor.unsqueeze(1).contiguous() - dtype = self.get_data_type(unsqueeze_output_tensor, input_quant_configs) - unsqueeze_output_tensor_wrapper = self.define_custom_tensor_wrapper( - node_name=node.name + "_unsqueeze", - tensor_type=PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, - dtype=dtype, - quant_encoding=input_quant_encoding, - quant_configs=input_quant_configs, - dims=unsqueeze_output_tensor.size(), - tensor=unsqueeze_output_tensor, - is_fake_tensor=True, - nodes_to_wrappers=nodes_to_wrappers, - ) - unsqueeze_op = PyQnnWrapper.PyQnnOpWrapper( - node.name + "_unsqueeze", - QNN_OP_PACKAGE_NAME_QTI_AISW, - OpExpandDims.op_name, - ) - unsqueeze_op.AddInputTensors([unsqueeze_input_tensor_wrapper]) - unsqueeze_op.AddOutputTensors([unsqueeze_output_tensor_wrapper]) - unsqueeze_op.AddScalarParam( - OpExpandDims.param_axis, - PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_32, - {QCOM_DATA: np.uint32(1)}, - ) - op_wrapper_list.append(unsqueeze_op) - - filter_node = node.args[1] - filter_tensor = ( - get_parameter(filter_node, self.edge_program).unsqueeze(2).contiguous() - ) - filter_axis_order = (2, 3, 1, 0) - filter_tensor = filter_tensor.permute(dims=filter_axis_order).contiguous() - filter_tensor_wrapper = self.define_tensor( - filter_node, - node, - filter_tensor, - PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC, - nodes_to_wrappers, - ) - conv_input_tensors = [unsqueeze_output_tensor_wrapper, filter_tensor_wrapper] - if node.args[2] is not None: - bias_node = node.args[2] - bias_tensor = get_parameter(bias_node, self.edge_program) - bias_tensor_wrapper = self.define_tensor( - bias_node, - node, - bias_tensor, - PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC, - nodes_to_wrappers, - ) - conv_input_tensors.append(bias_tensor_wrapper) - - stride = [1] + cast(List[int], node.args[3]) - padding = [0] + cast(List[int], node.args[4]) - dilation = [1] + cast(List[int], node.args[5]) - groups = cast(int, node.args[8]) - - # args[6] = transposed - if cast(bool, node.args[6]): - warnings.warn( - "[QNN Delegate Op Builder]: Currently, No support for transposed convolution.", - stacklevel=1, - ) - return - - # args[7] = output padding - if not all(out_pad == 0 for out_pad in cast(List[int], node.args[7])): - warnings.warn( - "[QNN Delegate Op Builder]: QNN does not support output padding.", - stacklevel=1, - ) - return - - stride_shape = [len(stride)] - padding_shape = [2, 2] - dilation_shape = [len(dilation)] - - conv_op = PyQnnWrapper.PyQnnOpWrapper( - node.name + "_squeeze", - QNN_OP_PACKAGE_NAME_QTI_AISW, - OpConv2d.op_name, - ) - conv_output_tensor = self.get_tensor(node, node) - conv_output_tensor = conv_output_tensor.unsqueeze(1).contiguous() - dtype = self.get_data_type(conv_output_tensor, input_quant_configs) - conv_output_tensor_wrapper = self.define_custom_tensor_wrapper( - node_name=node.name + "_squeeze", - tensor_type=PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, - dtype=dtype, - quant_encoding=input_quant_encoding, - quant_configs=input_quant_configs, - dims=conv_output_tensor.size(), - tensor=conv_output_tensor, - is_fake_tensor=True, - nodes_to_wrappers=nodes_to_wrappers, - ) - conv_op = self._add_conv_op_parameter( - OpConv2d, - conv_op, - conv_input_tensors, - [conv_output_tensor_wrapper], - stride, - stride_shape, - padding, - padding_shape, - dilation, - dilation_shape, - groups=groups, - ) - op_wrapper_list.append(conv_op) - - squeeze_op = PyQnnWrapper.PyQnnOpWrapper( - node.name, - QNN_OP_PACKAGE_NAME_QTI_AISW, - OpReshape.op_name, - ) - squeeze_output_tensor = self.get_tensor(node, node) - squeeze_output_tensor_wrapper = self.define_tensor( - node, - node, - squeeze_output_tensor, - PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, - nodes_to_wrappers, - node_name=node.name, - ) - squeeze_op.AddInputTensors([conv_output_tensor_wrapper]) - squeeze_op.AddOutputTensors([squeeze_output_tensor_wrapper]) - op_wrapper_list.append(squeeze_op) - - return op_wrapper_list - def define_node( self, node: torch.fx.Node, nodes_to_wrappers: Dict[str, PyQnnWrapper.TensorWrapper], ) -> PyQnnWrapper.PyQnnOpWrapper: - if get_parameter(node.args[1], self.edge_program).dim() == 3: - return self._define_conv1d(node, nodes_to_wrappers) - input_node = node.args[0] input_tensor = self.get_tensor(input_node, node) + assert ( + input_tensor.dim() == 4 + ), "All Conv should be converted to Conv2D in ConvertConv1dToConv2d" input_tensor_wrapper = self.define_tensor( input_node, node, diff --git a/backends/qualcomm/builders/op_elu.py b/backends/qualcomm/builders/op_elu.py new file mode 100644 index 00000000000..f9cc089c7bb --- /dev/null +++ b/backends/qualcomm/builders/op_elu.py @@ -0,0 +1,68 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import Dict + +import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper + +import numpy as np +import torch +from executorch.backends.qualcomm.utils.constants import QCOM_DATA + +from .node_visitor import NodeVisitor, register_node_visitor +from .qnn_constants import OpElu, QNN_OP_PACKAGE_NAME_QTI_AISW + + +@register_node_visitor +class Elu(NodeVisitor): + target = ["aten.elu.default"] + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper], + ) -> PyQnnWrapper.PyQnnOpWrapper: + # tensor input + input_node = node.args[0] + input_tensor = self.get_tensor(input_node, node) + + input_tensor_wrapper = self.define_tensor( + input_node, + node, + input_tensor, + PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, + nodes_to_wrappers, + ) + elu_input_tensors = [input_tensor_wrapper] + + out_tensor = self.get_tensor(node, node) + output_tensor_wrapper = self.define_tensor( + node, + node, + out_tensor, + PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, + nodes_to_wrappers, + ) + elu_output_tensors = [output_tensor_wrapper] + + elu_op = PyQnnWrapper.PyQnnOpWrapper( + node.name, + QNN_OP_PACKAGE_NAME_QTI_AISW, + OpElu.op_name, + ) + elu_op.AddInputTensors(elu_input_tensors) + elu_op.AddOutputTensors(elu_output_tensors) + + if len(node.args) == 2: + elu_op.AddScalarParam( + OpElu.param_alpha, + PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_FLOAT_32, + {QCOM_DATA: np.uint32(node.args[1])}, + ) + + return elu_op diff --git a/backends/qualcomm/builders/op_exp.py b/backends/qualcomm/builders/op_exp.py new file mode 100644 index 00000000000..8c4794c9725 --- /dev/null +++ b/backends/qualcomm/builders/op_exp.py @@ -0,0 +1,59 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import Dict + +import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper + +import torch + +from .node_visitor import NodeVisitor, register_node_visitor +from .qnn_constants import OpElementWiseExp, QNN_OP_PACKAGE_NAME_QTI_AISW + + +@register_node_visitor +class Exp(NodeVisitor): + target = ["aten.exp.default"] + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper], + ) -> PyQnnWrapper.PyQnnOpWrapper: + # tensor input + input_node = node.args[0] + input_tensor = self.get_tensor(input_node, node) + + input_tensor_wrapper = self.define_tensor( + input_node, + node, + input_tensor, + PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, + nodes_to_wrappers, + ) + exp_input_tensors = [input_tensor_wrapper] + + out_tensor = self.get_tensor(node, node) + output_tensor_wrapper = self.define_tensor( + node, + node, + out_tensor, + PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, + nodes_to_wrappers, + ) + exp_output_tensors = [output_tensor_wrapper] + + exp_op = PyQnnWrapper.PyQnnOpWrapper( + node.name, + QNN_OP_PACKAGE_NAME_QTI_AISW, + OpElementWiseExp.op_name, + ) + exp_op.AddInputTensors(exp_input_tensors) + exp_op.AddOutputTensors(exp_output_tensors) + + return exp_op diff --git a/backends/qualcomm/builders/op_pad.py b/backends/qualcomm/builders/op_pad.py index 10948859be9..5ec34065f8b 100644 --- a/backends/qualcomm/builders/op_pad.py +++ b/backends/qualcomm/builders/op_pad.py @@ -53,14 +53,14 @@ def define_node( pad_amount = np.reshape(cast(List[int], node.args[1]), (-1, 2))[::-1].astype( np.uint32 ) - # fullfill the pad amount for each idex of tensor + # fulfill the pad amount for each idex of tensor if zero_amounts := pad_amount_shape[0] - pad_amount.shape[0]: pad_amount = np.concatenate( (np.array([(0, 0)] * zero_amounts), pad_amount) ).astype(np.uint32) if QCOM_AXIS_ORDER in node.meta: - pad_amount = np.transpose(pad_amount, node.meta[QCOM_AXIS_ORDER]) + pad_amount = pad_amount[list(node.meta[QCOM_AXIS_ORDER])] pad_amount_val = node.args[2] pad_op = PyQnnWrapper.PyQnnOpWrapper( diff --git a/backends/qualcomm/builders/op_scalar_tensor.py b/backends/qualcomm/builders/op_scalar_tensor.py new file mode 100644 index 00000000000..d236f6674df --- /dev/null +++ b/backends/qualcomm/builders/op_scalar_tensor.py @@ -0,0 +1,50 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import Dict + +import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper + +import torch + +from .node_visitor import NodeVisitor, register_node_visitor + + +@register_node_visitor +class Arange(NodeVisitor): + target = ["scalar_tensor.default"] + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper], + ) -> PyQnnWrapper.PyQnnOpWrapper: + val = node.args[0] + out_tensor = torch.tensor([val], dtype=node.meta["val"].dtype) + + # The following clamping will only occur in FP mode. Clamping for quantized mode will happen in the pass ReplaceInfValues. + # negative infinite + if torch.isinf(out_tensor)[0] and (out_tensor < 0): + out_tensor = torch.tensor( + [torch.finfo(torch.float32).min], dtype=node.meta["val"].dtype + ) + # positive infinite + elif torch.isinf(out_tensor)[0] and (out_tensor > 0): + out_tensor = torch.tensor( + [torch.finfo(torch.float32).max], dtype=node.meta["val"].dtype + ) + # since we can derive the constant value of current op in AoT stage + # we only build static tensor here for consumers of current node + # to correctly reference the data + self.define_tensor( + node, + node, + out_tensor, + PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC, + nodes_to_wrappers, + ) diff --git a/backends/qualcomm/builders/op_sqrt.py b/backends/qualcomm/builders/op_sqrt.py index dc6691460ca..030e6c3e10a 100644 --- a/backends/qualcomm/builders/op_sqrt.py +++ b/backends/qualcomm/builders/op_sqrt.py @@ -10,7 +10,7 @@ import torch from .node_visitor import NodeVisitor, register_node_visitor -from .qnn_constants import OpSqrt, QNN_OP_PACKAGE_NAME_QTI_AISW +from .qnn_constants import OpElementWiseSqrt, QNN_OP_PACKAGE_NAME_QTI_AISW @register_node_visitor @@ -51,7 +51,7 @@ def define_node( sqrt_op = PyQnnWrapper.PyQnnOpWrapper( node.name, QNN_OP_PACKAGE_NAME_QTI_AISW, - OpSqrt.op_name, + OpElementWiseSqrt.op_name, ) sqrt_op.AddInputTensors(sqrt_input_tensors) sqrt_op.AddOutputTensors(sqrt_output_tensors) diff --git a/backends/qualcomm/builders/op_stack.py b/backends/qualcomm/builders/op_stack.py new file mode 100644 index 00000000000..616d0ee0ccc --- /dev/null +++ b/backends/qualcomm/builders/op_stack.py @@ -0,0 +1,71 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import cast, Dict + +import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper + +import numpy as np +import torch +from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA + +from .node_visitor import NodeVisitor, register_node_visitor +from .qnn_constants import OpPack, QNN_OP_PACKAGE_NAME_QTI_AISW + + +@register_node_visitor +class Stack(NodeVisitor): + target = ["aten.stack.default"] + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper], + ) -> PyQnnWrapper.PyQnnOpWrapper: + input_node_list = node.args[0] + stack_input_tensors = [] + for input_node in input_node_list: + input_tensor = self.get_tensor(input_node, node) + stack_inp_tensor_wrapper = self.define_tensor( + input_node, + node, + input_tensor, + PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, + nodes_to_wrappers, + ) + stack_input_tensors.append(stack_inp_tensor_wrapper) + output_tensor = self.get_tensor(node, node) + output_tensor_wrapper = self.define_tensor( + node, + node, + output_tensor, + PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, + nodes_to_wrappers, + ) + stack_output_tensors = [output_tensor_wrapper] + + dim = 0 if len(node.args) == 1 else cast(int, node.args[1]) + if dim < 0: + dim = dim % len(input_tensor.shape) + if QCOM_AXIS_ORDER in node.meta: + dim = node.meta[QCOM_AXIS_ORDER].index(dim) + stack_op = PyQnnWrapper.PyQnnOpWrapper( + node.name, + QNN_OP_PACKAGE_NAME_QTI_AISW, + OpPack.op_name, + ) + stack_op.AddInputTensors(stack_input_tensors) + stack_op.AddOutputTensors(stack_output_tensors) + + stack_op.AddScalarParam( + OpPack.param_axis, + PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_32, + {QCOM_DATA: np.uint32(dim)}, + ) + + return stack_op diff --git a/backends/qualcomm/builders/op_unbind.py b/backends/qualcomm/builders/op_unbind.py new file mode 100644 index 00000000000..8ca62e2a07b --- /dev/null +++ b/backends/qualcomm/builders/op_unbind.py @@ -0,0 +1,73 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import cast, Dict + +import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper + +import numpy as np +import torch +from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA + +from .node_visitor import NodeVisitor, register_node_visitor +from .qnn_constants import OpUnpack, QNN_OP_PACKAGE_NAME_QTI_AISW + + +@register_node_visitor +class Unbind(NodeVisitor): + target = ["aten.unbind.int"] + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper], + ) -> PyQnnWrapper.PyQnnOpWrapper: + input_node = node.args[0] + input_tensor = self.get_tensor(input_node, node) + input_tensor_wrapper = self.define_tensor( + input_node, + node, + input_tensor, + PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC, + nodes_to_wrappers, + ) + unbind_input_tensors = [input_tensor_wrapper] + + unbind_output_tensors = [] + for i in range(len(node.meta["val"])): + output_tensor = self.get_tensor(node, node, i) + output_tensor_wrapper = self.define_tensor( + node, + node, + output_tensor, + PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, + nodes_to_wrappers, + wrapper_idx=i, + ) + unbind_output_tensors.append(output_tensor_wrapper) + + dim = 0 if len(node.args) == 1 else cast(int, node.args[1]) + if dim < 0: + dim = dim % len(input_tensor.shape) + if QCOM_AXIS_ORDER in node.meta: + dim = node.meta[QCOM_AXIS_ORDER].index(dim) + unbind_op = PyQnnWrapper.PyQnnOpWrapper( + node.name, + QNN_OP_PACKAGE_NAME_QTI_AISW, + OpUnpack.op_name, + ) + unbind_op.AddInputTensors(unbind_input_tensors) + unbind_op.AddOutputTensors(unbind_output_tensors) + + unbind_op.AddScalarParam( + OpUnpack.param_axis, + PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_32, + {QCOM_DATA: np.uint32(dim)}, + ) + + return unbind_op diff --git a/backends/qualcomm/builders/qnn_constants.py b/backends/qualcomm/builders/qnn_constants.py index 1d55d56de0f..9613c755c7c 100644 --- a/backends/qualcomm/builders/qnn_constants.py +++ b/backends/qualcomm/builders/qnn_constants.py @@ -85,6 +85,11 @@ class OpElementWiseAdd: op_name: str = "ElementWiseAdd" +@dataclass(init=False, frozen=True) +class OpElementWiseAnd: + op_name: str = "ElementWiseAnd" + + @dataclass(init=False, frozen=True) class OpElementWiseCeil: op_name = "ElementWiseCeil" @@ -100,6 +105,11 @@ class OpElementWiseDivide: op_name: str = "ElementWiseDivide" +@dataclass(init=False, frozen=True) +class OpElementWiseExp: + op_name: str = "ElementWiseExp" + + @dataclass(init=False, frozen=True) class OpElementWiseEqual: op_name: str = "ElementWiseEqual" @@ -193,11 +203,22 @@ class OpElementWiseSelect: op_name = "ElementWiseSelect" +@dataclass(init=False, frozen=True) +class OpElementWiseSqrt: + op_name = "ElementWiseSquareRoot" + + @dataclass(init=False, frozen=True) class OpElementWiseSubtract: op_name = "ElementWiseSubtract" +@dataclass(init=False, frozen=True) +class OpElu: + op_name: str = "Elu" + param_alpha: str = "alpha" + + @dataclass(init=False, frozen=True) class OpExpandDims: op_name: str = "ExpandDims" @@ -423,11 +444,6 @@ class OpSplit: param_split_index: str = "split_index" -@dataclass(init=False, frozen=True) -class OpSqrt: - op_name: str = "ElementWiseSquareRoot" - - @dataclass(init=False, frozen=True) class OpSqueeze: op_name: str = "Squeeze" @@ -474,3 +490,9 @@ class OpTransposeConv2d: param_pad_amount: str = "pad_amount" param_group: str = "group" param_output_padding: str = "output_padding" + + +@dataclass(init=False, frozen=True) +class OpUnpack: + op_name: str = "UnPack" + param_axis: str = "axis" diff --git a/backends/qualcomm/partition/common_defs.py b/backends/qualcomm/partition/common_defs.py index 8254bb64db0..b427c59ce07 100644 --- a/backends/qualcomm/partition/common_defs.py +++ b/backends/qualcomm/partition/common_defs.py @@ -5,6 +5,8 @@ # LICENSE file in the root directory of this source tree. import _operator +import torch + from executorch.exir.dialects._ops import ops as exir_ops not_supported_operator = [ @@ -20,6 +22,7 @@ exir_ops.edge.aten.arange.start_step, exir_ops.edge.aten.full.default, exir_ops.edge.aten.full_like.default, + torch.ops.aten.scalar_tensor.default, ] allow_list_operator = [ diff --git a/backends/qualcomm/quantizer/annotators.py b/backends/qualcomm/quantizer/annotators.py index c1e1aa25b08..93af5e86c97 100644 --- a/backends/qualcomm/quantizer/annotators.py +++ b/backends/qualcomm/quantizer/annotators.py @@ -378,6 +378,20 @@ def annotate_sin(node: Node, quantization_config: QuantizationConfig) -> None: annotate_single_in_single_out(node, quantization_config) +@register_annotator([torch.ops.aten.scalar_tensor.default]) +def annotate_scalar_tensor(node: Node, quantization_config: QuantizationConfig) -> None: + if _is_annotated([node]): + return + if _is_float_tensor(node): + # workaround for node with kwargs could not be correctly annotated + node.kwargs = {} + node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation( + input_qspec_map={}, + output_qspec=quantization_config.output_activation, + _annotated=True, + ) + + @register_annotator([torch.ops.aten.tanh.default]) def annotate_tanh(node: Node, quantization_config: QuantizationConfig) -> None: annotate_single_in_single_out(node, quantization_config) @@ -680,6 +694,11 @@ def annotate_sigmoid(node: Node, quantization_config: QuantizationConfig) -> Non ) +@register_annotator([torch.ops.aten.__and__.Tensor]) +def annotate_and(node: Node, quantization_config: QuantizationConfig) -> None: + annotate_binary(node, quantization_config) + + @register_annotator([torch.ops.aten.bitwise_or.Tensor, torch.ops.aten.__or__.Tensor]) def annotate_bitwise_or(node: Node, quantization_config: QuantizationConfig) -> None: annotate_binary(node, quantization_config) @@ -717,6 +736,11 @@ def annotate_transpose(node: Node, quantization_config: QuantizationConfig) -> N annotate_single_in_single_out(node, quantization_config) +@register_annotator([torch.ops.aten.elu.default]) +def annotate_elu(node: Node, quantization_config: QuantizationConfig) -> None: + annotate_single_in_single_out(node, quantization_config) + + @register_annotator([torch.ops.aten.embedding.default]) def annotate_embedding(node: Node, quantization_config: QuantizationConfig) -> None: weight = node.args[0] @@ -763,6 +787,11 @@ def annotate_index_put(node: Node, quantization_config: QuantizationConfig) -> N ) +@register_annotator([torch.ops.aten.exp.default]) +def annotate_exp(node: Node, quantization_config: QuantizationConfig) -> None: + annotate_single_in_single_out(node, quantization_config) + + @register_annotator([torch.ops.aten.expand.default, torch.ops.aten.expand_as.default]) def annotate_expand(node: Node, quantization_config: QuantizationConfig) -> None: annotate_in_out_obs_sharing_op(node, quantization_config) @@ -812,18 +841,28 @@ def annotate_flatten(node: Node, quantization_config: QuantizationConfig) -> Non @register_annotator([torch.ops.aten.stack.default]) def annotate_stack(node: Node, quantization_config: QuantizationConfig) -> None: + input_nodes = node.args[0] + if _is_annotated([node]) or not _is_float_tensor(node): + return + + assert isinstance(input_nodes, Sequence) + + first_input_node = input_nodes[0] input_qspec_map = {} - for input_act in node.args[0]: - assert isinstance(input_act, Node) - input_qspec_map[input_act] = quantization_config.input_activation + assert isinstance(first_input_node, Node) + input_qspec_map[first_input_node] = quantization_config.input_activation + share_qparams_with_input_act0_qspec = SharedQuantizationSpec( + (first_input_node, node) + ) - node_tensor = node.meta.get("val") - if torch.is_tensor(node_tensor) and node_tensor.dtype == torch.int64: - continue + for input_node in input_nodes[1:]: + if input_node not in input_qspec_map: + assert isinstance(input_node, Node) + input_qspec_map[input_node] = share_qparams_with_input_act0_qspec node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation( input_qspec_map=input_qspec_map, - output_qspec=quantization_config.output_activation, + output_qspec=share_qparams_with_input_act0_qspec, _annotated=True, ) @@ -894,6 +933,7 @@ def annotate_bmm(node: Node, quantization_config: QuantizationConfig) -> None: torch.ops.aten.conv2d.default, torch.ops.aten.conv1d.default, torch.ops.aten.conv_transpose2d.input, + torch.ops.aten.conv_transpose1d.default, ] ) def annotate_conv2d(node: Node, quantization_config: QuantizationConfig) -> None: @@ -1059,7 +1099,7 @@ def annotate_layer_norm(node: Node, quantization_config: QuantizationConfig) -> @register_annotator([torch.ops.aten.cat.default, torch.ops.aten.concat.default]) def annotate_cat(node: Node, quantization_config: QuantizationConfig) -> None: input_nodes = node.args[0] - if _is_annotated([node]): + if _is_annotated([node]) or not _is_float_tensor(node): return assert isinstance(input_nodes, Sequence) @@ -1087,23 +1127,28 @@ def annotate_cat(node: Node, quantization_config: QuantizationConfig) -> None: @register_annotator([torch.ops.aten.unbind.int]) def annotate_unbind(node: Node, quantization_config: QuantizationConfig) -> None: - if _is_annotated([node]): + # Seems like unbind.int can be either float or int. Only quant when input is float. + if _is_annotated([node]) or not _is_float_tensor(node.args[0]): return input_qspec_map = {} input_act = node.args[0] assert isinstance(input_act, Node) + share_qparams_with_out_node0_qspec = SharedQuantizationSpec((node.args[0], node)) input_qspec_map[input_act] = quantization_config.input_activation - node_tensor = node.meta.get("val") - if torch.is_tensor(node_tensor) and node_tensor.dtype == torch.int64: - return - node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation( input_qspec_map=input_qspec_map, + output_qspec=share_qparams_with_out_node0_qspec, _annotated=True, ) + for user in node.users: + user.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation( + output_qspec=share_qparams_with_out_node0_qspec, + _annotated=True, + ) + @register_annotator([torch.ops.aten.split.Tensor, torch.ops.aten.chunk.default]) def annotate_chunk(node: Node, quantization_config: QuantizationConfig) -> None: @@ -1129,22 +1174,33 @@ def annotate_chunk(node: Node, quantization_config: QuantizationConfig) -> None: @register_annotator([torch.ops.aten.where.self]) def annotate_where(node: Node, quantization_config: QuantizationConfig) -> None: - true_input_act = node.args[1] - false_input_act = node.args[2] if _is_annotated([node]): return - _annotate_input_qspec_map( - node, - true_input_act, - quantization_config.input_activation, - ) + input_qspec_map = {} + for input_node in node.args: + assert isinstance(input_node, Node) + if _is_float_tensor(input_node): + input_qspec_map[input_node] = quantization_config.input_activation - _annotate_input_qspec_map( - node, - false_input_act, - quantization_config.input_activation, + node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation( + input_qspec_map=input_qspec_map, + output_qspec=( + quantization_config.output_activation if _is_float_tensor(node) else None + ), + _annotated=True, ) - _annotate_output_qspec(node, quantization_config.output_activation) - _mark_nodes_as_annotated([node]) + +@register_annotator([torch.ops.aten.zeros.default]) +def annotate_zeros(node: Node, quantization_config: QuantizationConfig) -> None: + if _is_annotated([node]) or not _is_float_tensor(node): + return + + # workaround for node with kwargs could not be correctly annotated + node.kwargs = {} + node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation( + input_qspec_map={}, + output_qspec=quantization_config.output_activation, + _annotated=True, + ) diff --git a/backends/qualcomm/quantizer/quantizer.py b/backends/qualcomm/quantizer/quantizer.py index 38570835bea..028ffb69f1d 100644 --- a/backends/qualcomm/quantizer/quantizer.py +++ b/backends/qualcomm/quantizer/quantizer.py @@ -10,12 +10,14 @@ import torch from executorch.backends.qualcomm._passes import ( DecomposeEinsum, + DecomposeExpM1, DecomposeLinalgVectorNorm, DecomposeSilu, LiftConstantScalarOperands, RecomposePixelUnshuffle, ReduceDynamicRange, - ReplaceInfBuffer, + ReplaceArangeArgs, + ReplaceInfValues, ) from executorch.backends.transforms.decompose_sdpa import ( DecomposeScaledDotProductAttention, @@ -273,11 +275,13 @@ def set_per_channel_linear_quant(self, enable: bool) -> None: def transform_for_annotation(self, model: GraphModule) -> GraphModule: model = ReduceDynamicRange()(model).graph_module model = RecomposePixelUnshuffle(quantization_capture=True)(model).graph_module + model = ReplaceArangeArgs()(model).graph_module model = DecomposeScaledDotProductAttention()(model).graph_module model = DecomposeSilu()(model).graph_module model = DecomposeEinsum()(model).graph_module + model = DecomposeExpM1()(model).graph_module model = DecomposeLinalgVectorNorm(aten_dialect_capture=True)(model).graph_module - model = ReplaceInfBuffer()(model).graph_module + model = ReplaceInfValues()(model).graph_module model = LiftConstantScalarOperands()(model).graph_module return model diff --git a/backends/qualcomm/tests/models.py b/backends/qualcomm/tests/models.py index e5a9be8e75b..c3c439261d2 100644 --- a/backends/qualcomm/tests/models.py +++ b/backends/qualcomm/tests/models.py @@ -8,6 +8,19 @@ # module with related operator only + + +class And(torch.nn.Module): + def __init__(self, pos, neg): + super().__init__() + self.pos = pos + self.neg = neg + + def forward(self, x, y): + bitwise_and = torch.bitwise_and(x, y).bool() + return torch.where(bitwise_and, self.pos, self.neg) + + class Abs(torch.nn.Module): def __init__(self): super().__init__() @@ -462,6 +475,17 @@ def forward(self, x): return self.conv(x) +class ConvTranspose1dSingle(torch.nn.Module): + def __init__(self, bias=True): + super().__init__() + self.conv_transpose = torch.nn.ConvTranspose1d( + in_channels=1, out_channels=3, kernel_size=3, stride=2, padding=1, bias=bias + ) + + def forward(self, x): + return self.conv_transpose(x) + + class ConvTranspose2dSingle(torch.nn.Module): def __init__(self, bias=True): super().__init__() @@ -601,6 +625,15 @@ def forward(self, i, j): return torch.relu(torch.einsum("i,j->ij", i, j)) +class Elu(torch.nn.Module): + def __init__(self): + super().__init__() + self.elu = torch.nn.ELU(alpha=0.5) + + def forward(self, i): + return self.elu(i) + + class Embedding(torch.nn.Module): def __init__(self): super().__init__() @@ -645,6 +678,14 @@ def forward(self, x): return y.expand_as(x) +class ExpM1(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.special.expm1(x) + + class Full(torch.nn.Module): def __init__(self, fill, shape): super().__init__() @@ -1383,8 +1424,8 @@ class Stack(torch.nn.Module): def __init__(self): super().__init__() - def forward(self, x, y): - return torch.stack((x, y)) + def forward(self, x, y, z): + return torch.stack((x, y, z)) class Sub(torch.nn.Module): @@ -1493,3 +1534,29 @@ def __init__(self, pos, neg): def forward(self, x): return torch.where(x >= torch.zeros(x.shape), self.pos, self.neg) + + +class WhereConstantOther(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.where(x >= 0, torch.ones(x.shape), 0) + + +class WhereConstantAll(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.where(x >= 0, 1, 0) + + +class WhereConstantInf(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.nn.functional.softmax( + torch.where(x >= 0, 0.1, float("-inf")), dim=-1 + ) diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index 936b9c3efe4..05e368f372e 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -195,6 +195,16 @@ def test_qnn_backend_conv2d_channel_last(self): with self.subTest(i=i): self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_conv_transpose1d(self): + modules = [ + ConvTranspose1dSingle(), # noqa: F405 + ConvTranspose1dSingle(bias=False), # noqa: F405 + ] + sample_input = (torch.randn([1, 1, 3]),) + for i, module in enumerate(modules): + with self.subTest(i=i): + self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_conv_transpose2d(self): modules = [ ConvTranspose2dSingle(), # noqa: F405 @@ -255,6 +265,14 @@ def test_qnn_backend_element_wise_add(self): self.lower_module_and_test_output(module, sample_input) index += 1 + def test_qnn_backend_element_wise_and(self): + module = And(torch.tensor(1.7), torch.tensor(0.2)) # noqa: F405 + sample_input = ( + torch.tensor([1, 0, 1, 0], dtype=torch.bool), + torch.tensor([1, 1, 0, 0], dtype=torch.bool), + ) + self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_element_wise_ceil(self): module = Ceil() # noqa: F405 sample_input = (torch.randn([2, 5, 1, 3]),) @@ -369,6 +387,12 @@ def test_qnn_backend_element_wise_sub(self): self.lower_module_and_test_output(module, sample_input) index += 1 + @unittest.expectedFailure + def test_qnn_backend_elu(self): + module = Elu() # noqa: F405 + sample_input = (torch.randn(2, 5, 1, 3),) + self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_embedding(self): module = Embedding() # noqa: F405 sample_input = (torch.Tensor([[1, 2, 4, 5], [4, 3, 2, 9]]).to(torch.int32),) @@ -398,6 +422,11 @@ def test_qnn_backend_expand(self): with self.subTest(i=i): self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_expm1(self): + sample_input = (torch.randn(3, 4, 5),) + module = ExpM1() # noqa: F405 + self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_full(self): shape = (1, 2, 3, 4) module = Full(0.5, shape) # noqa: F405 @@ -758,7 +787,11 @@ def test_qnn_backend_slice_copy(self): def test_qnn_backend_stack(self): module = Stack() # noqa: F405 - sample_input = (torch.randn([1, 2, 3, 4]), torch.randn([1, 2, 3, 4])) + sample_input = ( + torch.randn([1, 2, 3, 4]), + torch.randn([1, 2, 3, 4]), + torch.randn([1, 2, 3, 4]), + ) self.lower_module_and_test_output(module, sample_input) def test_qnn_backend_softmax(self): @@ -800,10 +833,16 @@ def test_qnn_backend_where(self): modules = [ Where(), # noqa: F405 WhereConstant(torch.randn(3, 2), torch.randn(3, 2)), # noqa: F405 + WhereConstantOther(), # noqa: F405 + # WhereConstantAll(), # noqa: F405 TODO: constant dtype does not propogate when doing const i64->32, causing where to fail since where does not support int64 output + WhereConstantInf(), # noqa: F405 ] sample_inputs = [ (torch.randn(3, 2), torch.randn(3, 2), torch.randn(3, 2)), (torch.randn(3, 2),), + (torch.randn(3, 2),), + # (torch.randn(3, 2),), + (torch.randn(30, 20),), ] for i, module in enumerate(modules): self.lower_module_and_test_output(module, sample_inputs[i]) @@ -1206,6 +1245,17 @@ def test_qnn_backend_conv2d_channel_last(self): module = self.get_qdq_module(module, sample_input) self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_conv_transpose1d(self): + modules = [ + ConvTranspose1dSingle(), # noqa: F405 + ConvTranspose1dSingle(bias=False), # noqa: F405 + ] + sample_input = (torch.randn([1, 1, 3]),) + for i, module in enumerate(modules): + with self.subTest(i=i): + module = self.get_qdq_module(module, sample_input) + self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_conv_transpose2d(self): modules = [ ConvTranspose2dSingle(), # noqa: F405 @@ -1271,6 +1321,15 @@ def test_qnn_backend_element_wise_add(self): self.lower_module_and_test_output(module, sample_input) index += 1 + def test_qnn_backend_element_wise_and(self): + module = And(torch.tensor(1.7), torch.tensor(0.2)) # noqa: F405 + sample_input = ( + torch.tensor([1, 0, 1, 0], dtype=torch.bool), + torch.tensor([1, 1, 0, 0], dtype=torch.bool), + ) + module = self.get_qdq_module(module, sample_input) + self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_element_wise_ceil(self): module = Ceil() # noqa: F405 sample_input = (torch.randn([2, 5, 1, 3]),) @@ -1391,6 +1450,12 @@ def test_qnn_backend_element_wise_sub(self): self.lower_module_and_test_output(module, sample_input) index += 1 + def test_qnn_backend_elu(self): + module = Elu() # noqa: F405 + sample_input = (torch.randn(2, 5, 1, 3),) + module = self.get_qdq_module(module, sample_input) + self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_embedding(self): module = Embedding() # noqa: F405 sample_input = (torch.Tensor([[1, 2, 4, 5], [4, 3, 2, 9]]).to(torch.int32),) @@ -1423,6 +1488,12 @@ def test_qnn_backend_expand(self): module = self.get_qdq_module(module, sample_input) self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_expm1(self): + sample_input = (torch.randn(3, 4, 5),) + module = ExpM1() # noqa: F405 + module = self.get_qdq_module(module, sample_input) + self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_full(self): shape = (1, 2, 3, 4) module = Full(0.5, shape) # noqa: F405 @@ -1856,6 +1927,7 @@ def test_qnn_backend_stack(self): sample_input = ( torch.randn([1, 2, 3, 4]), torch.randn([1, 2, 3, 4]), + torch.randn([1, 2, 3, 4]), ) module = self.get_qdq_module(module, sample_input) self.lower_module_and_test_output(module, sample_input) @@ -1894,10 +1966,16 @@ def test_qnn_backend_where(self): modules = [ Where(), # noqa: F405 WhereConstant(torch.randn(3, 2), torch.randn(3, 2)), # noqa: F405 + WhereConstantOther(), # noqa: F405 + # WhereConstantAll(), # noqa: F405, TODO: constant dtype does not propogate when doing const i64->32, causing where to fail since where does not support int64 output + WhereConstantInf(), # noqa: F405 ] sample_inputs = [ (torch.randn(3, 2), torch.randn(3, 2), torch.randn(3, 2)), (torch.randn(3, 2),), + (torch.randn(3, 2),), + # (torch.randn(3, 2),), + (torch.randn(30, 20),), ] for i, module in enumerate(modules): module = self.get_qdq_module(module, sample_inputs[i]) diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py index 8045e9e6443..7033f30997a 100644 --- a/backends/qualcomm/utils/utils.py +++ b/backends/qualcomm/utils/utils.py @@ -21,8 +21,10 @@ AnnotateQuantAttrs, ConstantI64toI32, ConvertBmmToMatmul, + ConvertConv1dToConv2d, ConvertToLinear, DecomposeAny, + DecomposeExpM1, DecomposeLinalgVectorNorm, ExpandBroadcastTensorShape, FoldQDQ, @@ -321,11 +323,12 @@ def canonicalize_program(obj): update_spill_fill_size(obj) -def get_decomp_table() -> Dict[torch._ops.OperatorBase, Callable]: +def get_decomp_table(passes_job) -> Dict[torch._ops.OperatorBase, Callable]: source_decompositions = core_aten_decompositions() # The below super ops are supported by QNN skip_decompositions = [ torch.ops.aten.adaptive_avg_pool2d.default, + torch.ops.aten.elu.default, torch.ops.aten.instance_norm.default, torch.ops.aten.pixel_shuffle.default, torch.ops.aten.pixel_unshuffle.default, @@ -334,8 +337,17 @@ def get_decomp_table() -> Dict[torch._ops.OperatorBase, Callable]: torch.ops.pt2e_quant.quantize_affine.default, torch.ops.pt2e_quant.dequantize_affine.default, torch.ops.aten._safe_softmax.default, + torch.ops.aten.stack.default, + torch.ops.aten.unbind.int, ] + # If we want to annotate the decomposed ops, then we should decompose the operation. + if passes_job and passes_job.get(AnnotateDecomposed, False): + skip_decompositions = [ + skip_decomp_op + for skip_decomp_op in skip_decompositions + if skip_decomp_op not in AnnotateDecomposed.decomp_ops + ] remove_decompositions(source_decompositions, skip_decompositions) return source_decompositions @@ -353,10 +365,11 @@ def get_capture_program_passes(): # The second value in each tuple in `default_passes_and_setting` indicates whether the corresponding pass is activated by default. # If a pass is activated, it will be executed by default. default_passes_and_setting = [ - (AnnotateDecomposed, True), + (AnnotateDecomposed, False), (AnnotateQuantAttrs, True), (ConstantI64toI32, True), (ConvertBmmToMatmul, True), + (ConvertConv1dToConv2d, True), (ConvertToLinear, True), (DecomposeAny, True), (DecomposeLinalgVectorNorm, True), @@ -448,6 +461,7 @@ def _preprocess_module(module: torch.nn.Module, inputs: Tuple[torch.Tensor]): module = torch.export.export(module, inputs, strict=True).module() module = DecomposeScaledDotProductAttention()(module).graph_module module = DecomposeLinalgVectorNorm(True)(module).graph_module + module = DecomposeExpM1()(module).graph_module module = LiftConstantScalarOperands()(module).graph_module return module @@ -460,7 +474,8 @@ def capture_program( ) -> exir.ExirExportedProgram: module = _preprocess_module(module, inputs) ep = torch.export.export(module, inputs, dynamic_shapes=dynamic_shapes, strict=True) - decomposed_ep = ep.run_decompositions(get_decomp_table()) + # TODO: Handle stack op. If we want to run annotate_decomposed pass for stack op, we need to make stack op decompose, which means we need to find a method to remove it from skip_decomp table + decomposed_ep = ep.run_decompositions(get_decomp_table(passes_job)) core_ep = ExirExportedProgram(decomposed_ep, False) core_ep.transform(TensorI64toI32(edge_program=core_ep)) edge_ep = core_ep.to_edge(qnn_edge_config()) diff --git a/backends/xnnpack/test/models/deeplab_v3.py b/backends/xnnpack/test/models/deeplab_v3.py index 9913296521d..c47832b63d1 100644 --- a/backends/xnnpack/test/models/deeplab_v3.py +++ b/backends/xnnpack/test/models/deeplab_v3.py @@ -23,6 +23,9 @@ def forward(self, *args): class TestDeepLabV3(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + dl3 = DL3Wrapper() dl3 = dl3.eval() model_inputs = (torch.randn(1, 3, 224, 224),) diff --git a/backends/xnnpack/test/models/edsr.py b/backends/xnnpack/test/models/edsr.py index 34b5898cf41..138ea62ddf5 100644 --- a/backends/xnnpack/test/models/edsr.py +++ b/backends/xnnpack/test/models/edsr.py @@ -14,6 +14,9 @@ class TestEDSR(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + edsr = edsr_r16f64(2, False).eval() # noqa model_inputs = (torch.randn(1, 3, 224, 224),) diff --git a/backends/xnnpack/test/models/emformer_rnnt.py b/backends/xnnpack/test/models/emformer_rnnt.py index 5cf4337307c..d5125361def 100644 --- a/backends/xnnpack/test/models/emformer_rnnt.py +++ b/backends/xnnpack/test/models/emformer_rnnt.py @@ -13,6 +13,9 @@ class TestEmformerModel(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class EmformerRnnt(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/models/inception_v3.py b/backends/xnnpack/test/models/inception_v3.py index 59fd56d6af7..d5d6d086206 100644 --- a/backends/xnnpack/test/models/inception_v3.py +++ b/backends/xnnpack/test/models/inception_v3.py @@ -13,6 +13,9 @@ class TestInceptionV3(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + ic3 = models.inception_v3(weights="IMAGENET1K_V1").eval() # noqa model_inputs = (torch.randn(1, 3, 224, 224),) diff --git a/backends/xnnpack/test/models/inception_v4.py b/backends/xnnpack/test/models/inception_v4.py index e8a785116a3..c5239bb2dd3 100644 --- a/backends/xnnpack/test/models/inception_v4.py +++ b/backends/xnnpack/test/models/inception_v4.py @@ -12,6 +12,9 @@ class TestInceptionV4(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + ic4 = inception_v4(pretrained=False).eval() model_inputs = (torch.randn(3, 299, 299).unsqueeze(0),) diff --git a/backends/xnnpack/test/models/llama2_et_example.py b/backends/xnnpack/test/models/llama2_et_example.py index f1dce43c3c9..378f9dd3d48 100644 --- a/backends/xnnpack/test/models/llama2_et_example.py +++ b/backends/xnnpack/test/models/llama2_et_example.py @@ -13,6 +13,9 @@ class TestLlama2ETExample(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + def test_f32(self): self._test() diff --git a/backends/xnnpack/test/models/mobilebert.py b/backends/xnnpack/test/models/mobilebert.py index ca18e6c265d..57c099e87d1 100644 --- a/backends/xnnpack/test/models/mobilebert.py +++ b/backends/xnnpack/test/models/mobilebert.py @@ -12,6 +12,9 @@ class TestMobilebert(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + # pyre-ignore mobilebert = MobileBertModel(MobileBertConfig()).eval() example_inputs = (torch.tensor([[101, 7592, 1010, 2026, 3899, 2003, 10140, 102]]),) diff --git a/backends/xnnpack/test/models/mobilenet_v2.py b/backends/xnnpack/test/models/mobilenet_v2.py index 4ee28af6b95..2ff93303d50 100644 --- a/backends/xnnpack/test/models/mobilenet_v2.py +++ b/backends/xnnpack/test/models/mobilenet_v2.py @@ -14,6 +14,9 @@ class TestMobileNetV2(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + mv2 = models.mobilenetv2.mobilenet_v2(weights=MobileNet_V2_Weights) mv2 = mv2.eval() model_inputs = (torch.randn(1, 3, 224, 224),) diff --git a/backends/xnnpack/test/models/mobilenet_v3.py b/backends/xnnpack/test/models/mobilenet_v3.py index cacd8b5cc87..f64b7352b7f 100644 --- a/backends/xnnpack/test/models/mobilenet_v3.py +++ b/backends/xnnpack/test/models/mobilenet_v3.py @@ -13,6 +13,9 @@ class TestMobileNetV3(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + mv3 = models.mobilenetv3.mobilenet_v3_small(pretrained=True) mv3 = mv3.eval() model_inputs = (torch.randn(1, 3, 224, 224),) diff --git a/backends/xnnpack/test/models/resnet.py b/backends/xnnpack/test/models/resnet.py index 4ad6a7d5f47..9f4989e1724 100644 --- a/backends/xnnpack/test/models/resnet.py +++ b/backends/xnnpack/test/models/resnet.py @@ -13,6 +13,9 @@ class TestResNet18(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + inputs = (torch.randn(1, 3, 224, 224),) dynamic_shapes = ( { diff --git a/backends/xnnpack/test/models/torchvision_vit.py b/backends/xnnpack/test/models/torchvision_vit.py index 6bebd284e53..f9153032cd8 100644 --- a/backends/xnnpack/test/models/torchvision_vit.py +++ b/backends/xnnpack/test/models/torchvision_vit.py @@ -12,6 +12,9 @@ class TestViT(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + vit = models.vision_transformer.vit_b_16(weights="IMAGENET1K_V1") vit = vit.eval() model_inputs = (torch.randn(1, 3, 224, 224),) diff --git a/backends/xnnpack/test/models/very_big_model.py b/backends/xnnpack/test/models/very_big_model.py index 3545287c628..f4f10f1611c 100644 --- a/backends/xnnpack/test/models/very_big_model.py +++ b/backends/xnnpack/test/models/very_big_model.py @@ -11,6 +11,9 @@ class TestVeryBigModel(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class BigModel(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/models/w2l.py b/backends/xnnpack/test/models/w2l.py index 07b3bf56b32..216fb7a89f0 100644 --- a/backends/xnnpack/test/models/w2l.py +++ b/backends/xnnpack/test/models/w2l.py @@ -12,6 +12,9 @@ class TestW2L(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + batch_size = 10 input_frames = 700 vocab_size = 4096 diff --git a/backends/xnnpack/test/ops/test_abs.py b/backends/xnnpack/test/ops/test_abs.py index a41bee47470..48feaafb0e1 100644 --- a/backends/xnnpack/test/ops/test_abs.py +++ b/backends/xnnpack/test/ops/test_abs.py @@ -11,6 +11,9 @@ class TestAbs(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Abs(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_add.py b/backends/xnnpack/test/ops/test_add.py index 29a87df1303..2416879f5ce 100644 --- a/backends/xnnpack/test/ops/test_add.py +++ b/backends/xnnpack/test/ops/test_add.py @@ -11,6 +11,9 @@ class TestAdd(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Add(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_avgpool2d.py b/backends/xnnpack/test/ops/test_avgpool2d.py index b471fd914c2..c1f149e5a93 100644 --- a/backends/xnnpack/test/ops/test_avgpool2d.py +++ b/backends/xnnpack/test/ops/test_avgpool2d.py @@ -11,6 +11,9 @@ class TestAvgPool2d(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class AvgPool2d(torch.nn.Module): def __init__( self, count_include_pad=False, ceil_mode=False, divisor_override=None diff --git a/backends/xnnpack/test/ops/test_bilinear2d.py b/backends/xnnpack/test/ops/test_bilinear2d.py index 24c990d6bb1..1fd3c147328 100644 --- a/backends/xnnpack/test/ops/test_bilinear2d.py +++ b/backends/xnnpack/test/ops/test_bilinear2d.py @@ -14,6 +14,9 @@ class TestUpsampleBilinear2d(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class StaticResizeBilinear2dModule(torch.nn.Module): def forward(self, x): a = torch.nn.functional.interpolate( diff --git a/backends/xnnpack/test/ops/test_bmm.py b/backends/xnnpack/test/ops/test_bmm.py index 1c6235e5f7e..a029738e771 100644 --- a/backends/xnnpack/test/ops/test_bmm.py +++ b/backends/xnnpack/test/ops/test_bmm.py @@ -11,6 +11,9 @@ class TestBMM(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class BMM(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_cat.py b/backends/xnnpack/test/ops/test_cat.py index dd551ea3fa7..11e246f541a 100644 --- a/backends/xnnpack/test/ops/test_cat.py +++ b/backends/xnnpack/test/ops/test_cat.py @@ -13,6 +13,9 @@ class TestCat(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Cat(torch.nn.Module): def __init__(self, dim=0): super().__init__() diff --git a/backends/xnnpack/test/ops/test_ceil.py b/backends/xnnpack/test/ops/test_ceil.py index 9caee15ad5b..717df6f47e6 100644 --- a/backends/xnnpack/test/ops/test_ceil.py +++ b/backends/xnnpack/test/ops/test_ceil.py @@ -11,6 +11,9 @@ class TestCeil(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Ceil(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_check_quant_params.py b/backends/xnnpack/test/ops/test_check_quant_params.py index cd18568afba..b76935a9f72 100644 --- a/backends/xnnpack/test/ops/test_check_quant_params.py +++ b/backends/xnnpack/test/ops/test_check_quant_params.py @@ -14,6 +14,9 @@ class TestCheckQuantParams(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + def create_invalid_value_injector( self, invalid_value, is_per_channel=False, is_zp=False ): @@ -46,6 +49,7 @@ def inject_invalid_scale_in_per_tensor(aten): return inject_invalid_scale_in_per_tensor def _test_check_quant_message(self, ep_modifier, expected_message): + torch._dynamo.reset() mod = torch.nn.Linear(10, 10) quantizer = XNNPACKQuantizer() captured = export_for_training(mod, (torch.randn(1, 10),)).module() diff --git a/backends/xnnpack/test/ops/test_clamp.py b/backends/xnnpack/test/ops/test_clamp.py index 9fb8935553e..671d9372e18 100644 --- a/backends/xnnpack/test/ops/test_clamp.py +++ b/backends/xnnpack/test/ops/test_clamp.py @@ -11,6 +11,9 @@ class TestClamp(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Clamp(torch.nn.Module): def __init__(self, min_val=None, max_val=None): super().__init__() diff --git a/backends/xnnpack/test/ops/test_conv1d.py b/backends/xnnpack/test/ops/test_conv1d.py index b4c8c414929..036500b29d5 100644 --- a/backends/xnnpack/test/ops/test_conv1d.py +++ b/backends/xnnpack/test/ops/test_conv1d.py @@ -19,6 +19,9 @@ class TestConv1d(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Conv1d(torch.nn.Module): def __init__(self, dtype: torch.dtype = torch.float): groups = 1 diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py index d3e5db8df2d..80b731bd18e 100644 --- a/backends/xnnpack/test/ops/test_conv2d.py +++ b/backends/xnnpack/test/ops/test_conv2d.py @@ -170,6 +170,9 @@ def get_inputs(self): class TestConv2d(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + def _test( self, m: torch.nn.Module, diff --git a/backends/xnnpack/test/ops/test_div.py b/backends/xnnpack/test/ops/test_div.py index 9bca5feed48..b53c59df8e1 100644 --- a/backends/xnnpack/test/ops/test_div.py +++ b/backends/xnnpack/test/ops/test_div.py @@ -11,6 +11,9 @@ class TestDiv(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Div(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_elu.py b/backends/xnnpack/test/ops/test_elu.py index f976c29d799..68a0c687779 100644 --- a/backends/xnnpack/test/ops/test_elu.py +++ b/backends/xnnpack/test/ops/test_elu.py @@ -11,6 +11,9 @@ class TestElu(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class ELU(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_floor.py b/backends/xnnpack/test/ops/test_floor.py index dfbe7fb18c0..5c543fd0534 100644 --- a/backends/xnnpack/test/ops/test_floor.py +++ b/backends/xnnpack/test/ops/test_floor.py @@ -11,6 +11,9 @@ class TestFloor(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Floor(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_hardswish.py b/backends/xnnpack/test/ops/test_hardswish.py index 899a119ed44..561551fc433 100644 --- a/backends/xnnpack/test/ops/test_hardswish.py +++ b/backends/xnnpack/test/ops/test_hardswish.py @@ -11,6 +11,9 @@ class TestHardswish(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Hardswish(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_hardtanh.py b/backends/xnnpack/test/ops/test_hardtanh.py index e35e840e3c3..6f2914010c7 100644 --- a/backends/xnnpack/test/ops/test_hardtanh.py +++ b/backends/xnnpack/test/ops/test_hardtanh.py @@ -11,6 +11,9 @@ class TestHardTanh(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class HardTanh(torch.nn.Module): def __init__(self, min_val=-1.0, max_val=1.0): super().__init__() diff --git a/backends/xnnpack/test/ops/test_leaky_relu.py b/backends/xnnpack/test/ops/test_leaky_relu.py index 32f73486977..c0921ddf3ad 100644 --- a/backends/xnnpack/test/ops/test_leaky_relu.py +++ b/backends/xnnpack/test/ops/test_leaky_relu.py @@ -11,6 +11,9 @@ class TestLeakyRelu(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class LeakyReLU(torch.nn.Module): def __init__(self, **kwargs): super().__init__() diff --git a/backends/xnnpack/test/ops/test_linear.py b/backends/xnnpack/test/ops/test_linear.py index cf9473180bb..849a1b237e8 100644 --- a/backends/xnnpack/test/ops/test_linear.py +++ b/backends/xnnpack/test/ops/test_linear.py @@ -219,6 +219,9 @@ class TestLinear(unittest.TestCase): should produce strictly better results compared to Per-Tensor Quantization """ + def setUp(self): + torch._dynamo.reset() + @staticmethod def _get_4b_dqconfig() -> QuantizationConfig: # Returns a QuantizationConfig for 4b dynamic quantization for XNNPACK. diff --git a/backends/xnnpack/test/ops/test_lstm.py b/backends/xnnpack/test/ops/test_lstm.py index 6c174b16f33..db4deb9aae4 100644 --- a/backends/xnnpack/test/ops/test_lstm.py +++ b/backends/xnnpack/test/ops/test_lstm.py @@ -14,6 +14,9 @@ class TestLSTM(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class LSTMLinear(torch.nn.Module): def __init__(self, input_size, hidden_size, out_size): super().__init__() diff --git a/backends/xnnpack/test/ops/test_max_dim.py b/backends/xnnpack/test/ops/test_max_dim.py index c660a5a6d26..f209845372e 100644 --- a/backends/xnnpack/test/ops/test_max_dim.py +++ b/backends/xnnpack/test/ops/test_max_dim.py @@ -11,6 +11,9 @@ class TestMaxDim(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Max(torch.nn.Module): def forward(self, x): max_values_1, max_indices_1 = torch.max(x, dim=2, keepdim=True) diff --git a/backends/xnnpack/test/ops/test_maximum.py b/backends/xnnpack/test/ops/test_maximum.py index 30dfa5503a9..c594452631c 100644 --- a/backends/xnnpack/test/ops/test_maximum.py +++ b/backends/xnnpack/test/ops/test_maximum.py @@ -11,6 +11,9 @@ class TestMaximum(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Maximum(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_maxpool2d.py b/backends/xnnpack/test/ops/test_maxpool2d.py index 521235232a2..f82b27b09ec 100644 --- a/backends/xnnpack/test/ops/test_maxpool2d.py +++ b/backends/xnnpack/test/ops/test_maxpool2d.py @@ -15,6 +15,9 @@ class TestMaxPool2d(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class MaxPool2d(torch.nn.Module): def __init__(self, kernel_size=3, stride=1, padding=0, dilation=1): super().__init__() diff --git a/backends/xnnpack/test/ops/test_mean_dim.py b/backends/xnnpack/test/ops/test_mean_dim.py index 3bac5f3239c..81a93c3e97e 100644 --- a/backends/xnnpack/test/ops/test_mean_dim.py +++ b/backends/xnnpack/test/ops/test_mean_dim.py @@ -11,6 +11,9 @@ class TestMeanDim(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class MeanDim(torch.nn.Module): def __init__(self, dims): super().__init__() diff --git a/backends/xnnpack/test/ops/test_minimum.py b/backends/xnnpack/test/ops/test_minimum.py index 406ac8485e5..fe1af3de5ab 100644 --- a/backends/xnnpack/test/ops/test_minimum.py +++ b/backends/xnnpack/test/ops/test_minimum.py @@ -11,6 +11,9 @@ class TestMinimum(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Minimum(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_multiply.py b/backends/xnnpack/test/ops/test_multiply.py index db50bc5dd44..3315200005d 100644 --- a/backends/xnnpack/test/ops/test_multiply.py +++ b/backends/xnnpack/test/ops/test_multiply.py @@ -11,6 +11,9 @@ class TestMul(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Mul(torch.nn.Module): def forward(self, x, y): z = x * y diff --git a/backends/xnnpack/test/ops/test_negate.py b/backends/xnnpack/test/ops/test_negate.py index 4d158612e97..5022255e484 100644 --- a/backends/xnnpack/test/ops/test_negate.py +++ b/backends/xnnpack/test/ops/test_negate.py @@ -11,6 +11,9 @@ class TestNegate(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Negate(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_permute.py b/backends/xnnpack/test/ops/test_permute.py index b348fc8af6d..2991ba1773d 100644 --- a/backends/xnnpack/test/ops/test_permute.py +++ b/backends/xnnpack/test/ops/test_permute.py @@ -11,6 +11,9 @@ class TestPermute(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Permute(torch.nn.Module): def __init__(self, dims): self.dims = dims diff --git a/backends/xnnpack/test/ops/test_pow.py b/backends/xnnpack/test/ops/test_pow.py index ac902ae44be..2accb010210 100644 --- a/backends/xnnpack/test/ops/test_pow.py +++ b/backends/xnnpack/test/ops/test_pow.py @@ -11,6 +11,9 @@ class TestPow(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Pow(torch.nn.Module): def __init__(self, exp): super().__init__() diff --git a/backends/xnnpack/test/ops/test_prelu.py b/backends/xnnpack/test/ops/test_prelu.py index f73648dfa25..47b2851278c 100644 --- a/backends/xnnpack/test/ops/test_prelu.py +++ b/backends/xnnpack/test/ops/test_prelu.py @@ -11,6 +11,9 @@ class TestPrelu(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class PReLU(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_quantize_per_tensor.py b/backends/xnnpack/test/ops/test_quantize_per_tensor.py index c2117987536..9e876c09671 100644 --- a/backends/xnnpack/test/ops/test_quantize_per_tensor.py +++ b/backends/xnnpack/test/ops/test_quantize_per_tensor.py @@ -13,6 +13,9 @@ class TestQuantizePerTensor(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + def test_qs8_quantize_per_tensor(self): class Quant(torch.nn.Module): def forward(self, x): diff --git a/backends/xnnpack/test/ops/test_relu.py b/backends/xnnpack/test/ops/test_relu.py index 8672b1d3e4e..508c1ceeffa 100644 --- a/backends/xnnpack/test/ops/test_relu.py +++ b/backends/xnnpack/test/ops/test_relu.py @@ -11,6 +11,9 @@ class TestRelu(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Relu(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_rsqrt.py b/backends/xnnpack/test/ops/test_rsqrt.py index e5d704a0467..5405e966359 100644 --- a/backends/xnnpack/test/ops/test_rsqrt.py +++ b/backends/xnnpack/test/ops/test_rsqrt.py @@ -11,6 +11,9 @@ class TestRsqrt(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Rsqrt(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_sdpa.py b/backends/xnnpack/test/ops/test_sdpa.py index de5c7174ab5..205b6d4ab36 100644 --- a/backends/xnnpack/test/ops/test_sdpa.py +++ b/backends/xnnpack/test/ops/test_sdpa.py @@ -15,6 +15,9 @@ class TestSDPA(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class SDPA(torch.nn.Module): def __init__(self, scale: Optional[float] = None): super().__init__() diff --git a/backends/xnnpack/test/ops/test_sigmoid.py b/backends/xnnpack/test/ops/test_sigmoid.py index a9acd4df6db..fe55f0f1ef5 100644 --- a/backends/xnnpack/test/ops/test_sigmoid.py +++ b/backends/xnnpack/test/ops/test_sigmoid.py @@ -11,6 +11,9 @@ class TestSigmoid(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Sigmoid(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_slice_copy.py b/backends/xnnpack/test/ops/test_slice_copy.py index 8ff37368578..ea65571b1e8 100644 --- a/backends/xnnpack/test/ops/test_slice_copy.py +++ b/backends/xnnpack/test/ops/test_slice_copy.py @@ -11,6 +11,9 @@ class TestSliceCopy(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + def _test_slice_copy(self, module, inputs, copy_count=1, edge_copy_count=1): ( Tester(module, inputs) diff --git a/backends/xnnpack/test/ops/test_softmax.py b/backends/xnnpack/test/ops/test_softmax.py index f909e8ce5f2..bf078860ba5 100644 --- a/backends/xnnpack/test/ops/test_softmax.py +++ b/backends/xnnpack/test/ops/test_softmax.py @@ -11,6 +11,9 @@ class TestSoftmax(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Softmax(torch.nn.Module): def __init__(self, dim): super().__init__() diff --git a/backends/xnnpack/test/ops/test_sqrt.py b/backends/xnnpack/test/ops/test_sqrt.py index eaeb3b9f700..ee800c62568 100644 --- a/backends/xnnpack/test/ops/test_sqrt.py +++ b/backends/xnnpack/test/ops/test_sqrt.py @@ -11,6 +11,9 @@ class TestSqrt(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Sqrt(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_square.py b/backends/xnnpack/test/ops/test_square.py index 32a19639343..c7a567239bb 100644 --- a/backends/xnnpack/test/ops/test_square.py +++ b/backends/xnnpack/test/ops/test_square.py @@ -11,6 +11,9 @@ class TestSquare(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Square(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_static_constant_pad.py b/backends/xnnpack/test/ops/test_static_constant_pad.py index b1b41afe8cf..c5d103f596a 100644 --- a/backends/xnnpack/test/ops/test_static_constant_pad.py +++ b/backends/xnnpack/test/ops/test_static_constant_pad.py @@ -11,6 +11,9 @@ class TestStaticConstantPad(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class StaticConstantPadFunctional(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/ops/test_sub.py b/backends/xnnpack/test/ops/test_sub.py index fb3d3d3f948..06219730ddb 100644 --- a/backends/xnnpack/test/ops/test_sub.py +++ b/backends/xnnpack/test/ops/test_sub.py @@ -11,6 +11,9 @@ class TestSub(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + class Sub(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/xnnpack/test/passes/test_activation_fusion.py b/backends/xnnpack/test/passes/test_activation_fusion.py index 5f340f61b2e..6a1182dc7fb 100644 --- a/backends/xnnpack/test/passes/test_activation_fusion.py +++ b/backends/xnnpack/test/passes/test_activation_fusion.py @@ -16,6 +16,9 @@ class TestActivationFusion(unittest.TestCase): PassStage = RunPasses([ConvertToLinearPass, FuseActivationPass]) + def setUp(self): + torch._dynamo.reset() + def check_node_has_tag(self, graph_module, node_target, tag): for n in graph_module.graph.nodes: if n.op == "call_function" and n.target == node_target: diff --git a/backends/xnnpack/test/passes/test_batch_norm_fusion.py b/backends/xnnpack/test/passes/test_batch_norm_fusion.py index 59d0e0a2072..70c93c3751b 100644 --- a/backends/xnnpack/test/passes/test_batch_norm_fusion.py +++ b/backends/xnnpack/test/passes/test_batch_norm_fusion.py @@ -18,6 +18,9 @@ class TestBatchNormFusion(unittest.TestCase): PassStage = RunPasses([FuseBatchNormWithConvPass]) bn_name = "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default" + def setUp(self): + torch._dynamo.reset() + class ModelConvBN(torch.nn.Module): def __init__( self, diff --git a/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py b/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py index c1438b29213..6d60f9d76b5 100644 --- a/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py +++ b/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py @@ -17,6 +17,9 @@ class TestChannelsLastTaggedReshapePass(unittest.TestCase): + def setUp(self): + torch._dynamo.reset() + PassStage = RunPasses([ChannelsLastTaggedReshapePass]) # Dictionary mapping modules to expected number of reshapes modules = { diff --git a/backends/xnnpack/test/passes/test_convert_to_linear.py b/backends/xnnpack/test/passes/test_convert_to_linear.py index a07f8cf61ce..0e7bc7d01c4 100644 --- a/backends/xnnpack/test/passes/test_convert_to_linear.py +++ b/backends/xnnpack/test/passes/test_convert_to_linear.py @@ -14,6 +14,9 @@ class TestConvertToLinear(unittest.TestCase): PassStage = RunPasses([ConvertToLinearPass]) + def setUp(self): + torch._dynamo.reset() + def test_fp32_convert_to_linear(self): in_sizes = [1, 4, 4] input_sizes = [4, 37, 17] @@ -21,6 +24,7 @@ def test_fp32_convert_to_linear(self): bias_vals = [True, True, False] for i, _ in enumerate(in_sizes): + torch._dynamo.reset() in_size = int(in_sizes[i]) input_size = int(input_sizes[i]) output_size = int(output_sizes[i]) diff --git a/backends/xnnpack/test/passes/test_decompose_cat_pass.py b/backends/xnnpack/test/passes/test_decompose_cat_pass.py index beb1761aec8..38537a99c4d 100644 --- a/backends/xnnpack/test/passes/test_decompose_cat_pass.py +++ b/backends/xnnpack/test/passes/test_decompose_cat_pass.py @@ -16,6 +16,9 @@ class TestDecomposeCatPass(unittest.TestCase): PassStage = RunPasses([DecomposeConcatenate]) cat_name = "executorch_exir_dialects_edge__ops_aten_cat_default" + def setUp(self): + torch._dynamo.reset() + class Cat(torch.nn.Module): def forward(self, *args): xs = [*args] diff --git a/backends/xnnpack/test/passes/test_remove_get_item_pass.py b/backends/xnnpack/test/passes/test_remove_get_item_pass.py index 2365c9bba0c..4d71d61afd7 100644 --- a/backends/xnnpack/test/passes/test_remove_get_item_pass.py +++ b/backends/xnnpack/test/passes/test_remove_get_item_pass.py @@ -16,6 +16,9 @@ class TestRemoveGetItemPass(unittest.TestCase): max_pool2d_name = "executorch_exir_dialects_edge__ops_aten_max_pool2d_default" amax_name = "executorch_exir_dialects_edge__ops_aten_amax_default" + def setUp(self): + torch._dynamo.reset() + class MaxPool2dModule(torch.nn.Module): def __init__( self, diff --git a/backends/xnnpack/test/passes/test_tag_implicit_q_dq_pass.py b/backends/xnnpack/test/passes/test_tag_implicit_q_dq_pass.py index 05d1ac9e8b6..6fec7726835 100644 --- a/backends/xnnpack/test/passes/test_tag_implicit_q_dq_pass.py +++ b/backends/xnnpack/test/passes/test_tag_implicit_q_dq_pass.py @@ -20,6 +20,9 @@ class TestTagImplicitQDq(unittest.TestCase): PassStage = RunPasses([DuplicateDequantNodePass, TagImplicitQDqPass]) + def setUp(self): + torch._dynamo.reset() + class QDqModule(torch.nn.Module): def __init__(self): super().__init__() diff --git a/docs/source/backends-coreml.md b/docs/source/backends-coreml.md index a06820b2d08..126727735ae 100644 --- a/docs/source/backends-coreml.md +++ b/docs/source/backends-coreml.md @@ -28,12 +28,6 @@ Before starting, make sure you install the Xcode Command Line Tools: xcode-select --install ``` -Finally you must install the CoreML backend by running the following script: -```bash -sh ./backends/apple/coreml/scripts/install_requirements.sh -``` - - ---- ## Using the CoreML Backend diff --git a/docs/source/backends-qualcomm.md b/docs/source/backends-qualcomm.md index 2d2b017aca1..7a2f749e185 100644 --- a/docs/source/backends-qualcomm.md +++ b/docs/source/backends-qualcomm.md @@ -43,10 +43,17 @@ The version is documented in QNN SDK. ### Hardware: You will need an Android smartphone with adb-connected running on one of below Qualcomm SoCs: + - SA8295 - SM8450 (Snapdragon 8 Gen 1) - SM8475 (Snapdragon 8 Gen 1+) - SM8550 (Snapdragon 8 Gen 2) - SM8650 (Snapdragon 8 Gen 3) + - SM8750 (Snapdragon 8 Elite) + - SSG2115P + - SSG2125P + - SXR1230P + - SXR2230P + - SXR2330P This example is verified with SM8550 and SM8450. diff --git a/docs/source/demo-apps-android.md b/docs/source/demo-apps-android.md deleted file mode 100644 index 5d6dccf5734..00000000000 --- a/docs/source/demo-apps-android.md +++ /dev/null @@ -1,2 +0,0 @@ -```{include} ../../examples/demo-apps/android/ExecuTorchDemo/README.md -``` diff --git a/docs/source/getting-started.md b/docs/source/getting-started.md index 4d8e3f0189d..741454fed27 100644 --- a/docs/source/getting-started.md +++ b/docs/source/getting-started.md @@ -14,7 +14,7 @@ The following are required to install the ExecuTorch host libraries, needed to e - Windows is supported via WSL. ## Installation -To use ExecuTorch, you will need to install both the Python package and the appropriate platform-specific runtime libraries. Pip is the recommended way to install the ExecuTorch python package. +To use ExecuTorch, you will need to install both the Python package and the appropriate platform-specific runtime libraries. Pip is the recommended way to install the ExecuTorch python package. This package includes the dependencies needed to export a PyTorch model, as well as Python runtime bindings for model testing and evaluation. Consider installing ExecuTorch within a virtual environment, such as one provided by [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/getting-started.html#creating-environments) or [venv](https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/#create-and-use-virtual-environments). @@ -72,7 +72,7 @@ Quantization can also be done at this stage to reduce model size and runtime. Qu ### Testing the Model -After successfully generating a .pte file, it is common to use the Python runtime APIs to validate the model on the development platform. This can be used to evaluate model accuracy before running on-device. +After successfully generating a .pte file, it is common to use the Python runtime APIs to validate the model on the development platform. This can be used to evaluate model accuracy before running on-device. For the MobileNet V2 model from torchvision used in this example, image inputs are expected as a normalized, float32 tensor with a dimensions of (batch, channels, height, width). The output See [torchvision.models.mobilenet_v2](https://pytorch.org/vision/main/models/generated/torchvision.models.mobilenet_v2.html) for more information on the input and output tensor format for this model. @@ -103,20 +103,13 @@ Quick Links: ### Android #### Installation -ExecuTorch provides Java bindings for Android usage, which can be consumed from both Java and Kotlin. -To add the library to your app, download the AAR, and add it to the gradle build rule. +ExecuTorch provides Java bindings for Android usage, which can be consumed from both Java and Kotlin. +To add the library to your app, add the following dependency to gradle build rule. -``` -mkdir -p app/libs -curl https://ossci-android.s3.amazonaws.com/executorch/release/v0.5.0-rc3/executorch.aar -o app/libs/executorch.aar -``` -And in gradle, ``` # app/build.gradle.kts dependencies { - implementation(files("libs/executorch.aar")) - implementation("com.facebook.soloader:soloader:0.10.5") - implementation("com.facebook.fbjni:fbjni:0.5.1") + implementation("org.pytorch:executorch-android:0.5.1") } ``` @@ -137,7 +130,7 @@ EValue[] output = model.forward(input_evalue); float[] scores = output[0].toTensor().getDataAsFloatArray(); ``` -For a full example of running a model on Android, see the [ExecuTorch Android Demo App](https://github.com/pytorch/executorch/blob/main/examples/demo-apps/android/ExecuTorchDemo/app/src/main/java/com/example/executorchdemo/ClassificationActivity.java). For more information on Android development, including building from source, a full description of the Java APIs, and information on using ExecuTorch from Android native code, see [Using ExecuTorch on Android](using-executorch-android.md). +For a full example of running a model on Android, see the [DeepLabV3AndroidDemo](https://github.com/pytorch-labs/executorch-examples/tree/main/dl3/android/DeepLabV3Demo). For more information on Android development, including building from source, a full description of the Java APIs, and information on using ExecuTorch from Android native code, see [Using ExecuTorch on Android](using-executorch-android.md). ### iOS diff --git a/docs/source/index.rst b/docs/source/index.rst index b27d53f51c7..187a5300c58 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -31,7 +31,7 @@ Welcome to the ExecuTorch Documentation The ExecuTorch source is hosted on GitHub at -https://github.com/pytorch/executorch. +https://github.com/pytorch/executorch. Join us on `Discord `__ if you have questions about ExecuTorch or would like to become a contributor! @@ -103,7 +103,7 @@ Topics in this section will help you get started with ExecuTorch. :caption: Examples :hidden: - demo-apps-android.md + Building an ExecuTorch Android Demo App demo-apps-ios.md .. toctree:: diff --git a/docs/source/using-executorch-android.md b/docs/source/using-executorch-android.md index 62d1f3ee75a..99b68008dc6 100644 --- a/docs/source/using-executorch-android.md +++ b/docs/source/using-executorch-android.md @@ -176,7 +176,7 @@ public class MainActivity extends Activity { ``` This example loads an ExecuTorch module, prepares input data, runs inference, and processes the output data. -Please use [ExecuTorchDemo](https://github.com/pytorch/executorch/tree/main/examples/demo-apps/android/ExecuTorchDemo) +Please use [DeepLabV3AndroidDemo](https://github.com/pytorch-labs/executorch-examples/tree/main/dl3/android/DeepLabV3Demo) and [LlamaDemo](https://github.com/pytorch/executorch/tree/main/examples/demo-apps/android/LlamaDemo) for the code examples using ExecuTorch AAR package. diff --git a/docs/source/using-executorch-ios.md b/docs/source/using-executorch-ios.md index e975cb9ef22..70c2b366fa8 100644 --- a/docs/source/using-executorch-ios.md +++ b/docs/source/using-executorch-ios.md @@ -103,19 +103,18 @@ git clone https://github.com/pytorch/executorch.git --depth 1 --recurse-submodul 3. Set up [Python](https://www.python.org/downloads/macos/) 3.10+ and activate a virtual environment: ```bash -python3 -m venv .venv && source .venv/bin/activate && pip install --upgrade pip +python3 -m venv .venv && source .venv/bin/activate && ./install_requirements.sh ``` -4. Install the required dependencies, including those needed for the backends like [Core ML](backends-coreml.md) or [MPS](backends-mps.md), if you plan to build them as well: +4. Install the required dependencies, including those needed for the backends like [Core ML](backends-coreml.md) or [MPS](backends-mps.md). Choose one: ```bash -./install_executorch.sh --pybind coreml mps xnnpack +# ExecuTorch with xnnpack and CoreML backend +./install_executorch.sh --pybind xnnpack -# Optional dependencies for Core ML backend. -./backends/apple/coreml/scripts/install_requirements.sh - -# And MPS backend. +# Optional: ExecuTorch with xnnpack, CoreML, and MPS backend ./backends/apple/mps/install_requirements.sh +./install_executorch.sh --pybind xnnpack mps ``` 5. Install [CMake](https://cmake.org): diff --git a/examples/apple/coreml/README.md b/examples/apple/coreml/README.md index f4270956b2c..4dba5031358 100644 --- a/examples/apple/coreml/README.md +++ b/examples/apple/coreml/README.md @@ -18,16 +18,8 @@ We will walk through an example model to generate a Core ML delegated binary fil 1. Following the setup guide in [Setting Up ExecuTorch](https://pytorch.org/executorch/stable/getting-started-setup) you should be able to get the basic development environment for ExecuTorch working. -2. Run `install_requirements.sh` to install dependencies required by the **Core ML** backend. -```bash -cd executorch - -./backends/apple/coreml/scripts/install_requirements.sh - -``` - -3. Run the export script to generate a Core ML delegated binary file. +2. Run the export script to generate a Core ML delegated binary file. ```bash cd executorch @@ -39,11 +31,14 @@ python3 -m examples.portable.scripts.export -h python3 -m examples.apple.coreml.scripts.export --model_name add ``` -4. Run the binary file using the `coreml_executor_runner`. +3. Run the binary file using the `coreml_executor_runner`. ```bash cd executorch +# Install requirements needed to run the example runner +./backends/apple/coreml/scripts/install_requirements.sh + # Builds the Core ML executor runner. Generates ./coreml_executor_runner if successful. ./examples/apple/coreml/scripts/build_executor_runner.sh diff --git a/examples/apple/coreml/scripts/extract_coreml_models.py b/examples/apple/coreml/scripts/extract_coreml_models.py index d2812d37ab0..b3778a22625 100644 --- a/examples/apple/coreml/scripts/extract_coreml_models.py +++ b/examples/apple/coreml/scripts/extract_coreml_models.py @@ -10,12 +10,9 @@ from typing import List, Optional -import executorchcoreml - +from executorch.backends.apple.coreml import executorchcoreml from executorch.backends.apple.coreml.compiler import CoreMLBackend - from executorch.exir._serialize._program import deserialize_pte_binary - from executorch.exir.schema import ( BackendDelegate, BackendDelegateDataReference, diff --git a/examples/demo-apps/android/ExecuTorchDemo/.gitignore b/examples/demo-apps/android/ExecuTorchDemo/.gitignore deleted file mode 100644 index e7bee2e2b1c..00000000000 --- a/examples/demo-apps/android/ExecuTorchDemo/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -*.iml -.gradle -/local.properties -.idea -.DS_Store -/build -/captures -.externalNativeBuild -.cxx -local.properties -*.so diff --git a/examples/demo-apps/android/ExecuTorchDemo/README.md b/examples/demo-apps/android/ExecuTorchDemo/README.md deleted file mode 100644 index c6ee756458f..00000000000 --- a/examples/demo-apps/android/ExecuTorchDemo/README.md +++ /dev/null @@ -1,132 +0,0 @@ -# Building an ExecuTorch Android Demo App - -This is forked from [PyTorch Android Demo App](https://github.com/pytorch/android-demo-app). - -This guide explains how to setup ExecuTorch for Android using a demo app. The app employs a [DeepLab v3](https://pytorch.org/hub/pytorch_vision_deeplabv3_resnet101/) model for image segmentation tasks. Models are exported to ExecuTorch using [XNNPACK FP32 backend](tutorial-xnnpack-delegate-lowering.md). - -::::{grid} 2 -:::{grid-item-card} What you will learn -:class-card: card-prerequisites -* How to set up a build target for Android arm64-v8a -* How to build the required ExecuTorch runtime with JNI wrapper for Android -* How to build the app with required JNI library and model file -::: - -:::{grid-item-card} Prerequisites -:class-card: card-prerequisites -* Refer to [Setting up ExecuTorch](https://pytorch.org/executorch/stable/getting-started-setup) to set up the repo and dev environment. -* Download and install [Android Studio and SDK](https://developer.android.com/studio). -* Supported Host OS: CentOS, macOS Ventura (M1/x86_64). See below for Qualcomm HTP specific requirements. -* *Qualcomm HTP Only[^1]:* To build and run on Qualcomm's AI Engine Direct, please follow [Building and Running ExecuTorch with Qualcomm AI Engine Direct Backend](backends-qualcomm.md) for hardware and software pre-requisites. The version we use for this tutorial is 2.19. The chip we use for this tutorial is SM8450. -::: -:::: - -[^1]: This section applies only if Qualcomm HTP Backend is needed in the app. Same applies to sections with title`Qualcomm Hexagon NPU`. - -```{note} -This demo app and tutorial has only been validated with arm64-v8a [ABI](https://developer.android.com/ndk/guides/abis). -``` - - -## Build - -### Ahead-Of-Time - -We generate the model file for the ExecuTorch runtime in Android Demo App. - -#### XNNPACK Delegation - -For delegating DeepLab v3 to XNNPACK backend, please do the following to export the model: - -```bash -cd executorch # go to executorch root -python3 -m examples.xnnpack.aot_compiler --model_name="dl3" --delegate -``` - -Then push the pte file to Android device: - -```bash -adb push dl3_xnnpack_fp32.pte /data/local/tmp/dl3_xnnpack_fp32.pte -``` - -For more detailed tutorial of lowering to XNNPACK, please see [XNNPACK backend](backends-xnnpack.md). - -#### Qualcomm Hexagon NPU - -For delegating to Qualcomm Hexagon NPU, please follow the tutorial [here](backends-qualcomm.md). - -```bash -python -m examples.qualcomm.scripts.deeplab_v3 -b build-android -m SM8450 -s -``` - -Then push the pte file to Android device: - -```bash -adb push deeplab_v3/dlv3_qnn.pte /data/local/tmp/dlv3_qnn.pte -``` - -### Runtime - -We build the required ExecuTorch runtime library (AAR) to run the model. - -#### XNNPACK - -```bash -# go to ExecuTorch repo root -export ANDROID_NDK= -export ANDROID_ABIS=arm64-v8a - -# Run the following lines from the `executorch/` folder -./install_executorch.sh --clean - -# Create a new directory `app/libs` for the AAR to live -pushd examples/demo-apps/android/ExecuTorchDemo -mkdir -p app/libs -popd - -# Build the AAR. It will include XNNPACK backend by default. -export BUILD_AAR_DIR=$(realpath examples/demo-apps/android/ExecuTorchDemo/app/libs) -sh scripts/build_android_library.sh -``` - -#### Qualcomm Hexagon NPU - -```bash -# go to ExecuTorch repo root -export ANDROID_NDK= -export ANDROID_ABIS=arm64-v8a -export QNN_SDK_ROOT= - -# Run the following lines from the `executorch/` folder -./install_executorch.sh --clean - -# Create a new directory `app/libs` for the AAR to live -pushd examples/demo-apps/android/ExecuTorchDemo -mkdir -p app/libs -popd - -# Build the AAR. It will include XNNPACK backend by default. -export BUILD_AAR_DIR=$(realpath examples/demo-apps/android/ExecuTorchDemo/app/libs) -sh scripts/build_android_library.sh -``` - -This is very similar to XNNPACK setup, but users now needs to define `QNN_SDK_ROOT` so that -QNN backend is built into the AAR. - -## Running the App - -1. Open the project `examples/demo-apps/android/ExecuTorchDemo` with Android Studio. - -2. [Run](https://developer.android.com/studio/run) the app (^R). - -Android Studio View
- -On the phone or emulator, you can try running the model: -Android Demo
- -## Takeaways -Through this tutorial we've learnt how to build the ExecuTorch runtime library with XNNPACK (or Qualcomm HTP) backend, and expose it to JNI layer to build the Android app running segmentation model. - -## Reporting Issues - -If you encountered any bugs or issues following this tutorial please file a bug/issue here on [Github](https://github.com/pytorch/executorch/issues/new). diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/.gitignore b/examples/demo-apps/android/ExecuTorchDemo/app/.gitignore deleted file mode 100644 index 796b96d1c40..00000000000 --- a/examples/demo-apps/android/ExecuTorchDemo/app/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/build diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/build.gradle.kts b/examples/demo-apps/android/ExecuTorchDemo/app/build.gradle.kts deleted file mode 100644 index ca06671f328..00000000000 --- a/examples/demo-apps/android/ExecuTorchDemo/app/build.gradle.kts +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -plugins { - id("com.android.application") - id("org.jetbrains.kotlin.android") -} - -android { - namespace = "com.example.executorchdemo" - compileSdk = 34 - - defaultConfig { - applicationId = "com.example.executorchdemo" - minSdk = 24 - targetSdk = 33 - versionCode = 1 - versionName = "1.0" - - testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" - vectorDrawables { useSupportLibrary = true } - externalNativeBuild { cmake { cppFlags += "" } } - } - - buildTypes { - release { - isMinifyEnabled = false - proguardFiles(getDefaultProguardFile("proguard-android-optimize.txt"), "proguard-rules.pro") - } - } - compileOptions { - sourceCompatibility = JavaVersion.VERSION_1_8 - targetCompatibility = JavaVersion.VERSION_1_8 - } - kotlinOptions { jvmTarget = "1.8" } - buildFeatures { compose = true } - composeOptions { kotlinCompilerExtensionVersion = "1.4.3" } - packaging { resources { excludes += "/META-INF/{AL2.0,LGPL2.1}" } } -} - -dependencies { - implementation("androidx.core:core-ktx:1.9.0") - implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.6.1") - implementation("androidx.activity:activity-compose:1.7.0") - implementation(platform("androidx.compose:compose-bom:2023.03.00")) - implementation("androidx.compose.ui:ui") - implementation("androidx.compose.ui:ui-graphics") - implementation("androidx.compose.ui:ui-tooling-preview") - implementation("androidx.compose.material3:material3") - implementation("androidx.appcompat:appcompat:1.6.1") - implementation("androidx.camera:camera-core:1.3.0-rc02") - implementation("androidx.constraintlayout:constraintlayout:2.2.0-alpha12") - implementation("com.facebook.soloader:soloader:0.10.5") - implementation("com.facebook.fbjni:fbjni:0.5.1") - implementation(files("libs/executorch.aar")) - testImplementation("junit:junit:4.13.2") - androidTestImplementation("androidx.test.ext:junit:1.1.5") - androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1") - androidTestImplementation(platform("androidx.compose:compose-bom:2023.03.00")) - androidTestImplementation("androidx.compose.ui:ui-test-junit4") - debugImplementation("androidx.compose.ui:ui-tooling") - debugImplementation("androidx.compose.ui:ui-test-manifest") -} - -tasks.register("setup") { - doFirst { - exec { - commandLine("sh", "setup.sh") - workingDir("../") - } - } -} diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/proguard-rules.pro b/examples/demo-apps/android/ExecuTorchDemo/app/proguard-rules.pro deleted file mode 100644 index 481bb434814..00000000000 --- a/examples/demo-apps/android/ExecuTorchDemo/app/proguard-rules.pro +++ /dev/null @@ -1,21 +0,0 @@ -# Add project specific ProGuard rules here. -# You can control the set of applied configuration files using the -# proguardFiles setting in build.gradle. -# -# For more details, see -# http://developer.android.com/guide/developing/tools/proguard.html - -# If your project uses WebView with JS, uncomment the following -# and specify the fully qualified class name to the JavaScript interface -# class: -#-keepclassmembers class fqcn.of.javascript.interface.for.webview { -# public *; -#} - -# Uncomment this to preserve the line number information for -# debugging stack traces. -#-keepattributes SourceFile,LineNumberTable - -# If you keep the line number information, uncomment this to -# hide the original source file name. -#-renamesourcefileattribute SourceFile \ No newline at end of file diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/AndroidManifest.xml b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/AndroidManifest.xml deleted file mode 100644 index 8d71b156398..00000000000 --- a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/AndroidManifest.xml +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/BUCK b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/BUCK deleted file mode 100644 index 371c991ce88..00000000000 --- a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/BUCK +++ /dev/null @@ -1,65 +0,0 @@ -load("@fbsource//tools/build_defs:manifold.bzl", "manifold_get") -load("@fbsource//tools/build_defs/android:fb_android_binary.bzl", "fb_android_binary") -load("@fbsource//tools/build_defs/android:fb_android_library.bzl", "fb_android_library") -load("@fbsource//tools/build_defs/android:fb_android_resource.bzl", "fb_android_resource") - -manifold_get( - name = "dl3_xnnpack_fp32", - out = "dl3_xnnpack_fp32.pte", - api_key = "executorch-key", - artifact_path = "tree/models/benchmarking/executorch/dl3_xnnpack_fp32.pte", - bucket_name = "executorch", - sha1 = "3e7af1d8f5ec4acb6de156d361715e16e5f53783", - timeout_msec = 120000, -) - -fb_android_resource( - name = "app_res", - assets = "assets", - package = "com.example.executorchdemo", - res = "res", -) - -fb_android_resource( - name = "model_res", - assets = {"dl3_xnnpack_fp32.pte": ":dl3_xnnpack_fp32"}, - package = "com.example.executorchdemo", - res = "res", -) - -fb_android_library( - name = "app_lib", - srcs = [ - "java/com/example/executorchdemo/MainActivity.java", - "java/com/example/executorchdemo/TensorImageUtils.java", - ], - autoglob = False, - language = "JAVA", - deps = [ - ":app_res", - "//xplat/executorch/extension/android:executorch", - ], -) - -fb_android_binary( - name = "ExecuTorchDemo", - keystore = "//fbandroid/keystores:debug", - manifest = "AndroidManifest.xml", - manifest_entries = { - "min_sdk_version": 19, # Android supports 19 for minimum - "target_sdk_version": 34, - "version_code": "1", - "version_name": "1.0", - }, - package_type = "release", - skip_proguard = True, - deps = [ - ":app_lib", - ":app_res", - ":model_res", - "//third-party/java/androidx/appcompat/appcompat:appcompat", - "//third-party/java/androidx/constraintlayout/constraintlayout:constraintlayout", - "//xplat/executorch/extension/android:executorch", - "//xplat/executorch/extension/android/jni:executorch_jni_full", - ], -) diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/TARGETS b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/TARGETS deleted file mode 100644 index 5c4f482b5ea..00000000000 --- a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/TARGETS +++ /dev/null @@ -1 +0,0 @@ -# This file needs to exist to avoid build system breakage, see https://fburl.com/workplace/jtdlgdmd diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/corgi.jpeg b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/corgi.jpeg deleted file mode 100644 index b7cdd3bf73d..00000000000 Binary files a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/corgi.jpeg and /dev/null differ diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/deeplab.jpg b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/deeplab.jpg deleted file mode 100644 index e840b670002..00000000000 Binary files a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/deeplab.jpg and /dev/null differ diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/dog.jpg b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/dog.jpg deleted file mode 100644 index e20f0ccbc48..00000000000 Binary files a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/dog.jpg and /dev/null differ diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/java/com/example/executorchdemo/MainActivity.java b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/java/com/example/executorchdemo/MainActivity.java deleted file mode 100644 index 9ac800b49a3..00000000000 --- a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/java/com/example/executorchdemo/MainActivity.java +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -package com.example.executorchdemo; - -import android.app.Activity; -import android.content.Context; -import android.graphics.Bitmap; -import android.graphics.BitmapFactory; -import android.os.Bundle; -import android.os.SystemClock; -import android.system.ErrnoException; -import android.system.Os; -import android.util.Log; -import android.view.View; -import android.widget.Button; -import android.widget.ImageView; -import android.widget.ProgressBar; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.Objects; -import org.pytorch.executorch.EValue; -import org.pytorch.executorch.Module; -import org.pytorch.executorch.Tensor; - -public class MainActivity extends Activity implements Runnable { - private ImageView mImageView; - private Button mButtonXnnpack; - private Button mButtonHtp; - private ProgressBar mProgressBar; - private Bitmap mBitmap = null; - private Module mModule = null; - private String mImagename = "corgi.jpeg"; - - // see http://host.robots.ox.ac.uk:8080/pascal/VOC/voc2007/segexamples/index.html for the list of - // classes with indexes - private static final int CLASSNUM = 21; - private static final int DOG = 12; - private static final int PERSON = 15; - private static final int SHEEP = 17; - - public static String assetFilePath(Context context, String assetName) throws IOException { - File file = new File(context.getFilesDir(), assetName); - if (file.exists() && file.length() > 0) { - return file.getAbsolutePath(); - } - - try (InputStream is = context.getAssets().open(assetName)) { - try (OutputStream os = new FileOutputStream(file)) { - byte[] buffer = new byte[4 * 1024]; - int read; - while ((read = is.read(buffer)) != -1) { - os.write(buffer, 0, read); - } - os.flush(); - } - return file.getAbsolutePath(); - } - } - - private void populateImage() { - try { - mBitmap = BitmapFactory.decodeStream(getAssets().open(mImagename)); - mBitmap = Bitmap.createScaledBitmap(mBitmap, 224, 224, true); - mImageView.setImageBitmap(mBitmap); - } catch (IOException e) { - Log.e("ImageSegmentation", "Error reading assets", e); - finish(); - } - } - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - setContentView(R.layout.activity_main); - - try { - Os.setenv("ADSP_LIBRARY_PATH", getApplicationInfo().nativeLibraryDir, true); - } catch (ErrnoException e) { - Log.e("ExecuTorchDemo", "Cannot set ADSP_LIBRARY_PATH", e); - finish(); - } - - try { - mBitmap = BitmapFactory.decodeStream(getAssets().open(mImagename), null, null); - mBitmap = Bitmap.createScaledBitmap(mBitmap, 224, 224, true); - } catch (IOException e) { - Log.e("ImageSegmentation", "Error reading assets", e); - finish(); - } - - mModule = Module.load("/data/local/tmp/dl3_xnnpack_fp32.pte"); - - mImageView = findViewById(R.id.imageView); - mImageView.setImageBitmap(mBitmap); - - final Button buttonNext = findViewById(R.id.nextButton); - buttonNext.setOnClickListener( - new View.OnClickListener() { - public void onClick(View v) { - if (Objects.equals(mImagename, "corgi.jpeg")) { - mImagename = "dog.jpg"; - } else if (Objects.equals(mImagename, "dog.jpg")) { - mImagename = "deeplab.jpg"; - } else { - mImagename = "corgi.jpeg"; - } - populateImage(); - } - }); - - mButtonXnnpack = findViewById(R.id.xnnpackButton); - mButtonHtp = findViewById(R.id.htpButton); - mProgressBar = (ProgressBar) findViewById(R.id.progressBar); - mButtonXnnpack.setOnClickListener( - new View.OnClickListener() { - public void onClick(View v) { - mModule.destroy(); - mModule = Module.load("/data/local/tmp/dl3_xnnpack_fp32.pte"); - mButtonXnnpack.setEnabled(false); - mProgressBar.setVisibility(ProgressBar.VISIBLE); - mButtonXnnpack.setText(getString(R.string.run_model)); - - Thread thread = new Thread(MainActivity.this); - thread.start(); - } - }); - - mButtonHtp.setOnClickListener( - new View.OnClickListener() { - public void onClick(View v) { - mModule.destroy(); - mModule = Module.load("/data/local/tmp/dlv3_qnn.pte"); - mButtonHtp.setEnabled(false); - mProgressBar.setVisibility(ProgressBar.VISIBLE); - mButtonHtp.setText(getString(R.string.run_model)); - - Thread thread = new Thread(MainActivity.this); - thread.start(); - } - }); - - final Button resetImage = findViewById(R.id.resetImage); - resetImage.setOnClickListener( - new View.OnClickListener() { - public void onClick(View v) { - populateImage(); - } - }); - } - - @Override - public void run() { - final Tensor inputTensor = - TensorImageUtils.bitmapToFloat32Tensor( - mBitmap, - TensorImageUtils.TORCHVISION_NORM_MEAN_RGB, - TensorImageUtils.TORCHVISION_NORM_STD_RGB); - final float[] inputs = inputTensor.getDataAsFloatArray(); - - final long startTime = SystemClock.elapsedRealtime(); - Tensor outputTensor = mModule.forward(EValue.from(inputTensor))[0].toTensor(); - final long inferenceTime = SystemClock.elapsedRealtime() - startTime; - Log.d("ImageSegmentation", "inference time (ms): " + inferenceTime); - - final float[] scores = outputTensor.getDataAsFloatArray(); - int width = mBitmap.getWidth(); - int height = mBitmap.getHeight(); - - int[] intValues = new int[width * height]; - for (int j = 0; j < height; j++) { - for (int k = 0; k < width; k++) { - int maxi = 0, maxj = 0, maxk = 0; - double maxnum = -Double.MAX_VALUE; - for (int i = 0; i < CLASSNUM; i++) { - float score = scores[i * (width * height) + j * width + k]; - if (score > maxnum) { - maxnum = score; - maxi = i; - maxj = j; - maxk = k; - } - } - if (maxi == PERSON) intValues[maxj * width + maxk] = 0xFFFF0000; // R - else if (maxi == DOG) intValues[maxj * width + maxk] = 0xFF00FF00; // G - else if (maxi == SHEEP) intValues[maxj * width + maxk] = 0xFF0000FF; // B - else intValues[maxj * width + maxk] = 0xFF000000; - } - } - - Bitmap bmpSegmentation = Bitmap.createScaledBitmap(mBitmap, width, height, true); - Bitmap outputBitmap = bmpSegmentation.copy(bmpSegmentation.getConfig(), true); - outputBitmap.setPixels( - intValues, - 0, - outputBitmap.getWidth(), - 0, - 0, - outputBitmap.getWidth(), - outputBitmap.getHeight()); - final Bitmap transferredBitmap = - Bitmap.createScaledBitmap(outputBitmap, mBitmap.getWidth(), mBitmap.getHeight(), true); - - runOnUiThread( - new Runnable() { - @Override - public void run() { - mImageView.setImageBitmap(transferredBitmap); - mButtonXnnpack.setEnabled(true); - mButtonXnnpack.setText(R.string.run_xnnpack); - mButtonHtp.setEnabled(true); - mButtonHtp.setText(R.string.run_htp); - mProgressBar.setVisibility(ProgressBar.INVISIBLE); - } - }); - } -} diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/java/com/example/executorchdemo/TensorImageUtils.java b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/java/com/example/executorchdemo/TensorImageUtils.java deleted file mode 100644 index a5c7699df9f..00000000000 --- a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/java/com/example/executorchdemo/TensorImageUtils.java +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -package com.example.executorchdemo; - -import android.graphics.Bitmap; -import android.util.Log; -import java.nio.FloatBuffer; -import org.pytorch.executorch.Tensor; - -/** - * Contains utility functions for {@link Tensor} creation from {@link android.graphics.Bitmap} or - * {@link android.media.Image} source. - */ -public final class TensorImageUtils { - - public static float[] TORCHVISION_NORM_MEAN_RGB = new float[] {0.485f, 0.456f, 0.406f}; - public static float[] TORCHVISION_NORM_STD_RGB = new float[] {0.229f, 0.224f, 0.225f}; - - /** - * Creates new {@link Tensor} from full {@link android.graphics.Bitmap}, normalized with specified - * in parameters mean and std. - * - * @param normMeanRGB means for RGB channels normalization, length must equal 3, RGB order - * @param normStdRGB standard deviation for RGB channels normalization, length must equal 3, RGB - * order - */ - public static Tensor bitmapToFloat32Tensor( - final Bitmap bitmap, final float[] normMeanRGB, final float normStdRGB[]) { - checkNormMeanArg(normMeanRGB); - checkNormStdArg(normStdRGB); - - return bitmapToFloat32Tensor( - bitmap, 0, 0, bitmap.getWidth(), bitmap.getHeight(), normMeanRGB, normStdRGB); - } - - /** - * Writes tensor content from specified {@link android.graphics.Bitmap}, normalized with specified - * in parameters mean and std to specified {@link java.nio.FloatBuffer} with specified offset. - * - * @param bitmap {@link android.graphics.Bitmap} as a source for Tensor data - * @param x - x coordinate of top left corner of bitmap's area - * @param y - y coordinate of top left corner of bitmap's area - * @param width - width of bitmap's area - * @param height - height of bitmap's area - * @param normMeanRGB means for RGB channels normalization, length must equal 3, RGB order - * @param normStdRGB standard deviation for RGB channels normalization, length must equal 3, RGB - * order - */ - public static void bitmapToFloatBuffer( - final Bitmap bitmap, - final int x, - final int y, - final int width, - final int height, - final float[] normMeanRGB, - final float[] normStdRGB, - final FloatBuffer outBuffer, - final int outBufferOffset) { - checkOutBufferCapacity(outBuffer, outBufferOffset, width, height); - checkNormMeanArg(normMeanRGB); - checkNormStdArg(normStdRGB); - final int pixelsCount = height * width; - final int[] pixels = new int[pixelsCount]; - bitmap.getPixels(pixels, 0, width, x, y, width, height); - final int offset_g = pixelsCount; - final int offset_b = 2 * pixelsCount; - for (int i = 0; i < 100; i++) { - final int c = pixels[i]; - Log.i("Image", ": " + i + " " + ((c >> 16) & 0xff)); - } - for (int i = 0; i < pixelsCount; i++) { - final int c = pixels[i]; - float r = ((c >> 16) & 0xff) / 255.0f; - float g = ((c >> 8) & 0xff) / 255.0f; - float b = ((c) & 0xff) / 255.0f; - outBuffer.put(outBufferOffset + i, (r - normMeanRGB[0]) / normStdRGB[0]); - outBuffer.put(outBufferOffset + offset_g + i, (g - normMeanRGB[1]) / normStdRGB[1]); - outBuffer.put(outBufferOffset + offset_b + i, (b - normMeanRGB[2]) / normStdRGB[2]); - } - } - - /** - * Creates new {@link Tensor} from specified area of {@link android.graphics.Bitmap}, normalized - * with specified in parameters mean and std. - * - * @param bitmap {@link android.graphics.Bitmap} as a source for Tensor data - * @param x - x coordinate of top left corner of bitmap's area - * @param y - y coordinate of top left corner of bitmap's area - * @param width - width of bitmap's area - * @param height - height of bitmap's area - * @param normMeanRGB means for RGB channels normalization, length must equal 3, RGB order - * @param normStdRGB standard deviation for RGB channels normalization, length must equal 3, RGB - * order - */ - public static Tensor bitmapToFloat32Tensor( - final Bitmap bitmap, - int x, - int y, - int width, - int height, - float[] normMeanRGB, - float[] normStdRGB) { - checkNormMeanArg(normMeanRGB); - checkNormStdArg(normStdRGB); - - final FloatBuffer floatBuffer = Tensor.allocateFloatBuffer(3 * width * height); - bitmapToFloatBuffer(bitmap, x, y, width, height, normMeanRGB, normStdRGB, floatBuffer, 0); - return Tensor.fromBlob(floatBuffer, new long[] {1, 3, height, width}); - } - - private static void checkOutBufferCapacity( - FloatBuffer outBuffer, int outBufferOffset, int tensorWidth, int tensorHeight) { - if (outBufferOffset + 3 * tensorWidth * tensorHeight > outBuffer.capacity()) { - throw new IllegalStateException("Buffer underflow"); - } - } - - private static void checkTensorSize(int tensorWidth, int tensorHeight) { - if (tensorHeight <= 0 || tensorWidth <= 0) { - throw new IllegalArgumentException("tensorHeight and tensorWidth must be positive"); - } - } - - private static void checkRotateCWDegrees(int rotateCWDegrees) { - if (rotateCWDegrees != 0 - && rotateCWDegrees != 90 - && rotateCWDegrees != 180 - && rotateCWDegrees != 270) { - throw new IllegalArgumentException("rotateCWDegrees must be one of 0, 90, 180, 270"); - } - } - - private static void checkNormStdArg(float[] normStdRGB) { - if (normStdRGB.length != 3) { - throw new IllegalArgumentException("normStdRGB length must be 3"); - } - } - - private static void checkNormMeanArg(float[] normMeanRGB) { - if (normMeanRGB.length != 3) { - throw new IllegalArgumentException("normMeanRGB length must be 3"); - } - } -} diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/res/drawable/ic_launcher_background.xml b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/res/drawable/ic_launcher_background.xml deleted file mode 100644 index 07d5da9cbf1..00000000000 --- a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/res/drawable/ic_launcher_background.xml +++ /dev/null @@ -1,170 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/res/drawable/ic_launcher_foreground.xml b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/res/drawable/ic_launcher_foreground.xml deleted file mode 100644 index 7706ab9e6d4..00000000000 --- a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/res/drawable/ic_launcher_foreground.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - - - - - diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/res/layout/activity_classification.xml b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/res/layout/activity_classification.xml deleted file mode 100644 index d896adb54d4..00000000000 --- a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/res/layout/activity_classification.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - - - - -