From cb2b174e852184af8f724a10ed73222c7ee48133 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Tue, 18 Feb 2025 09:44:13 -0800 Subject: [PATCH 001/584] Stop -Wmacro-redefined for TORCH_API (#8392) No longer seeing these warnings during local CMake build. --- codegen/macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codegen/macros.h b/codegen/macros.h index c84b30c2317..ddba56b18d7 100644 --- a/codegen/macros.h +++ b/codegen/macros.h @@ -8,6 +8,6 @@ #pragma once // TODO(T157709949) remove this file -#ifndef USE_ATEN_LIB +#if !defined(USE_ATEN_LIB) && !defined(TORCH_API) #define TORCH_API #endif From c00c798106bf62171189f7adff2e0bca442fd255 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Tue, 18 Feb 2025 11:23:48 -0800 Subject: [PATCH 002/584] Automatically put torch in CMAKE_PREFIX_PATH so users/scripts don't have to (#8474) Removed all explicit setting of CMAKE_PREFIX_PATH to the torch directory in favor of automatically doing it when we find_package(Torch). Hat tip to @huydhn for showing the way to only find torch once. --- .ci/scripts/build_llama_android.sh | 3 - .ci/scripts/test_llama.sh | 1 - .ci/scripts/test_llava.sh | 7 +-- .ci/scripts/test_model.sh | 4 -- .ci/scripts/test_phi_3_mini.sh | 4 -- .ci/scripts/test_quantized_aot_lib.sh | 3 - .ci/scripts/utils.sh | 1 - .github/workflows/trunk.yml | 2 - CMakeLists.txt | 4 +- .../scripts/build_quantized_ops_aot_lib.sh | 4 -- build/Codegen.cmake | 9 +-- build/Utils.cmake | 60 +++++++++++++++---- build/build_android_llm_demo.sh | 3 - build/build_apple_frameworks.sh | 1 - .../executorch-arm-delegate-tutorial.md | 3 - examples/models/llava/CMakeLists.txt | 2 +- .../portable/custom_ops/test_custom_ops.sh | 3 +- .../xnnpack/quantization/test_quantize.sh | 3 - extension/llm/custom_ops/CMakeLists.txt | 4 +- extension/llm/export/quantizer_lib.py | 4 +- setup.py | 4 +- test/run_oss_cpp_tests.sh | 2 - 22 files changed, 62 insertions(+), 69 deletions(-) diff --git a/.ci/scripts/build_llama_android.sh b/.ci/scripts/build_llama_android.sh index d37c65aa8ec..a100cc13144 100644 --- a/.ci/scripts/build_llama_android.sh +++ b/.ci/scripts/build_llama_android.sh @@ -14,7 +14,6 @@ if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then PYTHON_EXECUTABLE=python3 fi which "${PYTHON_EXECUTABLE}" -CMAKE_PREFIX_PATH="$(python3 -c 'import torch as _; print(_.__path__[0])')" install_executorch_and_backend_lib() { echo "Installing executorch and xnnpack backend" @@ -28,7 +27,6 @@ install_executorch_and_backend_lib() { -DANDROID_ABI="${ANDROID_ABI}" \ -DCMAKE_INSTALL_PREFIX=cmake-android-out \ -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ @@ -54,7 +52,6 @@ build_llama_runner() { -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ - -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \ -Bcmake-android-out/examples/models/llama examples/models/llama cmake --build cmake-android-out/examples/models/llama -j4 --config Release diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index 9735e26798d..9bb881ce8eb 100644 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -154,7 +154,6 @@ cmake_install_executorch_libraries() { rm -rf cmake-out retry cmake \ -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DCMAKE_PREFIX_PATH="$(python3 -c 'import torch as _; print(_.__path__[0])')" \ -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ diff --git a/.ci/scripts/test_llava.sh b/.ci/scripts/test_llava.sh index c511942be91..15df725f9c1 100644 --- a/.ci/scripts/test_llava.sh +++ b/.ci/scripts/test_llava.sh @@ -31,7 +31,6 @@ NPROC=8 if hash nproc &> /dev/null; then NPROC=$(nproc); fi python_lib=$($PYTHON_EXECUTABLE -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())') -CMAKE_PREFIX_PATH="$(python3 -c 'import torch as _; print(_.__path__[0])')" EXECUTORCH_COMMON_CMAKE_ARGS=" \ -DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ @@ -48,7 +47,6 @@ EXECUTORCH_COMMON_CMAKE_ARGS=" \ cmake_install_executorch_libraries() { cmake \ ${EXECUTORCH_COMMON_CMAKE_ARGS} \ - "-DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}" \ -B${BUILD_DIR} . cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${CMAKE_BUILD_TYPE} @@ -59,7 +57,6 @@ cmake_install_executorch_libraries_for_android() { -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \ -DANDROID_ABI=arm64-v8a \ ${EXECUTORCH_COMMON_CMAKE_ARGS} \ - "-DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}" \ -B${BUILD_DIR} . cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${CMAKE_BUILD_TYPE} @@ -80,7 +77,7 @@ cmake_build_llava_runner() { cmake \ ${LLAVA_COMMON_CMAKE_ARGS} \ - -DCMAKE_PREFIX_PATH="$python_lib;${CMAKE_PREFIX_PATH}" \ + -DCMAKE_PREFIX_PATH="$python_lib" \ -B${BUILD_DIR}/${dir} \ ${dir} @@ -96,7 +93,7 @@ cmake_build_llava_runner_for_android() { -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \ -DANDROID_ABI=arm64-v8a \ ${LLAVA_COMMON_CMAKE_ARGS} \ - -DCMAKE_PREFIX_PATH="$python_lib;${CMAKE_PREFIX_PATH}" \ + -DCMAKE_PREFIX_PATH="$python_lib" \ -DLLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE=ON \ -B${BUILD_DIR}/${dir} \ ${dir} diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh index ef4859135c6..157449c0717 100755 --- a/.ci/scripts/test_model.sh +++ b/.ci/scripts/test_model.sh @@ -50,12 +50,10 @@ prepare_artifacts_upload() { build_cmake_executor_runner() { echo "Building executor_runner" - CMAKE_PREFIX_PATH="$(python3 -c 'import torch as _; print(_.__path__[0])')" rm -rf ${CMAKE_OUTPUT_DIR} cmake -DCMAKE_BUILD_TYPE=Debug \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ - -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \ -B${CMAKE_OUTPUT_DIR} . cmake --build ${CMAKE_OUTPUT_DIR} -j4 --config Debug @@ -100,14 +98,12 @@ test_model() { build_cmake_xnn_executor_runner() { echo "Building xnn_executor_runner" - CMAKE_PREFIX_PATH="$(python3 -c 'import torch as _; print(_.__path__[0])')" (rm -rf ${CMAKE_OUTPUT_DIR} \ && mkdir ${CMAKE_OUTPUT_DIR} \ && cd ${CMAKE_OUTPUT_DIR} \ && retry cmake -DCMAKE_BUILD_TYPE=Release \ -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \ -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..) cmake --build ${CMAKE_OUTPUT_DIR} -j4 diff --git a/.ci/scripts/test_phi_3_mini.sh b/.ci/scripts/test_phi_3_mini.sh index 64dd6b829d8..40767013e23 100644 --- a/.ci/scripts/test_phi_3_mini.sh +++ b/.ci/scripts/test_phi_3_mini.sh @@ -22,10 +22,8 @@ NPROC=8 if hash nproc &> /dev/null; then NPROC=$(nproc); fi cmake_install_executorch_libraries() { - CMAKE_PREFIX_PATH="$(python3 -c 'import torch as _; print(_.__path__[0])')" cmake -DPYTHON_EXECUTABLE=python \ -DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \ - -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \ -DEXECUTORCH_ENABLE_LOGGING=1 \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ @@ -41,10 +39,8 @@ cmake_install_executorch_libraries() { } cmake_build_phi_3_mini() { - CMAKE_PREFIX_PATH="$(python3 -c 'import torch as _; print(_.__path__[0])')" cmake -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \ -DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \ - -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ diff --git a/.ci/scripts/test_quantized_aot_lib.sh b/.ci/scripts/test_quantized_aot_lib.sh index 3f8ea886f5c..b522eb7b418 100755 --- a/.ci/scripts/test_quantized_aot_lib.sh +++ b/.ci/scripts/test_quantized_aot_lib.sh @@ -16,13 +16,10 @@ CMAKE_OUTPUT_DIR=cmake-out build_cmake_quantized_aot_lib() { echo "Building quantized aot lib" - SITE_PACKAGES="$(${PYTHON_EXECUTABLE} -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')" - CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch" (rm -rf ${CMAKE_OUTPUT_DIR} \ && mkdir ${CMAKE_OUTPUT_DIR} \ && cd ${CMAKE_OUTPUT_DIR} \ && retry cmake -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED_AOT=ON \ -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..) diff --git a/.ci/scripts/utils.sh b/.ci/scripts/utils.sh index c21d0bb604e..be684b7bfa2 100644 --- a/.ci/scripts/utils.sh +++ b/.ci/scripts/utils.sh @@ -136,7 +136,6 @@ cmake_install_executorch_lib() { clean_executorch_install_folders retry cmake -DBUCK2="$BUCK" \ -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DCMAKE_PREFIX_PATH="$($PYTHON_EXECUTABLE -c 'import torch as _; print(_.__path__[0])')" \ -DCMAKE_BUILD_TYPE=Release \ -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ -Bcmake-out . diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 18e34bff72a..04a6c96f3ec 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -394,7 +394,6 @@ jobs: rm -rf cmake-out cmake \ -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DCMAKE_PREFIX_PATH="$(python -c 'import torch as _; print(_.__path__[0])')" \ -DCMAKE_BUILD_TYPE=Release \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ @@ -412,7 +411,6 @@ jobs: cmake \ -DCMAKE_INSTALL_PREFIX=cmake-out \ -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_PREFIX_PATH="$(python -c 'import torch as _; print(_.__path__[0])')" \ -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ diff --git a/CMakeLists.txt b/CMakeLists.txt index 69232b85a53..be0921a0b5a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -761,9 +761,7 @@ if(EXECUTORCH_BUILD_PYBIND) endif() # find pytorch lib, to allow pybind to take at::Tensor as input/output - if(NOT TARGET torch) - find_package(Torch CONFIG REQUIRED) - endif() + find_package_torch() find_library( TORCH_PYTHON_LIBRARY torch_python PATHS "${TORCH_INSTALL_PREFIX}/lib" ) diff --git a/backends/arm/scripts/build_quantized_ops_aot_lib.sh b/backends/arm/scripts/build_quantized_ops_aot_lib.sh index d3d4b669f3b..3c70b48a5dc 100755 --- a/backends/arm/scripts/build_quantized_ops_aot_lib.sh +++ b/backends/arm/scripts/build_quantized_ops_aot_lib.sh @@ -11,9 +11,6 @@ build_type="Release" build_type=${1:-$build_type} -SITE_PACKAGES="$(python3 -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')" -CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch" - echo "--------------------------------------------------------------------------------" echo "Build .so library to register quant ops with AoT flow ${build_type} into '$(echo $(pwd))/cmake-out-aot-lib'" echo "--------------------------------------------------------------------------------" @@ -23,7 +20,6 @@ echo "-------------------------------------------------------------------------- rm -f cmake-out-aot-lib/CMakeCache.txt CXXFLAGS="-fno-exceptions -fno-rtti" cmake \ - -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \ -DCMAKE_BUILD_TYPE=${build_type} \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED_AOT=ON \ diff --git a/build/Codegen.cmake b/build/Codegen.cmake index f2da23baeaa..9ccb7ec442b 100644 --- a/build/Codegen.cmake +++ b/build/Codegen.cmake @@ -9,6 +9,8 @@ # Selective build. See codegen/tools/gen_oplist.py for how to use these # arguments. +include(${EXECUTORCH_ROOT}/build/Utils.cmake) + function(gen_selected_ops) set(arg_names LIB_NAME OPS_SCHEMA_YAML ROOT_OPS INCLUDE_ALL_OPS) cmake_parse_arguments(GEN "" "" "${arg_names}" ${ARGN}) @@ -145,18 +147,13 @@ function(gen_custom_ops_aot_lib) ${_out_dir}/RegisterCPUCustomOps.cpp ${_out_dir}/RegisterSchema.cpp ${_out_dir}/CustomOpsNativeFunctions.h "${GEN_KERNEL_SOURCES}" ) - # Find `Torch`. - if(NOT TARGET torch) - find_package(Torch REQUIRED) - endif() + find_package_torch() # This lib uses ATen lib, so we explicitly enable rtti and exceptions. target_compile_options(${GEN_LIB_NAME} PRIVATE -frtti -fexceptions) target_compile_definitions(${GEN_LIB_NAME} PRIVATE USE_ATEN_LIB=1) include_directories(${TORCH_INCLUDE_DIRS}) target_link_libraries(${GEN_LIB_NAME} PRIVATE torch) - include(${EXECUTORCH_ROOT}/build/Utils.cmake) - target_link_options_shared_lib(${GEN_LIB_NAME}) if(TARGET portable_lib) target_link_libraries(${GEN_LIB_NAME} PRIVATE portable_lib) diff --git a/build/Utils.cmake b/build/Utils.cmake index 3d4e9c76005..a27edf33669 100644 --- a/build/Utils.cmake +++ b/build/Utils.cmake @@ -328,21 +328,61 @@ function(resolve_python_executable) endif() endfunction() -# find_package(Torch CONFIG REQUIRED) replacement for targets that -# have a header-only Torch dependency. Because find_package sets -# variables in the parent scope, we use a macro to preserve this -# rather than maintaining our own list of those variables. +# find_package(Torch CONFIG REQUIRED) replacement for targets that have a +# header-only Torch dependency. Because find_package sets variables in the +# parent scope, we use a macro to preserve this rather than maintaining our own +# list of those variables. macro(find_package_torch_headers) - # We cannot simply use CMAKE_FIND_ROOT_PATH_BOTH, because that does - # not propagate into TorchConfig.cmake. + # We cannot simply use CMAKE_FIND_ROOT_PATH_BOTH, because that does not + # propagate into TorchConfig.cmake. foreach(mode_kind IN ITEMS PACKAGE LIBRARY INCLUDE) - set(OLD_CMAKE_FIND_ROOT_PATH_MODE_${mode_kind} ${CMAKE_FIND_ROOT_PATH_MODE_${mode_kind}}) + set(OLD_CMAKE_FIND_ROOT_PATH_MODE_${mode_kind} + ${CMAKE_FIND_ROOT_PATH_MODE_${mode_kind}} + ) set(CMAKE_FIND_ROOT_PATH_MODE_${mode_kind} BOTH) endforeach() + find_package_torch() + foreach(mode_kind IN ITEMS PACKAGE LIBRARY INCLUDE) + set(CMAKE_FIND_ROOT_PATH_MODE_${mode_kind} + ${OLD_CMAKE_FIND_ROOT_PATH_MODE_${mode_kind}} + ) + endforeach() +endmacro() + +# Add the Torch CMake configuration to CMAKE_PREFIX_PATH so that find_package +# can find Torch. +function(add_torch_to_cmake_prefix_path) + if(NOT PYTHON_EXECUTABLE) + resolve_python_executable() + endif() + execute_process( + COMMAND "${PYTHON_EXECUTABLE}" -c + "import torch as _; print(_.__path__[0], end='')" + OUTPUT_VARIABLE _tmp_torch_path + ERROR_VARIABLE _tmp_torch_path_error + RESULT_VARIABLE _tmp_torch_path_result COMMAND_ECHO STDERR + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(NOT _tmp_torch_path_result EQUAL 0) + message("Error while adding torch to CMAKE_PREFIX_PATH. " + "Exit code: ${_tmp_torch_path_result}" + ) + message("Output:\n${_tmp_torch_path}") + message(FATAL_ERROR "Error:\n${_tmp_torch_path_error}") + endif() + list(APPEND CMAKE_PREFIX_PATH "${_tmp_torch_path}") + set(CMAKE_PREFIX_PATH + "${CMAKE_PREFIX_PATH}" + PARENT_SCOPE + ) +endfunction() + +# Replacement for find_package(Torch CONFIG REQUIRED); sets up CMAKE_PREFIX_PATH +# first and only does the find once. If you have a header-only Torch dependency, +# use find_package_torch_headers instead! +macro(find_package_torch) if(NOT TARGET torch) + add_torch_to_cmake_prefix_path() find_package(Torch CONFIG REQUIRED) endif() - foreach(mode_kind IN ITEMS PACKAGE LIBRARY INCLUDE) - set(CMAKE_FIND_ROOT_PATH_MODE_${mode_kind} ${OLD_CMAKE_FIND_ROOT_PATH_MODE_${mode_kind}}) - endforeach() endmacro() diff --git a/build/build_android_llm_demo.sh b/build/build_android_llm_demo.sh index b72968037c1..4119bde4c7e 100644 --- a/build/build_android_llm_demo.sh +++ b/build/build_android_llm_demo.sh @@ -11,7 +11,6 @@ if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then PYTHON_EXECUTABLE=python3 fi which "${PYTHON_EXECUTABLE}" -CMAKE_PREFIX_PATH="$(python3 -c 'import torch as _; print(_.__path__[0])')" build_jar() { pushd extension/android @@ -42,7 +41,6 @@ build_android_native_library() { fi cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \ - -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \ -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \ -DANDROID_ABI="${ANDROID_ABI}" \ -DANDROID_PLATFORM=android-26 \ @@ -76,7 +74,6 @@ build_android_native_library() { -DANDROID_ABI="${ANDROID_ABI}" \ -DANDROID_PLATFORM=android-26 \ -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \ - -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \ -DEXECUTORCH_ENABLE_LOGGING=ON \ -DEXECUTORCH_LOG_LEVEL=Info \ -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \ diff --git a/build/build_apple_frameworks.sh b/build/build_apple_frameworks.sh index 4d793ace0ab..ab2f45e41e2 100755 --- a/build/build_apple_frameworks.sh +++ b/build/build_apple_frameworks.sh @@ -150,7 +150,6 @@ cmake_build() { mkdir "$platform" && cd "$platform" || exit 1 cmake "$SOURCE_ROOT_DIR" -G Xcode \ -DCMAKE_BUILD_TYPE="$MODE" \ - -DCMAKE_PREFIX_PATH="$($PYTHON -c 'import torch as _; print(_.__path__[0])')" \ -DCMAKE_TOOLCHAIN_FILE="$TOOLCHAIN" \ -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD="c++17" \ -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY="libc++" \ diff --git a/docs/source/executorch-arm-delegate-tutorial.md b/docs/source/executorch-arm-delegate-tutorial.md index eaea7fc23bf..ff6d4abbbac 100644 --- a/docs/source/executorch-arm-delegate-tutorial.md +++ b/docs/source/executorch-arm-delegate-tutorial.md @@ -228,8 +228,6 @@ python3 -m examples.arm.aot_arm_compiler --model_name="add" --delegate Before generating the `.pte` file for delegated quantized networks like MobileNetV2, we need to build the `quantized_ops_aot_lib` ```bash -SITE_PACKAGES="$(python3 -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')" -CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch" cd mkdir -p cmake-out-aot-lib @@ -237,7 +235,6 @@ cmake -DCMAKE_BUILD_TYPE=Release \ -DEXECUTORCH_BUILD_XNNPACK=OFF \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED_AOT=ON \ - -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \ -DPYTHON_EXECUTABLE=python3 \ -Bcmake-out-aot-lib \ "${et_root_dir}" diff --git a/examples/models/llava/CMakeLists.txt b/examples/models/llava/CMakeLists.txt index f22b4471538..ecd00809fdb 100644 --- a/examples/models/llava/CMakeLists.txt +++ b/examples/models/llava/CMakeLists.txt @@ -79,7 +79,7 @@ if(LLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE) add_definitions(-DLLAVA_NO_TORCH_DUMMY_IMAGE=1) message("Buidling the runner without Torch, feeding a dummy image!") else() - find_package(Torch CONFIG REQUIRED) + find_package_torch() endif() # diff --git a/examples/portable/custom_ops/test_custom_ops.sh b/examples/portable/custom_ops/test_custom_ops.sh index 6d83de07d3a..5d21d393686 100644 --- a/examples/portable/custom_ops/test_custom_ops.sh +++ b/examples/portable/custom_ops/test_custom_ops.sh @@ -53,8 +53,7 @@ get_shared_lib_ext() { test_cmake_custom_op_2() { local model_name='custom_ops_2' - SITE_PACKAGES="$(${PYTHON_EXECUTABLE} -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')" - CMAKE_PREFIX_PATH="$PWD/cmake-out/lib/cmake/ExecuTorch;${SITE_PACKAGES}/torch" + CMAKE_PREFIX_PATH="$PWD/cmake-out/lib/cmake/ExecuTorch" local example_dir=examples/portable/custom_ops local build_dir=cmake-out/${example_dir} diff --git a/examples/xnnpack/quantization/test_quantize.sh b/examples/xnnpack/quantization/test_quantize.sh index d439fde6cbc..1f50667c788 100644 --- a/examples/xnnpack/quantization/test_quantize.sh +++ b/examples/xnnpack/quantization/test_quantize.sh @@ -44,8 +44,6 @@ test_buck2_quantization() { test_cmake_quantization() { echo "Building quantized ops shared library" - SITE_PACKAGES="$(${PYTHON_EXECUTABLE} -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')" - CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch" clean_executorch_install_folders @@ -56,7 +54,6 @@ test_cmake_quantization() { -DEXECUTORCH_BUILD_XNNPACK="$EXECUTORCH_BUILD_XNNPACK" \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED_AOT=ON \ - -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \ -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..) cmake --build cmake-out -j4 diff --git a/extension/llm/custom_ops/CMakeLists.txt b/extension/llm/custom_ops/CMakeLists.txt index 4b793905339..95f4bc559fa 100644 --- a/extension/llm/custom_ops/CMakeLists.txt +++ b/extension/llm/custom_ops/CMakeLists.txt @@ -69,9 +69,7 @@ install(TARGETS custom_ops DESTINATION lib) if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT) # Add a AOT library - if(NOT TARGET torch) - find_package(Torch CONFIG REQUIRED) - endif() + find_package_torch() add_library( custom_ops_aot_lib SHARED ${_custom_ops__srcs} diff --git a/extension/llm/export/quantizer_lib.py b/extension/llm/export/quantizer_lib.py index 55e530553f0..40d81075d9f 100644 --- a/extension/llm/export/quantizer_lib.py +++ b/extension/llm/export/quantizer_lib.py @@ -107,10 +107,8 @@ def check_embedding_byte_registered(): raise RuntimeError( "Need to specify shared library path to register quantized ops (and their out variants) into EXIR.\n" "Follow the following steps to build the needed lib via cmake.\n" - 'Use `python -c "import torch as _; print(_.__path__)"` to find where torch package is installed.\n' - "Set that as TORCH_PACKAGE_DIR.\n" "Then from root executorch dir do the following:\n" - "rm -rf cmake-out && mkdir cmake-out && (cd cmake-out && cmake -DBUCK2= -DCMAKE_PREFIX_PATH=$TORCH_PACKAGE_DIR -DEXECUTORCH_BUILD_KERNELS_QUANTIZED_AOT=ON ..) && cmake --build . -j16\n" + "rm -rf cmake-out && mkdir cmake-out && (cd cmake-out && cmake -DBUCK2= -DEXECUTORCH_BUILD_KERNELS_QUANTIZED_AOT=ON ..) && cmake --build . -j16\n" 'To find the location of the lib: find cmake-out -name "libquantized_ops_aot_lib*"\n' "Then specify the said library via -s Path: srcs = tuple(cmake_cache_dir.glob(self.src)) if len(srcs) != 1: raise ValueError( - f"""Expected exactly one file matching '{self.src}'; found {repr(srcs)}. + f"""Expected exactly one file matching '{self.src}'; found {repr(srcs)}. If that file is a CMake-built extension module file, and we are installing in editable mode, please disable the corresponding build option since it's not supported yet. -Try: +Try: EXECUTORCH_BUILD_FLATC=OFF EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT=OFF pip install -e . """ diff --git a/test/run_oss_cpp_tests.sh b/test/run_oss_cpp_tests.sh index df7955c4d41..f747100006d 100755 --- a/test/run_oss_cpp_tests.sh +++ b/test/run_oss_cpp_tests.sh @@ -32,10 +32,8 @@ build_executorch() { if [ -x "$(command -v glslc)" ]; then BUILD_VULKAN="ON" fi - CMAKE_PREFIX_PATH="$(python3 -c 'import torch as _; print(_.__path__[0])')" cmake . \ -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \ -DEXECUTORCH_USE_CPP_CODE_COVERAGE=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ From 1b6c12dd79ee597d6502e4a0da27b191357722b4 Mon Sep 17 00:00:00 2001 From: Kimish Patel Date: Tue, 18 Feb 2025 13:04:41 -0800 Subject: [PATCH 003/584] [Executorch] Refactor op_add to support op_sub broadcasting (#8255) * [Executorch] Refactor op_mul's broadcasting utils Summary: Refactoring broadcast handling utils that were added for op_mul. This is in prepartion use these utils to handle broadcast for other ops such as add, sub, div. Plus remove a redundant test Test Plan: optimized_kernels_test in CI Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] * [ExecuTorch] Add broadcast support for optimized add op Summary: This brings add op to feature parity, wrt, broadcasting, to mul op in optimized kernels lib Test Plan: tests added Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] * Update on "[ExecuTorch] Add broadcast support for optimized add op" Summary: This brings add op to feature parity, wrt, broadcasting, to mul op in optimized kernels lib Test Plan: tests added Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] * [Executorch] Refactor op_add to support op_sub broadcasting Summary: Refactor op_add to conslidate commong broadcasting related improvements Test Plan: Previously added tests Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] * Update base for Update on "[Executorch] Refactor op_add to support op_sub broadcasting" Summary: Refactor op_add to conslidate commong broadcasting related improvements Test Plan: Previously added tests Reviewers: Subscribers: Tasks: Tags: cc larryliu0820 manuelcandales [ghstack-poisoned] * Update base for Update on "[Executorch] Refactor op_add to support op_sub broadcasting" Summary: Refactor op_add to conslidate commong broadcasting related improvements Test Plan: Previously added tests Reviewers: Subscribers: Tasks: Tags: cc larryliu0820 manuelcandales [ghstack-poisoned] * Update base for Update on "[Executorch] Refactor op_add to support op_sub broadcasting" Summary: Refactor op_add to conslidate commong broadcasting related improvements Test Plan: Previously added tests Reviewers: Subscribers: Tasks: Tags: cc larryliu0820 manuelcandales [ghstack-poisoned] * Update base for Update on "[Executorch] Refactor op_add to support op_sub broadcasting" Summary: Refactor op_add to conslidate commong broadcasting related improvements Test Plan: Previously added tests Reviewers: Subscribers: Tasks: Tags: cc larryliu0820 manuelcandales Differential Revision: [D69491817](https://our.internmc.facebook.com/intern/diff/D69491817) [ghstack-poisoned] * Update base for Update on "[Executorch] Refactor op_add to support op_sub broadcasting" Summary: Refactor op_add to conslidate commong broadcasting related improvements Test Plan: Previously added tests Reviewers: Subscribers: Tasks: Tags: cc larryliu0820 manuelcandales Differential Revision: [D69491817](https://our.internmc.facebook.com/intern/diff/D69491817) [ghstack-poisoned] --- kernels/optimized/cpu/op_add.cpp | 155 +---------------- kernels/optimized/cpu/op_add_sub_impl.h | 218 ++++++++++++++++++++++++ kernels/optimized/cpu/targets.bzl | 9 + 3 files changed, 235 insertions(+), 147 deletions(-) create mode 100644 kernels/optimized/cpu/op_add_sub_impl.h diff --git a/kernels/optimized/cpu/op_add.cpp b/kernels/optimized/cpu/op_add.cpp index dbf828e5882..5f164f1eb13 100644 --- a/kernels/optimized/cpu/op_add.cpp +++ b/kernels/optimized/cpu/op_add.cpp @@ -14,59 +14,11 @@ #include #include +#include + namespace torch { namespace executor { namespace native { -namespace { - -template < - bool can_cast, - typename CTYPE_A, - typename CTYPE_B, - typename CTYPE_IN, - typename CTYPE_OUT> -struct AddInner; - -template < - typename CTYPE_A, - typename CTYPE_B, - typename CTYPE_IN, - typename CTYPE_OUT> -struct AddInner { - static void - run(const Tensor& a, const Tensor& b, CTYPE_IN alpha_val, Tensor& out) { - apply_binary_elementwise_fn( - // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue) - [alpha_val](const CTYPE_A val_a, const CTYPE_B val_b) { - CTYPE_IN a_casted = static_cast(val_a); - CTYPE_IN b_casted = static_cast(val_b); - CTYPE_IN value = a_casted + alpha_val * b_casted; - - return static_cast(value); - }, - a, - b, - out); - } -}; - -template -struct ReportCanCastBug { - static void run(const Tensor&, const Tensor&, CTYPE_IN, Tensor&) { - ET_DCHECK_MSG(false, "BUG: canCast should have been checked above"); - } -}; - -template < - typename CTYPE_A, - typename CTYPE_B, - typename CTYPE_IN, - typename CTYPE_OUT> -struct AddInner - : public ReportCanCastBug {}; - -} // namespace - using Tensor = executorch::aten::Tensor; using ScalarType = executorch::aten::ScalarType; @@ -76,8 +28,6 @@ Tensor& opt_add_out( const Tensor& b, const Scalar& alpha, Tensor& out) { - (void)ctx; - ScalarType a_type = a.scalar_type(); ScalarType b_type = b.scalar_type(); ScalarType out_type = out.scalar_type(); @@ -95,7 +45,9 @@ Tensor& opt_add_out( ET_SWITCH_REALB_TYPES(b_type, ctx, "add.out", CTYPE_B, [&]() { CTYPE alpha_val; ET_KERNEL_CHECK( - ctx, utils::extract_scalar(alpha, &alpha_val), InvalidArgument, ); + ctx, + torch::executor::native::utils::extract_scalar(alpha, &alpha_val), + InvalidArgument, ); CTYPE_B b_val = *b.const_data_ptr(); CTYPE b_casted = static_cast(b_val); @@ -115,100 +67,9 @@ Tensor& opt_add_out( return opt_add_out(ctx, b, a, alpha, out); } - auto selected_optimized_path = select_optimized_path(a, b, out); - if (selected_optimized_path == ElementwiseOptimizedPath::kTreatAs1d) { - // Resize for dynamic shape - auto error = resize_tensor(out, a.sizes()); - ET_KERNEL_CHECK_MSG( - ctx, - error == Error::Ok, - InvalidArgument, - out, - "Failed to resize output tensor."); - - ET_SWITCH_REALB_TYPES(a_type, ctx, "add.out", CTYPE, [&]() { - CTYPE alpha_val; - ET_KERNEL_CHECK( - ctx, utils::extract_scalar(alpha, &alpha_val), InvalidArgument, ); - - using Vec = executorch::vec::Vectorized; - executorch::vec::map2( - [alpha_val](Vec x, Vec y) { return x + Vec(alpha_val) * y; }, - out.mutable_data_ptr(), - a.const_data_ptr(), - b.const_data_ptr(), - out.numel()); - }); - } else if (selected_optimized_path != ElementwiseOptimizedPath::kNone) { - ET_SWITCH_REALB_TYPES(out_type, ctx, "add.out", CTYPE, [&]() { - CTYPE alpha_val; - ET_KERNEL_CHECK_MSG( - ctx, - utils::extract_scalar(alpha, &alpha_val), - InvalidArgument, - out, - "Failed to extract scalar alpha."); - using Vec = executorch::vec::Vectorized; - Vec alpha_val_vec(alpha_val); - if (selected_optimized_path == - ElementwiseOptimizedPath::kBroadcast2dBy1dReverseArguments || - selected_optimized_path == - ElementwiseOptimizedPath::kBroadcastLastDimReverseArguments || - selected_optimized_path == - ElementwiseOptimizedPath::kBroadcastNdByNdReverseArguments) { - // Reason we swap out args here is because handle_broadcast_elementwise - // handles this selected_optimized_path option a bit differently. - // This should really be resolved in handle_broadcast_elementwise. - // However, the current blocker is that handle_broadcast_elementwise - // tries to be agnostic of op. This should be fixed, likely by moving - // lambda creation to handle_broadcast_elementwise and it be aware of - // which op is being executed. - auto add_lambda = [&alpha_val_vec](auto x, auto y) { - return y + alpha_val_vec * x; - }; - return torch::executor::handle_broadcast_elementwise( - ctx, add_lambda, a, b, out, selected_optimized_path, alpha); - } else { - auto add_lambda = [&alpha_val_vec](auto x, auto y) { - return x + alpha_val_vec * y; - }; - return torch::executor::handle_broadcast_elementwise( - ctx, add_lambda, a, b, out, selected_optimized_path, alpha); - } - }); - } else { - ScalarType common_type = - promoteTypes(a_type, b_type, /*half_to_float*/ true); - ET_KERNEL_CHECK(ctx, canCast(common_type, out_type), InvalidArgument, out); - - ET_KERNEL_CHECK( - ctx, - resize_to_broadcast_target_size(a, b, out) == Error::Ok, - InvalidArgument, - out); - - ET_SWITCH_REALHBBF16_TYPES(a_type, ctx, "add.out", CTYPE_A, [&]() { - ET_SWITCH_REALHBBF16_TYPES(b_type, ctx, "add.out", CTYPE_B, [&]() { - using CTYPE_IN = typename torch::executor:: - promote_types::type; - ET_DCHECK(CppTypeToScalarType::value == common_type); - ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, "add.out", CTYPE_OUT, [&]() { - CTYPE_IN alpha_val; - ET_KERNEL_CHECK( - ctx, utils::extract_scalar(alpha, &alpha_val), InvalidArgument, ); - - AddInner< - can_cast::value, - CTYPE_A, - CTYPE_B, - CTYPE_IN, - CTYPE_OUT>::run(a, b, alpha_val, out); - }); - }); - }); - } - - return out; + static constexpr const char op_name[] = "add.out"; + return torch::executor::kernels::impl::opt_add_sub_out_impl( + ctx, a, b, alpha, out); } Tensor& opt_add_scalar_out( diff --git a/kernels/optimized/cpu/op_add_sub_impl.h b/kernels/optimized/cpu/op_add_sub_impl.h new file mode 100644 index 00000000000..6fb8574688b --- /dev/null +++ b/kernels/optimized/cpu/op_add_sub_impl.h @@ -0,0 +1,218 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace torch { +namespace executor { +namespace kernels { +namespace impl { + +namespace { +template < + bool can_cast, + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct AddInner; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct AddInner { + static void + run(const Tensor& a, const Tensor& b, CTYPE_IN alpha_val, Tensor& out) { + apply_binary_elementwise_fn( + // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue) + [alpha_val](const CTYPE_A val_a, const CTYPE_B val_b) { + CTYPE_IN a_casted = static_cast(val_a); + CTYPE_IN b_casted = static_cast(val_b); + CTYPE_IN value = a_casted + alpha_val * b_casted; + + return static_cast(value); + }, + a, + b, + out); + } +}; + +template +struct ReportCanCastBug { + static void run(const Tensor&, const Tensor&, CTYPE_IN, Tensor&) { + ET_DCHECK_MSG(false, "BUG: canCast should have been checked above"); + } +}; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct AddInner + : public ReportCanCastBug {}; + +} // namespace + +using Tensor = executorch::aten::Tensor; +using ScalarType = executorch::aten::ScalarType; + +template +Tensor& opt_add_sub_out_impl( + KernelRuntimeContext& ctx, + const Tensor& a, + const Tensor& b, + const Scalar& alpha, + Tensor& out) { + (void)ctx; + + ScalarType a_type = a.scalar_type(); + ScalarType b_type = b.scalar_type(); + ScalarType out_type = out.scalar_type(); + + auto selected_optimized_path = select_optimized_path(a, b, out); + if (selected_optimized_path == ElementwiseOptimizedPath::kTreatAs1d) { + // Resize for dynamic shape + auto error = resize_tensor(out, a.sizes()); + ET_KERNEL_CHECK_MSG( + ctx, + error == Error::Ok, + InvalidArgument, + out, + "Failed to resize output tensor."); + + ET_SWITCH_REALB_TYPES(a_type, ctx, op_name, CTYPE, [&]() { + CTYPE alpha_val; + ET_KERNEL_CHECK( + ctx, + torch::executor::native::utils::extract_scalar(alpha, &alpha_val), + InvalidArgument, ); + if constexpr (is_sub) { + alpha_val = -alpha_val; + } + using Vec = executorch::vec::Vectorized; + executorch::vec::map2( + [alpha_val](Vec x, Vec y) { return x + Vec(alpha_val) * y; }, + out.mutable_data_ptr(), + a.const_data_ptr(), + b.const_data_ptr(), + out.numel()); + }); + } else if (selected_optimized_path != ElementwiseOptimizedPath::kNone) { + // Cannot apply the trick of -alpha here because alpha is Scalar without + // support for - operator. At least not right now. + ET_SWITCH_REALB_TYPES(out_type, ctx, op_name, CTYPE, [&]() { + CTYPE alpha_val; + ET_KERNEL_CHECK_MSG( + ctx, + torch::executor::native::utils::extract_scalar(alpha, &alpha_val), + InvalidArgument, + out, + "Failed to extract scalar alpha."); + using Vec = executorch::vec::Vectorized; + Vec alpha_val_vec(alpha_val); + if constexpr (is_sub) { + if (selected_optimized_path == + ElementwiseOptimizedPath::kBroadcast2dBy1dReverseArguments || + selected_optimized_path == + ElementwiseOptimizedPath::kBroadcastLastDimReverseArguments || + selected_optimized_path == + ElementwiseOptimizedPath::kBroadcastNdByNdReverseArguments) { + auto add_lambda = [&alpha_val_vec](auto x, auto y) { + return y - alpha_val_vec * x; + }; + return torch::executor::handle_broadcast_elementwise( + ctx, add_lambda, a, b, out, selected_optimized_path, alpha); + } else { + auto add_lambda = [&alpha_val_vec](auto x, auto y) { + return x - alpha_val_vec * y; + }; + return torch::executor::handle_broadcast_elementwise( + ctx, add_lambda, a, b, out, selected_optimized_path, alpha); + } + } else { + if (selected_optimized_path == + ElementwiseOptimizedPath::kBroadcast2dBy1dReverseArguments || + selected_optimized_path == + ElementwiseOptimizedPath::kBroadcastLastDimReverseArguments || + selected_optimized_path == + ElementwiseOptimizedPath::kBroadcastNdByNdReverseArguments) { + // Reason we swap out args here is because + // handle_broadcast_elementwise handles this selected_optimized_path + // option a bit differently. This should really be resolved in + // handle_broadcast_elementwise. However, the current blocker is that + // handle_broadcast_elementwise tries to be agnostic of op. This + // should be fixed, likely by moving lambda creation to + // handle_broadcast_elementwise and it be aware of which op is being + // executed. + auto add_lambda = [&alpha_val_vec](auto x, auto y) { + return y + alpha_val_vec * x; + }; + return torch::executor::handle_broadcast_elementwise( + ctx, add_lambda, a, b, out, selected_optimized_path, alpha); + } else { + auto add_lambda = [&alpha_val_vec](auto x, auto y) { + return x + alpha_val_vec * y; + }; + return torch::executor::handle_broadcast_elementwise( + ctx, add_lambda, a, b, out, selected_optimized_path, alpha); + } + } + }); + } else { + ScalarType common_type = + promoteTypes(a_type, b_type, /*half_to_float*/ true); + ET_KERNEL_CHECK(ctx, canCast(common_type, out_type), InvalidArgument, out); + + ET_KERNEL_CHECK( + ctx, + resize_to_broadcast_target_size(a, b, out) == Error::Ok, + InvalidArgument, + out); + + ET_SWITCH_REALHBBF16_TYPES(a_type, ctx, op_name, CTYPE_A, [&]() { + ET_SWITCH_REALHBBF16_TYPES(b_type, ctx, op_name, CTYPE_B, [&]() { + using CTYPE_IN = typename torch::executor:: + promote_types::type; + ET_DCHECK(CppTypeToScalarType::value == common_type); + ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, op_name, CTYPE_OUT, [&]() { + CTYPE_IN alpha_val; + ET_KERNEL_CHECK( + ctx, + torch::executor::native::utils::extract_scalar(alpha, &alpha_val), + InvalidArgument, ); + if constexpr (is_sub) { + alpha_val = -alpha_val; + } + + AddInner< + can_cast::value, + CTYPE_A, + CTYPE_B, + CTYPE_IN, + CTYPE_OUT>::run(a, b, alpha_val, out); + }); + }); + }); + } + + return out; +} +} // namespace impl +} // namespace kernels +} // namespace executor +} // namespace torch diff --git a/kernels/optimized/cpu/targets.bzl b/kernels/optimized/cpu/targets.bzl index 1c62b683b8f..94ceb1f4dc1 100644 --- a/kernels/optimized/cpu/targets.bzl +++ b/kernels/optimized/cpu/targets.bzl @@ -6,6 +6,7 @@ _OPTIMIZED_ATEN_OPS = ( name = "op_add", deps = [ ":binary_ops", + ":add_sub_impl", "//executorch/kernels/portable/cpu:scalar_utils", "//executorch/kernels/portable/cpu/util:broadcast_util", ], @@ -123,6 +124,14 @@ def define_common_targets(): aten_op_targets = [":{}".format(op["name"]) for op in enabled_ops] all_op_targets = aten_op_targets + runtime.cxx_library( + name = "add_sub_impl", + srcs = [], + exported_headers = ["op_add_sub_impl.h"], + visibility = ["//executorch/kernels/optimized/cpu/..."], + exported_deps = ["//executorch/runtime/core:core"], + ) + runtime.cxx_library( name = "binary_ops", exported_headers = ["binary_ops.h"], From 7bab59f4af6d78727ebb4df536cef0f75de3ecd3 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Tue, 18 Feb 2025 13:12:40 -0800 Subject: [PATCH 004/584] s/ET_LOG_MSG_AND_RETURN_IF_FALSE/ET_CHECK_OR_RETURN_FALSE (#8469) Just a big find/replace with fastmod. --- extension/llm/custom_ops/op_sdpa.cpp | 42 ++++++----- extension/llm/custom_ops/op_update_cache.cpp | 12 ++-- kernels/optimized/cpu/op_bmm.cpp | 17 +++-- .../portable/cpu/op_convolution_backward.cpp | 8 +-- .../cpu/op_linear_scratch_example.cpp | 6 +- kernels/portable/cpu/op_repeat.cpp | 2 +- kernels/portable/cpu/op_repeat_interleave.cpp | 10 +-- kernels/portable/cpu/op_topk.cpp | 2 +- .../portable/cpu/util/activation_ops_util.cpp | 8 +-- .../portable/cpu/util/advanced_index_util.cpp | 17 +++-- kernels/portable/cpu/util/copy_ops_util.cpp | 70 +++++++++---------- kernels/portable/cpu/util/distance_util.cpp | 8 +-- kernels/portable/cpu/util/index_util.cpp | 36 +++++----- kernels/portable/cpu/util/kernel_ops_util.cpp | 38 +++++----- .../cpu/util/normalization_ops_util.cpp | 14 ++-- kernels/portable/cpu/util/reduce_util.cpp | 2 +- kernels/portable/cpu/util/repeat_util.cpp | 10 +-- kernels/portable/cpu/util/slice_util.cpp | 10 ++- kernels/prim_ops/et_view.cpp | 6 +- kernels/quantized/cpu/op_mixed_linear.cpp | 8 +-- kernels/quantized/cpu/op_mixed_mm.cpp | 6 +- runtime/core/exec_aten/util/tensor_util.h | 54 +++++++------- .../core/exec_aten/util/tensor_util_aten.cpp | 10 +-- .../exec_aten/util/tensor_util_portable.cpp | 2 +- 24 files changed, 195 insertions(+), 203 deletions(-) diff --git a/extension/llm/custom_ops/op_sdpa.cpp b/extension/llm/custom_ops/op_sdpa.cpp index d23572d8d04..f0a7775e803 100644 --- a/extension/llm/custom_ops/op_sdpa.cpp +++ b/extension/llm/custom_ops/op_sdpa.cpp @@ -594,46 +594,46 @@ bool validate_flash_attention_args( const Tensor& key, const Tensor& value, const optional& attn_mask) { - ET_LOG_MSG_AND_RETURN_IF_FALSE(query.dim() == 4, "query must be a 4D tensor"); - ET_LOG_MSG_AND_RETURN_IF_FALSE(key.dim() == 4, "key must be a 4D tensor"); - ET_LOG_MSG_AND_RETURN_IF_FALSE(value.dim() == 4, "value must be a 4D tensor"); + ET_CHECK_OR_RETURN_FALSE(query.dim() == 4, "query must be a 4D tensor"); + ET_CHECK_OR_RETURN_FALSE(key.dim() == 4, "key must be a 4D tensor"); + ET_CHECK_OR_RETURN_FALSE(value.dim() == 4, "value must be a 4D tensor"); // Sizes - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( (query.size(3) == value.size(3)) && (key.size(3) == value.size(3)), "scaled_dot_product_attention_flash_attention: Q/K/V should have the same head size"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( (query.scalar_type() == ScalarType::Float), "Query must be Float type"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( (query.scalar_type() == key.scalar_type()) && (query.scalar_type() == value.scalar_type()), "Key and Value must have the same data type as Query"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( !attn_mask.has_value() || attn_mask.value().dim() == 2, "Attention mask must be a 2D tensor"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( !attn_mask.has_value() || attn_mask.value().scalar_type() == query.scalar_type(), "Attention mask must be a 2D tensor"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( is_contiguous_dim_order(query.dim_order().data(), query.dim()), "key cache must be in contiguous dim order"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( is_contiguous_dim_order(key.dim_order().data(), key.dim()), "value cache must be in contiguous dim order"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( is_contiguous_dim_order(value.dim_order().data(), value.dim()), "value cache must be in contiguous dim order"); if (attn_mask.has_value()) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( is_contiguous_dim_order( attn_mask.value().dim_order().data(), attn_mask.value().dim()), "value cache must be in contiguous dim order"); @@ -647,21 +647,19 @@ bool validate_cache_params( const Tensor& v_cache, int64_t start_pos, int64_t seq_length) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( - k_cache.dim() == 4, "kcache must be a 4D tensor"); + ET_CHECK_OR_RETURN_FALSE(k_cache.dim() == 4, "kcache must be a 4D tensor"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( - v_cache.dim() == 4, "v_cache must be a 4D tensor"); + ET_CHECK_OR_RETURN_FALSE(v_cache.dim() == 4, "v_cache must be a 4D tensor"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( start_pos < k_cache.size(1), "start_pos must be less than key cache at dim 1"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( start_pos < v_cache.size(1), "start_pos must be less than value cache at dim 1"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( (start_pos + seq_length) <= k_cache.size(1), "start_post + seq_length must be less than max seq length supported by key cache." "start pos: %" PRId64 ", seq_length: %" PRId64 @@ -671,7 +669,7 @@ bool validate_cache_params( seq_length, k_cache.size(1)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( (start_pos + seq_length) <= v_cache.size(1), "start_post + seq_length must be less than max seq length supported by key cache." "start pos: %" PRId64 ", seq_length: %" PRId64 @@ -682,11 +680,11 @@ bool validate_cache_params( v_cache.size(1)); // Make sure they are in contiguous dim order - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( is_contiguous_dim_order(k_cache.dim_order().data(), k_cache.dim()), "key cache must be in contiguous dim order"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( is_contiguous_dim_order(v_cache.dim_order().data(), v_cache.dim()), "value cache must be in contiguous dim order"); diff --git a/extension/llm/custom_ops/op_update_cache.cpp b/extension/llm/custom_ops/op_update_cache.cpp index bbc0190dab1..323b7a65ddb 100644 --- a/extension/llm/custom_ops/op_update_cache.cpp +++ b/extension/llm/custom_ops/op_update_cache.cpp @@ -25,17 +25,17 @@ bool validate_cache_params( const Tensor& quantized_cache, int64_t start_pos, int64_t seq_length) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( quantized_cache.dim() == 4, "quantized cache must be a 4D tensor"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( quantized_value.dim() == 4, "quantized_value must be a 4D tensor"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( start_pos < quantized_cache.size(1), "start_pos must be less than cache size at dim 1"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( (start_pos + seq_length) <= quantized_cache.size(1), "start_post + seq_length must be less than max seq length supported by cache." "start pos: %" PRId64 ", seq_length: %" PRId64 @@ -46,12 +46,12 @@ bool validate_cache_params( quantized_cache.size(1)); // Make sure they are in contiguous dim order - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( is_contiguous_dim_order( quantized_cache.dim_order().data(), quantized_cache.dim()), "quantized cache must be in contiguous dim order"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( is_contiguous_dim_order( quantized_value.dim_order().data(), quantized_value.dim()), "quantized value must be in contiguous dim order"); diff --git a/kernels/optimized/cpu/op_bmm.cpp b/kernels/optimized/cpu/op_bmm.cpp index 21ae7dfca90..5e7fa1dd839 100644 --- a/kernels/optimized/cpu/op_bmm.cpp +++ b/kernels/optimized/cpu/op_bmm.cpp @@ -31,39 +31,38 @@ namespace { // Verifies that the parameters are valid. bool check_bmm_out_args(const Tensor& self, const Tensor& mat2, Tensor& out) { // Ensure dimensions is 3 for all input and out - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( self.dim() == mat2.dim(), "self.dim() %zd != mat2.dim() %zd", self.dim(), mat2.dim()); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( self.dim() == out.dim(), "self.dim() %zd != out.dim() %zd", self.dim(), out.dim()); - ET_LOG_MSG_AND_RETURN_IF_FALSE( - self.dim() == 3, "self.dim() %zd != 3", self.dim()); + ET_CHECK_OR_RETURN_FALSE(self.dim() == 3, "self.dim() %zd != 3", self.dim()); // Ensure batch larger than or equals to 0 - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( self.size(0) >= 0, "self.size(0) %zd < 0", self.size(0)); // Ensure batches are the same - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( self.size(0) == mat2.size(0), "self.size(0) %zd != mat2.size(0) %zd", self.size(0), mat2.size(0)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( self.size(0) == out.size(0), "self.size(0) %zd != out.size(0) %zd", self.size(0), out.size(0)); // Ensure the out size is compatible with input tensors - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( mat2.size(2) == out.size(2), "mat2.size(2) %zd != out.size(2) %zd", mat2.size(2), out.size(2)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( self.size(1) == out.size(1), "self.size(1) %zd != out.size(1) %zd", self.size(1), diff --git a/kernels/portable/cpu/op_convolution_backward.cpp b/kernels/portable/cpu/op_convolution_backward.cpp index 7884ea0c44c..cd635cda8f9 100644 --- a/kernels/portable/cpu/op_convolution_backward.cpp +++ b/kernels/portable/cpu/op_convolution_backward.cpp @@ -38,9 +38,9 @@ bool check_convolution_backward_args( Tensor& grad_input, Tensor& grad_weight, Tensor& grad_bias) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( transposed == false, "Transposed Convolution Backward not supported yet"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( weight.dim() == 4, "Only 2D Convolution Backward supported for now"); ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(weight, input)); @@ -58,7 +58,7 @@ bool check_convolution_backward_args( ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(grad_bias, input)); } - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( check_convolution_args( input, weight, @@ -89,7 +89,7 @@ bool check_convolution_backward_args( ET_LOG_AND_RETURN_IF_FALSE( output_size_is_valid({output_sizes, output_ndim}, input.dim() - 2)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( grad_output.dim() == input.dim(), "grad_output should have same number of dimensions as input"); diff --git a/kernels/portable/cpu/op_linear_scratch_example.cpp b/kernels/portable/cpu/op_linear_scratch_example.cpp index b217e9ad942..eae2417fe32 100644 --- a/kernels/portable/cpu/op_linear_scratch_example.cpp +++ b/kernels/portable/cpu/op_linear_scratch_example.cpp @@ -40,13 +40,13 @@ bool check_linear_scratch_example_args( const optional& bias, Tensor& out, Tensor& scratch) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( input.size(1) == weight.size(1), "Unexpected weight size 1"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( scratch.size(0) == input.size(0), "Unexpected scratch size 0"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( scratch.size(1) == weight.size(0), "Unexpected scratch size 1"); return true; diff --git a/kernels/portable/cpu/op_repeat.cpp b/kernels/portable/cpu/op_repeat.cpp index 8b64eefde31..dc9a7232152 100644 --- a/kernels/portable/cpu/op_repeat.cpp +++ b/kernels/portable/cpu/op_repeat.cpp @@ -23,7 +23,7 @@ bool calculate_output_size( Tensor::SizesType* out_sizes_ptr) { ET_LOG_AND_RETURN_IF_FALSE(repeats.size() < kTensorDimensionLimit); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( repeats.size() >= self_sizes.size(), "Repeats vector size is %zu must be >= self_sizes %zu.", repeats.size(), diff --git a/kernels/portable/cpu/op_repeat_interleave.cpp b/kernels/portable/cpu/op_repeat_interleave.cpp index c8a84e8c748..61c9fbfdb82 100644 --- a/kernels/portable/cpu/op_repeat_interleave.cpp +++ b/kernels/portable/cpu/op_repeat_interleave.cpp @@ -18,12 +18,12 @@ bool check_repeat_interleave_args( int64_t output_size_value, int64_t repeats_sum, Tensor& out) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( repeats.scalar_type() == ScalarType::Int || repeats.scalar_type() == ScalarType::Long, "repeats must be int or long"); - ET_LOG_MSG_AND_RETURN_IF_FALSE(repeats.dim() == 1, "repeats must be 1D"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE(repeats.dim() == 1, "repeats must be 1D"); + ET_CHECK_OR_RETURN_FALSE( output_size_value == repeats_sum, "output_size, if provided, must be equal to repeats.sum()"); ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(repeats, out)); @@ -31,13 +31,13 @@ bool check_repeat_interleave_args( if (repeats.scalar_type() == ScalarType::Long) { const int64_t* const repeats_data = repeats.const_data_ptr(); for (size_t i = 0; i < repeats.numel(); ++i) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( repeats_data[i] >= 0, "repeats cannot be negative"); } } else { const int32_t* const repeats_data = repeats.const_data_ptr(); for (size_t i = 0; i < repeats.numel(); ++i) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( repeats_data[i] >= 0, "repeats cannot be negative"); } } diff --git a/kernels/portable/cpu/op_topk.cpp b/kernels/portable/cpu/op_topk.cpp index 987e974bbf5..e6ba9afef2c 100644 --- a/kernels/portable/cpu/op_topk.cpp +++ b/kernels/portable/cpu/op_topk.cpp @@ -28,7 +28,7 @@ bool check_topk_args( if (dim < 0) { dim += nonzero_dim(in); } - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( k >= 0 && k <= nonempty_size(in, dim), "selected index k out of range"); return true; } diff --git a/kernels/portable/cpu/util/activation_ops_util.cpp b/kernels/portable/cpu/util/activation_ops_util.cpp index 908758a2e36..70be6367c76 100644 --- a/kernels/portable/cpu/util/activation_ops_util.cpp +++ b/kernels/portable/cpu/util/activation_ops_util.cpp @@ -17,7 +17,7 @@ namespace executor { bool check_gelu_args(const Tensor& in, string_view approximate, Tensor& out) { ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out)); ET_LOG_AND_RETURN_IF_FALSE(in.scalar_type() != ScalarType::Bool); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( approximate == "tanh" || approximate == "none", "Invalid approximation format: %.*s for gelu", static_cast(approximate.length()), @@ -32,7 +32,7 @@ bool check_glu_args(const Tensor& in, int64_t dim, Tensor& out) { const size_t non_negative_dim = dim < 0 ? dim + in.dim() : dim; const size_t dim_size = in.size(non_negative_dim); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( dim_size % 2 == 0, "Halving dimension must be even, but dimension %zd is size %zd", non_negative_dim, @@ -40,7 +40,7 @@ bool check_glu_args(const Tensor& in, int64_t dim, Tensor& out) { ET_LOG_AND_RETURN_IF_FALSE(tensor_is_floating_type(out)); ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_rank(in, out)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( out.size(non_negative_dim) == dim_size / 2, "output tensor must have half the size of the input tensor along the specified dimension."); @@ -73,7 +73,7 @@ bool check_log_softmax_args( int64_t dim, bool half_to_float, Tensor& out) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( !half_to_float, "half to float conversion is not supported on CPU"); ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out)); ET_LOG_AND_RETURN_IF_FALSE(tensor_has_dim(in, dim)); diff --git a/kernels/portable/cpu/util/advanced_index_util.cpp b/kernels/portable/cpu/util/advanced_index_util.cpp index cc205df0e43..68faa192b44 100644 --- a/kernels/portable/cpu/util/advanced_index_util.cpp +++ b/kernels/portable/cpu/util/advanced_index_util.cpp @@ -24,7 +24,7 @@ bool check_indices_dtypes(TensorOptList indices) { if (indices[i].has_value()) { const Tensor& index = indices[i].value(); ScalarType ix_type = index.scalar_type(); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( ix_type == ScalarType::Long || ix_type == ScalarType::Int || ix_type == ScalarType::Byte || ix_type == ScalarType::Bool, "Index tensors should be Long, Int, Byte or Bool"); @@ -47,7 +47,7 @@ bool check_mask_indices(const Tensor& in, TensorOptList indices) { if (indices[i].has_value()) { const Tensor& index = indices[i].value(); if (is_mask_index(index)) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( index.dim() > 0, "Zero-dimensional mask index not allowed"); for (auto j = 0; j < index.dim(); j++) { if (index.size(j) != in.size(in_i + j)) { @@ -156,7 +156,7 @@ int64_t query_integral_index( bool check_index_args(const Tensor& in, TensorOptList indices, Tensor& out) { ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out)); ET_LOG_AND_RETURN_IF_FALSE(check_indices_dtypes(indices)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( indices.size() <= in.dim(), "Indexing too many dimensions"); ET_LOG_AND_RETURN_IF_FALSE(check_mask_indices(in, indices)); return true; @@ -197,8 +197,7 @@ bool get_indices_broadcast_shape( } else if (rev_ix_sizes[0] == 1) { rev_ix_sizes[0] = len; } else if (len != 1 && rev_ix_sizes[0] != len) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( - false, "Broadcast of mask index failed."); + ET_CHECK_OR_RETURN_FALSE(false, "Broadcast of mask index failed."); } } else { for (size_t j = 0; j < index.dim(); j++) { @@ -209,7 +208,7 @@ bool get_indices_broadcast_shape( } else if (rev_ix_sizes[j] == 1) { rev_ix_sizes[j] = rev_j_size; } else if (rev_j_size != 1 && rev_ix_sizes[j] != rev_j_size) { - ET_LOG_MSG_AND_RETURN_IF_FALSE(false, "Broadcast of index failed."); + ET_CHECK_OR_RETURN_FALSE(false, "Broadcast of index failed."); } } } @@ -290,11 +289,11 @@ bool get_index_out_target_size( size_t num_null_indices = get_num_null_indices(indices); size_t num_indexed_dims = get_num_indexed_dims(indices); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( num_null_indices + num_indexed_dims <= in.dim(), "Indexing too many dimensions"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( in.dim() + broadcast_ndim - num_indexed_dims <= kTensorDimensionLimit, "Out tensor would exceed number of allowed dimensions"); @@ -441,7 +440,7 @@ bool get_in_coord( if (index_val < 0) { index_val += in.size(i); } - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( index_val >= 0 && index_val < in.size(i), "Index %" PRId64 " is out of bounds for input dimension %zd with size %zd.", diff --git a/kernels/portable/cpu/util/copy_ops_util.cpp b/kernels/portable/cpu/util/copy_ops_util.cpp index 78b66b05f22..f0e1d8b30d2 100644 --- a/kernels/portable/cpu/util/copy_ops_util.cpp +++ b/kernels/portable/cpu/util/copy_ops_util.cpp @@ -44,16 +44,16 @@ bool check_as_strided_copy_args( optional storage_offset, Tensor& out) { ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( size.size() == stride.size(), "mismatch in length of strides and shape"); for (const auto& val : stride) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( val >= 0, "as_strided: Negative strides are not supported at the moment"); } int64_t offset = storage_offset.has_value() ? storage_offset.value() : 0; - ET_LOG_MSG_AND_RETURN_IF_FALSE(offset >= 0, "Negative storage offset"); + ET_CHECK_OR_RETURN_FALSE(offset >= 0, "Negative storage offset"); // Check that the requested storage is within bounds of input storage size_t storage_size_bytes = @@ -63,7 +63,7 @@ bool check_as_strided_copy_args( return true; } size_t new_storage_size_bytes = in.nbytes(); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( storage_size_bytes + storage_offset_bytes <= new_storage_size_bytes, "Requiring a storage size of %zd are out of bounds for storage of size %zd", storage_size_bytes + storage_offset_bytes, @@ -159,17 +159,17 @@ bool check_expand_copy_args( Tensor& out) { (void)out; - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( implicit == false, "This operator is not implemented for when implicit == true."); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( expand_sizes.size() >= input.sizes().size(), "The number of sizes provided (%zu) must at least be equal to the number of dimensions in the tensor (%zu)", expand_sizes.size(), input.sizes().size()); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( expand_sizes.size() <= kTensorDimensionLimit, "The number of expanded dims (%zu) exceeds the configured maximum (%zu). Increase this limit.", expand_sizes.size(), @@ -198,7 +198,7 @@ bool get_expand_copy_out_target_size( // -1 can use for replacing any corresponding dimension output_sizes[j] = self_sizes[i]; } else if (self_sizes[i] != 1) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( expand_sizes[j] == self_sizes[i], "The expanded size of the tensor (%zu) must match the existing size (%zu) at non-singleton dimension %zu.", (size_t)expand_sizes[j], @@ -211,7 +211,7 @@ bool get_expand_copy_out_target_size( while (j > 0) { --j; output_sizes[j] = expand_sizes[j]; - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( expand_sizes[j] >= 0, "The expanded size of the tensor (%zu) isn't allowed in a leading, non-existing dimension %zu", (size_t)expand_sizes[j], @@ -241,7 +241,7 @@ bool check_permute_copy_args(const Tensor& in, IntArrayRef dims, Tensor& out) { ET_LOG_AND_RETURN_IF_FALSE(dim < kTensorDimensionLimit && dim >= 0); // Check that the dimension hasn't been seen previously. - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( dim_exist[dim] == false, "duplicate dims are not allowed."); dim_exist[dim] = true; @@ -251,13 +251,13 @@ bool check_permute_copy_args(const Tensor& in, IntArrayRef dims, Tensor& out) { } bool check_unbind_copy_args(const Tensor& in, int64_t dim, TensorList out) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( in.dim() > 0, "in must have at least one dimension; saw %zd", in.dim()); ET_LOG_AND_RETURN_IF_FALSE(dim_is_valid(dim, in.dim())); const ssize_t dim_size = in.size(dim); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( dim_size == out.size(), "out tensorlist's length %zd must equal unbind dim %" PRId64 " size = %zd.", @@ -268,7 +268,7 @@ bool check_unbind_copy_args(const Tensor& in, int64_t dim, TensorList out) { // Validate each output. for (size_t i = 0; i < out.size(); ++i) { // All output dtypes must be the same. - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( out[i].scalar_type() == out[0].scalar_type(), "out[%zu] dtype %" PRId8 " != out[0] dtype %" PRId8, i, @@ -276,7 +276,7 @@ bool check_unbind_copy_args(const Tensor& in, int64_t dim, TensorList out) { static_cast(out[0].scalar_type())); // output tensor must have # of dims = in.dim() -1 - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( out[i].dim() == (in.dim() - 1), "out[%zu] dim %zd != in dim %zd", i, @@ -286,7 +286,7 @@ bool check_unbind_copy_args(const Tensor& in, int64_t dim, TensorList out) { // Check the shape of the output. for (ssize_t d = 0, out_d = 0; d < in.dim(); ++d) { if (d != dim) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( out[i].size(out_d) == in.size(d), "out[%zu].size(%zd) %zd != in.size(%zd) %zd", i, @@ -421,19 +421,19 @@ bool check_split_with_sizes_copy_args( ET_LOG_AND_RETURN_IF_FALSE(tensor_has_rank_greater_or_equal_to(in, 1)); ET_LOG_AND_RETURN_IF_FALSE(tensor_has_dim(in, dim)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( split_sizes.size() == out.size(), "Number of split sizes must match the number of output tensors"); int64_t sum = 0; for (int i = 0; i < split_sizes.size(); i++) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( split_sizes[i] >= 0, "All split sizes must be non negative."); sum += split_sizes[i]; } const ssize_t dim_size = in.size(dim); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( sum == dim_size, "Sum of split sizes does not match input size at given dim"); @@ -506,7 +506,7 @@ bool check_squeeze_copy_dims_args( if (i != j) { const int64_t dim_temp = dims[j] < 0 ? dims[j] + nonzero_dim(in) : dims[j]; - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( dim != dim_temp, "dim %" PRId64 " appears multiple times in dims!", dim); @@ -612,22 +612,22 @@ bool check_split_copy_args( int64_t split_size, int64_t dim, TensorList out) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( input.dim() > 0, "input must have at least one dimension; saw %zd", input.dim()); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( dim >= 0 && dim < input.dim(), "dim %" PRId64 " out of range [0,%zd)", dim, input.dim()); const ssize_t dim_size = input.size(dim); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( split_size >= 0, "split_size %" PRId64 " must be non-negative", split_size); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( split_size > 0 || dim_size == 0, "split_size is zero but input.size(%" PRId64 ") %zd is non-zero", dim, @@ -646,7 +646,7 @@ bool check_split_copy_args( // Note that this also handles the case where split_size == 0, avoiding a // division by zero in the other branch. When dim_size == 0 && split_size == // 0, core PyTorch expects 1 output element. - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( out.size() == 1, "Unexpected out.size() %zu: should be 1 because split_size %" PRId64 " >= input.size(%" PRId64 ") %zd", @@ -657,7 +657,7 @@ bool check_split_copy_args( remainder = dim_size; } else { int64_t expected_out_len = (dim_size + split_size - 1) / split_size; - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( out.size() == expected_out_len, "Unexpected out.size() %zu: ceil(input.size(%" PRId64 ")=%zd" @@ -676,7 +676,7 @@ bool check_split_copy_args( // Validate each output. for (size_t i = 0; i < out.size(); ++i) { // All output dtypes must be the same. - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( out[i].scalar_type() == out[0].scalar_type(), "out[%zu] dtype %" PRId8 " != out[0] dtype %" PRId8, i, @@ -684,7 +684,7 @@ bool check_split_copy_args( static_cast(out[0].scalar_type())); // All outputs must have the same number of dimensions as the input. - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( out[i].dim() == input.dim(), "out[%zu] dim %zd != input dim %zd", i, @@ -698,7 +698,7 @@ bool check_split_copy_args( if (i < out.size() - 1) { // All outputs except the final one: split dimension should be // split_size. - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( out[i].size(d) == split_size, "out[%zu].size(%zd) %zd != split_size %" PRId64, i, @@ -708,7 +708,7 @@ bool check_split_copy_args( } else { // The final output: split dimension should be the remainder of // split_size. - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( out[i].size(d) == remainder, "out[%zu].size(%zd) %zd != remainder %" PRId64, i, @@ -811,7 +811,7 @@ bool check_unsqueeze_copy_args( } if (d < dim_normalized) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( input.size(d) == out.size(d), "input.size(%zu) %zd != out.size(%zu) %zd | dim = %" PRId64, d, @@ -820,7 +820,7 @@ bool check_unsqueeze_copy_args( out.size(d), dim); } else if (d > dim_normalized) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( input.size(d - 1) == out.size(d), "input.size(%zu) %zd != out.size(%zu) %zd | dim = %" PRId64, d - 1, @@ -829,7 +829,7 @@ bool check_unsqueeze_copy_args( out.size(d), dim); } else { // d == dim - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( out.size(d) == 1, "out.size(%zu) %zd shall equal 1 | dim = %" PRId64, d, @@ -848,7 +848,7 @@ bool check_view_copy_args( ET_LOG_AND_RETURN_IF_FALSE(size_int64_t.size() == out.sizes().size()); // The input and out shall share same dtype and numel - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( self.numel() == out.numel(), "self.numel() %zd != out.numel() %zd", self.numel(), @@ -860,7 +860,7 @@ bool check_view_copy_args( for (int i = 0; i < size_int64_t.size(); i++) { // If this value is -1 it implies that this dimension is inferred. if (size_int64_t[i] == -1) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( !size_inferred, "Multiple dimensions cannot be inferred."); size_inferred = true; } @@ -888,7 +888,7 @@ bool get_view_copy_target_size( out_numels_without_minus_1 = out_numels_without_minus_1 * size_int64_t[i]; } else { // TODO(kimishpatel): Add test to hit this line - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( minus_1_dim == -1, "At most one view copy dim can be -1."); minus_1_dim = i; } diff --git a/kernels/portable/cpu/util/distance_util.cpp b/kernels/portable/cpu/util/distance_util.cpp index f8dc2f71216..21a111d2c47 100644 --- a/kernels/portable/cpu/util/distance_util.cpp +++ b/kernels/portable/cpu/util/distance_util.cpp @@ -14,8 +14,7 @@ namespace executor { bool check_pdist_args(const Tensor& in, double p, const Tensor& out) { ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out)); ET_LOG_AND_RETURN_IF_FALSE(tensor_is_rank(in, 2)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( - p >= 0, "pdist only supports non-negative p values"); + ET_CHECK_OR_RETURN_FALSE(p >= 0, "pdist only supports non-negative p values"); return true; } @@ -40,11 +39,10 @@ bool check_cdist_args( ET_LOG_AND_RETURN_IF_FALSE(tensor_has_rank_greater_or_equal_to(x2, 2)); ET_LOG_AND_RETURN_IF_FALSE( tensors_have_same_size_at_dims(x1, x1.dim() - 1, x2, x2.dim() - 1)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( - p >= 0, "cdist only supports non-negative p values"); + ET_CHECK_OR_RETURN_FALSE(p >= 0, "cdist only supports non-negative p values"); if (compute_mode.has_value()) { int64_t mode = compute_mode.value(); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( mode >= 0 && mode <= 2, "possible modes: 0, 1, 2, but was: %" PRId64, mode); diff --git a/kernels/portable/cpu/util/index_util.cpp b/kernels/portable/cpu/util/index_util.cpp index fb54980bb47..909b00db3aa 100644 --- a/kernels/portable/cpu/util/index_util.cpp +++ b/kernels/portable/cpu/util/index_util.cpp @@ -20,11 +20,11 @@ bool check_gather_args( Tensor& out) { ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out)); ET_LOG_AND_RETURN_IF_FALSE(tensor_has_dim(in, dim)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( index.scalar_type() == ScalarType::Long, "Expected dypte int64 for index"); if (index.numel() != 0) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( nonzero_dim(in) == nonzero_dim(index), "self and index should have the same dimensionality when index is not empty " "except for the case when one has dimension 0 and the other has dimension 1"); @@ -37,7 +37,7 @@ bool check_gather_args( for (size_t d = 0; d < nonzero_dim(in); ++d) { if (d != dim) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( nonempty_size(index, d) <= nonempty_size(in, d), "size of dimension %zd of index should be smaller than the size of that dimension of input if dimension %zd != dim %zd", d, @@ -47,7 +47,7 @@ bool check_gather_args( } const long* index_data = index.const_data_ptr(); for (size_t i = 0; i < index.numel(); ++i) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( index_data[i] >= 0 && index_data[i] < nonempty_size(in, dim), "Index is out of bounds for dimension %zd with size %zd", (size_t)dim, @@ -64,12 +64,12 @@ bool check_index_select_args( Tensor& out) { ET_LOG_AND_RETURN_IF_FALSE(tensor_has_dim(in, dim)); dim = dim < 0 ? dim + nonzero_dim(in) : dim; - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( nonempty_size(in, dim) > 0, "index_select: Indexing axis dim should be positive"); ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( index.scalar_type() == ScalarType::Long || index.scalar_type() == ScalarType::Int, "Expected index to have type of Long or Int, but found %s", @@ -77,7 +77,7 @@ bool check_index_select_args( ET_LOG_AND_RETURN_IF_FALSE(tensor_has_rank_smaller_or_equal_to(index, 1)); if (index.dim() > 0 && in.dim() == 0) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( index.numel() == 1, "index_select: Index to scalar must have exactly 1 value"); } @@ -85,7 +85,7 @@ bool check_index_select_args( if (index.scalar_type() == ScalarType::Long) { const int64_t* const index_ptr = index.const_data_ptr(); for (size_t i = 0; i < index.numel(); ++i) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( index_ptr[i] >= 0 && index_ptr[i] < nonempty_size(in, dim), "index[%zu] = %" PRId64 " is out of range [0, %zd)", i, @@ -95,7 +95,7 @@ bool check_index_select_args( } else { const int32_t* const index_ptr = index.const_data_ptr(); for (size_t i = 0; i < index.numel(); ++i) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( index_ptr[i] >= 0 && index_ptr[i] < nonempty_size(in, dim), "index[%zu] = %" PRId32 " is out of range [0, %zd)", i, @@ -126,12 +126,12 @@ void get_index_select_out_target_size( bool check_nonzero_args(const Tensor& in, const Tensor& out) { (void)in; - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( out.scalar_type() == ScalarType::Long, "Expected out to be a Long tensor but received %" PRId8, static_cast(out.scalar_type())); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( out.dim() == 2, "Expected out to be a 2d tensor received %zd", ssize_t(out.dim())); @@ -147,7 +147,7 @@ bool check_scatter_add_args( Tensor& out) { ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(self, out)); ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(self, src)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( index.scalar_type() == ScalarType::Long, "Expected dypte int64 for index"); ET_LOG_AND_RETURN_IF_FALSE(tensor_has_dim(self, dim)); @@ -156,7 +156,7 @@ bool check_scatter_add_args( return true; } - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( nonzero_dim(self) == nonzero_dim(src) && nonzero_dim(self) == nonzero_dim(index), "self, index and src should have same number of dimensions."); @@ -167,12 +167,12 @@ bool check_scatter_add_args( } for (size_t d = 0; d < nonzero_dim(self); ++d) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( nonempty_size(index, d) <= nonempty_size(src, d), "size of dimension %zd of index should be smaller than the size of that dimension of src", d); if (d != dim) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( nonempty_size(index, d) <= nonempty_size(self, d), "size of dimension %zd of index should be smaller than the size of that dimension of self if dimension %zd != dim %zd", d, @@ -182,7 +182,7 @@ bool check_scatter_add_args( } const long* index_data = index.const_data_ptr(); for (size_t i = 0; i < index.numel(); ++i) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( index_data[i] >= 0 && index_data[i] < nonempty_size(self, dim), "Index is out of bounds for dimension %zd with size %zd", (size_t)dim, @@ -228,7 +228,7 @@ bool check_select_scatter_args( ET_LOG_AND_RETURN_IF_FALSE(dim_is_valid(dim, in.dim())); // The index shall be valid in the given dimenson - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( index >= 0 && index < in.size(dim), "index %" PRId64 " out of range [-%zd,%zd) at in.size( %" PRId64 ")", index, @@ -239,7 +239,7 @@ bool check_select_scatter_args( // The src.dim() shall be one lower than in.dim() since src needs to fit // into the selected data on one dim of input // https://pytorch.org/docs/stable/generated/torch.select_scatter.html - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( in.dim() == src.dim() + 1, "in.dim() %zd != src.dim() + 1 %zd", in.dim(), diff --git a/kernels/portable/cpu/util/kernel_ops_util.cpp b/kernels/portable/cpu/util/kernel_ops_util.cpp index 2e267b57715..1e851ccb1ef 100644 --- a/kernels/portable/cpu/util/kernel_ops_util.cpp +++ b/kernels/portable/cpu/util/kernel_ops_util.cpp @@ -26,14 +26,14 @@ bool param_array_is_valid( bool allow_empty) { auto size = array.size(); if (allow_empty) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( size == 0 || size == 1 || size == length, "Expected %s to have size 0, 1 or %zu but got %zd", name, length, size); } else { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( size == 1 || size == length, "Expected %s to have size 1 or %zu but got %zd", name, @@ -126,7 +126,7 @@ bool output_padding_is_valid( const int64_t op_i = val_at(output_padding, i); const int64_t s_i = val_at(stride, i); const int64_t d_i = val_at(dilation, i); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( op_i < s_i || op_i < d_i, "output padding must be smaller than either stride or dilation"); } @@ -246,12 +246,12 @@ void calculate_kernel_output_sizes( } bool check_arange_args(double start, double end, double step, Tensor& out) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( out.dim() == 1, "out should be a 1-d tensor, but got a %zu-d tensor", out.dim()); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( (step > 0 && (end >= start)) || (step < 0 && (end <= start)), "upper bound and larger bound inconsistent with step sign"); @@ -272,7 +272,7 @@ bool check_avg_pool2d_args( ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_or_channels_last_dim_order(in)); ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_or_channels_last_dim_order(out)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( (in.dim() == 3 && in.size(0) > 0 && in.size(1) > 0 && in.size(2) > 0) || (in.dim() == 4 && in.size(1) > 0 && in.size(2) > 0 && in.size(3) > 0), "Expected 3D or 4D (batch mode) tensor with optional 0 dim batch size for input"); @@ -285,7 +285,7 @@ bool check_avg_pool2d_args( padding, kernel_size, /*kernel_ndim=*/2, /*enforce_half_kernel=*/true)); if (divisor_override.has_value()) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( divisor_override.value() != 0, "divisor_override must be non-zero, but found %" PRId64, divisor_override.value()); @@ -334,7 +334,7 @@ bool check_convolution_args( tensor_is_default_or_channels_last_dim_order(weight)); ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_or_channels_last_dim_order(out)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( in.dim() == 3 || in.dim() == 4, "Expect input tensor to be 3-D or 4-D, but got, %zu.", static_cast(in.dim())); @@ -343,7 +343,7 @@ bool check_convolution_args( if (bias.has_value()) { ET_LOG_AND_RETURN_IF_FALSE(tensor_is_rank(bias.value(), 1)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( bias.value().size(0) == transposed ? groups * weight.size(1) : weight.size(0), "bias length must equal number of output channels, but got %zd", @@ -369,14 +369,14 @@ bool check_convolution_args( output_padding_is_valid(output_padding, stride, dilation, kernel_ndim)); } - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( weight.size(0) >= groups, "Given groups=%" PRId64 ", expected weight to be at least %" PRId64 " at dimension 0, but got weight.size(0) = %zd instead", groups, groups, weight.size(0)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( weight.size(0) % groups == 0, "Given groups=%" PRId64 ", expected weight to be divisible by %" PRId64 " at dimension 0, but got weight.size(0) = %zd instead", @@ -385,7 +385,7 @@ bool check_convolution_args( weight.size(0)); if (!transposed) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( in.size(1) == groups * weight.size(1), "Given groups=%" PRId64 " and weight.size(1) = %zd, expected input to have %" PRId64 @@ -395,7 +395,7 @@ bool check_convolution_args( groups * weight.size(1), in.size(1)); } else { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( in.size(1) == weight.size(0), "input channels must match weight.size(0) in transposed convolution"); } @@ -472,7 +472,7 @@ bool check_max_pool2d_with_indices_args( Tensor& out, Tensor& indices) { ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( indices.scalar_type() == ScalarType::Long, "Expected indices to have type of Long, but found %s", toString(indices.scalar_type())); @@ -480,7 +480,7 @@ bool check_max_pool2d_with_indices_args( ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_or_channels_last_dim_order(in)); ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_or_channels_last_dim_order(out)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( (in.dim() == 3 && in.size(0) > 0 && in.size(1) > 0 && in.size(2) > 0) || (in.dim() == 4 && in.size(1) > 0 && in.size(2) > 0 && in.size(3) > 0), "Expected 3D or 4D (batch mode) tensor with optional 0 dim batch size for input"); @@ -543,10 +543,10 @@ bool check_constant_pad_args( ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_rank(in, out)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( pad.size() % 2 == 0, "Padding array must be a multiple of 2"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( pad.size() / 2 <= in.dim(), "Padding array contains too many elements"); return true; @@ -578,13 +578,13 @@ bool check_embedding_args( const Tensor& indices, const Tensor& out) { // Ensure weight is 2-D. It could be empty. - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( weight.dim() == 2, "weight.dim() %zd != 2", weight.dim()); // Ensure out is k+1 dimension tensor where k is the indices.dim() // out's first k dimension shall be same as indices, and the last dim shall // equal weight's last dim - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( out.dim() == indices.dim() + 1, "out.dim() %zd != indices.dim() %zd + 1", out.dim(), diff --git a/kernels/portable/cpu/util/normalization_ops_util.cpp b/kernels/portable/cpu/util/normalization_ops_util.cpp index 684417f448a..9f3ce5cc112 100644 --- a/kernels/portable/cpu/util/normalization_ops_util.cpp +++ b/kernels/portable/cpu/util/normalization_ops_util.cpp @@ -81,15 +81,15 @@ bool check_layer_norm_args( Tensor& mean_out, Tensor& rstd_out) { size_t ndim = normalized_shape.size(); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( ndim >= 1, "Expected normalized_shape to be at least 1-dimensional, i.e., containing at least one element."); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( in.dim() >= ndim, "Expected input tensor to have rank >= the length of normalized_shape."); size_t shift = in.dim() - ndim; for (size_t d = 0; d < ndim; ++d) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( in.size(d + shift) == normalized_shape[d], "Expected normalized_shape to match the sizes of input's rightmost dimensions."); } @@ -144,16 +144,16 @@ bool check_group_norm_args( ET_LOG_AND_RETURN_IF_FALSE(in.size(0) == N); ET_LOG_AND_RETURN_IF_FALSE(in.size(1) == C); ET_LOG_AND_RETURN_IF_FALSE(in.numel() == N * C * HxW); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( group > 0, "Expected number of groups to be greater than 0"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( C % group == 0, "Expected number of channels in input to be divisible by number of groups"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( !weight.has_value() || (weight.value().dim() == 1 && weight.value().size(0) == C), "Expected weight to be a vector of size equal to the number of channels in input"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( !bias.has_value() || (bias.value().dim() == 1 && bias.value().size(0) == C), "Expected bias to be a vector of size equal to the number of channels in input"); diff --git a/kernels/portable/cpu/util/reduce_util.cpp b/kernels/portable/cpu/util/reduce_util.cpp index 65140fc6643..fb6ac202f44 100644 --- a/kernels/portable/cpu/util/reduce_util.cpp +++ b/kernels/portable/cpu/util/reduce_util.cpp @@ -51,7 +51,7 @@ ET_NODISCARD bool check_dim_list_is_valid( ET_LOG_AND_RETURN_IF_FALSE( non_neg_d < kTensorDimensionLimit && non_neg_d >= 0); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( dim_exist[non_neg_d] == false, "dim %zd appears multiple times in the list of dims", non_neg_d); diff --git a/kernels/portable/cpu/util/repeat_util.cpp b/kernels/portable/cpu/util/repeat_util.cpp index d373a86c16c..bcb7a7ae0f9 100644 --- a/kernels/portable/cpu/util/repeat_util.cpp +++ b/kernels/portable/cpu/util/repeat_util.cpp @@ -25,7 +25,7 @@ bool check_repeat_args( executorch::aten::ArrayRef repeats, Tensor& out) { // Ensure the self tensors list is non-empty. - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( repeats.size() >= self.dim(), "Number of dimensions of repeat dims can not be smaller than number of dimensions of tensor"); @@ -34,11 +34,11 @@ bool check_repeat_args( for (auto repeat : repeats) { all_non_negative = all_non_negative && (repeat >= 0); } - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( all_non_negative, "Trying to create tensor with negative dimension"); /// Check if out.size() is legal. - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( out.dim() == repeats.size(), "The dimension of out shall equal size of repeats, but now is %zd and %zd", out.dim(), @@ -47,7 +47,7 @@ bool check_repeat_args( // Right now we only support the tensors whose dimension is no greater than // kTensorDimensionLimit. Only check out tensor because the number of // dimension of out tensor shall have more than or equal to self tensor - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( out.dim() <= kTensorDimensionLimit, "The dimension of input and output should not be larger than %zd", kTensorDimensionLimit); @@ -66,7 +66,7 @@ bool check_repeat_args( reformat_self_size[out.dim() - 1 - i] = self.size(self.dim() - 1 - i); } for (size_t i = 0; i < repeats.size(); i++) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( reformat_self_size[i] * repeats[i] == out.size(i), "Expect out size at dimension %zu is %" PRId64 ", but now is %zd", i, diff --git a/kernels/portable/cpu/util/slice_util.cpp b/kernels/portable/cpu/util/slice_util.cpp index a948a370de2..e6444bd074a 100644 --- a/kernels/portable/cpu/util/slice_util.cpp +++ b/kernels/portable/cpu/util/slice_util.cpp @@ -24,7 +24,7 @@ bool check_narrow_copy_args( ET_LOG_AND_RETURN_IF_FALSE(in.dim() > 0); ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out)); ET_LOG_AND_RETURN_IF_FALSE(tensor_has_dim(in, dim)); - ET_LOG_MSG_AND_RETURN_IF_FALSE(lenth >= 0, "lenth must be non-negative"); + ET_CHECK_OR_RETURN_FALSE(lenth >= 0, "lenth must be non-negative"); ET_LOG_AND_RETURN_IF_FALSE(start >= -in.size(dim)); ET_LOG_AND_RETURN_IF_FALSE(start <= in.size(dim)); if (start < 0) { @@ -56,8 +56,7 @@ bool check_slice_copy_args( ET_LOG_AND_RETURN_IF_FALSE(in.dim() > 0); ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out)); ET_LOG_AND_RETURN_IF_FALSE(tensor_has_dim(in, dim)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( - step > 0, "slice step must be greater than zero"); + ET_CHECK_OR_RETURN_FALSE(step > 0, "slice step must be greater than zero"); return true; } @@ -89,8 +88,7 @@ bool check_slice_scatter_args( ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_rank(input, src)); // Check step. Step must be greater than zero - ET_LOG_MSG_AND_RETURN_IF_FALSE( - step > 0, "slice step must be greater than zero"); + ET_CHECK_OR_RETURN_FALSE(step > 0, "slice step must be greater than zero"); // The size of src tensor should follow these rules: // - src.size(i) shall equal to input.size(i) if i != dim, @@ -100,7 +98,7 @@ bool check_slice_scatter_args( ET_LOG_AND_RETURN_IF_FALSE( tensors_have_same_size_at_dims(input, d, src, d)); } else { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( src.size(d) == num_values, "input.size(%zu) %zd != num_values %" PRId64 " | dim = %" PRId64 ")", d, diff --git a/kernels/prim_ops/et_view.cpp b/kernels/prim_ops/et_view.cpp index 66aa9ac87e2..0f041dae00f 100644 --- a/kernels/prim_ops/et_view.cpp +++ b/kernels/prim_ops/et_view.cpp @@ -38,13 +38,13 @@ bool get_view_target_size( int64_t numel_without_minus_1 = 1; for (int i = 0; i < dim; i++) { if (size[i] == -1) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( minus1_dim == -1, "At most one view dim can be -1."); minus1_dim = i; } else { // The size[i] must be non-negative now, but we check size[i] >= -1 // in case code is reordered in the future. - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( size[i] >= -1, "Negative sizes are not allowed."); numel_without_minus_1 *= size[i]; @@ -56,7 +56,7 @@ bool get_view_target_size( } } if (minus1_dim >= 0) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( n_zero == 0, "Cannot infer dimension size if there is a zero dim."); out_size[minus1_dim] = self.numel() / numel_without_minus_1; } diff --git a/kernels/quantized/cpu/op_mixed_linear.cpp b/kernels/quantized/cpu/op_mixed_linear.cpp index d09d0bdd5e1..c97ed2cb7c9 100644 --- a/kernels/quantized/cpu/op_mixed_linear.cpp +++ b/kernels/quantized/cpu/op_mixed_linear.cpp @@ -36,13 +36,13 @@ bool check_quantized_mixed_linear_args( ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, weight_scales)); if (dtype.has_value()) { ET_LOG_AND_RETURN_IF_FALSE(out.scalar_type() == dtype.value()); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( dtype.value() == ScalarType::Float || dtype.value() == ScalarType::Half, "dtype must be Float or Half"); } - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( weight.scalar_type() == ScalarType::Char, "weight dtype must be int8"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( in.scalar_type() == ScalarType::Float || in.scalar_type() == ScalarType::Half, "input dtype must be Float or Half"); @@ -55,7 +55,7 @@ bool check_quantized_mixed_linear_args( } // Support for non-null zero points is not implemented yet. - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( !opt_weight_zero_points.has_value(), "zero points not supported yet."); return true; } diff --git a/kernels/quantized/cpu/op_mixed_mm.cpp b/kernels/quantized/cpu/op_mixed_mm.cpp index 044e110bf5c..564de74dfde 100644 --- a/kernels/quantized/cpu/op_mixed_mm.cpp +++ b/kernels/quantized/cpu/op_mixed_mm.cpp @@ -31,9 +31,9 @@ bool check_quantized_mixed_mm_args( tensors_have_same_size_at_dims(weight_scales, 0, weight, 0)); ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, weight_scales, out)); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( weight.scalar_type() == ScalarType::Char, "weight dtype must be int8"); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( in.scalar_type() == ScalarType::Float || in.scalar_type() == ScalarType::Half, "input dtype must be Float or Half"); @@ -46,7 +46,7 @@ bool check_quantized_mixed_mm_args( } // Support for non-null zero points is not implemented yet. - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( !opt_weight_zero_points.has_value(), "zero points not supported yet."); return true; } diff --git a/runtime/core/exec_aten/util/tensor_util.h b/runtime/core/exec_aten/util/tensor_util.h index d7edcfd21d5..d7917e37b19 100644 --- a/runtime/core/exec_aten/util/tensor_util.h +++ b/runtime/core/exec_aten/util/tensor_util.h @@ -406,7 +406,7 @@ namespace runtime { * upper_bound - 1, inclusive. */ inline bool dim_is_valid(int64_t dim, int64_t upper_bound) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( dim >= -upper_bound && dim < upper_bound, "Dimension %" PRId64 " is out of range. Dimension should be between %" PRId64 " and %" PRId64 @@ -443,7 +443,7 @@ inline ssize_t nonempty_size( inline bool tensor_can_cast_to( executorch::aten::Tensor a, executorch::aten::ScalarType dtype) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( torch::executor::canCast(a.scalar_type(), dtype), "Tensor of dtype %s cannot cast to dtype %s", torch::executor::toString(a.scalar_type()), @@ -453,7 +453,7 @@ inline bool tensor_can_cast_to( } inline bool tensor_is_bool_type(executorch::aten::Tensor t) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( t.scalar_type() == executorch::aten::ScalarType::Bool, "Expected to find bool type, but tensor has type %s", torch::executor::toString(t.scalar_type())); @@ -464,7 +464,7 @@ inline bool tensor_is_bool_type(executorch::aten::Tensor t) { inline bool tensor_is_type( executorch::aten::Tensor t, executorch::aten::ScalarType dtype) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( t.scalar_type() == dtype, "Expected to find %s type, but tensor has type %s", torch::executor::toString(dtype), @@ -476,7 +476,7 @@ inline bool tensor_is_type( inline bool tensor_is_integral_type( executorch::aten::Tensor t, bool includeBool = false) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( torch::executor::isIntegralType(t.scalar_type(), includeBool), "Expected to find a integral type, but tensor has type %s", torch::executor::toString(t.scalar_type())); @@ -485,7 +485,7 @@ inline bool tensor_is_integral_type( } inline bool tensor_is_floating_type(executorch::aten::Tensor t) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( torch::executor::isFloatingType(t.scalar_type()), "Expected to find a floating type, but tensor has type %s", torch::executor::toString(t.scalar_type())); @@ -494,7 +494,7 @@ inline bool tensor_is_floating_type(executorch::aten::Tensor t) { } inline bool tensor_is_real_type(executorch::aten::Tensor t) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( torch::executor::isRealType(t.scalar_type()), "Expected to find a real type, but tensor has type %s", torch::executor::toString(t.scalar_type())); @@ -503,7 +503,7 @@ inline bool tensor_is_real_type(executorch::aten::Tensor t) { } inline bool tensor_is_realh_type(executorch::aten::Tensor t) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( torch::executor::isRealHType(t.scalar_type()), "Expected to find a real type, but tensor has type %s", torch::executor::toString(t.scalar_type())); @@ -512,7 +512,7 @@ inline bool tensor_is_realh_type(executorch::aten::Tensor t) { } inline bool tensor_is_realhbf16_type(executorch::aten::Tensor t) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( executorch::runtime::isRealHBF16Type(t.scalar_type()), "Expected to find a real type, but tensor has type %s", torch::executor::toString(t.scalar_type())); @@ -521,7 +521,7 @@ inline bool tensor_is_realhbf16_type(executorch::aten::Tensor t) { } inline bool tensor_is_realhb_type(executorch::aten::Tensor t) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( torch::executor::isRealHBType(t.scalar_type()), "Expected to find a real type, but tensor has type %s", torch::executor::toString(t.scalar_type())); @@ -530,7 +530,7 @@ inline bool tensor_is_realhb_type(executorch::aten::Tensor t) { } inline bool tensor_is_realhbbf16_type(executorch::aten::Tensor t) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( executorch::runtime::isRealHBBF16Type(t.scalar_type()), "Expected to find a real type, but tensor has type %s", torch::executor::toString(t.scalar_type())); @@ -539,7 +539,7 @@ inline bool tensor_is_realhbbf16_type(executorch::aten::Tensor t) { } inline bool tensor_is_complex_type(executorch::aten::Tensor t) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( torch::executor::isComplexType(t.scalar_type()), "Expected to find a complex type, but tensor has type %s", torch::executor::toString(t.scalar_type())); @@ -548,7 +548,7 @@ inline bool tensor_is_complex_type(executorch::aten::Tensor t) { } inline bool tensor_is_bits_type(executorch::aten::Tensor t) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( torch::executor::isBitsType(t.scalar_type()), "Expected to find a bits type, but tensor has type %s", torch::executor::toString(t.scalar_type())); @@ -559,7 +559,7 @@ inline bool tensor_is_bits_type(executorch::aten::Tensor t) { inline bool tensors_have_same_dtype( executorch::aten::Tensor a, executorch::aten::Tensor b) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( a.scalar_type() == b.scalar_type(), ET_TENSOR_CHECK_PREFIX__ ": dtype={%s, %s}", torch::executor::toString(a.scalar_type()), @@ -571,7 +571,7 @@ inline bool tensors_have_same_dtype( executorch::aten::Tensor a, executorch::aten::Tensor b, executorch::aten::Tensor c) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( a.scalar_type() == b.scalar_type() && b.scalar_type() == c.scalar_type(), ET_TENSOR_CHECK_PREFIX__ ": dtype={%s, %s, %s}", torch::executor::toString(a.scalar_type()), @@ -581,7 +581,7 @@ inline bool tensors_have_same_dtype( } inline bool tensor_is_rank(executorch::aten::Tensor t, size_t rank) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( t.dim() == rank, "Expected tensor.dim() to be %zu, but got %zu", static_cast(rank), @@ -593,7 +593,7 @@ inline bool tensor_is_rank(executorch::aten::Tensor t, size_t rank) { inline bool tensor_has_rank_greater_or_equal_to( executorch::aten::Tensor t, size_t rank) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( t.dim() >= rank, "Expected tensor.dim() to be >= %zu, but got %zu", static_cast(rank), @@ -605,7 +605,7 @@ inline bool tensor_has_rank_greater_or_equal_to( inline bool tensor_has_rank_smaller_or_equal_to( executorch::aten::Tensor t, size_t rank) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( t.dim() <= rank, "Expected tensor.dim() to be <= %zu, but got %zu", static_cast(rank), @@ -616,12 +616,12 @@ inline bool tensor_has_rank_smaller_or_equal_to( inline bool tensor_has_dim(executorch::aten::Tensor t, int64_t d) { if (t.dim() == 0) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( d == 0 || d == -1, "dim must be 0 or -1 for 0-dim tensor, got %" PRId64, d); } else { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( d > 0 ? d < t.dim() : t.dim() + d >= 0, "%zu-dim tensor does not have dim at index %zu", static_cast(t.dim()), @@ -647,7 +647,7 @@ tensor_dim_has_index(executorch::aten::Tensor t, int64_t d, int64_t ix) { // Dimension must have been already checked by tensor_has_dim ET_CHECK(d >= 0 && d < t.dim()); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( ix >= -t.size(d) && ix < t.size(d), "index %" PRId64 " out of range [-%zu,%zu) at dimension %" PRId64 ")", ix, @@ -662,17 +662,17 @@ inline bool tensors_have_same_size_at_dims( size_t dim_a, executorch::aten::Tensor b, size_t dim_b) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( dim_a < a.dim(), "Cannot retrieve dim %zu from tensor with dim %zu", static_cast(dim_a), static_cast(a.dim())); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( dim_b < b.dim(), "Cannot retrieve dim %zu from tensor with dim %zu", static_cast(dim_b), static_cast(b.dim())); - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( a.size(dim_a) == b.size(dim_b), ET_TENSOR_CHECK_PREFIX__ ": a.size(%zu) = %zu does not match b.size(%zu) = %zu", @@ -847,13 +847,13 @@ inline bool tensor_is_contiguous(executorch::aten::Tensor t) { if (strides.size() == 0) { return true; } - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( strides[strides.size() - 1] == 1, "Tensor is not contiguous; the stride of the last dimension must be 1, " "but got %zu", static_cast(strides[strides.size() - 1])); for (int i = strides.size() - 1; i > 0; --i) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( strides[i - 1] == strides[i] * sizes[i], "Tensor is not contiguous; the stride of dim %zu should be equal to " "strides[%zu] * sizes[%zu] = %zu, but found %zu", @@ -869,7 +869,7 @@ inline bool tensor_is_contiguous(executorch::aten::Tensor t) { inline bool tensors_have_same_rank( executorch::aten::Tensor a, executorch::aten::Tensor b) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( a.dim() == b.dim(), ET_TENSOR_CHECK_PREFIX__ ": rank={%zd, %zd}", ssize_t(a.dim()), diff --git a/runtime/core/exec_aten/util/tensor_util_aten.cpp b/runtime/core/exec_aten/util/tensor_util_aten.cpp index d768f66d05f..4df273d4dbb 100644 --- a/runtime/core/exec_aten/util/tensor_util_aten.cpp +++ b/runtime/core/exec_aten/util/tensor_util_aten.cpp @@ -35,7 +35,7 @@ Error get_dim_order( bool tensor_has_valid_dim_order(at::Tensor t) { executorch::aten::DimOrderType dim_order[kTensorDimensionLimit]; - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( get_dim_order(t, dim_order, t.dim()) == Error::Ok, "Failed to retrieve dim order from tensor!"); @@ -55,7 +55,7 @@ bool tensor_has_valid_dim_order(at::Tensor t) { inline bool tensor_is_default_or_channels_last_dim_order(at::Tensor t) { executorch::aten::DimOrderType dim_order[kTensorDimensionLimit]; - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( get_dim_order(t, dim_order, t.dim()) == Error::Ok, "Failed to retrieve dim order from tensor!"); @@ -86,7 +86,7 @@ bool tensors_have_same_dim_order( executorch::aten::DimOrderType first_dim_order[kTensorDimensionLimit]; executorch::aten::DimOrderType other_dim_order[kTensorDimensionLimit]; - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( get_dim_order(tensor_list[0], first_dim_order, tensor_list[0].dim()) == Error::Ok, "Failed to retrieve dim order from 1st input tensor!"); @@ -97,7 +97,7 @@ bool tensors_have_same_dim_order( is_channels_last_dim_order(first_dim_order, tensor_list[0].dim()); for (size_t i = 1; i < tensor_list.size(); ++i) { - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( get_dim_order(tensor_list[i], other_dim_order, tensor_list[i].dim()) == Error::Ok, "Failed to retrieve dim order from %zd-th input tensor!", @@ -109,7 +109,7 @@ bool tensors_have_same_dim_order( is_channels_last_dim_order(other_dim_order, tensor_list[i].dim()); } - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( all_contiguous || all_channels_last, "%zd input tensors have different dim orders", tensor_list.size()); diff --git a/runtime/core/exec_aten/util/tensor_util_portable.cpp b/runtime/core/exec_aten/util/tensor_util_portable.cpp index 3350445db73..c1cbcfb6064 100644 --- a/runtime/core/exec_aten/util/tensor_util_portable.cpp +++ b/runtime/core/exec_aten/util/tensor_util_portable.cpp @@ -125,7 +125,7 @@ bool tensors_have_same_dim_order( tensor_list[i].dim_order().size()); } - ET_LOG_MSG_AND_RETURN_IF_FALSE( + ET_CHECK_OR_RETURN_FALSE( all_contiguous || all_channels_last, "%zd input tensors have different dim orders", tensor_list.size()); From 366522359f3939862c0fb8c4db196d35df3dfcb3 Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Tue, 18 Feb 2025 14:00:48 -0800 Subject: [PATCH 005/584] Fix pyre Differential Revision: D69698446 Pull Request resolved: https://github.com/pytorch/executorch/pull/8509 --- extension/llm/tokenizer/targets.bzl | 1 + 1 file changed, 1 insertion(+) diff --git a/extension/llm/tokenizer/targets.bzl b/extension/llm/tokenizer/targets.bzl index be5606ccd2b..1a590c7876f 100644 --- a/extension/llm/tokenizer/targets.bzl +++ b/extension/llm/tokenizer/targets.bzl @@ -10,6 +10,7 @@ def define_common_targets(): name = "tokenizer_py_lib", srcs = [ "__init__.py", + "hf_tokenizer.py", "tokenizer.py", "utils.py", ], From 5eef5ae95088dc6b2b54ad5f74bc03bd0a07d57f Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Tue, 18 Feb 2025 16:02:33 -0600 Subject: [PATCH 006/584] [ExecuTorch] Arm Ethos: Option to be verbose for Vela (#8546) [ExecuTorch] Arm Ethos: Always verbose all for Vela Pull Request resolved: https://github.com/pytorch/executorch/pull/8406 Better to have the output then to have to rerun it. ghstack-source-id: 266959633 @exported-using-ghexport Differential Revision: [D69503726](https://our.internmc.facebook.com/intern/diff/D69503726/) Co-authored-by: Digant Desai --- backends/arm/arm_vela.py | 9 ++++++++- backends/arm/ethosu_backend.py | 7 ++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/backends/arm/arm_vela.py b/backends/arm/arm_vela.py index ef7a4b01cda..e259a8867bd 100644 --- a/backends/arm/arm_vela.py +++ b/backends/arm/arm_vela.py @@ -39,7 +39,12 @@ def vela_bin_pack_io(prefix, data, shape_order=None): # Output via Vela to binary stream for ArmBackendEthosU # WARNING: Do not change this without changing VelaBinStream.cpp as that # function consumes this format and the two need to align. -def vela_compile(tosa_flatbuffer: bytes, args: List[str], shape_order=None): +def vela_compile( + tosa_flatbuffer: bytes, args: List[str], shape_order=None, verbose: bool = False +): + """ + Compile a TOSA graph to a binary stream for ArmBackendEthosU using Vela. + """ with tempfile.TemporaryDirectory() as tmpdir: tosaname = "out.tosa" tosa_path = os.path.join(tmpdir, tosaname) @@ -50,6 +55,8 @@ def vela_compile(tosa_flatbuffer: bytes, args: List[str], shape_order=None): output_dir = os.path.join(tmpdir, "output") args.append(f"--output-dir={output_dir}") args.append(tosa_path) + if verbose: + args.append("--verbose-all") vela.main(" ".join(args).split(" ")) if any("ethos-u85" in arg for arg in args) or any( diff --git a/backends/arm/ethosu_backend.py b/backends/arm/ethosu_backend.py index 768389548e9..9b14a7a72b8 100644 --- a/backends/arm/ethosu_backend.py +++ b/backends/arm/ethosu_backend.py @@ -58,7 +58,12 @@ def _compile_tosa_flatbuffer( ) # Pass on the TOSA flatbuffer to the vela compiler. - binary = vela_compile(tosa_flatbuffer, compile_flags, input_order) + binary = vela_compile( + tosa_flatbuffer, + compile_flags, + input_order, + verbose=logger.getEffectiveLevel() == logging.INFO, + ) return binary @staticmethod From b5c0c6105a6fc99c033b0e70ca88119af5412f64 Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Tue, 18 Feb 2025 14:08:12 -0800 Subject: [PATCH 007/584] Fix pyre Differential Revision: D69764534 Pull Request resolved: https://github.com/pytorch/executorch/pull/8548 --- backends/arm/operators/TARGETS | 1 + 1 file changed, 1 insertion(+) diff --git a/backends/arm/operators/TARGETS b/backends/arm/operators/TARGETS index d12cc7e4dfd..1f91aa37b75 100644 --- a/backends/arm/operators/TARGETS +++ b/backends/arm/operators/TARGETS @@ -21,6 +21,7 @@ python_library( "//executorch/backends/arm:tosa_mapping", "//executorch/backends/arm:tosa_quant_utils", "//executorch/backends/arm:tosa_utils", + "//executorch/backends/arm/_passes:passes", "//executorch/exir:lib", ], ) From 1858086a78ecc91047e7b4c02ac8a8ae2aeac14a Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Tue, 18 Feb 2025 15:19:48 -0800 Subject: [PATCH 008/584] Initialize the ET PAL in broadcast_test (#8541) Test is broken internally because this is missing. (Is there a different way I'm supposed to fix this?) --- kernels/portable/cpu/util/test/broadcast_test.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernels/portable/cpu/util/test/broadcast_test.cpp b/kernels/portable/cpu/util/test/broadcast_test.cpp index 679296f112c..7ffd95b6c52 100644 --- a/kernels/portable/cpu/util/test/broadcast_test.cpp +++ b/kernels/portable/cpu/util/test/broadcast_test.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -131,6 +132,7 @@ TEST(BroadcastUtilTest, GetBroadcastTargetSize) { .equals(ArrayRef({5, 2, 2}))); Tensor c = tf.zeros({4, 5}); + et_pal_init(); err = get_broadcast_target_size( a, c, From f174ca8f3e5217c756bb1424e33f961a0ff0dc5d Mon Sep 17 00:00:00 2001 From: Riley Dulin Date: Tue, 18 Feb 2025 17:12:27 -0800 Subject: [PATCH 009/584] Add small repro test for unsigned -> signed et loss error Differential Revision: D69668881 Pull Request resolved: https://github.com/pytorch/executorch/pull/8506 --- backends/cadence/aot/TARGETS | 1 + backends/cadence/aot/fuse_ops.py | 8 +++++++- backends/cadence/aot/remove_ops.py | 2 ++ backends/cadence/aot/reorder_ops.py | 8 ++++++++ backends/cadence/aot/replace_ops.py | 10 ++++++---- 5 files changed, 24 insertions(+), 5 deletions(-) diff --git a/backends/cadence/aot/TARGETS b/backends/cadence/aot/TARGETS index 0590e694602..78a78bbda30 100644 --- a/backends/cadence/aot/TARGETS +++ b/backends/cadence/aot/TARGETS @@ -180,6 +180,7 @@ python_library( typing = True, deps = [ "//caffe2:torch", + ":ops_registrations", ":compiler_utils", "//executorch/backends/cadence/aot:pass_utils", "//executorch/backends/cadence/aot:utils", diff --git a/backends/cadence/aot/fuse_ops.py b/backends/cadence/aot/fuse_ops.py index aa79b5582a7..47e6b8b5d03 100644 --- a/backends/cadence/aot/fuse_ops.py +++ b/backends/cadence/aot/fuse_ops.py @@ -16,6 +16,9 @@ from numbers import Number from typing import cast, Sequence +# Import these for the cadence function signatures. +import executorch.backends.cadence.aot.ops_registrations # noqa: F401 + import torch import torch.fx from executorch.backends.cadence.aot.compiler_utils import ( @@ -849,7 +852,10 @@ def attempt_fusion( if isinstance(arg, torch.fx.Node) and isinstance(arg.target, EdgeOpOverload) and get_edge_overload_packet(arg.target) - == exir_ops.edge.quantized_decomposed.dequantize_per_tensor + in ( + exir_ops.edge.quantized_decomposed.dequantize_per_tensor, + exir_ops.edge.cadence.dequantize_per_tensor, + ) ] multiplier_nodes = [ arg diff --git a/backends/cadence/aot/remove_ops.py b/backends/cadence/aot/remove_ops.py index caceabfba82..942f6d55533 100644 --- a/backends/cadence/aot/remove_ops.py +++ b/backends/cadence/aot/remove_ops.py @@ -569,6 +569,8 @@ class Subgraph: exir_ops.edge.aten.hardtanh.default, exir_ops.edge.quantized_decomposed.quantize_per_tensor.default, exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default, + exir_ops.edge.cadence.quantize_per_tensor.default, + exir_ops.edge.cadence.dequantize_per_tensor.default, } # must be initialized in the constructor diff --git a/backends/cadence/aot/reorder_ops.py b/backends/cadence/aot/reorder_ops.py index 0fd7f0b61a4..e8a8e230531 100644 --- a/backends/cadence/aot/reorder_ops.py +++ b/backends/cadence/aot/reorder_ops.py @@ -118,6 +118,8 @@ def get_descendent_quant_ops(self, node: torch.fx.Node) -> List[torch.fx.Node]: if user_target in { torch.ops.quantized_decomposed.quantize_per_tensor, exir_ops.edge.quantized_decomposed.quantize_per_tensor, + torch.ops.cadence.quantize_per_tensor, + exir_ops.edge.cadence.quantize_per_tensor, }: descendent_quant_ops.append(user) # If the successor is a trivially quantizable op, consider its users @@ -300,6 +302,8 @@ def advance_quantize_op(self, graph_module: torch.fx.GraphModule): if get_overload_packet(node.target) not in ( exir_ops.edge.quantized_decomposed.quantize_per_tensor, torch.ops.quantized_decomposed.quantize_per_tensor, + exir_ops.edge.cadence.quantize_per_tensor, + torch.ops.cadence.quantize_per_tensor, ): continue @@ -413,6 +417,7 @@ def postponing_feasible(self, dequant_node: torch.fx.Node): in { exir_ops.edge.quantized_decomposed.quantize_per_tensor, exir_ops.edge.quantized_decomposed.quantize_per_channel, + exir_ops.edge.cadence.quantize_per_tensor, } for x in users ) @@ -422,6 +427,7 @@ def postpone_dequantize_op(self, graph_module: torch.fx.GraphModule) -> bool: packet_to_overload_map = { exir_ops.edge.quantized_decomposed.dequantize_per_tensor: "default", exir_ops.edge.quantized_decomposed.dequantize_per_channel: "default", + exir_ops.edge.cadence.dequantize_per_tensor: "default", } graph = graph_module.graph modified = False @@ -500,6 +506,7 @@ class SinkOpsCloserToUsePass(ExportPass): exir_ops.edge.aten.dequantize, exir_ops.edge.quantized_decomposed.dequantize_per_tensor, exir_ops.edge.quantized_decomposed.dequantize_per_channel, + exir_ops.edge.cadence.dequantize_per_tensor, } def sink_ops_closer_to_use(self, graph_module: torch.fx.GraphModule): @@ -558,6 +565,7 @@ class HoistOpsCloserToDefPass(ExportPass): hoistable_ops: Set[EdgeOpOverload] = { exir_ops.edge.quantized_decomposed.quantize_per_tensor, + exir_ops.edge.cadence.quantize_per_tensor, exir_ops.edge.aten.slice_copy, exir_ops.edge.aten.select_copy, } diff --git a/backends/cadence/aot/replace_ops.py b/backends/cadence/aot/replace_ops.py index 487d374fb80..d2fbc0eda80 100644 --- a/backends/cadence/aot/replace_ops.py +++ b/backends/cadence/aot/replace_ops.py @@ -162,11 +162,12 @@ def call_operator( kwargs: Dict[str, Argument], meta: NodeMetadata, ) -> ProxyValue: - if op not in {exir_ops.edge.quantized_decomposed.quantize_per_tensor.default}: + ns = exir_ops.edge if isinstance(op, EdgeOpOverload) else torch.ops + if op != ns.quantized_decomposed.quantize_per_tensor.default: return super().call_operator(op, args, kwargs, meta) return super().call_operator( - exir_ops.edge.cadence.quantize_per_tensor.default, + ns.cadence.quantize_per_tensor.default, args, kwargs, meta, @@ -188,11 +189,12 @@ def call_operator( kwargs: Dict[str, Argument], meta: NodeMetadata, ) -> ProxyValue: - if op not in {exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default}: + ns = exir_ops.edge if isinstance(op, EdgeOpOverload) else torch.ops + if op != ns.quantized_decomposed.dequantize_per_tensor.default: return super().call_operator(op, args, kwargs, meta) return super().call_operator( - exir_ops.edge.cadence.dequantize_per_tensor.default, + ns.cadence.dequantize_per_tensor.default, args, kwargs, meta, From 8d1480b40e73c7d7508a3c9cff4f63bed5799378 Mon Sep 17 00:00:00 2001 From: mcremon-meta <134334895+mcremon-meta@users.noreply.github.com> Date: Tue, 18 Feb 2025 17:23:18 -0800 Subject: [PATCH 010/584] Make the quantizer type a config argument Differential Revision: D69704507 Pull Request resolved: https://github.com/pytorch/executorch/pull/8510 --- backends/cadence/aot/quantizer/quantizer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backends/cadence/aot/quantizer/quantizer.py b/backends/cadence/aot/quantizer/quantizer.py index d6765d2ad30..585f38241a2 100644 --- a/backends/cadence/aot/quantizer/quantizer.py +++ b/backends/cadence/aot/quantizer/quantizer.py @@ -6,6 +6,7 @@ # pyre-strict +from dataclasses import dataclass from typing import List, Optional, Tuple, Union import torch @@ -177,6 +178,8 @@ def get_cadence_default_quantizers() -> List[Quantizer]: ] +# Note: need dataclass to be used in CI configs through OmegaConf and Hydra +@dataclass class CadenceQuantizer(ComposableQuantizer): """ Generic CadenceQuantizer. Although it can be used directly, it is typically a base From cc3974f983f8eb0da5e6ecf3f65f2c3c8b3552e2 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Tue, 18 Feb 2025 23:04:36 -0600 Subject: [PATCH 011/584] Use at::Vectorized in optimized log_softmax Pull Request resolved: https://github.com/pytorch/executorch/pull/8382 This should allow us to enable this op in OSS, because Vectorized handles any Sleef issues for us as needed. (I considered going straight to sharing the PyTorch core implementation, but we need parallel_for enabled for that and this improvement is easy enough to make.) Differential Revision: [D69473208](https://our.internmc.facebook.com/intern/diff/D69473208/) ghstack-source-id: 267044107 Co-authored-by: Github Executorch --- kernels/optimized/cpu/op_log_softmax.cpp | 32 +++++++++++++----------- kernels/optimized/cpu/targets.bzl | 13 +++------- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/kernels/optimized/cpu/op_log_softmax.cpp b/kernels/optimized/cpu/op_log_softmax.cpp index c3f090a6dfe..1d2467bca5f 100644 --- a/kernels/optimized/cpu/op_log_softmax.cpp +++ b/kernels/optimized/cpu/op_log_softmax.cpp @@ -14,6 +14,8 @@ #include #include +#include +#include #include #include @@ -66,30 +68,30 @@ void log_softmax_kernel(const Tensor& input, int64_t dim, Tensor& out) { } // calculate sum and exponential in softmax dim OUT_T temp_sum = 0; -#ifndef __aarch64__ - for (auto d = 0; d < dim_size; ++d) { - output_data[d * dim_stride] = - std::exp(input_data[d * dim_stride] - max_input); - temp_sum += output_data[d * dim_stride]; - } -#else + using VecOut = at::vec::Vectorized; + using VecIn = at::vec::Vectorized; auto d = 0; - for (; d + 4 < dim_size; d += 4) { + static_assert(sizeof(IN_T) == sizeof(OUT_T)); + static_assert( + std::is_same_v, + "Below loop actually only supports float."); + const VecIn max_input_vec(max_input); + for (; d + VecOut::size() < dim_size; d += VecOut::size()) { auto index = d * dim_stride; - float32x4_t in = - vld1q_f32(static_cast(&input_data[index])); - float32x4_t out_ = - Sleef_expf4_u10(vsubq_f32(in, vmovq_n_f32(max_input))); - vst1q_f32(static_cast(&output_data[index]), out_); + auto in = VecIn::loadu(&input_data[index]); + auto out_ = (in - max_input_vec).exp(); + out_.store(&output_data[index]); +#if defined(__aarch64__) && !defined(CPU_CAPABILITY_SVE) temp_sum += vaddvq_f32(out_); +#else + temp_sum += at::vec::vec_reduce_all(std::plus(), out_); +#endif } - for (; d < dim_size; ++d) { output_data[d * dim_stride] = std::exp(input_data[d * dim_stride] - max_input); temp_sum += output_data[d * dim_stride]; } -#endif // __aarch64__ temp_sum = std::log(temp_sum); diff --git a/kernels/optimized/cpu/targets.bzl b/kernels/optimized/cpu/targets.bzl index 94ceb1f4dc1..41dde099290 100644 --- a/kernels/optimized/cpu/targets.bzl +++ b/kernels/optimized/cpu/targets.bzl @@ -57,15 +57,10 @@ _OPTIMIZED_ATEN_OPS = ( ), op_target( name = "op_log_softmax", - deps = select({ - "DEFAULT": [ - "//executorch/kernels/portable/cpu/util:activation_ops_util", - ], - "ovr_config//cpu:arm64": [ - "//executorch/kernels/portable/cpu/util:activation_ops_util", - "fbsource//third-party/sleef:sleef_arm", - ], - }), + deps = [ + "//executorch/kernels/portable/cpu/util:activation_ops_util", + "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch", + ], ), op_target( name = "op_mm", From f0ef51c0e6f4cbe9ea85414578877c0cb51fb477 Mon Sep 17 00:00:00 2001 From: winskuo-quic <143469905+winskuo-quic@users.noreply.github.com> Date: Wed, 19 Feb 2025 13:28:07 +0800 Subject: [PATCH 012/584] Qualcomm AI Engine Direct - CI For Llama (#8512) Enable inference speed test and 1b test --- .ci/scripts/test_qnn_static_llama.sh | 4 +- backends/qualcomm/tests/test_qnn_delegate.py | 247 +++++++++++++----- backends/qualcomm/tests/utils.py | 2 + examples/qualcomm/oss_scripts/llama/llama.py | 5 + .../oss_scripts/llama/runner/runner.cpp | 14 + 5 files changed, 204 insertions(+), 68 deletions(-) diff --git a/.ci/scripts/test_qnn_static_llama.sh b/.ci/scripts/test_qnn_static_llama.sh index 8aab21846f1..5df74bddef4 100644 --- a/.ci/scripts/test_qnn_static_llama.sh +++ b/.ci/scripts/test_qnn_static_llama.sh @@ -34,11 +34,11 @@ $PYTHON_EXECUTABLE -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o to set +e # Compile only as weight sharing is not applicable on x86 -$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleScript.test_stories_single_llama --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir . --compile_only +$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir . --llama_artifacts . --compile_only exit_code1=$? # Checks accuracy with weight sharing disabled since x86 does not support weight sharing. -$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleScript.test_stories_single_llama --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir . --enable_x86_64 +$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir . --llama_artifacts . --enable_x86_64 exit_code2=$? # Check the exit codes and print messages diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index 6ea94ba9e07..9b05ad871f4 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -3106,6 +3106,173 @@ def test_qnn_backend_draw_graph(self): ), "Generated .dot file does not match the golden file." +class TestExampleLLMScript(TestQNN): + def required_envs(self, conditions=None) -> bool: + conditions = [] if conditions is None else conditions + return all( + [ + self.executorch_root, + self.artifact_dir, + *conditions, + ] + ) + + def test_llama3_2_1b(self): + if not self.required_envs(): + self.skipTest("missing required envs") + assert ( + self.llama_artifacts is not None + ), "Please provide path to llama artifacts" + + prompt = "What is the meaning of life?" + cmds = [ + "python", + f"{self.executorch_root}/examples/qualcomm/oss_scripts/llama/llama.py", + "--artifact", + self.artifact_dir, + "--build_folder", + self.build_folder, + "--model", + self.model, + "--checkpoint", + f"{self.llama_artifacts}/consolidated.00.pth", + "--params", + f"{self.llama_artifacts}/params.json", + "--tokenizer_model", + f"{self.llama_artifacts}/tokenizer.model", + "--ip", + self.ip, + "--port", + str(self.port), + "--prompt", + f"{prompt}", + "--ptq", + "16a4w", + "--temperature", + "0", + "--llama_model", + "llama3_2", + "--model_mode", + "hybrid", + "--prefill_seq_len", + "32", + "--kv_seq_len", + "512", + "--num_sharding", + "4", + ] + if self.compile_only: + cmds.extend(["--compile_only"]) + elif self.device: + cmds.extend(["--device", self.device]) + if self.host: + cmds.extend(["--host", self.host]) + elif self.enable_x86_64: + cmds.extend(["--enable_x86_64"]) + if self.pre_gen_pte: + cmds.extend(["--pre_gen_pte", self.pre_gen_pte]) + + golden_start_with = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>" + p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL) + with Listener((self.ip, self.port)) as listener: + conn = listener.accept() + p.communicate() + msg = json.loads(conn.recv()) + if "Error" in msg: + self.fail(msg["Error"]) + else: + if not self.compile_only: + model_out = msg["result"][0] + self.assertTrue( + model_out.startswith(golden_start_with), + f"Expected Output: {golden_start_with}. Actual Output: {model_out}", + ) + # x86 does not allow weight sharing, so we don't check pte size. + # Inference speed on x86 is slow, so we only check when running on Android + if not self.enable_x86_64: + pte_size = msg["pte_size"] + self.assertLessEqual(pte_size, 1300000000) + if not self.compile_only and not self.enable_x86_64: + self.assertGreaterEqual(msg["inference_speed"], 66) # Lanai + + def test_llama_stories_110m(self): + if not self.required_envs(): + self.skipTest("missing required envs") + assert ( + self.llama_artifacts is not None + ), "Please provide path to llama artifacts" + + prompt = "Once" + cmds = [ + "python", + f"{self.executorch_root}/examples/qualcomm/oss_scripts/llama/llama.py", + "--artifact", + self.artifact_dir, + "--build_folder", + self.build_folder, + "--model", + self.model, + "--checkpoint", + f"{self.llama_artifacts}/stories110M.pt", + "--params", + f"{self.llama_artifacts}/params.json", + "--tokenizer_model", + f"{self.llama_artifacts}/tokenizer.model", + "--tokenizer_bin", + f"{self.llama_artifacts}/tokenizer.bin", + "--ip", + self.ip, + "--port", + str(self.port), + "--prompt", + f"{prompt}", + "--ptq", + "16a4w", + "--temperature", + "0", + "--llama_model", + "stories110m", + "--model_mode", + "hybrid", + "--prefill_seq_len", + "32", + "--kv_seq_len", + "128", + ] + if self.compile_only: + cmds.extend(["--compile_only"]) + elif self.device: + cmds.extend(["--device", self.device]) + if self.host: + cmds.extend(["--host", self.host]) + elif self.enable_x86_64: + cmds.extend(["--enable_x86_64"]) + if self.pre_gen_pte: + cmds.extend(["--pre_gen_pte", self.pre_gen_pte]) + + golden_start_with = "Once upon a time," + p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL) + with Listener((self.ip, self.port)) as listener: + conn = listener.accept() + p.communicate() + msg = json.loads(conn.recv()) + if "Error" in msg: + self.fail(msg["Error"]) + else: + if not self.compile_only: + model_out = msg["result"][0] + self.assertTrue( + model_out.startswith(golden_start_with), + f"Expected Output: {golden_start_with}. Actual Output: {model_out}", + ) + # x86 does not allow weight sharing, so we don't check pte size + if not self.enable_x86_64: + pte_size = msg["pte_size"] + self.assertLessEqual(pte_size, 130000000) + if not self.compile_only and not self.enable_x86_64: + self.assertGreaterEqual(msg["inference_speed"], 220) # Lanai + + class TestExampleOssScript(TestQNN): def required_envs(self, conditions=None) -> bool: conditions = [] if conditions is None else conditions @@ -4001,72 +4168,6 @@ def test_deeplab_v3(self): self.assertGreaterEqual(msg["MPA"], 0.70) self.assertGreaterEqual(msg["MIoU"], 0.55) - def test_stories_single_llama(self): - if not self.required_envs(): - self.skipTest("missing required envs") - - cmds = [ - "python", - f"{self.executorch_root}/examples/qualcomm/oss_scripts/llama/llama.py", - "--artifact", - self.artifact_dir, - "--build_folder", - self.build_folder, - "--model", - self.model, - "--checkpoint", - f"{self.artifact_dir}/stories110M.pt", - "--params", - f"{self.artifact_dir}/params.json", - "--tokenizer_model", - f"{self.artifact_dir}/tokenizer.model", - "--tokenizer_bin", - f"{self.artifact_dir}/tokenizer.bin", - "--ip", - self.ip, - "--port", - str(self.port), - "--prompt", - "Once", - "--ptq", - "16a4w", - "--temperature", - "0", - "--llama_model", - "stories110m", - "--model_mode", - "hybrid", - "--prefill_seq_len", - "32", - "--kv_seq_len", - "128", - ] - if self.compile_only: - cmds.extend(["--compile_only"]) - elif self.device: - cmds.extend(["--device", self.device]) - if self.host: - cmds.extend(["--host", self.host]) - elif self.enable_x86_64: - cmds.extend(["--enable_x86_64"]) - - golden_start_with = "Once upon a time," - p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL) - with Listener((self.ip, self.port)) as listener: - conn = listener.accept() - p.communicate() - msg = json.loads(conn.recv()) - if "Error" in msg: - self.fail(msg["Error"]) - else: - if not self.compile_only: - model_out = msg["result"][0] - self.assertTrue(model_out.startswith(golden_start_with)) - # x86 does not allow weight sharing, so we don't check pte size - if not self.enable_x86_64: - pte_size = msg["pte_size"] - self.assertLessEqual(pte_size, 130000000) - @unittest.skip("dynamic shape inputs appear in recent torch.export.export") def test_mobilebert(self): if not self.required_envs([self.pretrained_weight]): @@ -4271,6 +4372,18 @@ def setup_environment(): type=str, ) + parser.add_argument( + "--pre_gen_pte", + help="Run the pre-generated pte in the given directory.", + type=str, + ) + + parser.add_argument( + "--llama_artifacts", + help="A folder that contains: weight, tokenizer, and params.", + type=str, + ) + args, ns_args = parser.parse_known_args(namespace=unittest) TestQNN.host = args.host TestQNN.device = args.device @@ -4289,6 +4402,8 @@ def setup_environment(): TestQNN.enable_x86_64 = args.enable_x86_64 TestQNN.dump_intermediate_outputs = args.dump_intermediate_outputs TestQNN.compile_only = args.compile_only + TestQNN.pre_gen_pte = args.pre_gen_pte + TestQNN.llama_artifacts = args.llama_artifacts return sys.argv[:1] + ns_args diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index 46cc9b65fcf..eeebb6fd8a9 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -188,6 +188,8 @@ class TestQNN(unittest.TestCase): shared_buffer: bool = False enable_x86_64: bool = False compile_only: bool = False + pre_gen_pte: str = "" + llama_artifacts: str = "" def _assert_outputs_equal(self, model_output, ref_output): self.assertTrue(len(ref_output) == len(model_output)) diff --git a/examples/qualcomm/oss_scripts/llama/llama.py b/examples/qualcomm/oss_scripts/llama/llama.py index ab27714ae1f..48353d3ee6b 100755 --- a/examples/qualcomm/oss_scripts/llama/llama.py +++ b/examples/qualcomm/oss_scripts/llama/llama.py @@ -881,6 +881,10 @@ def post_process(): adb.pull(output_path=args.artifact, callback=post_process) if args.ip and args.port != -1: + inference_speed = 0 + with open(f"{args.artifact}/outputs/inference_speed.txt", "r") as f: + inference_speed = float(f.read()) + pte_size = os.path.getsize(pte_path) with Client((args.ip, args.port)) as conn: conn.send( @@ -888,6 +892,7 @@ def post_process(): { "result": outputs, "pte_size": pte_size, + "inference_speed": inference_speed, } ) ) diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp index 4b45863147e..70ba25a0972 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include using executorch::aten::Tensor; @@ -518,6 +519,19 @@ void printReport(const Runner::Stats& stats) { stats.num_generated_tokens, (double)stats.aggregate_sampling_time_ms / stats.SCALING_FACTOR_UNITS_PER_SECOND); + + // For now, we just print the total inference time for CI, can save more info + // in future if needed. + std::ofstream outfile("outputs/inference_speed.txt"); + if (outfile.is_open()) { + double num_tok = (stats.num_generated_tokens) / + (double)(stats.inference_end_ms - stats.inference_start_ms) * + stats.SCALING_FACTOR_UNITS_PER_SECOND; + outfile << num_tok; + outfile.close(); + } else { + ET_CHECK_MSG(false, "Error saving the inference speed file"); + } } std::string statsToJsonString(const Runner::Stats& stats) { From fea3684d16cb8fe98a4a6894b1a26b629adf2c72 Mon Sep 17 00:00:00 2001 From: Zingo Andersen Date: Wed, 19 Feb 2025 14:04:42 +0100 Subject: [PATCH 013/584] Arm: Create script for model testing and split run.sh example script (#8460) Create model test script and break up run.sh into smaller scripts that can be used from both places. This makes it possible to run things in run.sh in separate steps like this: 1. Build needed libs backends/arm/scripts/build_executorch.sh backends/arm/scripts/build_portable_kernels.sh --portable_kernels= backends/arm/scripts/build_quantized_ops_aot_lib.sh 2. Build python3 -m examples.arm.aot_arm_compiler --target=ethos-u85-128 --delegate --quantize --so_library=/libquantized_ops_aot_lib.so --model_name= 3. Build target executable backends/arm/scripts/build_executorch_runner.sh --pte= --target=ethos-u55-128 4. Test target executable in FVP backends/arm/scripts/run_fvp.sh --elf= --target=ethos-u85-128 Signed-off-by: Zingo Andersen --- .github/workflows/trunk.yml | 2 +- backends/arm/README.md | 4 +- backends/arm/scripts/build_executorch.sh | 123 +++++++ .../arm/scripts/build_executorch_runner.sh | 125 +++++++ .../arm/scripts/build_portable_kernels.sh | 74 ++++ .../scripts/build_quantized_ops_aot_lib.sh | 37 +- backends/arm/scripts/run_fvp.sh | 104 ++++++ backends/arm/test/runner_utils.py | 2 +- backends/arm/test/setup_testing.sh | 8 +- backends/arm/test/test_arm_baremetal.sh | 120 ++++-- backends/arm/test/test_model.py | 247 +++++++++++++ .../executorch-arm-delegate-tutorial.md | 31 +- examples/arm/aot_arm_compiler.py | 7 +- examples/arm/run.sh | 344 ++++-------------- 14 files changed, 893 insertions(+), 335 deletions(-) create mode 100755 backends/arm/scripts/build_executorch.sh create mode 100755 backends/arm/scripts/build_executorch_runner.sh create mode 100755 backends/arm/scripts/build_portable_kernels.sh create mode 100755 backends/arm/scripts/run_fvp.sh create mode 100755 backends/arm/test/test_model.py diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 04a6c96f3ec..dff2b400ee6 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -159,7 +159,7 @@ jobs: sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024 # Test ethos-u delegate examples with run.sh - backends/arm/test/test_arm_baremetal.sh test_run_ethosu_fvp + backends/arm/test/test_arm_baremetal.sh test_full_ethosu_fvp test-arm-reference-delegation: diff --git a/backends/arm/README.md b/backends/arm/README.md index 9a5a6f94085..04815bf23d2 100644 --- a/backends/arm/README.md +++ b/backends/arm/README.md @@ -55,10 +55,10 @@ To run the unit test suite with Corstone3x0 FVP simulator support use backends/arm/test/test_arm_baremetal.sh test_pytest_ethosu_fvp ``` -You can test to run some models with the run.sh flow +You can test to run some models with the full fvp test flow ``` -backends/arm/test/test_arm_baremetal.sh test_run_ethosu_fvp +backends/arm/test/test_arm_baremetal.sh test_full_ethosu_fvp ``` ## Unit tests diff --git a/backends/arm/scripts/build_executorch.sh b/backends/arm/scripts/build_executorch.sh new file mode 100755 index 00000000000..f868d264f48 --- /dev/null +++ b/backends/arm/scripts/build_executorch.sh @@ -0,0 +1,123 @@ +#!/usr/bin/env bash +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Optional parameter: +# --build_type= "Release" | "Debug" | "RelWithDebInfo" +# --etdump build with devtools-etdump support + +set -eu + +script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +et_root_dir=$(cd ${script_dir}/../../.. && pwd) +et_root_dir=$(realpath ${et_root_dir}) +toolchain_cmake=${script_dir}/../../../examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake +toolchain_cmake=$(realpath ${toolchain_cmake}) + + + +et_build_root="${et_root_dir}/arm_test" +build_type="Release" +build_with_etdump=false + + +help() { + echo "Usage: $(basename $0) [options]" + echo "Options:" + echo " --et_build_root= Build output root folder to use, defaults to ${et_build_root}" + echo " --build_type= Build with Release, Debug or RelWithDebInfo, default is ${build_type}" + echo " --etdump Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log" + exit 0 +} + +for arg in "$@"; do + case $arg in + -h|--help) help ;; + --et_build_root=*) et_build_root="${arg#*=}";; + --build_type=*) build_type="${arg#*=}";; + --etdump) build_with_etdump=true ;; + *) + ;; + esac +done + +et_build_dir="${et_build_root}/cmake-out" +et_build_host_dir=${et_build_root}/cmake-out-host-tools + +set -x +cd "${et_root_dir}" + +build_with_etdump_flags="" +if [ "$build_with_etdump" = true ] ; then + ( set +x ; + echo "--------------------------------------------------------------------------------" ; + echo "Build ExecuTorch Libraries host flatcc bin ${build_type} into ${et_build_host_dir} - ${et_build_host_dir}/bin/flatcc" ; + echo "--------------------------------------------------------------------------------" ) + + + # Build host flatcc bin + # This is a way to work around that the flatcc executable get build for target (e.g. Arm) later + # and get replaced. flatcc is a tool used on the host for etdump and BundleIO handling. + # The way to solve this is to generate it once for the host, then copy it to ${et_build_host_dir}/bin + # and later point that out with -DFLATCC_EXECUTABLE=${et_build_host_dir}/bin/flatcc later. + mkdir -p ${et_build_host_dir} + cmake \ + -DCMAKE_INSTALL_PREFIX=${et_build_host_dir} \ + -DCMAKE_BUILD_TYPE=${build_type} \ + -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF \ + -DEXECUTORCH_ENABLE_LOGGING=ON \ + -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON \ + -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ + -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ + -DEXECUTORCH_BUILD_DEVTOOLS=ON \ + -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ + -DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=ON \ + -DFLATCC_ALLOW_WERROR=OFF \ + -DFLATC_EXECUTABLE="$(which flatc)" \ + -B"${et_build_host_dir}" \ + "${et_root_dir}" + + # Copy host flatcc excutable to it's saved when we build for target (Arm) later + mkdir -p ${et_build_host_dir}/bin + cp third-party/flatcc/bin/flatcc ${et_build_host_dir}/bin + + # Add DevTools flags use in the Target build below + build_with_etdump_flags="-DEXECUTORCH_BUILD_DEVTOOLS=ON \ + -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ + -DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=OFF \ + -DFLATCC_ALLOW_WERROR=OFF \ + -DFLATCC_EXECUTABLE=${et_build_host_dir}/bin/flatcc " + echo "build_with_etdump_flags=$build_with_etdump_flags" +fi + +( set +x ; + echo "--------------------------------------------------------------------------------" ; + echo "Build ExecuTorch target libs ${build_type} into '${et_build_dir}'" ; + echo "--------------------------------------------------------------------------------" ) + +# Build +cmake \ + -DCMAKE_INSTALL_PREFIX=${et_build_dir} \ + -DCMAKE_BUILD_TYPE=${build_type} \ + -DCMAKE_TOOLCHAIN_FILE="${toolchain_cmake}" \ + -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF \ + -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON \ + -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ + -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ + -DEXECUTORCH_ENABLE_LOGGING=ON \ + ${build_with_etdump_flags} \ + -DFLATC_EXECUTABLE="$(which flatc)" \ + -B"${et_build_dir}" \ + "${et_root_dir}" + +echo "[$(basename $0)] Configured CMAKE" + +cmake --build ${et_build_dir} --parallel --target install --config ${build_type} -- + +set +x + +echo "[$(basename $0)] Generated static libraries for ExecuTorch:" +find ${et_build_dir} -name "*.a" -exec ls -al {} \; diff --git a/backends/arm/scripts/build_executorch_runner.sh b/backends/arm/scripts/build_executorch_runner.sh new file mode 100755 index 00000000000..afa8f27bdff --- /dev/null +++ b/backends/arm/scripts/build_executorch_runner.sh @@ -0,0 +1,125 @@ +#!/usr/bin/env bash +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set -eu + +script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +et_root_dir=$(cd ${script_dir}/../../.. && pwd) +et_root_dir=$(realpath ${et_root_dir}) +toolchain_cmake=${et_root_dir}/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake + +pte_file="" +target="ethos-u55-128" +build_type="Release" +system_config="" +build_with_etdump=false +extra_build_flags="" +output_folder_set=false +output_folder="." +et_build_root="${et_root_dir}/arm_test" +ethosu_tools_dir=${et_root_dir}/examples/arm/ethos-u-scratch + +help() { + echo "Usage: $(basename $0) [options]" + echo "Options:" + echo " --pte= pte file (genrated by the aot_arm_compier from the model to include in the elf" + echo " --target= Target to build and run for Default: ${target}" + echo " --build_type= Build with Release, Debug or RelWithDebInfo, default is ${build_type}" + echo " --system_config= System configuration to select from the Vela configuration file (see vela.ini). Default: Ethos_U55_High_End_Embedded for EthosU55 targets, Ethos_U85_SYS_DRAM_Mid for EthosU85 targets." + echo " NOTE: If given, this option must match the given target. This option also sets timing adapter values customized for specific hardware, see ./executor_runner/CMakeLists.txt." + echo " --etdump Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log" + echo " --extra_build_flags= Extra flags to pass to cmake like -DET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE=60000 Default: none " + echo " --output= Output folder Default: /_.pte" + echo " --et_build_root= Build output root folder to use, defaults to ${et_build_root}" + echo " --ethosu_tools_dir= Path to your Ethos-U tools dir if you not using default: ${ethosu_tools_dir}" + exit 0 +} + +for arg in "$@"; do + case $arg in + -h|--help) help ;; + --pte=*) pte_file="${arg#*=}";; + --target=*) target="${arg#*=}";; + --build_type=*) build_type="${arg#*=}";; + --system_config=*) system_config="${arg#*=}";; + --etdump) build_with_etdump=true ;; + --extra_build_flags=*) extra_build_flags="${arg#*=}";; + --output=*) output_folder="${arg#*=}" ; output_folder_set=true ;; + --et_build_root=*) et_build_root="${arg#*=}";; + --ethosu_tools_dir=*) ethosu_tools_dir="${arg#*=}";; + *) + ;; + esac +done + +pte_file=$(realpath ${pte_file}) +ethosu_tools_dir=$(realpath ${ethosu_tools_dir}) +ethos_u_root_dir="$ethosu_tools_dir/ethos-u" +ethosu_tools_dir=$(realpath ${ethos_u_root_dir}) + +et_build_dir=${et_build_root}/cmake-out +et_build_dir=$(realpath ${et_build_dir}) + +if [ "$output_folder_set" = false ] ; then + pte_folder=$(cd -- "$( dirname -- "${pte_file}" )" &> /dev/null && pwd) + pte_short_name=$(basename -- "${pte_file}" ".pte") + output_folder="$pte_folder/$pte_short_name" +fi + +if [[ ${system_config} == "" ]] +then + system_config="Ethos_U55_High_End_Embedded" + if [[ ${target} =~ "ethos-u85" ]] + then + system_config="Ethos_U85_SYS_DRAM_Mid" + fi +fi + +output_folder=$(realpath ${output_folder}) + +if [[ ${target} == *"ethos-u55"* ]]; then + target_cpu=cortex-m55 +else + target_cpu=cortex-m85 +fi +echo "--------------------------------------------------------------------------------" +echo "Build Arm Baremetal executor_runner for ${target} with ${pte_file} using ${system_config} to '${output_folder}/cmake-out'" +echo "--------------------------------------------------------------------------------" + +cd ${et_root_dir}/examples/arm/executor_runner + +build_with_etdump_flags="" +if [ "$build_with_etdump" = true ] ; then + echo "Building with etdump e.g. -DEXECUTORCH_ENABLE_EVENT_TRACER=ON" + build_with_etdump_flags=" -DEXECUTORCH_ENABLE_EVENT_TRACER=ON " +fi + +mkdir -p "$output_folder" + +cmake \ + -DCMAKE_BUILD_TYPE=${build_type} \ + -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} \ + -DTARGET_CPU=${target_cpu} \ + -DET_DIR_PATH:PATH=${et_root_dir} \ + -DET_BUILD_DIR_PATH:PATH=${et_build_dir} \ + -DET_PTE_FILE_PATH:PATH="${pte_file}" \ + -DETHOS_SDK_PATH:PATH=${ethos_u_root_dir} \ + -DETHOSU_TARGET_NPU_CONFIG=${target} \ + ${build_with_etdump_flags} \ + -DPYTHON_EXECUTABLE=$(which python3) \ + -DSYSTEM_CONFIG=${system_config} \ + ${extra_build_flags} \ + -B ${output_folder}/cmake-out + +echo "[${BASH_SOURCE[0]}] Configured CMAKE" + +cmake --build ${output_folder}/cmake-out --parallel -- arm_executor_runner + +echo "[${BASH_SOURCE[0]}] Generated baremetal elf file:" +find ${output_folder}/cmake-out -name "arm_executor_runner" +echo "executable_text: $(find ${output_folder}/cmake-out -name arm_executor_runner -exec arm-none-eabi-size {} \; | grep -v filename | awk '{print $1}') bytes" +echo "executable_data: $(find ${output_folder}/cmake-out -name arm_executor_runner -exec arm-none-eabi-size {} \; | grep -v filename | awk '{print $2}') bytes" +echo "executable_bss: $(find ${output_folder}/cmake-out -name arm_executor_runner -exec arm-none-eabi-size {} \; | grep -v filename | awk '{print $3}') bytes" diff --git a/backends/arm/scripts/build_portable_kernels.sh b/backends/arm/scripts/build_portable_kernels.sh new file mode 100755 index 00000000000..afdccd79cfd --- /dev/null +++ b/backends/arm/scripts/build_portable_kernels.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Optional parameter: +# --build_type= "Release" | "Debug" | "RelWithDebInfo" +# --etdump build with devtools-etdump support + +set -eu + +script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +et_root_dir=$(cd ${script_dir}/../../.. && pwd) +et_root_dir=$(realpath ${et_root_dir}) +toolchain_cmake=${script_dir}/../../../examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake +toolchain_cmake=$(realpath ${toolchain_cmake}) + + +et_build_root="${et_root_dir}/arm_test" +build_type="Release" +portable_kernels="aten::_softmax.out" + +help() { + echo "Usage: $(basename $0) [options]" + echo "Options:" + echo " --et_build_root= Build output root folder to use, defaults to ${et_build_root}" + echo " --build_type= Build with Release, Debug or RelWithDebInfo, default is ${build_type}" + echo " --portable_kernels= Comma separated list of portable (non delagated) kernels to include Default: ${portable_kernels}" + exit 0 +} + +for arg in "$@"; do + case $arg in + -h|--help) help ;; + --et_build_root=*) et_build_root="${arg#*=}";; + --build_type=*) build_type="${arg#*=}";; + --portable_kernels=*) portable_kernels="${arg#*=}";; + *) + ;; + esac +done + +et_build_dir=${et_build_root}/cmake-out + +cd "${et_root_dir}" + +echo "--------------------------------------------------------------------------------" ; +echo "Build ExecuTorch Libraries ${build_type} portable kernels: ${portable_kernels} into '${et_build_dir}'" ; +echo "--------------------------------------------------------------------------------" + +if ! [[ $portable_kernels =~ ^((^|,)aten::[a-zA-Z0-9_]+\.[a-zA-Z0-9_]*out)*$ ]]; then + echo " ERROR: specified argument --portable_kernels=${portable_kernels}" + echo " is in the wrong format please use \"aten::.out,aten::.out,...\"" + echo " e.g. \"aten::_softmax.out,aten::add.out\"" + exit 1 +fi + +set -x + +cmake \ + -DCMAKE_INSTALL_PREFIX=${et_build_dir} \ + -DCMAKE_BUILD_TYPE=${build_type} \ + -DCMAKE_TOOLCHAIN_FILE="${toolchain_cmake}" \ + -DEXECUTORCH_SELECT_OPS_LIST=${portable_kernels} \ + -B"${et_build_dir}/examples/arm" \ + "${et_root_dir}/examples/arm" + +cmake --build "${et_build_dir}/examples/arm" --parallel --config ${build_type} -- + +set +x + +echo "[$(basename $0)] Generated static libraries for ExecuTorch:" +find "${et_build_dir}/examples/arm" -name "*.a" -exec ls -al {} \; diff --git a/backends/arm/scripts/build_quantized_ops_aot_lib.sh b/backends/arm/scripts/build_quantized_ops_aot_lib.sh index 3c70b48a5dc..ad6fad9c122 100755 --- a/backends/arm/scripts/build_quantized_ops_aot_lib.sh +++ b/backends/arm/scripts/build_quantized_ops_aot_lib.sh @@ -4,26 +4,51 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -# Needs to be run from exeuctorch root. # Optional parameter: 1: build_type= "Release" | "Debug" | "RelWithDebInfo" +set -eu +script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +et_root_dir=$(cd ${script_dir}/../../.. && pwd) +et_root_dir=$(realpath ${et_root_dir}) + build_type="Release" +et_build_root="${et_root_dir}" + +help() { + echo "Usage: $(basename $0) [options]" + echo "Options:" + echo " --et_build_root= Build output root folder to use, defaults to ${et_build_root}" + echo " --build_type= Build with Release, Debug or RelWithDebInfo, default is ${build_type}" + exit 0 +} + +for arg in "$@"; do + case $arg in + -h|--help) help ;; + --et_build_root=*) et_build_root="${arg#*=}";; + --build_type=*) build_type="${arg#*=}";; + *) + ;; + esac +done + +et_build_dir=${et_build_root}/cmake-out-aot-lib -build_type=${1:-$build_type} +cd "${et_root_dir}" echo "--------------------------------------------------------------------------------" -echo "Build .so library to register quant ops with AoT flow ${build_type} into '$(echo $(pwd))/cmake-out-aot-lib'" +echo "Build quantized_ops_aot_lib library to register quant ops with AoT flow ${build_type} into '${et_build_dir}'" echo "--------------------------------------------------------------------------------" # Since we only want to build the quantized_aot lib in the specified folder, # we want exactly the configuration set below and deleting the cache is OK. -rm -f cmake-out-aot-lib/CMakeCache.txt +rm -f ${et_build_dir}/CMakeCache.txt CXXFLAGS="-fno-exceptions -fno-rtti" cmake \ -DCMAKE_BUILD_TYPE=${build_type} \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED_AOT=ON \ - -Bcmake-out-aot-lib \ + -B${et_build_dir} \ . -cmake --build cmake-out-aot-lib --parallel -- quantized_ops_aot_lib +cmake --build ${et_build_dir} --parallel -- quantized_ops_aot_lib diff --git a/backends/arm/scripts/run_fvp.sh b/backends/arm/scripts/run_fvp.sh new file mode 100755 index 00000000000..568f07011f2 --- /dev/null +++ b/backends/arm/scripts/run_fvp.sh @@ -0,0 +1,104 @@ +#!/usr/bin/env bash +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Optional parameter: +# --build_type= "Release" | "Debug" | "RelWithDebInfo" +# --etdump build with devtools-etdump support + +set -eu + +script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +et_root_dir=$(cd ${script_dir}/../../.. && pwd) +et_root_dir=$(realpath ${et_root_dir}) +setup_path_script=${et_root_dir}/examples/arm/ethos-u-scratch/setup_path.sh +_setup_msg="please refer to ${et_root_dir}/examples/arm/setup.sh to properly install necessary tools." + + +elf_file="" +target="ethos-u55-128" + +help() { + echo "Usage: $(basename $0) [options]" + echo "Options:" + echo " --elf= elf file to run" + echo " --target= Target to build and run for Default: ${target}" + exit 0 +} + +for arg in "$@"; do + case $arg in + -h|--help) help ;; + --elf=*) elf_file="${arg#*=}";; + --target=*) target="${arg#*=}";; + *) + ;; + esac +done + +elf_file=$(realpath ${elf_file}) + +if [[ ${target} == *"ethos-u55"* ]]; then + fvp_model=FVP_Corstone_SSE-300_Ethos-U55 +else + fvp_model=FVP_Corstone_SSE-320 +fi + +# Source the tools +# This should be prepared by the setup.sh +[[ -f ${setup_path_script} ]] \ + || { echo "Missing ${setup_path_script}. ${_setup_msg}"; exit 1; } + +source ${setup_path_script} + +# basic checks before we get started +hash ${fvp_model} \ + || { echo "Could not find ${fvp_model} on PATH, ${_setup_msg}"; exit 1; } + + +[[ ! -f $elf_file ]] && { echo "[${BASH_SOURCE[0]}]: Unable to find executor_runner elf: ${elf_file}"; exit 1; } +num_macs=$(echo ${target} | cut -d - -f 3) + +echo "--------------------------------------------------------------------------------" +echo "Running ${elf_file} for ${target} run with FVP:${fvp_model} num_macs:${num_macs}" +echo "--------------------------------------------------------------------------------" + +log_file=$(mktemp) + +if [[ ${target} == *"ethos-u55"* ]]; then + ${fvp_model} \ + -C ethosu.num_macs=${num_macs} \ + -C mps3_board.visualisation.disable-visualisation=1 \ + -C mps3_board.telnetterminal0.start_telnet=0 \ + -C mps3_board.uart0.out_file='-' \ + -C mps3_board.uart0.shutdown_on_eot=1 \ + -a "${elf_file}" \ + --timelimit 220 2>&1 | tee ${log_file} || true # seconds + echo "[${BASH_SOURCE[0]}] Simulation complete, $?" +elif [[ ${target} == *"ethos-u85"* ]]; then + ${fvp_model} \ + -C mps4_board.subsystem.ethosu.num_macs=${num_macs} \ + -C mps4_board.visualisation.disable-visualisation=1 \ + -C vis_hdlcd.disable_visualisation=1 \ + -C mps4_board.telnetterminal0.start_telnet=0 \ + -C mps4_board.uart0.out_file='-' \ + -C mps4_board.uart0.shutdown_on_eot=1 \ + -a "${elf_file}" \ + --timelimit 220 2>&1 | tee ${log_file} || true # seconds + echo "[${BASH_SOURCE[0]}] Simulation complete, $?" +else + echo "Running ${elf_file} for ${target} is not supported" + exit 1 +fi + +echo "Checking for problems in log:" +! grep -E "^(F|E|\\[critical\\]|Hard fault.|Info: Simulation is stopping. Reason: CPU time has been exceeded.).*$" ${log_file} +if [ $? != 0 ]; then + echo "Found ERROR" + rm "${log_file}" + exit 1 +fi +echo "No problems found!" +rm "${log_file}" diff --git a/backends/arm/test/runner_utils.py b/backends/arm/test/runner_utils.py index 65be0b88f7b..2d182b4a410 100644 --- a/backends/arm/test/runner_utils.py +++ b/backends/arm/test/runner_utils.py @@ -525,7 +525,7 @@ def corstone320_installed() -> bool: def get_elf_path(target_board): elf_path = os.path.join( - "cmake-out", + "arm_test", f"arm_semihosting_executor_runner_{target_board}", "arm_executor_runner", ) diff --git a/backends/arm/test/setup_testing.sh b/backends/arm/test/setup_testing.sh index ebf9d799677..b9f8fc454ee 100755 --- a/backends/arm/test/setup_testing.sh +++ b/backends/arm/test/setup_testing.sh @@ -12,8 +12,8 @@ et_root_dir=$(cd ${script_dir}/../../.. && pwd) ethos_u_root_dir=${et_root_dir}/examples/arm/ethos-u-scratch/ethos-u toolchain_cmake=${et_root_dir}/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake -et_build_dir=${et_root_dir}/cmake-out -build_root_test_dir=${et_build_dir}/arm_semihosting_executor_runner +et_build_dir=${et_root_dir}/arm_test/cmake-out +build_root_test_dir=${et_root_dir}/arm_test/arm_semihosting_executor_runner # Build Arm Baremetal executor_runner in semihosting mode. # Put in backends/arm/test/res to be used by unit tests. @@ -38,12 +38,12 @@ function build_semihosting_executorch_runner() { -DTARGET_CPU=${target_cpu} \ -DSEMIHOSTING=ON \ -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=${build_test_dir} \ - -B ${build_test_dir} \ -DETHOS_SDK_PATH:PATH=${ethos_u_root_dir} \ -DET_DIR_PATH:PATH=${et_root_dir} \ -DET_BUILD_DIR_PATH:PATH=${et_build_dir} \ -DPYTHON_EXECUTABLE=$(which python3) \ - -DSYSTEM_CONFIG=${system_config} + -DSYSTEM_CONFIG=${system_config} \ + -B ${build_test_dir} echo "[${FUNCNAME[0]}] Configured CMAKE" n=$(nproc) diff --git a/backends/arm/test/test_arm_baremetal.sh b/backends/arm/test/test_arm_baremetal.sh index 9f2fa4c17d0..6c2784501b0 100755 --- a/backends/arm/test/test_arm_baremetal.sh +++ b/backends/arm/test/test_arm_baremetal.sh @@ -17,46 +17,47 @@ pwd TEST_SUITE=$1 help() { - echo "Usage:" - echo " $0 " - echo " where can be any of:" - # This will list all lines in this file that is starting with test_ remove () { and print it as a list. - # e,g, "test_pytest() { # Test ops and other things" -> test_pytest # Test ops and other things - echo "all # run all tests" - grep "^test_" $0 | sed 's/([^)]*)[[:space:]]*{*//g' - exit + echo "Usage:" + echo " $0 " + echo " where can be any of:" + # This will list all lines in this file that is starting with test_ remove () { and print it as a list. + # e,g, "test_pytest() { # Test ops and other things" -> test_pytest # Test ops and other things + echo "all # run all tests" + grep "^test_" $0 | sed 's/([^)]*)[[:space:]]*{*//g' + exit } if [[ -z "${TEST_SUITE:-}" ]]; then - echo "Missing test suite name, exiting..." - help + echo "Missing test suite name, exiting..." + help else - echo "Run Arm baremetal test suite ${TEST_SUITE}" + echo "Run Arm baremetal test suite ${TEST_SUITE}" fi TEST_SUITE_NAME="$(basename "$0") ${TEST_SUITE}" all() { # Run all tests - # This will list all lines in this file that is starting with test_ remove () { and add this script name in - # front of it and execute it in a sub shell - # e.g. from this file: - # - # test_pytest() { # Test ops and other things - # bla bla bla - # } - # test_pytest_ethosu_fvp() { # Same as test_pytest but ... - # bla bla bla - # } - #... - # become a small script: - # ---- - # backends/arm/test/test_arm_baremetal.sh test_pytest # Test ops and other things - # backends/arm/test/test_arm_baremetal.sh test_pytest_ethosu_fvp # Same as test_pytest but ... - # ... - # ---- - # That is executed - echo "${TEST_SUITE_NAME}: Run all tests" - grep "^test_" backends/arm/test/test_arm_baremetal.sh | sed 's/([^)]*)[[:space:]]*{*//g' | sed "s|^|$0 |" | sh + # This will list all lines in this file that is starting with test_ remove () { and add this script name in + # front of it and execute it in a sub shell + # e.g. from this file: + # + # test_pytest() { # Test ops and other things + # bla bla bla + # } + # test_pytest_ethosu_fvp() { # Same as test_pytest but ... + # bla bla bla + # } + #... + # become a small script: + # ---- + # backends/arm/test/test_arm_baremetal.sh test_pytest # Test ops and other things + # backends/arm/test/test_arm_baremetal.sh test_pytest_ethosu_fvp # Same as test_pytest but ... + # ... + # ---- + # That is executed + echo "${TEST_SUITE_NAME}: Run all tests" + grep "^test_" backends/arm/test/test_arm_baremetal.sh | sed 's/([^)]*)[[:space:]]*{*//g' | sed "s|^|$0 |" | sh + echo "${TEST_SUITE_NAME}: PASS" } test_pytest() { # Test ops and other things @@ -67,6 +68,7 @@ test_pytest() { # Test ops and other things # Run arm baremetal pytest tests without FVP pytest --verbose --color=yes --numprocesses=auto backends/arm/test/ + echo "${TEST_SUITE_NAME}: PASS" } test_pytest_ethosu_fvp() { # Same as test_pytest but also sometime verify using Corstone FVP @@ -80,28 +82,68 @@ test_pytest_ethosu_fvp() { # Same as test_pytest but also sometime verify using # Run arm baremetal pytest tests with FVP pytest --verbose --color=yes --numprocesses=auto backends/arm/test/ --arm_run_corstoneFVP + echo "${TEST_SUITE_NAME}: PASS" } -test_run_ethosu_fvp() { # End to End model tests +test_run_ethosu_fvp() { # End to End model tests using run.sh echo "${TEST_SUITE_NAME}: Test ethos-u delegate examples with run.sh" source examples/arm/ethos-u-scratch/setup_path.sh # TOSA quantized echo "${TEST_SUITE_NAME}: Test ethos-u target TOSA" - examples/arm/run.sh --target=TOSA --model_name=mv2 - examples/arm/run.sh --target=TOSA --model_name=lstm - examples/arm/run.sh --target=TOSA --model_name=edsr + examples/arm/run.sh --target=TOSA --model_name=add + examples/arm/run.sh --target=TOSA --model_name=mul # Ethos-U55 echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U55" - examples/arm/run.sh --target=ethos-u55-128 --model_name=mv2 - examples/arm/run.sh --target=ethos-u55-128 --model_name=lstm + examples/arm/run.sh --target=ethos-u55-128 --model_name=add + examples/arm/run.sh --target=ethos-u55-128 --model_name=mul # Ethos-U85 echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U85" - examples/arm/run.sh --target=ethos-u85-128 --model_name=mv2 - examples/arm/run.sh --target=ethos-u85-128 --model_name=lstm + examples/arm/run.sh --target=ethos-u85-128 --model_name=add + examples/arm/run.sh --target=ethos-u85-128 --model_name=mul + echo "${TEST_SUITE_NAME}: PASS" } +test_models_ethosu_fvp() { # End to End model tests using model_test.py + echo "${TEST_SUITE_NAME}: Test ethos-u delegate models with test_model.py" + + source examples/arm/ethos-u-scratch/setup_path.sh + + # Build common libs once + python3 backends/arm/test/test_model.py --build_libs + + # TOSA quantized + echo "${TEST_SUITE_NAME}: Test ethos-u target TOSA" + python3 backends/arm/test/test_model.py --target=TOSA --model=mv2 + python3 backends/arm/test/test_model.py --target=TOSA --model=mv3 + python3 backends/arm/test/test_model.py --target=TOSA --model=lstm + python3 backends/arm/test/test_model.py --target=TOSA --model=edsr + + # Ethos-U55 + echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U55" + python3 backends/arm/test/test_model.py --target=ethos-u55-128 --model=mv2 + python3 backends/arm/test/test_model.py --target=ethos-u55-64 --model=mv3 + python3 backends/arm/test/test_model.py --target=ethos-u55-256 --model=lstm + + # Ethos-U85 + echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U85" + python3 backends/arm/test/test_model.py --target=ethos-u85-256 --model=mv2 + python3 backends/arm/test/test_model.py --target=ethos-u85-1024 --model=mv3 + python3 backends/arm/test/test_model.py --target=ethos-u85-128 --model=lstm + echo "${TEST_SUITE_NAME}: PASS" + } + +test_full_ethosu_fvp() { # All End to End model tests + echo "${TEST_SUITE_NAME}: Test ethos-u delegate models and examples on fvp" + + test_models_ethosu_fvp + test_run_ethosu_fvp + echo "${TEST_SUITE_NAME}: PASS" + } + + + ${TEST_SUITE} \ No newline at end of file diff --git a/backends/arm/test/test_model.py b/backends/arm/test/test_model.py new file mode 100755 index 00000000000..990b9e5f70b --- /dev/null +++ b/backends/arm/test/test_model.py @@ -0,0 +1,247 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import os +import platform +import subprocess +import sys + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--build_libs", + action="store_true", + required=False, + default=False, + help="Flag for building executorch libs needed for this testing", + ) + parser.add_argument( + "--model", + required=False, + default=None, + help="Model to use that aot_arm_compiler.py can handle, can be a builtin, examples/models or a filename.", + ) + parser.add_argument( + "--target", + required=False, + default=None, + help="Target name", + ) + parser.add_argument( + "--test_output", + required=False, + default="arm_test", + help="Output folder used for build and test defults to arm_test", + ) + parser.add_argument( + "--system_config", + required=False, + default=None, + help="Target specific system_config (See Vela compiler)", + ) + parser.add_argument( + "--memory_mode", + required=False, + default=None, + help="Target specific memory_mode (See Vela compiler)", + ) + parser.add_argument( + "--no_intermediate", + action="store_true", + required=False, + default=False, + help="Don't save temporary files during compilation", + ) + + args = parser.parse_args() + + if args.model and "ethos-u" in args.target and args.system_config is None: + if "u55" in args.target: + args.system_config = "Ethos_U55_High_End_Embedded" + elif "u85" in args.target: + args.system_config = "Ethos_U85_SYS_DRAM_Mid" + else: + raise RuntimeError(f"Invalid target name {args.target}") + + if args.model and "ethos-u" in args.target and args.memory_mode is None: + if "u55" in args.target: + args.memory_mode = "Shared_Sram" + elif "u85" in args.target: + args.memory_mode = "Sram_Only" + else: + raise RuntimeError(f"Invalid target name {args.target}") + + return args + + +def run_external_cmd(cmd: []): + print("CALL:", *cmd, sep=" ") + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError as err: + print("ERROR called: ", *cmd, sep=" ") + print(f"Failed with: {err.returncode}") + sys.exit(err.returncode) + + +def build_libs(et_build_root: str, script_path: str): + run_external_cmd( + [ + "bash", + os.path.join(script_path, "build_executorch.sh"), + f"--et_build_root={et_build_root}", + "--build_type=Release", + ] + ) + run_external_cmd( + [ + "bash", + os.path.join(script_path, "build_portable_kernels.sh"), + f"--et_build_root={et_build_root}", + "--build_type=Release", + "--portable_kernels=aten::_softmax.out", + ] + ) + run_external_cmd( + [ + "bash", + os.path.join(script_path, "build_quantized_ops_aot_lib.sh"), + f"--et_build_root={et_build_root}", + "--build_type=Release", + ] + ) + + +def build_pte( + et_build_root: str, + model_name: str, + target: str, + system_config: str, + memory_mode: str, + build_output: str, + no_intermediate: bool, +): + soext = {"Darwin": "dylib", "Linux": "so", "Windows": "dll"}.get( + platform.system(), None + ) + solibs_path = os.path.join( + et_build_root, + "cmake-out-aot-lib", + "kernels", + "quantized", + f"libquantized_ops_aot_lib.{soext}", + ) + solibs = f"--so_library={solibs_path}" + + intermediate = "" + if not no_intermediate: + intermediate = f"--intermediate={output}" + + run_external_cmd( + [ + "python3", + "-m", + "examples.arm.aot_arm_compiler", + "--delegate", + "--quantize", + intermediate, + f"--model_name={model_name}", + f"--target={target}", + f"--output={build_output}", + f"--system_config={system_config}", + f"--memory_mode={memory_mode}", + solibs, + ] + ) + + pte_file = os.path.join(output, f"{model_name}_arm_delegate_{args.target}.pte") + return pte_file + + +def build_ethosu_runtime( + et_build_root: str, + script_path: str, + pte_file: str, + target: str, + system_config: str, + elf_build_path: str, +): + run_external_cmd( + [ + "bash", + os.path.join(script_path, "build_executorch_runner.sh"), + f"--et_build_root={et_build_root}", + f"--pte={pte_file}", + f"--target={target}", + "--build_type=Release", + f"--system_config={system_config}", + f"--output={elf_build_path}", + ] + ) + + elf_file = os.path.join(elf_build_path, "cmake-out", "arm_executor_runner") + return elf_file + + +def run_elf_with_fvp(script_path: str, elf_file: str, target: str): + run_external_cmd( + [ + "bash", + os.path.join(script_path, "run_fvp.sh"), + f"--elf={elf_file}", + f"--target={target}", + ] + ) + + +if __name__ == "__main__": + + args = get_args() + script_path = os.path.join("backends", "arm", "scripts") + + if args.build_libs: + build_libs(args.test_output, script_path) + + if args.model: + model_name = args.model.split(" ")[0].split(";")[0] + if not model_name: + print("ERROR: Bad --model specified") + if not args.target: + print("ERROR: --model need --target to also be set") + + output = os.path.join( + args.test_output, f"{model_name}_arm_delegate_{args.target}" + ) + + pte_file = build_pte( + args.test_output, + model_name, + args.target, + args.system_config, + args.memory_mode, + output, + args.no_intermediate, + ) + print(f"PTE file created: {pte_file} ") + + if "ethos-u" in args.target: + elf_build_path = os.path.join( + output, f"{model_name}_arm_delegate_{args.target}" + ) + + elf_file = build_ethosu_runtime( + args.test_output, + script_path, + pte_file, + args.target, + args.system_config, + elf_build_path, + ) + print(f"ELF file created: {elf_file} ") + + run_elf_with_fvp(script_path, elf_file, args.target) + print(f"Model: {model_name} on {args.target} -> PASS") diff --git a/docs/source/executorch-arm-delegate-tutorial.md b/docs/source/executorch-arm-delegate-tutorial.md index ff6d4abbbac..feb8f0335fa 100644 --- a/docs/source/executorch-arm-delegate-tutorial.md +++ b/docs/source/executorch-arm-delegate-tutorial.md @@ -200,7 +200,7 @@ Following script will serve as a helper utility to help us generate the `.pte` f ```bash python3 -m examples.arm.aot_arm_compiler --model_name="softmax" -# This should produce ./softmax.pte +# This should produce ./softmax_arm_ethos-u55-128.pte ``` ### Delegated Workflow @@ -221,12 +221,14 @@ Similar to the non-delegate flow, the same script will server as a helper utilit ```bash python3 -m examples.arm.aot_arm_compiler --model_name="add" --delegate -# should produce ./add_arm_delegate.pte +# should produce ./add_arm_delegate_ethos-u55-128.pte ``` ### Delegated Quantized Workflow Before generating the `.pte` file for delegated quantized networks like MobileNetV2, we need to build the `quantized_ops_aot_lib` +You can just run the `backends/arm/scripts/build_quantized_ops_aot_lib.sh` script to build this for you or build it yourself like this. + ```bash cd @@ -245,7 +247,7 @@ cmake --build cmake-out-aot-lib --parallel -- quantized_ops_aot_lib After the `quantized_ops_aot_lib` build, we can run the following script to generate the `.pte` file ```bash python3 -m examples.arm.aot_arm_compiler --model_name="mv2" --delegate --quantize --so_library="$(find cmake-out-aot-lib -name libquantized_ops_aot_lib.so)" -# should produce ./mv2_arm_delegate.pte.pte +# should produce ./mv2_arm_delegate_ethos-u55-128.pte ```
@@ -262,6 +264,14 @@ Now let's try to run these `.pte` files on a Corstone-300 and Corstone-320 platf In this section, we will go over steps that you need to go through to build the runtime application. This then run on the target device. In the executorch repository we have a functioning script which does the exact same steps. It is located at `executorch/examples/arm/run.sh`. We will use that to build necessary pieces and finally run the previously generated PTE file on an FVP. +By default the `run.sh` will use `arm_test/` as an build and output folder and you will find the build artifacts under it. This can be contolled/overrided with the `--et_build_root` and the `--output` flags if needed. + +e.g. running `examples/arm/run.sh --model_name=add --target=ethos-u85-128` will produce a pte and elf file like this: + +```bash +arm_test/add/add_arm_delegate_ethos-u85-128.pte +arm_test/add/cmake-out/arm_executor_runner +``` Also before we get started, make sure that you have completed ExecuTorch cmake build setup, and the instructions to setup the development environment described [earlier](#set-up-the-developer-environment). The block diagram below demonstrates, at the high level, how the various build artifacts are generated and are linked together to generate the final bare-metal executable. @@ -286,23 +296,19 @@ To run a `.pte` file with the Arm backend delegate call instructions, we will ne - `libexecutorch_delegate_ethos_u.a` -These libraries are generated in `build_executorch` and `build_quantization_aot_lib` function of the `run.sh` script. +These libraries are generated by the `backends/arm/scripts/build_executorch.sh`, `backends/arm/scripts/build_portable_kernels.sh` and `backends/arm/scripts/build_quantized_ops_aot_lib.sh` scripts called from the `run.sh` script. -In this function, `EXECUTORCH_SELECT_OPS_LIST` will decide the number of portable operators included in the build and are available at runtime. It must match with `.pte` file's requirements, otherwise you will get `Missing Operator` error at runtime. +The `--portable_kernels` flag can be used to set the build flag `EXECUTORCH_SELECT_OPS_LIST` when running `backends/arm/scripts/build_portable_kernels.sh` that will decide the number of portable operators included in the build and are available at runtime. It must match with `.pte` file's requirements, otherwise you will get `Missing Operator` error at runtime. For example, there in the command line above, to run SoftmaxModule, we only included the softmax CPU operator. Similarly, to run AddModule in a non-delegated manner you will need add op and so on. As you might have already realized, for the delegated operators, which will be executed by the Arm backend delegate, we do not need to include those operators in this list. This is only for *non-delegated* operators. -```{tip} -The `run.sh` script takes in `--portable_kernels` option, which provides a way to supply a comma seperated list of portable kernels to be included. -``` - ### Building the executor_runner Bare-Metal Application The SDK dir is the same one prepared [earlier](#setup-the-arm-ethos-u-software-development). And, we will be passing the `.pte` file (any one of them) generated above. Note, you have to generate a new `executor-runner` binary if you want to change the model or the `.pte` file. This constraint is from the constrained bare-metal runtime environment we have for Corstone-300/Corstone-320 platforms. -This is performed by the `build_executorch_runner` function in `run.sh`. +This is performed by the `backends/arm/scripts/build_executorch_runner.sh` script runned from `run.sh`. ```{tip} The `run.sh` script takes in `--target` option, which provides a way to provide a specific target, Corstone-300(ethos-u55-128) or Corstone-320(ethos-u85-128) @@ -310,7 +316,10 @@ The `run.sh` script takes in `--target` option, which provides a way to provide ## Running on Corstone FVP Platforms -Once the elf is prepared, regardless of the `.pte` file variant is used to generate the bare metal elf. The below command is used to run the [MV2Model](#mv2module) on Corstone-320 FVP +Once the elf is prepared, regardless of the `.pte` file variant is used to generate the bare metal elf. `run.sh` will run the FVP for you via the `backends/arm/scripts/run_fvp.sh` script but you can also run it directly. + + +The below command is used to run the [MV2Model](#mv2module) on Corstone-320 FVP ```bash ethos_u_build_dir=examples/arm/executor_runner/ diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py index 33d8bc5ebf2..ccd736f7fce 100644 --- a/examples/arm/aot_arm_compiler.py +++ b/examples/arm/aot_arm_compiler.py @@ -484,15 +484,15 @@ def get_args(): # noqa C901 ): raise RuntimeError(f"Model {args.model_name} cannot be delegated.") - if args.system_config is None: + if "ethos-u" in args.target and args.system_config is None: if "u55" in args.target: args.system_config = "Ethos_U55_High_End_Embedded" elif "u85" in args.target: - args.system_confg = "Ethos_U85_SYS_DRAM_Mid" + args.system_config = "Ethos_U85_SYS_DRAM_Mid" else: raise RuntimeError(f"Invalid target name {args.target}") - if args.memory_mode is None: + if "ethos-u" in args.target and args.memory_mode is None: if "u55" in args.target: args.memory_mode = "Shared_Sram" elif "u85" in args.target: @@ -591,6 +591,7 @@ def get_args(): # noqa C901 output_name = os.path.join(args.output, output_name) save_pte_program(exec_prog, output_name) + print(f"PTE file saved as {output_name}.pte") if args.evaluate: evaluate_model( diff --git a/examples/arm/run.sh b/examples/arm/run.sh index 1a50f59d454..ce92312b652 100755 --- a/examples/arm/run.sh +++ b/examples/arm/run.sh @@ -9,15 +9,13 @@ set -eu - - ######## ### Hardcoded constants ######## script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +et_root_dir=$(cd ${script_dir}/../.. && pwd) +et_root_dir=$(realpath ${et_root_dir}) -# Default Ethos-u tool folder override with --scratch-dir= -root_dir=${script_dir}/ethos-u-scratch model_name="" aot_arm_compiler_flags="--delegate --quantize" @@ -31,23 +29,26 @@ extra_build_flags="" build_only=false system_config="" memory_mode="" +et_build_root="${et_root_dir}/arm_test" +ethos_u_scratch_dir=${script_dir}/ethos-u-scratch -help() { +function help() { echo "Usage: $(basename $0) [options]" echo "Options:" echo " --model_name= Model to run, can be a builtin, examples/models or a filename Default to all builtin models" echo " --aot_arm_compiler_flags= Only used if --model_name is used Default: ${aot_arm_compiler_flags}" echo " --portable_kernels= Comma separated list of portable (non delagated) kernels to include Default: ${portable_kernels}" echo " --target= Target to build and run for Default: ${target}" - echo " --output= Output folder Default: ${output_folder}" + echo " --output= Target build output folder Default: ${output_folder}" echo " --etdump Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log" - echo " --debug_build Build with debug flag, default is Release" - echo " --extra_build_flags Extra flags to pass to cmake like -DET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE=60000 Default: none " + echo " --build_type= Build with Release, Debug or RelWithDebInfo, default is ${build_type}" + echo " --extra_build_flags= Extra flags to pass to cmake like -DET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE=60000 Default: none " echo " --build_only Only build, don't run FVP" - echo " --scratch-dir= Path to your Ethos-U scrach dir if you not using default" echo " --system_config= System configuration to select from the Vela configuration file (see vela.ini). Default: Ethos_U55_High_End_Embedded for EthosU55 targets, Ethos_U85_SYS_DRAM_Mid for EthosU85 targets." echo " NOTE: If given, this option must match the given target. This option also sets timing adapter values customized for specific hardware, see ./executor_runner/CMakeLists.txt." echo " --memory_mode= Memory mode to select from the Vela configuration file (see vela.ini), e.g. Shared_Sram/Sram_Only. Default: 'Shared_Sram' for Ethos-U55 targets, 'Sram_Only' for Ethos-U85 targets" + echo " --et_build_root= Executorch build output root folder to use, defaults to ${et_build_root}" + echo " --scratch-dir= Path to your Ethos-U scrach dir if you not using default ${ethos_u_scratch_dir}" exit 0 } @@ -60,43 +61,26 @@ for arg in "$@"; do --target=*) target="${arg#*=}";; --output=*) output_folder="${arg#*=}" ; output_folder_set=true ;; --etdump) build_with_etdump=true ;; - --debug_build) build_type="Debug" ;; + --build_type=*) build_type="${arg#*=}";; --extra_build_flags=*) extra_build_flags="${arg#*=}";; --build_only) build_only=true ;; - --scratch-dir=*) root_dir="${arg#*=}";; --system_config=*) system_config="${arg#*=}";; --memory_mode=*) memory_mode="${arg#*=}";; + --et_build_root=*) et_build_root="${arg#*=}";; + --scratch-dir=*) ethos_u_scratch_dir="${arg#*=}";; *) ;; esac done -root_dir=$(realpath ${root_dir}) -output_folder=$(realpath ${output_folder}) -mkdir -p ${output_folder} -if [ "$output_folder_set" = true ] ; then - executor_runner_path=${output_folder} -else - executor_runner_path=${script_dir}/executor_runner -fi -executor_runner_path=$(realpath ${executor_runner_path}) - -mkdir -p ${root_dir}/ethos-u -ethos_u_root_dir="$(cd ${root_dir}/ethos-u && pwd)" -setup_path_script=${root_dir}/setup_path.sh +# Default Ethos-u tool folder override with --scratch-dir= +ethos_u_scratch_dir=$(realpath ${ethos_u_scratch_dir}) +setup_path_script=${ethos_u_scratch_dir}/setup_path.sh +toolchain_cmake=${script_dir}/ethos-u-setup/arm-none-eabi-gcc.cmake +_setup_msg="please refer to ${script_dir}/setup.sh to properly install necessary tools." -# Executorch -et_root_dir=$(cd ${script_dir}/../.. && pwd) -et_build_dir=${et_root_dir}/cmake-out # Set target based variables -fvp_model=FVP_Corstone_SSE-300_Ethos-U55 -if [[ ${target} =~ "ethos-u85" ]] -then - echo "target is ethos-u85 variant so switching to CS320 FVP" - fvp_model=FVP_Corstone_SSE-320 -fi - if [[ ${system_config} == "" ]] then system_config="Ethos_U55_High_End_Embedded" @@ -115,227 +99,6 @@ then fi fi -toolchain_cmake=${script_dir}/ethos-u-setup/arm-none-eabi-gcc.cmake -_setup_msg="please refer to ${script_dir}/ethos-u-setup/setup.sh to properly install necessary tools." - -if ! [[ $portable_kernels =~ ^((^|,)aten::[a-zA-Z0-9_]+\.[a-zA-Z0-9_]*out)*$ ]]; then - echo " ERROR: specified argument --portable_kernels=${portable_kernels}" - echo " is in the wrong format please use \"aten::.out,aten::.out,...\"" - echo " e.g. \"aten::_softmax.out,aten::add.out\"" - exit 1 -fi - -# Generate a pte file -# output from this function is the pte filename e.g. echo should be avoided or directed to stderr e.g. >&2 -function generate_pte_file() { - [[ $# -ne 2 ]] && { echo "[${FUNCNAME[0]}]" "Expecting model and model_compiler_flags flag, got, $*"; exit 1; } - local model=${1} - local model_short_name=$(basename -- "${model}" ".py") - local model_compiler_flags=${2} - - local model_filename=${model_short_name}_arm_${target}.pte - if [[ "${model_compiler_flags}" == *"--delegate"* ]]; then - # Name aligned with default aot_arm_compiler output - model_filename=${model_short_name}_arm_delegate_${target}.pte - fi - cd $et_root_dir - - local pte_file - pte_file=$(realpath ${output_folder}/${model_filename}) - rm -f "${pte_file}" - - SO_EXT=$(python3 -c 'import platform; print({"Darwin": "dylib", "Linux": "so", "Windows": "dll"}.get(platform.system(), None))') - # We are using the aot_lib from build_quantization_aot_lib below - SO_LIB=$(find cmake-out-aot-lib -name libquantized_ops_aot_lib.${SO_EXT}) - - local ARM_AOT_CMD="python3 -m examples.arm.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --output ${output_folder} --so_library=$SO_LIB --system_config=${system_config} --memory_mode=${memory_mode}" - echo "CALL ${ARM_AOT_CMD}" >&2 - ${ARM_AOT_CMD} 1>&2 - - [[ -f ${pte_file} ]] || { >&2 echo "Failed to generate a pte file - ${pte_file}"; exit 1; } - echo "${pte_file}" -} - -# build ExecuTorch Libraries -function build_executorch() { - set -x - - [[ -d "${et_build_dir}" ]] \ - && echo "[${FUNCNAME[0]}] Warn: using already existing build-dir for executorch: ${et_build_dir}!!" - mkdir -p "${et_build_dir}" - - cd "${et_root_dir}" - - build_with_etdump_flags="" - if [ "$build_with_etdump" = true ] ; then - ( set +x ; - echo "--------------------------------------------------------------------------------" ; - echo "Build ExecuTorch Libraries host flatcc bin ${build_type} into ${et_root_dir} - cmake-out-host-tools/bin/flatcc" ; - echo "--------------------------------------------------------------------------------" ) - - - # Build host flatcc bin - mkdir -p cmake-out-host-tools - cmake \ - -DCMAKE_INSTALL_PREFIX=${et_build_dir} \ - -DCMAKE_BUILD_TYPE=${build_type} \ - -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF \ - -DEXECUTORCH_ENABLE_LOGGING=ON \ - -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON \ - -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ - -DEXECUTORCH_BUILD_DEVTOOLS=ON \ - -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ - -DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=ON \ - -DFLATCC_ALLOW_WERROR=OFF \ - -DFLATC_EXECUTABLE="$(which flatc)" \ - ${extra_build_flags} \ - -Bcmake-out-host-tools \ - "${et_root_dir}" - - mkdir -p cmake-out-host-tools/bin - cp third-party/flatcc/bin/flatcc cmake-out-host-tools/bin - - build_with_etdump_flags="-DEXECUTORCH_BUILD_DEVTOOLS=ON \ - -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ - -DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=OFF \ - -DFLATCC_ALLOW_WERROR=OFF \ - -DFLATCC_EXECUTABLE=${et_root_dir}/cmake-out-host-tools/bin/flatcc " - fi - - ( set +x ; - echo "--------------------------------------------------------------------------------" ; - echo "Build ExecuTorch Libraries target libs with --target install ${build_type} into '${et_root_dir}' - '${et_build_dir}'" ; - echo "--------------------------------------------------------------------------------" ) - - # Build - cmake \ - -DCMAKE_INSTALL_PREFIX=${et_build_dir} \ - -DCMAKE_BUILD_TYPE=${build_type} \ - -DCMAKE_TOOLCHAIN_FILE="${toolchain_cmake}" \ - -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF \ - -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON \ - -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ - -DEXECUTORCH_ENABLE_LOGGING=ON \ - ${build_with_etdump_flags} \ - -DFLATC_EXECUTABLE="$(which flatc)" \ - ${extra_build_flags} \ - -B${et_build_dir} \ - "${et_root_dir}" - - echo "[${FUNCNAME[0]}] Configured CMAKE" - - cmake --build ${et_build_dir} --parallel --target install --config ${build_type} -- - - ( set +x ; - echo "--------------------------------------------------------------------------------" ; - echo "Build ExecuTorch Libraries ${build_type} into '${et_root_dir}/examples/arm' - '${et_build_dir}/examples/arm'" ; - echo "--------------------------------------------------------------------------------" ) - - cmake \ - -DCMAKE_INSTALL_PREFIX=${et_build_dir} \ - -DCMAKE_BUILD_TYPE=${build_type} \ - -DCMAKE_TOOLCHAIN_FILE="${toolchain_cmake}" \ - -DEXECUTORCH_SELECT_OPS_LIST=${portable_kernels} \ - -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON \ - ${extra_build_flags} \ - -B"${et_build_dir}/examples/arm" \ - "${et_root_dir}/examples/arm" - - cmake --build "${et_build_dir}/examples/arm" --parallel --config ${build_type} -- - - set +x - - cd "${et_build_dir}" - echo "[${FUNCNAME[0]}] Generated static libraries for ExecuTorch:" - find . -name "*.a" -exec ls -al {} \; -} - -# build Arm Baremetal executor_runner -function build_executorch_runner() { - echo "[${FUNCNAME[0]}] Generating ExecuTorch libraries" - [[ $# -ne 1 ]] && { echo "[${FUNCNAME[0]}]" "Expecting a single pte file as argument got, $*"; exit 1; } - local pte=${1} - if [[ ${target} == *"ethos-u55"* ]]; then - local target_cpu=cortex-m55 - else - local target_cpu=cortex-m85 - fi - echo "--------------------------------------------------------------------------------" - echo "Build Arm Baremetal executor_runner for ${target} - '${executor_runner_path}/cmake-out'" - echo "--------------------------------------------------------------------------------" - - cd ${script_dir}/executor_runner - - build_with_etdump_flags="" - if [ "$build_with_etdump" = true ] ; then - build_with_etdump_flags=" -DEXECUTORCH_ENABLE_EVENT_TRACER=ON " - fi - - cmake \ - -DCMAKE_BUILD_TYPE=${build_type} \ - -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} \ - -DTARGET_CPU=${target_cpu} \ - -DET_DIR_PATH:PATH=${et_root_dir} \ - -DET_BUILD_DIR_PATH:PATH=${et_build_dir} \ - -DET_PTE_FILE_PATH:PATH="${pte}" \ - -DETHOS_SDK_PATH:PATH=${ethos_u_root_dir} \ - -DETHOSU_TARGET_NPU_CONFIG=${target} \ - ${build_with_etdump_flags} \ - -DPYTHON_EXECUTABLE=$(which python3) \ - -DSYSTEM_CONFIG=${system_config} \ - ${extra_build_flags} \ - -B ${executor_runner_path}/cmake-out - - echo "[${FUNCNAME[0]}] Configured CMAKE" - - cmake --build ${executor_runner_path}/cmake-out --parallel -- arm_executor_runner - echo "[${FUNCNAME[0]}] Generated baremetal elf file:" - find ${executor_runner_path}/cmake-out -name "arm_executor_runner" - echo "executable_text: $(find ${executor_runner_path}/cmake-out -name arm_executor_runner -exec arm-none-eabi-size {} \; | grep -v filename | awk '{print $1}') bytes" - echo "executable_data: $(find ${executor_runner_path}/cmake-out -name arm_executor_runner -exec arm-none-eabi-size {} \; | grep -v filename | awk '{print $2}') bytes" - echo "executable_bss: $(find ${executor_runner_path}/cmake-out -name arm_executor_runner -exec arm-none-eabi-size {} \; | grep -v filename | awk '{print $3}') bytes" -} - -# Execute the executor_runner on FVP Simulator -function run_fvp() { - [[ $# -ne 1 ]] && { echo "[${FUNCNAME[0]}]" "Expexted elf binary name, got $*"; exit 1; } - local elf_name=${1} - elf=$(find ${executor_runner_path} -name "${elf_name}") - [[ ! -f $elf ]] && { echo "[${FUNCNAME[0]}]: Unable to find executor_runner elf: ${elf}"; exit 1; } - num_macs=$(echo ${target} | cut -d - -f 3) - - if [[ ${target} == *"ethos-u55"* ]]; then - echo "Running ${elf} for ${target} run with FVP:${fvp_model} num_macs:${num_macs}" - ${fvp_model} \ - -C ethosu.num_macs=${num_macs} \ - -C mps3_board.visualisation.disable-visualisation=1 \ - -C mps3_board.telnetterminal0.start_telnet=0 \ - -C mps3_board.uart0.out_file='-' \ - -C mps3_board.uart0.shutdown_on_eot=1 \ - -a "${elf}" \ - --timelimit 220 || true # seconds - echo "[${FUNCNAME[0]}] Simulation complete, $?" - elif [[ ${target} == *"ethos-u85"* ]]; then - echo "Running ${elf} for ${target} run with FVP:${fvp_model} num_macs:${num_macs}" - ${fvp_model} \ - -C mps4_board.subsystem.ethosu.num_macs=${num_macs} \ - -C mps4_board.visualisation.disable-visualisation=1 \ - -C vis_hdlcd.disable_visualisation=1 \ - -C mps4_board.telnetterminal0.start_telnet=0 \ - -C mps4_board.uart0.out_file='-' \ - -C mps4_board.uart0.shutdown_on_eot=1 \ - -a "${elf}" \ - --timelimit 220 || true # seconds - echo "[${FUNCNAME[0]}] Simulation complete, $?" - else - echo "Running ${elf} for ${target} is not supported" - exit 1 - fi -} - ####### ### Main ####### @@ -343,12 +106,10 @@ function run_fvp() { # This should be prepared by the setup.sh [[ -f ${setup_path_script} ]] \ || { echo "Missing ${setup_path_script}. ${_setup_msg}"; exit 1; } -source ${root_dir}/setup_path.sh -# basic checks before we get started -hash ${fvp_model} \ - || { echo "Could not find ${fvp_model} on PATH, ${_setup_msg}"; exit 1; } +source ${setup_path_script} +# basic checks before we get started hash arm-none-eabi-gcc \ || { echo "Could not find arm baremetal toolchain on PATH, ${_setup_msg}"; exit 1; } @@ -358,9 +119,24 @@ hash arm-none-eabi-gcc \ [[ -f ${et_root_dir}/CMakeLists.txt ]] \ || { echo "Executorch repo doesn't contain CMakeLists.txt file at root level"; exit 1; } -# build executorch libraries -build_executorch -cd $et_root_dir && backends/arm/scripts/build_quantized_ops_aot_lib.sh $build_type +# Build executorch libraries +cd $et_root_dir +if [ "$build_with_etdump" = true ] ; then + et_dump_flag="--etdump" +else + et_dump_flag="" +fi + +backends/arm/scripts/build_executorch.sh --et_build_root="${et_build_root}" --build_type=$build_type $et_dump_flag +backends/arm/scripts/build_portable_kernels.sh --et_build_root="${et_build_root}" --build_type=$build_type --portable_kernels=$portable_kernels + +# Build a lib quantized_ops_aot_lib +backends/arm/scripts/build_quantized_ops_aot_lib.sh --et_build_root="${et_build_root}" --build_type=$build_type + +SO_EXT=$(python3 -c 'import platform; print({"Darwin": "dylib", "Linux": "so", "Windows": "dll"}.get(platform.system(), None))') +# We are using the aot_lib from build_quantization_aot_lib below +SO_LIB=$(find "${et_build_root}/cmake-out-aot-lib" -name libquantized_ops_aot_lib.${SO_EXT}) + if [[ -z "$model_name" ]]; then # the test models run, and whether to delegate @@ -373,19 +149,51 @@ fi # loop over running the AoT flow and executing the model on device for i in "${!test_model[@]}"; do + model="${test_model[i]}" + model_compiler_flags="${model_compiler_flags[i]}" + echo "--------------------------------------------------------------------------------" - printf "Running e2e flow for model '%s' with flags '%s'\n" "${test_model[i]}" "${model_compiler_flags[i]}" + printf "Running e2e flow for model '%s' with flags '%s'\n" "${model}" "${model_compiler_flags}" echo "--------------------------------------------------------------------------------" - pte=$(generate_pte_file "${test_model[i]}" "${model_compiler_flags[i]}") - stat --printf="Generated pte_data_size: %s bytes\npte_file:%n\n" ${pte} + + cd $et_root_dir + model_short_name=$(basename -- "${model}" ".py") + model_filename=${model_short_name}_arm_${target}.pte + + if [[ "${model_compiler_flags}" == *"--delegate"* ]]; then + # Name aligned with default aot_arm_compiler output + model_filename=${model_short_name}_arm_delegate_${target}.pte + fi + + if [ "$output_folder_set" = false ] ; then + output_folder=${et_build_root}/${model_short_name} + fi + + output_folder=$(realpath ${output_folder}) + mkdir -p ${output_folder} + pte_file=$(realpath -m ${output_folder}/${model_filename}) + + rm -f "${pte_file}" + + ARM_AOT_CMD="python3 -m examples.arm.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --intermediate=${output_folder} --output=${output_folder} --so_library=$SO_LIB --system_config=${system_config} --memory_mode=${memory_mode}" + echo "CALL ${ARM_AOT_CMD}" >&2 + ${ARM_AOT_CMD} 1>&2 + + [[ -f ${pte_file} ]] || { >&2 echo "Failed to generate a pte file - ${pte_file}"; exit 1; } + echo "pte_data_size: $(wc -c ${pte_file})" + echo "pte_file: ${pte_file}" + if [[ ${target} == *"TOSA"* ]]; then - echo "Build for ${target} skip generating .elf and running" + echo "Build for ${target} skip generating a .elf and running it" else + set -x # Rebuild the application as the pte is imported as a header/c array - build_executorch_runner "${pte}" + backends/arm/scripts/build_executorch_runner.sh "--pte=${pte_file}" --build_type=$build_type --target=$target --system_config=$system_config $et_dump_flag --extra_build_flags="$extra_build_flags" --ethosu_tools_dir="$ethos_u_scratch_dir" --output="${output_folder}" if [ "$build_only" = false ] ; then - run_fvp arm_executor_runner + # Execute the executor_runner on FVP Simulator + backends/arm/scripts/run_fvp.sh --elf=${output_folder}/cmake-out/arm_executor_runner --target=$target fi + set +x fi done From 4b1ae2108391599580cbc852b5c838c1e010b431 Mon Sep 17 00:00:00 2001 From: Yufeng Shi Date: Wed, 19 Feb 2025 13:11:06 +0000 Subject: [PATCH 014/584] Arm: Support ABS operator in Arm backend (#8459) Support ABS operator in Arm backend --- .../tosa_supported_operators.py | 1 + backends/arm/operators/__init__.py | 1 + backends/arm/operators/op_abs.py | 133 ++++++++++++++++++ .../arm/quantizer/quantization_annotator.py | 1 + backends/arm/test/ops/test_abs.py | 125 ++++++++++++++++ 5 files changed, 261 insertions(+) create mode 100644 backends/arm/operators/op_abs.py create mode 100644 backends/arm/test/ops/test_abs.py diff --git a/backends/arm/operator_support/tosa_supported_operators.py b/backends/arm/operator_support/tosa_supported_operators.py index 1fa626efce1..4bbcec57cba 100644 --- a/backends/arm/operator_support/tosa_supported_operators.py +++ b/backends/arm/operator_support/tosa_supported_operators.py @@ -91,6 +91,7 @@ class BaseTOSASupportList(OperatorSupportBase): def is_node_supported(self, submodules, node: fx.Node) -> bool: supported = node.op == "call_function" and node.target in [ + exir_ops.edge.aten.abs.default, exir_ops.edge.aten.add.Tensor, exir_ops.edge.aten.expand_copy.default, exir_ops.edge.aten.cat.default, diff --git a/backends/arm/operators/__init__.py b/backends/arm/operators/__init__.py index 735debe367f..98d96828ad5 100644 --- a/backends/arm/operators/__init__.py +++ b/backends/arm/operators/__init__.py @@ -7,6 +7,7 @@ from . import ( # noqa node_visitor, + op_abs, op_add, op_avg_pool2d, op_bmm, diff --git a/backends/arm/operators/op_abs.py b/backends/arm/operators/op_abs.py new file mode 100644 index 00000000000..886a96fd520 --- /dev/null +++ b/backends/arm/operators/op_abs.py @@ -0,0 +1,133 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-unsafe +from typing import List + +import executorch.backends.arm.tosa_quant_utils as tqutils +import executorch.backends.arm.tosa_utils as tutils + +import serializer.tosa_serializer as ts # type: ignore +from executorch.backends.arm.operators.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.arm.tosa_mapping import TosaArg +from executorch.backends.arm.tosa_specification import TosaSpecification + +from serializer.tosa_serializer import TosaOp +from torch.fx import Node + + +@register_node_visitor +class AbsVisitor_080_BI(NodeVisitor): + target = "aten.abs.default" + + tosa_specs = [ + TosaSpecification.create_from_string("TOSA-0.80+BI"), + ] + + def __init__(self, *args): + super().__init__(*args) + + def define_node( + self, + node: Node, + tosa_graph: ts.TosaSerializer, + inputs: List[TosaArg], + output: TosaArg, + ) -> None: + # Specification (0.80) states that input and output types + # should all be the same + if not (inputs[0].dtype == output.dtype): + raise ValueError( + "All inputs and outputs need same dtype." + f"Got {inputs[0].dtype=}, {output.dtype=}" + ) + # Handle int8 (quantized) and int32 + if not (inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]): + raise ValueError( + "All inputs need to be INT8 or INT32." f"Got {inputs[0].dtype=}" + ) + + if inputs[0].dtype == ts.DType.INT8: + rescaled_inputs, scale_back = tqutils.insert_rescale_ops_to_int32( + tosa_graph, inputs, node + ) + else: + # input[0].dtype == ts.DType.INT32 + # Non quantized input, natively support by TOSA.abs + rescaled_inputs = inputs + + if output.dtype == ts.DType.INT8: + broadcasted_shape = tutils.tosa_shape(output.shape, output.dim_order) + abs_output = tosa_graph.addIntermediate(broadcasted_shape, ts.DType.INT32) + else: + # output.dtype == ts.DType.INT32 + abs_output = output + + # Do the INT32 Abs + tosa_graph.addOperator( + TosaOp.Op().ABS, + [ + rescaled_inputs[0].name, + ], + [abs_output.name], + None, + ) + + if output.dtype == ts.DType.INT8: + # Scale output back to 8 bit + # pyre-ignore + tqutils.insert_rescale_op_to_int8(tosa_graph, abs_output, scale_back, node) # type: ignore[possibly-undefined] + + +@register_node_visitor +class AbsVisitor_080_MI(AbsVisitor_080_BI): + # inheriting 'target' from BI class + + tosa_specs = [ + TosaSpecification.create_from_string("TOSA-0.80+MI"), + ] + + def __init__(self, *args): + super().__init__(*args) + + def define_node( + self, + node: Node, + tosa_graph: ts.TosaSerializer, + inputs: List[TosaArg], + output: TosaArg, + ) -> None: + # Specification (0.80) states that input and output types + # should all be the same + if not (inputs[0].dtype == output.dtype): + raise ValueError( + "All inputs and output need same dtype." + f"Got {inputs[0].dtype=}, {output.dtype=}" + ) + + if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]: + # Call the inherited define_node for handling integers + super().define_node(node, tosa_graph, inputs, output) + else: + # FP32 Abs lowering + + if not (inputs[0].dtype == ts.DType.FP32): + raise ValueError( + "All inputs need to be FP32." f"Got {inputs[0].dtype=}" + ) + + if not (output.dtype == ts.DType.FP32): + raise ValueError("All outputs need to be FP32." f"Got {output.dtype=}") + + # MI lowering + tosa_graph.addOperator( + TosaOp.Op().ABS, + [inputs[0].name], + [output.name], + None, + ) diff --git a/backends/arm/quantizer/quantization_annotator.py b/backends/arm/quantizer/quantization_annotator.py index f1cef971782..9b2ada035f9 100644 --- a/backends/arm/quantizer/quantization_annotator.py +++ b/backends/arm/quantizer/quantization_annotator.py @@ -125,6 +125,7 @@ def _match_pattern( _one_to_one = [ + torch.ops.aten.abs.default, torch.ops.aten.exp.default, torch.ops.aten.log.default, torch.ops.aten.reciprocal.default, diff --git a/backends/arm/test/ops/test_abs.py b/backends/arm/test/ops/test_abs.py new file mode 100644 index 00000000000..481c7d5ed0d --- /dev/null +++ b/backends/arm/test/ops/test_abs.py @@ -0,0 +1,125 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright 2025 Arm Limited and/or its affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +from typing import Tuple + +import pytest + +import torch +from executorch.backends.arm.test import common, conftest +from executorch.backends.arm.test.tester.arm_tester import ArmTester +from executorch.exir.backend.compile_spec_schema import CompileSpec +from parameterized import parameterized + + +class TestAbs(unittest.TestCase): + class Abs(torch.nn.Module): + test_parameters = [ + (torch.zeros(5),), + (torch.full((5,), -1, dtype=torch.float32),), + (torch.ones(5) * -1,), + (torch.randn(8),), + (torch.randn(2, 3, 4),), + (torch.randn(1, 2, 3, 4),), + (torch.normal(mean=0, std=10, size=(2, 3, 4)),), + ] + + def forward(self, x): + return torch.abs(x) + + def _test_abs_tosa_MI_pipeline( + self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] + ): + ( + ArmTester( + module, + example_inputs=test_data, + compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), + ) + .export() + .check_count({"torch.ops.aten.abs.default": 1}) + .check_not(["torch.ops.quantized_decomposed"]) + .to_edge() + .partition() + .check_not(["torch.ops.aten.abs.default"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs(inputs=test_data) + ) + + def _test_abs_tosa_BI_pipeline( + self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] + ): + ( + ArmTester( + module, + example_inputs=test_data, + compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), + ) + .quantize() + .export() + .check_count({"torch.ops.aten.abs.default": 1}) + .check(["torch.ops.quantized_decomposed"]) + .to_edge() + .partition() + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs(inputs=test_data, qtol=1) + ) + + def _test_abs_ethosu_BI_pipeline( + self, + compile_spec: list[CompileSpec], + module: torch.nn.Module, + test_data: Tuple[torch.Tensor], + ): + tester = ( + ArmTester( + module, + example_inputs=test_data, + compile_spec=compile_spec, + ) + .quantize() + .export() + .check_count({"torch.ops.aten.abs.default": 1}) + .check(["torch.ops.quantized_decomposed"]) + .to_edge() + .partition() + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .serialize() + ) + if conftest.is_option_enabled("corstone_fvp"): + tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) + + @parameterized.expand(Abs.test_parameters) + def test_abs_tosa_MI(self, test_data: torch.Tensor): + test_data = (test_data,) + self._test_abs_tosa_MI_pipeline(self.Abs(), test_data) + + @parameterized.expand(Abs.test_parameters) + def test_abs_tosa_BI(self, test_data: torch.Tensor): + test_data = (test_data,) + self._test_abs_tosa_BI_pipeline(self.Abs(), test_data) + + @parameterized.expand(Abs.test_parameters) + @pytest.mark.corstone_fvp + def test_abs_u55_BI(self, test_data: torch.Tensor): + test_data = (test_data,) + self._test_abs_ethosu_BI_pipeline( + common.get_u55_compile_spec(), self.Abs(), test_data + ) + + @parameterized.expand(Abs.test_parameters) + @pytest.mark.corstone_fvp + def test_abs_u85_BI(self, test_data: torch.Tensor): + test_data = (test_data,) + self._test_abs_ethosu_BI_pipeline( + common.get_u85_compile_spec(), self.Abs(), test_data + ) From 43efc37e32a1e9395a7f976d83e7e7e31b564ad2 Mon Sep 17 00:00:00 2001 From: Tom Allsop <72802373+tom-arm@users.noreply.github.com> Date: Wed, 19 Feb 2025 13:16:26 +0000 Subject: [PATCH 015/584] Arm: Add pass that converts operators to clamp to the Arm backend (#8538) Add pass that converts operators to clamp to the Arm backend * Add ConvertToClampPass that converts relu and hardtanh to clamp * Remove op_relu and op_hardtanh visitors from backend Signed-off-by: Tom Allsop --- backends/arm/_passes/arm_pass_manager.py | 4 + backends/arm/_passes/convert_to_clamp.py | 36 +++++++++ backends/arm/operators/__init__.py | 2 - backends/arm/operators/op_hardtanh.py | 66 --------------- backends/arm/operators/op_relu.py | 59 -------------- .../arm/test/passes/test_convert_to_clamp.py | 80 +++++++++++++++++++ 6 files changed, 120 insertions(+), 127 deletions(-) create mode 100644 backends/arm/_passes/convert_to_clamp.py delete mode 100644 backends/arm/operators/op_hardtanh.py delete mode 100644 backends/arm/operators/op_relu.py create mode 100644 backends/arm/test/passes/test_convert_to_clamp.py diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py index 28d70591e5e..331d45e9124 100644 --- a/backends/arm/_passes/arm_pass_manager.py +++ b/backends/arm/_passes/arm_pass_manager.py @@ -27,6 +27,7 @@ from executorch.backends.arm._passes.convert_squeezes_to_view import ( # type: ignore[import-not-found] ConvertSqueezesToViewPass, ) +from executorch.backends.arm._passes.convert_to_clamp import ConvertToClampPass from executorch.backends.arm._passes.decompose_batchnorm_pass import ( DecomposeBatchNormPass, ) @@ -104,6 +105,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul self.add_pass(DecomposeLinearPass()) self.add_pass(ConvertMeanDimToAveragePoolPass()) self.add_pass(ConvertFullLikeToFullPass()) + self.add_pass(ConvertToClampPass()) self.add_pass(ReplaceScalarWithTensorArgPass()) self.add_pass(AnnotateDecomposedMatmulPass()) @@ -144,6 +146,8 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul self.add_pass(DecomposeDivPass()) self.add_pass(DecomposeSoftmaxesPass()) self.add_pass(ConvertFullLikeToFullPass()) + self.add_pass(ConvertToClampPass()) + self.add_pass(AnnotateDecomposedMatmulPass()) self.add_pass(QuantizeOperatorArguments()) self.add_pass(FoldAndAnnotateQParamsPass()) # type: ignore[call-arg] diff --git a/backends/arm/_passes/convert_to_clamp.py b/backends/arm/_passes/convert_to_clamp.py new file mode 100644 index 00000000000..8f2c9b16f9a --- /dev/null +++ b/backends/arm/_passes/convert_to_clamp.py @@ -0,0 +1,36 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Tuple + +from executorch.exir.dialects._ops import ops as exir_ops +from executorch.exir.pass_base import ExportPass + +edge_operators = { + exir_ops.edge.aten.hardtanh.default, + exir_ops.edge.aten.relu.default, +} + + +def get_clamp_params(op, args) -> Tuple[float | None, float | None]: + if op == exir_ops.edge.aten.hardtanh.default: + return args[1], args[2] + elif op == exir_ops.edge.aten.relu.default: + return 0.0, None + else: + raise ValueError(f"Getting clamp parameters for op {op} is not implemented.") + + +class ConvertToClampPass(ExportPass): + def call_operator(self, op, args, kwargs, meta): + if op not in edge_operators: + return super().call_operator(op, args, kwargs, meta) + + return super().call_operator( + exir_ops.edge.aten.clamp.default, + (args[0], *get_clamp_params(op, args)), + {}, + meta, + ) diff --git a/backends/arm/operators/__init__.py b/backends/arm/operators/__init__.py index 98d96828ad5..b737c7fb2c6 100644 --- a/backends/arm/operators/__init__.py +++ b/backends/arm/operators/__init__.py @@ -21,7 +21,6 @@ op_ge, op_get_item, op_gt, - op_hardtanh, op_le, op_log, op_lt, @@ -31,7 +30,6 @@ op_mul, op_permute, op_reciprocal, - op_relu, op_repeat, op_rescale, op_rshift, diff --git a/backends/arm/operators/op_hardtanh.py b/backends/arm/operators/op_hardtanh.py deleted file mode 100644 index fc0ee552a9f..00000000000 --- a/backends/arm/operators/op_hardtanh.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright 2023-2025 Arm Limited and/or its affiliates. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# pyre-unsafe -from typing import List - -import serializer.tosa_serializer as ts # type: ignore -import torch - -# pyre-fixme[21]: 'Could not find a module corresponding to import `executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass`.' -from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import ( - get_input_qparams, -) -from executorch.backends.arm.operators.node_visitor import ( - NodeVisitor, - register_node_visitor, -) -from executorch.backends.arm.tosa_mapping import TosaArg - -from serializer.tosa_serializer import TosaOp - - -@register_node_visitor -class HardTanhVisitor(NodeVisitor): - target = "aten.hardtanh.default" - - def __init__(self, *args): - super().__init__(*args) - - def define_node( - self, - node: torch.fx.Node, - tosa_graph: ts.TosaSerializer, - inputs: List[TosaArg], - output: TosaArg, - ) -> None: - attr = ts.TosaSerializerAttribute() - - if inputs[0].dtype == ts.DType.INT8: - # Get quant parameters - input_qparams = get_input_qparams(node) # pyre-ignore[16] - qargs = input_qparams[0] - # Convert to quantized representation - clamp_min_qs = qargs.quantize_value(inputs[1].number).item() - clamp_max_qs = qargs.quantize_value(inputs[2].number).item() - # Set fp values to 0.0 since they are not used - clamp_min_fp = 0.0 - clamp_max_fp = 0.0 - else: - clamp_min_fp = inputs[1].number - clamp_max_fp = inputs[2].number - # Set qs values to 0 since they are not used - clamp_min_qs = 0 - clamp_max_qs = 0 - - attr.ClampAttribute( - tosa_graph.builder, - clamp_min_qs, - clamp_max_qs, - clamp_min_fp, - clamp_max_fp, - ) - - tosa_graph.addOperator(TosaOp.Op().CLAMP, [inputs[0].name], [output.name], attr) diff --git a/backends/arm/operators/op_relu.py b/backends/arm/operators/op_relu.py deleted file mode 100644 index c37e4b3e75d..00000000000 --- a/backends/arm/operators/op_relu.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright 2024-2025 Arm Limited and/or its affiliates. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# pyre-unsafe - -import serializer.tosa_serializer as ts # type: ignore -import torch.fx - -# pyre-fixme[21]: 'Could not find a module corresponding to import `executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass`.' -from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import ( - get_output_qparams, -) -from executorch.backends.arm.operators.node_visitor import ( - NodeVisitor, - register_node_visitor, -) -from executorch.backends.arm.tosa_mapping import TosaArg -from serializer.tosa_serializer import TosaOp - - -@register_node_visitor -class ReluVisitor(NodeVisitor): - target = "aten.relu.default" - - def __init__(self, *args): - super().__init__(*args) - - def define_node( - self, - node: torch.fx.Node, - tosa_graph: ts.TosaSerializer, - inputs: list[TosaArg], - output: TosaArg, - ) -> None: - attr = ts.TosaSerializerAttribute() - - clamp_min_fp = 0.0 - clamp_max_fp = 0.0 - clamp_min_qs = 0 - clamp_max_qs = 0 - if inputs[0].dtype == ts.DType.INT8: - out_qargs = get_output_qparams(node) # pyre-ignore[16] - clamp_min_qs = out_qargs[0].quantize_value(0).item() - clamp_max_qs = out_qargs[0].quantize_value(float("inf")).item() - else: - clamp_min_fp = 0 - clamp_max_fp = float("inf") - - attr.ClampAttribute( - tosa_graph.builder, - clamp_min_qs, - clamp_max_qs, - clamp_min_fp, - clamp_max_fp, - ) - - tosa_graph.addOperator(TosaOp.Op().CLAMP, [inputs[0].name], [output.name], attr) diff --git a/backends/arm/test/passes/test_convert_to_clamp.py b/backends/arm/test/passes/test_convert_to_clamp.py new file mode 100644 index 00000000000..0b106b7bc82 --- /dev/null +++ b/backends/arm/test/passes/test_convert_to_clamp.py @@ -0,0 +1,80 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +from executorch.backends.arm._passes.convert_to_clamp import ConvertToClampPass + +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.arm_tester import ArmTester + +from executorch.backends.xnnpack.test.tester.tester import RunPasses + + +class HardTanh(torch.nn.Module): + def __init__(self): + super().__init__() + + self.hardtanh = torch.nn.Hardtanh() + + def forward(self, x): + return self.hardtanh(x) + + def get_inputs(self): + return (torch.rand(1, 64, 64, 3),) + + +class ReLU(torch.nn.Module): + def __init__(self): + super().__init__() + + self.relu = torch.nn.ReLU() + + def forward(self, x): + return self.relu(x) + + def get_inputs(self): + return (torch.rand(1, 64, 64, 3),) + + +class TestConvertToClampPass(unittest.TestCase): + """ + Tests the ConvertToClampPass which converts hardtanh.default and relu.default to clamp.default + """ + + def test_tosa_MI_hardtahn(self): + module = HardTanh() + test_pass_stage = RunPasses([ConvertToClampPass]) + ( + ArmTester( + module, + example_inputs=module.get_inputs(), + compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), + ) + .export() + .to_edge() + .check(["executorch_exir_dialects_edge__ops_aten_hardtanh_default"]) + .run_passes(test_pass_stage) + .check(["executorch_exir_dialects_edge__ops_aten_clamp_default"]) + .check_not(["executorch_exir_dialects_edge__ops_aten_hardtanh_default"]) + ) + + def test_tosa_MI_relu(self): + module = ReLU() + test_pass_stage = RunPasses([ConvertToClampPass]) + ( + ArmTester( + module, + example_inputs=module.get_inputs(), + compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), + ) + .export() + .to_edge() + .check(["executorch_exir_dialects_edge__ops_aten_relu_default"]) + .run_passes(test_pass_stage) + .check(["executorch_exir_dialects_edge__ops_aten_clamp_default"]) + .check_not(["executorch_exir_dialects_edge__ops_aten_relu_default"]) + ) From 4f90ce4cdac5604d2b416a9a44aeeeb58ff14b98 Mon Sep 17 00:00:00 2001 From: Yufeng Shi Date: Wed, 19 Feb 2025 14:41:05 +0000 Subject: [PATCH 016/584] Arm: Add FLOOR operator (#8563) Implement an unary operator factory for creating one input NodeVisitors. Change-Id: I59ba0407b763e9e0cb79f214b7679465eda94825 --- backends/arm/_passes/insert_table_ops.py | 1 + .../tosa_supported_operators.py | 1 + backends/arm/operators/__init__.py | 1 + backends/arm/operators/ops_unary.py | 57 +++++++++++++ .../arm/quantizer/quantization_annotator.py | 1 + backends/arm/test/ops/test_floor.py | 82 +++++++++++++++++++ 6 files changed, 143 insertions(+) create mode 100644 backends/arm/operators/ops_unary.py create mode 100644 backends/arm/test/ops/test_floor.py diff --git a/backends/arm/_passes/insert_table_ops.py b/backends/arm/_passes/insert_table_ops.py index ada4d646c06..77de46fcd29 100644 --- a/backends/arm/_passes/insert_table_ops.py +++ b/backends/arm/_passes/insert_table_ops.py @@ -39,6 +39,7 @@ class InsertTableOpsPass(ExportPass): table_ops: Dict[EdgeOpOverload, Callable[[torch.Tensor], torch.Tensor]] = { exir_ops.edge.aten.exp.default: torch.exp, + exir_ops.edge.aten.floor.default: torch.floor, exir_ops.edge.aten.log.default: torch.log, exir_ops.edge.aten.reciprocal.default: torch.reciprocal, exir_ops.edge.aten.rsqrt.default: torch.rsqrt, diff --git a/backends/arm/operator_support/tosa_supported_operators.py b/backends/arm/operator_support/tosa_supported_operators.py index 4bbcec57cba..e0ded25493a 100644 --- a/backends/arm/operator_support/tosa_supported_operators.py +++ b/backends/arm/operator_support/tosa_supported_operators.py @@ -107,6 +107,7 @@ def is_node_supported(self, submodules, node: fx.Node) -> bool: exir_ops.edge.aten.log.default, exir_ops.edge.aten.linear.default, exir_ops.edge.aten.split_with_sizes_copy.default, + exir_ops.edge.aten.floor.default, exir_ops.edge.aten.full.default, exir_ops.edge.aten.full_like.default, exir_ops.edge.aten.ge.Tensor, diff --git a/backends/arm/operators/__init__.py b/backends/arm/operators/__init__.py index b737c7fb2c6..e98d7e76938 100644 --- a/backends/arm/operators/__init__.py +++ b/backends/arm/operators/__init__.py @@ -46,4 +46,5 @@ op_upsample_nearest2d, op_view, ops_binary, + ops_unary, ) diff --git a/backends/arm/operators/ops_unary.py b/backends/arm/operators/ops_unary.py new file mode 100644 index 00000000000..31397b9a3b1 --- /dev/null +++ b/backends/arm/operators/ops_unary.py @@ -0,0 +1,57 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-unsafe +from typing import List + +import serializer.tosa_serializer as ts # type: ignore +import torch.fx +from executorch.backends.arm.operators.node_visitor import ( + NodeVisitor, + register_node_visitor, +) + +from executorch.backends.arm.tosa_mapping import TosaArg +from executorch.backends.arm.tosa_specification import TosaSpecification +from serializer.tosa_serializer import TosaOp + + +def unary_operator_factory(unary_target: str, tosa_op): + "Creates and registers NodeVisitors for operations that have one input and map directly into a TOSA op." + + class UnaryOperator_080_MI(NodeVisitor): + target = unary_target + + tosa_specs = [TosaSpecification.create_from_string("TOSA-0.80+MI")] + + def __init__(self, *args): + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + tosa_graph: ts.TosaSerializer, + inputs: List[TosaArg], + output: TosaArg, + ) -> None: + + if not (inputs[0].dtype == output.dtype): + raise ValueError( + "All inputs and output need same dtype." + f"Got {inputs[0].dtype=}, {output.dtype=}" + ) + + if not (inputs[0].dtype == ts.DType.FP32): + raise ValueError( + "All inputs need to be FP32." f"Got {inputs[0].dtype=}" + ) + + # MI lowering + tosa_graph.addOperator(tosa_op, [inputs[0].name], [output.name]) + + register_node_visitor(UnaryOperator_080_MI) + + +unary_operator_factory("aten.floor.default", TosaOp.Op().FLOOR) diff --git a/backends/arm/quantizer/quantization_annotator.py b/backends/arm/quantizer/quantization_annotator.py index 9b2ada035f9..cc12923911a 100644 --- a/backends/arm/quantizer/quantization_annotator.py +++ b/backends/arm/quantizer/quantization_annotator.py @@ -127,6 +127,7 @@ def _match_pattern( _one_to_one = [ torch.ops.aten.abs.default, torch.ops.aten.exp.default, + torch.ops.aten.floor.default, torch.ops.aten.log.default, torch.ops.aten.reciprocal.default, torch.ops.aten.rsqrt.default, diff --git a/backends/arm/test/ops/test_floor.py b/backends/arm/test/ops/test_floor.py new file mode 100644 index 00000000000..c19dc8605b8 --- /dev/null +++ b/backends/arm/test/ops/test_floor.py @@ -0,0 +1,82 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + + +from typing import Tuple + +import torch +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + + +aten_op = "torch.ops.aten.floor.default" +exir_op = "executorch_exir_dialects_edge__ops_aten_floor_default" + +input_t1 = Tuple[torch.Tensor] # Input x + + +class Floor(torch.nn.Module): + def forward(self, x: torch.Tensor): + return torch.floor(x) + + test_data: dict[str, input_t1] = { + "zeros": (torch.zeros(1, 10, 10, 10),), + "ones": (torch.ones(10, 10, 10),), + "rand": ((torch.rand(10, 10) - 0.5),), + "randn_pos": ((torch.randn(1, 4, 4, 4) + 10),), + "randn_neg": ((torch.randn(1, 4, 4, 4) - 10),), + "ramp": (torch.arange(-16, 16, 0.2),), + } + + +@common.parametrize("test_data", Floor.test_data) +def test_floor_tosa_MI(test_data: input_t1): + pipeline = TosaPipelineMI[input_t1](Floor(), test_data, aten_op, exir_op) + pipeline.run() + + +@common.parametrize("test_data", Floor.test_data) +def test_floor_tosa_BI(test_data: input_t1): + pipeline = TosaPipelineBI[input_t1](Floor(), test_data, aten_op, exir_op) + pipeline.run() + + +@common.parametrize("test_data", Floor.test_data) +def test_floor_u55_BI(test_data: input_t1): + pipeline = EthosU55PipelineBI[input_t1]( + Floor(), test_data, aten_op, exir_op, run_on_fvp=False + ) + pipeline.run() + + +@common.parametrize("test_data", Floor.test_data) +def test_floor_u85_BI(test_data: input_t1): + pipeline = EthosU85PipelineBI[input_t1]( + Floor(), test_data, aten_op, exir_op, run_on_fvp=False + ) + pipeline.run() + + +@common.parametrize("test_data", Floor.test_data) +@common.SkipIfNoCorstone300 +def test_floor_u55_BI_on_fvp(test_data: input_t1): + pipeline = EthosU55PipelineBI[input_t1]( + Floor(), test_data, aten_op, exir_op, run_on_fvp=True + ) + pipeline.run() + + +@common.parametrize("test_data", Floor.test_data) +@common.SkipIfNoCorstone320 +def test_floor_u85_BI_on_fvp(test_data: input_t1): + pipeline = EthosU85PipelineBI[input_t1]( + Floor(), test_data, aten_op, exir_op, run_on_fvp=True + ) + pipeline.run() From e1aabb635141f9850303fc0a1847ce21f48e499f Mon Sep 17 00:00:00 2001 From: Erik Lundell Date: Wed, 19 Feb 2025 16:52:25 +0100 Subject: [PATCH 017/584] Arm: Fix various small issues (#8566) - Softmax incorrectly set to can_delegate=False in aot_arm_compiler - Incorrect % with rank instead of dim length. Fix test for this. - Remove .dump_artifact():s Signed-off-by: Erik Lundell --- backends/arm/_passes/decompose_select.py | 3 ++- backends/arm/test/ops/test_cat.py | 1 - backends/arm/test/ops/test_select.py | 4 +--- backends/arm/test/ops/test_to_copy.py | 4 +--- examples/arm/aot_arm_compiler.py | 2 +- 5 files changed, 5 insertions(+), 9 deletions(-) diff --git a/backends/arm/_passes/decompose_select.py b/backends/arm/_passes/decompose_select.py index 5e04668df9a..9a25b7c28ae 100644 --- a/backends/arm/_passes/decompose_select.py +++ b/backends/arm/_passes/decompose_select.py @@ -35,8 +35,9 @@ def call(self, graph_module: torch.fx.GraphModule): input_node, dim, index = node.args rank = len(input_node.meta["val"].size()) + shape = input_node.meta["val"].shape dim = dim % rank if dim < 0 else dim - index = index % rank if index < 0 else index + index = index % shape[dim] if index < 0 else index with graph_module.graph.inserting_before(node): slice_node = create_node( diff --git a/backends/arm/test/ops/test_cat.py b/backends/arm/test/ops/test_cat.py index a1613d1d04b..63423b9e993 100644 --- a/backends/arm/test/ops/test_cat.py +++ b/backends/arm/test/ops/test_cat.py @@ -111,7 +111,6 @@ def _test_cat_ethosu_BI_pipeline( .check(["torch.ops.quantized_decomposed"]) .to_edge() .partition() - .dump_artifact() .check_not(["executorch_exir_dialects_edge__ops_aten_cat_default"]) .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) .to_executorch() diff --git a/backends/arm/test/ops/test_select.py b/backends/arm/test/ops/test_select.py index b474da573f0..fbeb4ebf9e7 100644 --- a/backends/arm/test/ops/test_select.py +++ b/backends/arm/test/ops/test_select.py @@ -19,7 +19,7 @@ test_data_suite: list[tuple[test_data_t]] = [ # (test_data, dim, index) ((torch.zeros(5, 3, 20), -1, 0),), - ((torch.zeros(5, 3, 20), 0, -1),), + ((torch.rand(5, 3, 20), 0, -1),), ((torch.zeros(5, 3, 20), 0, 4),), ((torch.ones(10, 10, 10), 0, 2),), ((torch.rand(5, 3, 20, 2), 0, 2),), @@ -61,9 +61,7 @@ def _test_select_tosa_MI_pipeline( .check([export_target]) .check_not(["torch.ops.quantized_decomposed"]) .to_edge() - .dump_artifact() .partition() - .dump_artifact() .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) .to_executorch() .run_method_and_compare_outputs(inputs=test_data) diff --git a/backends/arm/test/ops/test_to_copy.py b/backends/arm/test/ops/test_to_copy.py index 6992ac2f8e6..db3e93fbdc9 100644 --- a/backends/arm/test/ops/test_to_copy.py +++ b/backends/arm/test/ops/test_to_copy.py @@ -1,4 +1,4 @@ -# Copyright 2024 Arm Limited and/or its affiliates. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the @@ -55,9 +55,7 @@ def _test_to_copy_tosa_MI_pipeline( compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), ) .export() - .dump_artifact() .to_edge() - .dump_artifact() .partition() .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) .to_executorch() diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py index ccd736f7fce..f7f2105b99c 100644 --- a/examples/arm/aot_arm_compiler.py +++ b/examples/arm/aot_arm_compiler.py @@ -185,7 +185,7 @@ def forward(self, x): return z example_input = (torch.ones(2, 2),) - can_delegate = False + can_delegate = True class MultipleOutputsModule(torch.nn.Module): From e2485c96150bf30f23240a46acc5c9552a0a9d55 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 19 Feb 2025 09:30:59 -0800 Subject: [PATCH 018/584] Extract unittest scripts and add Buck mode (#8493) Fixes #8419 --- .ci/scripts/unittest-buck2.sh | 14 ++++++++ .ci/scripts/unittest-linux-cmake.sh | 13 ++++++++ .ci/scripts/unittest-linux.sh | 40 ++++++++++++++++++++++ .ci/scripts/unittest-macos-cmake.sh | 13 ++++++++ .ci/scripts/unittest-macos.sh | 42 +++++++++++++++++++++++ .github/workflows/_unittest.yml | 52 ++++------------------------- .github/workflows/pull.yml | 10 ++++++ 7 files changed, 138 insertions(+), 46 deletions(-) create mode 100755 .ci/scripts/unittest-buck2.sh create mode 100755 .ci/scripts/unittest-linux-cmake.sh create mode 100755 .ci/scripts/unittest-linux.sh create mode 100755 .ci/scripts/unittest-macos-cmake.sh create mode 100755 .ci/scripts/unittest-macos.sh diff --git a/.ci/scripts/unittest-buck2.sh b/.ci/scripts/unittest-buck2.sh new file mode 100755 index 00000000000..2e386570504 --- /dev/null +++ b/.ci/scripts/unittest-buck2.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +set -eux + +# TODO: expand this to //... +buck2 query //runtime/... + +# TODO: expand the covered scope of Buck targets. +buck2 build //runtime/core/portable_type/... +buck2 test //runtime/core/portable_type/... diff --git a/.ci/scripts/unittest-linux-cmake.sh b/.ci/scripts/unittest-linux-cmake.sh new file mode 100755 index 00000000000..7b61256eb51 --- /dev/null +++ b/.ci/scripts/unittest-linux-cmake.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +set -eux + +# Run pytest with coverage +pytest -n auto --cov=./ --cov-report=xml +# Run gtest +LLVM_PROFDATA=llvm-profdata-12 LLVM_COV=llvm-cov-12 \ +test/run_oss_cpp_tests.sh diff --git a/.ci/scripts/unittest-linux.sh b/.ci/scripts/unittest-linux.sh new file mode 100755 index 00000000000..5902e3efd21 --- /dev/null +++ b/.ci/scripts/unittest-linux.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +set -eux + +BUILD_TOOL=$1 +if [[ $BUILD_TOOL =~ ^(cmake|buck2)$ ]]; then + echo "Running unittests for ${BUILD_TOOL} ..." +else + echo "Missing build tool (require buck2 or cmake), exiting..." + exit 1 +fi + +# The generic Linux job chooses to use base env, not the one setup by the image +eval "$(conda shell.bash hook)" +CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") +conda activate "${CONDA_ENV}" + +# Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate +source .ci/scripts/setup-vulkan-linux-deps.sh + +PYTHON_EXECUTABLE=python \ +EXECUTORCH_BUILD_PYBIND=ON \ +CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ +.ci/scripts/setup-linux.sh "$BUILD_TOOL" + +# Install llama3_2_vision dependencies. +PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh + +if [[ "$BUILD_TOOL" == "cmake" ]]; then + .ci/scripts/unittest-linux-cmake.sh +elif [[ "$BUILD_TOOL" == "buck2" ]]; then + .ci/scripts/unittest-buck2.sh +else + echo "Unknown build tool $BUILD_TOOL" + exit 1 +fi diff --git a/.ci/scripts/unittest-macos-cmake.sh b/.ci/scripts/unittest-macos-cmake.sh new file mode 100755 index 00000000000..cdb40c40244 --- /dev/null +++ b/.ci/scripts/unittest-macos-cmake.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +set -eux + +# Run pytest with coverage +${CONDA_RUN} pytest -n auto --cov=./ --cov-report=xml +# Run gtest +LLVM_PROFDATA="xcrun llvm-profdata" LLVM_COV="xcrun llvm-cov" \ +${CONDA_RUN} test/run_oss_cpp_tests.sh diff --git a/.ci/scripts/unittest-macos.sh b/.ci/scripts/unittest-macos.sh new file mode 100755 index 00000000000..907472b96bb --- /dev/null +++ b/.ci/scripts/unittest-macos.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +set -eux + +BUILD_TOOL=$1 +if [[ $BUILD_TOOL =~ ^(cmake|buck2)$ ]]; then + echo "Running unittests for ${BUILD_TOOL} ..." +else + echo "Missing build tool (require buck2 or cmake), exiting..." + exit 1 +fi + +bash .ci/scripts/setup-conda.sh +eval "$(conda shell.bash hook)" + +# Create temp directory for sccache shims +export TMP_DIR=$(mktemp -d) +export PATH="${TMP_DIR}:$PATH" +trap 'rm -rfv ${TMP_DIR}' EXIT + +# Setup MacOS dependencies as there is no Docker support on MacOS atm +PYTHON_EXECUTABLE=python \ +EXECUTORCH_BUILD_PYBIND=ON \ +CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ +${CONDA_RUN} --no-capture-output \ +.ci/scripts/setup-macos.sh cmake + +# Install llama3_2_vision dependencies. +PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh + +if [[ "$BUILD_TOOL" == "cmake" ]]; then + .ci/scripts/unittest-macos-cmake.sh +elif [[ "$BUILD_TOOL" == "buck2" ]]; then + .ci/scripts/unittest-buck2.sh +else + echo "Unknown build tool $BUILD_TOOL" + exit 1 +fi diff --git a/.github/workflows/_unittest.yml b/.github/workflows/_unittest.yml index 414f86494b0..15079920717 100644 --- a/.github/workflows/_unittest.yml +++ b/.github/workflows/_unittest.yml @@ -7,6 +7,10 @@ on: required: true type: string description: Name of the docker image to use. + build-tool: + required: true + type: string + description: Build tool to use, cmake or buck2. python-version: required: false type: string @@ -26,28 +30,7 @@ jobs: timeout: 90 script: | set -eux - - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate - source .ci/scripts/setup-vulkan-linux-deps.sh - - # Setup MacOS dependencies as there is no Docker support on MacOS atm - PYTHON_EXECUTABLE=python \ - EXECUTORCH_BUILD_PYBIND=ON \ - CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ - .ci/scripts/setup-linux.sh cmake - - # Install llama3_2_vision dependencies. - PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh - - # Run pytest with coverage - pytest -n auto --cov=./ --cov-report=xml - # Run gtest - LLVM_PROFDATA=llvm-profdata-12 LLVM_COV=llvm-cov-12 \ - test/run_oss_cpp_tests.sh + .ci/scripts/unittest-linux.sh "${{ inputs.build-tool }}" macos: uses: pytorch/test-infra/.github/workflows/macos_job.yml@main @@ -58,27 +41,4 @@ jobs: ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} script: | set -eux - - bash .ci/scripts/setup-conda.sh - - # Create temp directory for sccache shims - export TMP_DIR=$(mktemp -d) - export PATH="${TMP_DIR}:$PATH" - trap 'rm -rfv ${TMP_DIR}' EXIT - - # Setup MacOS dependencies as there is no Docker support on MacOS atm - PYTHON_EXECUTABLE=python \ - EXECUTORCH_BUILD_PYBIND=ON \ - CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ - ${CONDA_RUN} --no-capture-output \ - .ci/scripts/setup-macos.sh cmake - - # Install llama3_2_vision dependencies. - PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - ./examples/models/llama3_2_vision/install_requirements.sh - - # Run pytest with coverage - ${CONDA_RUN} pytest -n auto --cov=./ --cov-report=xml - # Run gtest - LLVM_PROFDATA="xcrun llvm-profdata" LLVM_COV="xcrun llvm-cov" \ - ${CONDA_RUN} test/run_oss_cpp_tests.sh + .ci/scripts/unittest-macos.sh "${{ inputs.build-tool }}" diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index fac23197891..697601e3b27 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -367,6 +367,16 @@ jobs: id-token: write contents: read with: + build-tool: cmake + docker-image: executorch-ubuntu-22.04-clang12 + + unittest-buck: + uses: ./.github/workflows/_unittest.yml + permissions: + id-token: write + contents: read + with: + build-tool: buck2 docker-image: executorch-ubuntu-22.04-clang12 unittest-arm: From 0d3cbe814d9c2237d6880ba8a103bc84376f3a96 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Wed, 19 Feb 2025 12:08:54 -0600 Subject: [PATCH 019/584] Add c10::irange to ExecuTorch (#8572) Pull Request resolved: https://github.com/pytorch/executorch/pull/8554 irange is a header-only utility that is both more readable than the usual way to write for loops and also solves -Wsign-compare issues. I've previously vetted that it generates the same assembly as a regular for loop. ghstack-source-id: 267194238 @exported-using-ghexport Differential Revision: [D69817195](https://our.internmc.facebook.com/intern/diff/D69817195/) Co-authored-by: Github Executorch --- runtime/core/portable_type/c10/README.md | 14 +- .../core/portable_type/c10/c10/targets.bzl | 1 + .../core/portable_type/c10/c10/util/irange.h | 123 ++++++++++++++++++ runtime/core/portable_type/targets.bzl | 3 + runtime/core/portable_type/tensor_impl.cpp | 4 +- 5 files changed, 140 insertions(+), 5 deletions(-) create mode 100644 runtime/core/portable_type/c10/c10/util/irange.h diff --git a/runtime/core/portable_type/c10/README.md b/runtime/core/portable_type/c10/README.md index df14d22a4cf..104a6717ba7 100644 --- a/runtime/core/portable_type/c10/README.md +++ b/runtime/core/portable_type/c10/README.md @@ -1,7 +1,13 @@ -We added an extra c10 directory so that runtime/core/portable_type/c10 +This directory contains header files from `c10` in PyTorch core that +need to be used in ExecuTorch core. They are copied here rather than +being found through the torch pip package to keep the core build +hermetic for embedded use cases. The headers should be exact copies +from PyTorch core; if they are out of sync, please send a PR! + +We added an extra c10 directory so that `runtime/core/portable_type/c10` can be the directory to put on your include path, rather than -runtime/core/portable_type, because using runtime/core/portable_type +`runtime/core/portable_type`, because using `runtime/core/portable_type` would cause all headers in that directory to be includeable with `#include `. In particular, that includes -runtime/core/portable_type/complex.h, which would shadow the C99 -complex.h standard header. +`runtime/core/portable_type/complex.h`, which would shadow the C99 +`complex.h` standard header. diff --git a/runtime/core/portable_type/c10/c10/targets.bzl b/runtime/core/portable_type/c10/c10/targets.bzl index 1e60b70a4b8..64436278e79 100644 --- a/runtime/core/portable_type/c10/c10/targets.bzl +++ b/runtime/core/portable_type/c10/c10/targets.bzl @@ -26,6 +26,7 @@ def define_common_targets(): "util/TypeSafeSignMath.h", "util/bit_cast.h", "util/floating_point_utils.h", + "util/irange.h", ], exported_preprocessor_flags = [ # NOTE: If we define C10_EMBEDDED to prevent Half and diff --git a/runtime/core/portable_type/c10/c10/util/irange.h b/runtime/core/portable_type/c10/c10/util/irange.h new file mode 100644 index 00000000000..2719a82075c --- /dev/null +++ b/runtime/core/portable_type/c10/c10/util/irange.h @@ -0,0 +1,123 @@ +// Copyright 2004-present Facebook. All Rights Reserved. + +#pragma once + +#include + +#include +#include +#include +#include + +namespace c10 { + +namespace detail { + +template < + typename I, + bool one_sided = false, + std::enable_if_t, int> = 0> +struct integer_iterator { + using iterator_category = std::input_iterator_tag; + using value_type = I; + using difference_type = std::ptrdiff_t; + using pointer = I*; + using reference = I&; + + explicit integer_iterator(I value) : value(value) {} + + I operator*() const { + return value; + } + + I const* operator->() const { + return &value; + } + + integer_iterator& operator++() { + ++value; + return *this; + } + + integer_iterator operator++(int) { + const auto copy = *this; + ++*this; + return copy; + } + + bool operator==(const integer_iterator& other) const { + if constexpr (one_sided) { + // Range-for loops' end test is `begin != end`, not `begin < + // end`. To handle `c10::irange(n)` where n < 0 (which should be + // empty), we just make `begin != end` fail whenever `end` is + // negative. + return is_negative(other.value) || value == other.value; + } else { + return value == other.value; + } + // Suppress "warning: missing return statement at end of non-void function" + // which Nvidia's Robert Crovella confirms is an NVCC compiler error + // here https://stackoverflow.com/a/64561686/752843 on 2020-10-27 + // `__builtin_unreachable();` would be best here, but it's not + // available with all compilers. So we instead return an arbitrary + // value trusting that this line will, in fact, never be reached. + return false; // Horrible hack + } + + bool operator!=(const integer_iterator& other) const { + return !(*this == other); + } + + protected: + I value; +}; + +} // namespace detail + +template < + typename I, + bool one_sided = false, + std::enable_if_t, bool> = true> +struct integer_range { + public: + integer_range(I begin, I end) : begin_(begin), end_(end) {} + using iterator = detail::integer_iterator; + iterator begin() const { + return begin_; + } + iterator end() const { + return end_; + } + + private: + iterator begin_; + iterator end_; +}; + +/// Creates an integer range for the half-open interval [begin, end) +/// If end<=begin, then the range is empty. +/// The range has the type of the `end` integer; `begin` integer is +/// cast to this type. +template < + typename Integer1, + typename Integer2, + std::enable_if_t, bool> = true, + std::enable_if_t, bool> = true> +integer_range irange(Integer1 begin, Integer2 end) { + // If end<=begin then the range is empty; we can achieve this effect by + // choosing the larger of {begin, end} as the loop terminator + return { + static_cast(begin), + std::max(static_cast(begin), end)}; +} + +/// Creates an integer range for the half-open interval [0, end) +/// If end<=begin, then the range is empty +template < + typename Integer, + std::enable_if_t, bool> = true> +integer_range irange(Integer end) { + return {Integer(), end}; +} + +} // namespace c10 diff --git a/runtime/core/portable_type/targets.bzl b/runtime/core/portable_type/targets.bzl index 43efeca208c..6178f2c0f9a 100644 --- a/runtime/core/portable_type/targets.bzl +++ b/runtime/core/portable_type/targets.bzl @@ -28,6 +28,9 @@ def define_common_targets(): "//executorch/runtime/core/exec_aten/...", "//executorch/runtime/core/portable_type/test/...", ], + deps = [ + "//executorch/runtime/core/portable_type/c10/c10:c10", + ], exported_deps = [ ":scalar_type", "//executorch/runtime/core:core", diff --git a/runtime/core/portable_type/tensor_impl.cpp b/runtime/core/portable_type/tensor_impl.cpp index b978e23cbd6..6366a8eac28 100644 --- a/runtime/core/portable_type/tensor_impl.cpp +++ b/runtime/core/portable_type/tensor_impl.cpp @@ -11,6 +11,8 @@ #include #include +#include + #include #include #include @@ -30,7 +32,7 @@ ssize_t compute_numel(const TensorImpl::SizesType* sizes, ssize_t dim) { dim == 0 || sizes != nullptr, "Sizes must be provided for non-scalar tensors"); ssize_t numel = 1; // Zero-dimensional tensors (scalars) have numel == 1. - for (ssize_t i = 0; i < dim; ++i) { + for (const auto i : c10::irange(dim)) { ET_CHECK_MSG( sizes[i] >= 0, "Size must be non-negative, got %d at dimension %zd", From 00c14438f45af05fb7fec0eac43944b17d174887 Mon Sep 17 00:00:00 2001 From: Jack <32371937+jackzhxng@users.noreply.github.com> Date: Wed, 19 Feb 2025 10:43:55 -0800 Subject: [PATCH 020/584] Update bug-report.yml (#8498) --- .github/ISSUE_TEMPLATE/bug-report.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 86363e7da9d..010f7c1132e 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -21,7 +21,7 @@ body: A clear and concise description of what the bug is. ```python - # Sample code to reproduce the problem + # Sample code to reproduce the problem. If applicable, also include your model export command. ``` ``` From b6ffe1a1daddc233d6661c9d658a9e81fc79b28c Mon Sep 17 00:00:00 2001 From: Dave Bort Date: Wed, 19 Feb 2025 12:18:59 -0800 Subject: [PATCH 021/584] Fix lint issue in backends/cadence/aot/fuse_ops.py (#8575) Fix lint issue in backends/cadence/aot/fuse_ops.py introduced in #8506 --- backends/cadence/aot/fuse_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/cadence/aot/fuse_ops.py b/backends/cadence/aot/fuse_ops.py index 47e6b8b5d03..f8a4b114e29 100644 --- a/backends/cadence/aot/fuse_ops.py +++ b/backends/cadence/aot/fuse_ops.py @@ -17,7 +17,7 @@ from typing import cast, Sequence # Import these for the cadence function signatures. -import executorch.backends.cadence.aot.ops_registrations # noqa: F401 +import executorch.backends.cadence.aot.ops_registrations # noqa: F401 import torch import torch.fx From e5dc18a9c9030c331c79002e4195397e80f6e044 Mon Sep 17 00:00:00 2001 From: Dario Seyb Date: Wed, 19 Feb 2025 13:36:43 -0800 Subject: [PATCH 022/584] Fix unsqueeze optimize pass Differential Revision: D69812661 Pull Request resolved: https://github.com/pytorch/executorch/pull/8564 --- backends/transforms/view_copy_to_squeeze_unsqueeze.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/backends/transforms/view_copy_to_squeeze_unsqueeze.py b/backends/transforms/view_copy_to_squeeze_unsqueeze.py index f4a0670072c..08ed70b2fa8 100644 --- a/backends/transforms/view_copy_to_squeeze_unsqueeze.py +++ b/backends/transforms/view_copy_to_squeeze_unsqueeze.py @@ -75,7 +75,11 @@ def find_unsqueeze_dim( j = 0 idx = -1 while j < len(view_shape): - if input_shape[i] != view_shape[j]: + # account for added dim being last dim in view_shape + if i == j and j == len(input_shape): + if view_shape[j] != 1: + return None + elif input_shape[i] != view_shape[j]: if view_shape[j] == 1: idx = j i -= 1 From 69ac165bd0ebc66c7711c65322f9423ffb844ad4 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 19 Feb 2025 14:17:37 -0800 Subject: [PATCH 023/584] Add Code Owners file Differential Revision: D69626036 Pull Request resolved: https://github.com/pytorch/executorch/pull/8545 --- CODEOWNERS | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 CODEOWNERS diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 00000000000..7a9d2a88f88 --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,85 @@ +# IMPORTANT: +# This file is ONLY used to subscribe for notifications for PRs +# related to a specific file path. Approvals from people in this +# file are not required for merges. + +/backends/apple @shoumikhin @cccclai +/backends/apple/mps @cccclai @DenisVieriu97 +/backends/arm @digantdesai +/backends/cadence @tarun292 +/backends/example @iseeyuan @JacobSzwejbka @larryliu0820 +/backends/mediatek @cccclai @neuropilot-captain +/backends/qualcomm @cccclai @chunit-quic @haowhsu-quic @shewu-quic @winskuo-quic +/backends/test @cccclai +/backends/transforms @kimishpatel +/backends/vulkan @SS-JIA +/backends/xnnpack @digantdesai @mcr229 + +/build @GregoryComer @dbort @kirklandsign + +/codegen @larryliu0820 @lucylq + +/devtools @tarun292 @Gasoonjia + +/docs @mergennachin + +/examples/apple @shoumikhin +/examples/apple/coreml @cccclai @metascroy @cymbalrush @YifanShenSZ +/examples/arm @digantdesai +/examples/cadence @tarun292 +/examples/demo-apps @shoumikhin @kirklandsign +/examples/devtools @tarun292 +/examples/llm_manual @larryliu0820 +/examples/llm_pte_finetuning @JacobSzwejbka +/examples/mediatek @cccclai +/examples/models @lucylq +/examples/portable @larryliu0820 @manuelcandales +/examples/qualcomm @cccclai +/examples/selective_build @lucylq @larryliu0820 @JacobSzwejbka +/examples/xnnpack @digantdesai @mcr229 + +/exir/backend @cccclai @kimishpatel @JacobSzwejbka @tarun292 +/exir @JacobSzwejbka @tarun292 @larryliu0820 + + +/extension/android @kirklandsign +/extension/android_test @kirklandsign +/extension/apple @shoumikhin +/extension/aten_util @JacobSzwejbka +/extension/benchmark @tarun292 +/extension/data_loader @JacobSzwejbka @lucylq @dbort +/extension/evalue_util @GregoryComer @dbort +/extension/export_util @kimishpatel +/extension/flat_tensor @lucylq +/extension/gguf_util @larryliu0820 +/extension/kernel_util @kimishpatel @manuelcandales +/extension/llm @jackzhxng @iseeyuan @larryliu0820 +/extension/memory_allocator @JacobSzwejbka @dbort +/extension/module @shoumikhin +/extension/parallel @kimishpatel +/extension/pybindings @JacobSzwejbka @larryliu0820 +/extension/pytree @JacobSzwejbka +/extension/runner_util @dbort +/extension/tensor @shoumikhin +/extension/testing_util @dbort +/extension/threadpool @kimishpatel +/extension/training @JacobSzwejbka + +/kernels @manuelcandales + +/profiler @tarun292 @Gasoonjia + +/runtime @dbort @JacobSzwejbka @lucylq +/runtime/backend @cccclai + +/schema @dbort @JacobSzwejbka @lucylq + +/scripts @GregoryComer + +/shim @larryliu0820 @GregoryComer + +/third-party @GregoryComer + +/test @larryliu0820 @kirklandsign + +/util @tarun292 From 4333d658ed27c4d68abc1bbe421657c2ce64f8f8 Mon Sep 17 00:00:00 2001 From: JP <46308822+zonglinpeng@users.noreply.github.com> Date: Wed, 19 Feb 2025 15:12:55 -0800 Subject: [PATCH 024/584] [cadence][hifi] Fixed all hifi ops and cmake Differential Revision: D69812342 Pull Request resolved: https://github.com/pytorch/executorch/pull/8551 --- backends/cadence/CMakeLists.txt | 11 ++++++++--- backends/cadence/aot/functions_hifi.yaml | 5 ----- backends/cadence/fusion_g3/operators/CMakeLists.txt | 3 ++- backends/cadence/hifi/kernels/CMakeLists.txt | 3 ++- backends/cadence/hifi/operators/CMakeLists.txt | 5 +++-- backends/cadence/hifi/operators/op_clamp.cpp | 2 +- backends/cadence/hifi/operators/op_mean.cpp | 2 +- .../cadence/hifi/operators/op_quantized_relu_out.cpp | 6 +++--- backends/cadence/hifi/operators/op_softmax.cpp | 2 +- backends/cadence/hifi/operators/op_where.cpp | 9 +++++++++ backends/cadence/reference/kernels/CMakeLists.txt | 3 ++- backends/cadence/reference/operators/CMakeLists.txt | 3 ++- 12 files changed, 34 insertions(+), 20 deletions(-) diff --git a/backends/cadence/CMakeLists.txt b/backends/cadence/CMakeLists.txt index e2ac3de5cab..65dd5430588 100644 --- a/backends/cadence/CMakeLists.txt +++ b/backends/cadence/CMakeLists.txt @@ -22,7 +22,10 @@ endif() include(${EXECUTORCH_ROOT}/build/Utils.cmake) # Let files say "include ". -set(_common_include_directories ${EXECUTORCH_ROOT}/..) +set(_common_include_directories ${EXECUTORCH_ROOT}/.. + ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10) + +add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS) if(EXECUTORCH_CADENCE_CPU_RUNNER) include(${EXECUTORCH_ROOT}/build/Codegen.cmake) @@ -74,10 +77,12 @@ endif() if(EXECUTORCH_NNLIB_OPT) set(TARGET_DIR hifi) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib + ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10) elseif(EXECUTORCH_FUSION_G3_OPT) set(TARGET_DIR fusion_g3) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib + ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10) else() set(TARGET_DIR reference) endif() diff --git a/backends/cadence/aot/functions_hifi.yaml b/backends/cadence/aot/functions_hifi.yaml index 7a98d704d87..f74d2c7a324 100644 --- a/backends/cadence/aot/functions_hifi.yaml +++ b/backends/cadence/aot/functions_hifi.yaml @@ -219,11 +219,6 @@ - arg_meta: null kernel_name: cadence::impl::HiFi::quantized_relu_per_tensor_out -- func: cadence::quantized_linear.per_tensor_out(Tensor src, Tensor weight, Tensor bias, SymInt src_zero_point, SymInt weight_zero_point, SymInt out_multiplier, SymInt out_shift, SymInt out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!) - kernels: - - arg_meta: null - kernel_name: cadence::impl::HiFi::quantized_linear_per_tensor_out - - func: cadence::quantized_fully_connected.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null diff --git a/backends/cadence/fusion_g3/operators/CMakeLists.txt b/backends/cadence/fusion_g3/operators/CMakeLists.txt index cac16bddc50..f39614ee4f3 100644 --- a/backends/cadence/fusion_g3/operators/CMakeLists.txt +++ b/backends/cadence/fusion_g3/operators/CMakeLists.txt @@ -64,7 +64,8 @@ target_link_libraries(aten_ops_cadence PUBLIC executorch) target_link_libraries(aten_ops_cadence PRIVATE xa_nnlib) # Let files say "include ". -set(_common_include_directories ${EXECUTORCH_ROOT}/..) +set(_common_include_directories ${EXECUTORCH_ROOT}/.. +${EXECUTORCH_ROOT}/runtime/core/portable_type/c10) target_include_directories( aten_ops_cadence PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR} diff --git a/backends/cadence/hifi/kernels/CMakeLists.txt b/backends/cadence/hifi/kernels/CMakeLists.txt index 9bbd386c75c..270835dbb74 100644 --- a/backends/cadence/hifi/kernels/CMakeLists.txt +++ b/backends/cadence/hifi/kernels/CMakeLists.txt @@ -25,7 +25,8 @@ add_library( ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_transpose_32.c ) # Let files say "include ". -set(_common_include_directories ${EXECUTORCH_ROOT}/..) +set(_common_include_directories ${EXECUTORCH_ROOT}/.. +${EXECUTORCH_ROOT}/runtime/core/portable_type/c10) target_include_directories( cadence_kernels diff --git a/backends/cadence/hifi/operators/CMakeLists.txt b/backends/cadence/hifi/operators/CMakeLists.txt index d6820c0700d..86b85bbfb6c 100644 --- a/backends/cadence/hifi/operators/CMakeLists.txt +++ b/backends/cadence/hifi/operators/CMakeLists.txt @@ -67,7 +67,8 @@ target_link_libraries(aten_ops_cadence PUBLIC executorch) target_link_libraries(aten_ops_cadence PRIVATE cadence_kernels) # Let files say "include ". -set(_common_include_directories ${EXECUTORCH_ROOT}/..) +set(_common_include_directories ${EXECUTORCH_ROOT}/.. +${EXECUTORCH_ROOT}/runtime/core/portable_type/c10) target_include_directories( aten_ops_cadence PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR} @@ -77,7 +78,7 @@ target_include_directories( # Custom ops that are needed to run the test model. add_library( custom_ops "op_quantized_linear_out.cpp" "op_quantized_layer_norm.cpp" - "op_quantize_per_tensor.cpp" "op_quantized_relu_out.cpp" "op_dequantize_per_tensor.cpp" + "op_quantize_per_tensor.cpp" "op_quantized_relu_out.cpp" "op_dequantize_per_tensor.cpp" "op_quantized_fully_connected_out" ) target_include_directories( custom_ops PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR} diff --git a/backends/cadence/hifi/operators/op_clamp.cpp b/backends/cadence/hifi/operators/op_clamp.cpp index 05c8659cbcb..785e6f015d5 100644 --- a/backends/cadence/hifi/operators/op_clamp.cpp +++ b/backends/cadence/hifi/operators/op_clamp.cpp @@ -328,7 +328,7 @@ Tensor& clamp_tensor_out( const executorch::aten::optional& min_opt, const executorch::aten::optional& max_opt, Tensor& out) { - clamp_Tensor_out(ctx, in, min_opt, max_opt, out); + return clamp_Tensor_out(ctx, in, min_opt, max_opt, out); } } // namespace native diff --git a/backends/cadence/hifi/operators/op_mean.cpp b/backends/cadence/hifi/operators/op_mean.cpp index 59cf8581583..4b93e55047b 100644 --- a/backends/cadence/hifi/operators/op_mean.cpp +++ b/backends/cadence/hifi/operators/op_mean.cpp @@ -175,7 +175,7 @@ Tensor& mean_dim_out( bool keepdim, optional dtype, Tensor& out) { - mean_out(ctx, in, dim_list, keepdim, dtype, out); + return mean_out(ctx, in, dim_list, keepdim, dtype, out); } } // namespace native diff --git a/backends/cadence/hifi/operators/op_quantized_relu_out.cpp b/backends/cadence/hifi/operators/op_quantized_relu_out.cpp index b8baa946b98..9b65751da71 100644 --- a/backends/cadence/hifi/operators/op_quantized_relu_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_relu_out.cpp @@ -100,10 +100,10 @@ void quantized_relu_per_tensor_out( void quantized_relu_out( KernelRuntimeContext& ctx, const Tensor& input, - const int64_t in_zero_point, + const Tensor& in_zero_point, const int64_t out_zero_point, - const int64_t out_multiplier, - const int64_t out_shift, + const Tensor& out_multiplier, + const Tensor& out_shift, Tensor& output) { quantized_relu_per_tensor_out( ctx, diff --git a/backends/cadence/hifi/operators/op_softmax.cpp b/backends/cadence/hifi/operators/op_softmax.cpp index 852479ed935..25d3ad7d389 100644 --- a/backends/cadence/hifi/operators/op_softmax.cpp +++ b/backends/cadence/hifi/operators/op_softmax.cpp @@ -200,7 +200,7 @@ Tensor& softmax_out( int64_t dim, bool half_to_float, Tensor& out) { - _softmax_out(ctx, in, dim, half_to_float, out); + return _softmax_out(ctx, in, dim, half_to_float, out); } } // namespace native diff --git a/backends/cadence/hifi/operators/op_where.cpp b/backends/cadence/hifi/operators/op_where.cpp index ac7559691ae..94c1684fe09 100644 --- a/backends/cadence/hifi/operators/op_where.cpp +++ b/backends/cadence/hifi/operators/op_where.cpp @@ -183,6 +183,15 @@ Tensor& where_self_out( return out; } +Tensor& where_out( + RuntimeContext& ctx, + const Tensor& cond, + const Tensor& a, + const Tensor& b, + Tensor& out) { + return where_out(ctx, cond, a, b, out); +} + } // namespace native } // namespace HiFi } // namespace impl diff --git a/backends/cadence/reference/kernels/CMakeLists.txt b/backends/cadence/reference/kernels/CMakeLists.txt index 07394cbe834..3fe0fe2101f 100644 --- a/backends/cadence/reference/kernels/CMakeLists.txt +++ b/backends/cadence/reference/kernels/CMakeLists.txt @@ -8,7 +8,8 @@ add_library(cadence_kernels kernels.cpp) # Let files say "include ". -set(_common_include_directories ${EXECUTORCH_ROOT}/..) +set(_common_include_directories ${EXECUTORCH_ROOT}/.. +${EXECUTORCH_ROOT}/runtime/core/portable_type/c10) target_include_directories(cadence_kernels PUBLIC . ${_common_include_directories} diff --git a/backends/cadence/reference/operators/CMakeLists.txt b/backends/cadence/reference/operators/CMakeLists.txt index a2d51af2c0c..ce926d86018 100644 --- a/backends/cadence/reference/operators/CMakeLists.txt +++ b/backends/cadence/reference/operators/CMakeLists.txt @@ -71,7 +71,8 @@ target_link_libraries(aten_ops_cadence PUBLIC executorch) target_link_libraries(aten_ops_cadence PRIVATE cadence_kernels) # Let files say "include ". -set(_common_include_directories ${EXECUTORCH_ROOT}/..) +set(_common_include_directories ${EXECUTORCH_ROOT}/.. +${EXECUTORCH_ROOT}/runtime/core/portable_type/c10) target_include_directories( aten_ops_cadence PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR} From e2f86243570103b4ee398a983ac86464b12bccc7 Mon Sep 17 00:00:00 2001 From: Shen Chen Xu Date: Wed, 19 Feb 2025 15:49:49 -0800 Subject: [PATCH 025/584] Support HuggingFace RoPE in static attention Differential Revision: D69857290 Pull Request resolved: https://github.com/pytorch/executorch/pull/8569 --- examples/models/llama/static_attention.py | 38 +++++++++++++------ .../llama/tests/test_static_attention.py | 29 ++++++++++++++ 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/examples/models/llama/static_attention.py b/examples/models/llama/static_attention.py index 8b341a3aafd..1d9b08d2f65 100644 --- a/examples/models/llama/static_attention.py +++ b/examples/models/llama/static_attention.py @@ -114,15 +114,30 @@ def update( return all_data, (out_k_cache, out_v_cache) -def _apply_rotary_embedding( - x: torch.Tensor, freqs_cos: torch.Tensor, freqs_sin: torch.Tensor -) -> torch.Tensor: - x_r, x_i = x[..., ::2], x[..., 1::2] - x_out_r = x_r * freqs_cos - x_i * freqs_sin - x_out_i = x_r * freqs_sin + x_i * freqs_cos +class _Rope(nn.Module): + def __init__(self, use_hf_rope): + super().__init__() + self.use_hf_rope = use_hf_rope + + def forward( + self, x: torch.Tensor, freqs_cos: torch.Tensor, freqs_sin: torch.Tensor + ) -> torch.Tensor: + if self.use_hf_rope: + if len(freqs_cos.shape) == 2: + freqs_cos = freqs_cos.unsqueeze(0) + if len(freqs_sin.shape) == 2: + freqs_sin = freqs_sin.unsqueeze(0) + x1 = x[..., : x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2 :] + x_rotated = torch.cat((-x2, x1), dim=-1) + return x * freqs_cos + x_rotated * freqs_sin + else: + x_r, x_i = x[..., ::2], x[..., 1::2] + x_out_r = x_r * freqs_cos - x_i * freqs_sin + x_out_i = x_r * freqs_sin + x_i * freqs_cos - x_out = torch.cat([x_out_r, x_out_i], dim=-1) - return x_out + x_out = torch.cat([x_out_r, x_out_i], dim=-1) + return x_out @register_attention("static") @@ -172,6 +187,7 @@ def __init__(self, config: ModelArgs, layer_id: int, rope: Rope): [StaticVCache(layer_id, i) for i in range(self.n_kv_heads)] ) self.wo = nn.Linear(self.n_heads * self.head_dim, self.dim, bias=False) + self.rope = _Rope(rope.params.use_hf_rope) def forward( self, @@ -191,8 +207,8 @@ def forward( new_qs = [self.wqs[i](x) for i in range(self.n_heads)] new_ks = [self.wks[i](x) for i in range(self.n_kv_heads)] new_vs = [self.wvs[i](x) for i in range(self.n_kv_heads)] - new_qs = [_apply_rotary_embedding(q, freqs_cos, freqs_sin) for q in new_qs] - new_ks = [_apply_rotary_embedding(k, freqs_cos, freqs_sin) for k in new_ks] + new_qs = [self.rope(q, freqs_cos, freqs_sin) for q in new_qs] + new_ks = [self.rope(k, freqs_cos, freqs_sin) for k in new_ks] all_ks = [] all_vs = [] @@ -211,7 +227,7 @@ def forward( kv_idx = i // self.n_heads_per_kv_group attn = new_qs[i] @ all_ks[kv_idx].transpose(-2, -1) attn = attn * self.inv_scale - attn = attn + mask # pyre-ignore + attn = attn + mask attn = F.softmax(attn, dim=-1) heads.append(attn @ all_vs[kv_idx]) diff --git a/examples/models/llama/tests/test_static_attention.py b/examples/models/llama/tests/test_static_attention.py index 401ba604cda..bf586ec4a6c 100644 --- a/examples/models/llama/tests/test_static_attention.py +++ b/examples/models/llama/tests/test_static_attention.py @@ -43,6 +43,35 @@ def test_without_cache(self): ) self.assertTrue(torch.isclose(y, expected, rtol=1e-3).all()) + def test_hf_rope_without_cache(self): + config = ModelArgs( + dim=64, + n_heads=4, + n_kv_heads=2, + max_seq_len=8, + use_hf_rope=True, + ) + layer_id = 0 + rope = Rope(config) + attn_mha = AttentionMHA(config, layer_id, rope).eval() + static_attn = StaticAttention(config, layer_id, rope).eval() + static_attn.load_weights_from_attention_mha(attn_mha) + + x = torch.rand(1, config.max_seq_len, config.dim) + freqs_cos, freqs_sin = rope.get_freqs(None, config.max_seq_len) + expected, _ = attn_mha(x, freqs_cos, freqs_sin) + mask = torch.triu( + torch.full((1, config.max_seq_len, config.max_seq_len), float("-inf")), + diagonal=1, + ) + y, _ = static_attn( + x, + freqs_cos.unsqueeze(0), + freqs_sin.unsqueeze(0), + mask=mask, + ) + self.assertTrue(torch.isclose(y, expected, rtol=1e-3).all()) + def test_with_cache(self): config = ModelArgs( dim=64, From 72432bacf75bf550b0e83e606b4e7b09271b106f Mon Sep 17 00:00:00 2001 From: Dave Bort Date: Wed, 19 Feb 2025 16:18:27 -0800 Subject: [PATCH 026/584] Arm: support files with prefix "ops_" Differential Revision: D69878804 Pull Request resolved: https://github.com/pytorch/executorch/pull/8585 --- backends/arm/operators/TARGETS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/arm/operators/TARGETS b/backends/arm/operators/TARGETS index 1f91aa37b75..cb08adb0354 100644 --- a/backends/arm/operators/TARGETS +++ b/backends/arm/operators/TARGETS @@ -13,7 +13,7 @@ python_library( python_library( name = "ops", - srcs = glob(["op_*.py"]), + srcs = glob(["op_*.py", "ops_*.py"]), typing = True, deps = [ "fbsource//third-party/serialization_lib/python/tosa:tosa", From 3e188fe119a7fd94197afe722d7135a56308954d Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 19 Feb 2025 17:18:03 -0800 Subject: [PATCH 027/584] add .ptd support to extension/module Differential Revision: D69478424 Pull Request resolved: https://github.com/pytorch/executorch/pull/8421 --- CMakeLists.txt | 5 + extension/flat_tensor/targets.bzl | 4 +- extension/module/CMakeLists.txt | 4 +- extension/module/module.cpp | 105 ++++++++++++++++----- extension/module/module.h | 27 +++++- extension/module/targets.bzl | 1 + extension/module/test/module_test.cpp | 25 ++++- extension/module/test/resources/README.md | 14 ++- extension/module/test/resources/linear.ptd | Bin 0 -> 336 bytes extension/module/test/resources/linear.pte | Bin 0 -> 1208 bytes 10 files changed, 149 insertions(+), 36 deletions(-) create mode 100644 extension/module/test/resources/linear.ptd create mode 100644 extension/module/test/resources/linear.pte diff --git a/CMakeLists.txt b/CMakeLists.txt index be0921a0b5a..01ad728c425 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -258,6 +258,11 @@ if(EXECUTORCH_BUILD_EXTENSION_TRAINING) set(EXECUTORCH_BUILD_EXTENSION_MODULE ON) endif() +if(EXECUTORCH_BUILD_EXTENSION_MODULE) + set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON) + set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON) +endif() + if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT) set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON) set(EXECUTORCH_BUILD_KERNELS_CUSTOM ON) diff --git a/extension/flat_tensor/targets.bzl b/extension/flat_tensor/targets.bzl index ed2adefc581..6f627492f24 100644 --- a/extension/flat_tensor/targets.bzl +++ b/extension/flat_tensor/targets.bzl @@ -9,13 +9,15 @@ def define_common_targets(): exported_headers = ["flat_tensor_data_map.h"], deps = [ "//executorch/extension/flat_tensor/serialize:generated_headers", - "//executorch/extension/flat_tensor/serialize:flat_tensor_header", "//executorch/runtime/core:core", "//executorch/runtime/core:evalue", "//executorch/runtime/core:named_data_map", "//executorch/runtime/core/exec_aten:lib", "//executorch/runtime/core/exec_aten/util:tensor_util", ], + exported_deps = [ + "//executorch/extension/flat_tensor/serialize:flat_tensor_header", + ], visibility = [ "//executorch/...", ], diff --git a/extension/module/CMakeLists.txt b/extension/module/CMakeLists.txt index 70441265c61..d144ce95356 100644 --- a/extension/module/CMakeLists.txt +++ b/extension/module/CMakeLists.txt @@ -27,7 +27,7 @@ if(CMAKE_TOOLCHAIN_IOS else() add_library(extension_module SHARED ${_extension_module__srcs}) endif() -target_link_libraries(extension_module PRIVATE executorch extension_data_loader) +target_link_libraries(extension_module PRIVATE executorch extension_data_loader extension_flat_tensor) target_include_directories(extension_module PUBLIC ${EXECUTORCH_ROOT}/..) target_compile_options( extension_module PUBLIC -Wno-deprecated-declarations -fPIC @@ -37,7 +37,7 @@ target_compile_options( # after cleaning up CMake targets. add_library(extension_module_static STATIC ${_extension_module__srcs}) target_link_libraries( - extension_module_static PRIVATE executorch extension_data_loader + extension_module_static PRIVATE executorch extension_data_loader extension_flat_tensor ) target_include_directories(extension_module_static PUBLIC ${EXECUTORCH_ROOT}/..) target_compile_options( diff --git a/extension/module/module.cpp b/extension/module/module.cpp index 99cc7e38bd6..aa750e2691e 100644 --- a/extension/module/module.cpp +++ b/extension/module/module.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -36,15 +37,59 @@ namespace executorch { namespace extension { +namespace { +runtime::Result> load_file( + const std::string& file_path, + Module::LoadMode mode) { + std::unique_ptr res = nullptr; + switch (mode) { + case Module::LoadMode::File: + res = ET_UNWRAP_UNIQUE(FileDataLoader::from(file_path.c_str())); + break; + case Module::LoadMode::Mmap: + res = ET_UNWRAP_UNIQUE(MmapDataLoader::from( + file_path.c_str(), MmapDataLoader::MlockConfig::NoMlock)); + break; + case Module::LoadMode::MmapUseMlock: + res = ET_UNWRAP_UNIQUE(MmapDataLoader::from(file_path.c_str())); + break; + case Module::LoadMode::MmapUseMlockIgnoreErrors: + res = ET_UNWRAP_UNIQUE(MmapDataLoader::from( + file_path.c_str(), + MmapDataLoader::MlockConfig::UseMlockIgnoreErrors)); + break; + } + return res; +} +} // namespace + +Module::Module( + const std::string& file_path, + const LoadMode load_mode, + std::unique_ptr event_tracer) + : file_path_(file_path), + load_mode_(load_mode), + memory_allocator_(std::make_unique()), + temp_allocator_(std::make_unique()), + event_tracer_(std::move(event_tracer)), + data_map_loader_(nullptr), + data_map_(nullptr) { + runtime::runtime_init(); +} + Module::Module( const std::string& file_path, + const std::string& data_map_path, const LoadMode load_mode, std::unique_ptr event_tracer) : file_path_(file_path), + data_map_path_(data_map_path), load_mode_(load_mode), memory_allocator_(std::make_unique()), temp_allocator_(std::make_unique()), - event_tracer_(std::move(event_tracer)) { + event_tracer_(std::move(event_tracer)), + data_map_loader_(nullptr), + data_map_(nullptr) { runtime::runtime_init(); } @@ -52,7 +97,8 @@ Module::Module( std::unique_ptr data_loader, std::unique_ptr memory_allocator, std::unique_ptr temp_allocator, - std::unique_ptr event_tracer) + std::unique_ptr event_tracer, + std::unique_ptr data_map_loader) : data_loader_(std::move(data_loader)), memory_allocator_( memory_allocator ? std::move(memory_allocator) @@ -60,7 +106,9 @@ Module::Module( temp_allocator_( temp_allocator ? std::move(temp_allocator) : std::make_unique()), - event_tracer_(std::move(event_tracer)) { + event_tracer_(std::move(event_tracer)), + data_map_loader_(std::move(data_map_loader)), + data_map_(nullptr) { runtime::runtime_init(); } @@ -68,7 +116,8 @@ Module::Module( std::shared_ptr program, std::unique_ptr memory_allocator, std::unique_ptr temp_allocator, - std::unique_ptr event_tracer) + std::unique_ptr event_tracer, + std::unique_ptr data_map_loader) : program_(std::move(program)), memory_allocator_( memory_allocator ? std::move(memory_allocator) @@ -76,33 +125,37 @@ Module::Module( temp_allocator_( temp_allocator ? std::move(temp_allocator) : std::make_unique()), - event_tracer_(std::move(event_tracer)) { + event_tracer_(std::move(event_tracer)), + data_map_loader_(std::move(data_map_loader)), + data_map_(nullptr) { runtime::runtime_init(); } runtime::Error Module::load(const runtime::Program::Verification verification) { if (!is_loaded()) { + // Load the program if (!data_loader_) { - switch (load_mode_) { - case LoadMode::File: - data_loader_ = - ET_UNWRAP_UNIQUE(FileDataLoader::from(file_path_.c_str())); - break; - case LoadMode::Mmap: - data_loader_ = ET_UNWRAP_UNIQUE(MmapDataLoader::from( - file_path_.c_str(), MmapDataLoader::MlockConfig::NoMlock)); - break; - case LoadMode::MmapUseMlock: - data_loader_ = - ET_UNWRAP_UNIQUE(MmapDataLoader::from(file_path_.c_str())); - break; - case LoadMode::MmapUseMlockIgnoreErrors: - data_loader_ = ET_UNWRAP_UNIQUE(MmapDataLoader::from( - file_path_.c_str(), - MmapDataLoader::MlockConfig::UseMlockIgnoreErrors)); - break; + auto res = load_file(file_path_, load_mode_); + if (!res.ok()) { + return res.error(); } - }; + data_loader_ = std::move(res.get()); + } + // If a .ptd path was given load it. + if (data_map_path_ != "") { + auto res = load_file(data_map_path_, load_mode_); + if (!res.ok()) { + return res.error(); + } + data_map_loader_ = std::move(res.get()); + } + // If we have a .ptd loader, then load the map. + if (data_map_loader_) { + data_map_ = + ET_UNWRAP_UNIQUE(FlatTensorDataMap::load(data_map_loader_.get())); + } + // else: either the map itself was provided or we have no data map, either + // way no work to do. auto program = ET_UNWRAP_UNIQUE( runtime::Program::load(data_loader_.get(), verification)); program_ = std::shared_ptr( @@ -130,6 +183,7 @@ runtime::Error Module::load_method( ET_CHECK_OK_OR_RETURN_ERROR(load()); MethodHolder method_holder; + const auto method_metadata = ET_UNWRAP(program_->method_meta(method_name.c_str())); const auto planned_buffersCount = @@ -155,7 +209,8 @@ runtime::Error Module::load_method( method_holder.method = ET_UNWRAP_UNIQUE(program_->load_method( method_name.c_str(), method_holder.memory_manager.get(), - event_tracer ? event_tracer : this->event_tracer())); + event_tracer ? event_tracer : this->event_tracer(), + data_map_.get())); method_holder.inputs.resize(method_holder.method->inputs_size()); methods_.emplace(method_name, std::move(method_holder)); } diff --git a/extension/module/module.h b/extension/module/module.h index 45ed38a7ff2..dc7c930d7c6 100644 --- a/extension/module/module.h +++ b/extension/module/module.h @@ -51,6 +51,21 @@ class Module { const LoadMode load_mode = LoadMode::MmapUseMlock, std::unique_ptr event_tracer = nullptr); + /** + * Constructs an instance by loading a program from a file with specified + * memory locking behavior. + * + * @param[in] file_path The path to the ExecuTorch program file to load. + * @param[in] data_map_path The path to a .ptd file + * @param[in] load_mode The loading mode to use. + * @param[in] event_tracer A EventTracer used for tracking and logging events. + */ + explicit Module( + const std::string& file_path, + const std::string& data_map_path, + const LoadMode load_mode = LoadMode::MmapUseMlock, + std::unique_ptr event_tracer = nullptr); + /** * Constructs an instance with the provided data loader and memory allocator. * @@ -59,12 +74,14 @@ class Module { * @param[in] temp_allocator A MemoryAllocator to use when allocating * temporary data during kernel or delegate execution. * @param[in] event_tracer A EventTracer used for tracking and logging events. + * @param[in] data_map_loader A DataLoader used for loading external weights. */ explicit Module( std::unique_ptr data_loader, std::unique_ptr memory_allocator = nullptr, std::unique_ptr temp_allocator = nullptr, - std::unique_ptr event_tracer = nullptr); + std::unique_ptr event_tracer = nullptr, + std::unique_ptr data_map_loader = nullptr); /** * Constructs an instance using an existing shared program. @@ -75,12 +92,14 @@ class Module { * @param[in] temp_allocator A MemoryAllocator to use when allocating * temporary data. * @param[in] event_tracer A EventTracer used for tracking and logging events. + * @param[in] data_map_loader A DataLoader used for loading external weights. */ explicit Module( std::shared_ptr program, std::unique_ptr memory_allocator = nullptr, std::unique_ptr temp_allocator = nullptr, - std::unique_ptr event_tracer = nullptr); + std::unique_ptr event_tracer = nullptr, + std::unique_ptr data_map_loader = nullptr); Module(const Module&) = delete; Module& operator=(const Module&) = delete; @@ -433,14 +452,16 @@ class Module { std::vector inputs; }; - private: std::string file_path_; + std::string data_map_path_; LoadMode load_mode_{LoadMode::MmapUseMlock}; std::shared_ptr program_; std::unique_ptr data_loader_; std::unique_ptr memory_allocator_; std::unique_ptr temp_allocator_; std::unique_ptr event_tracer_; + std::unique_ptr data_map_loader_; + std::unique_ptr data_map_; protected: std::unordered_map methods_; diff --git a/extension/module/targets.bzl b/extension/module/targets.bzl index 61251047dc8..4cbfa0ca0f5 100644 --- a/extension/module/targets.bzl +++ b/extension/module/targets.bzl @@ -25,6 +25,7 @@ def define_common_targets(): "//executorch/extension/memory_allocator:malloc_memory_allocator", "//executorch/extension/data_loader:file_data_loader", "//executorch/extension/data_loader:mmap_data_loader", + "//executorch/extension/flat_tensor:flat_tensor_data_map", ], exported_deps = [ "//executorch/runtime/executor:program" + aten_suffix, diff --git a/extension/module/test/module_test.cpp b/extension/module/test/module_test.cpp index 2dbb0fea936..ac7d4db13a9 100644 --- a/extension/module/test/module_test.cpp +++ b/extension/module/test/module_test.cpp @@ -22,14 +22,20 @@ using namespace ::executorch::runtime; class ModuleTest : public ::testing::Test { protected: static void SetUpTestSuite() { - model_path_ = std::getenv("RESOURCES_PATH") + std::string("/add.pte"); + std::string resources_path; + if (const char* env = std::getenv("RESOURCES_PATH")) { + resources_path = env; + } + model_path_ = resources_path + "/add.pte"; + linear_path_ = resources_path + "/linear.pte"; + linear_data_path_ = resources_path + "/linear.ptd"; } - static std::string model_path_; + static inline std::string model_path_; + static inline std::string linear_path_; + static inline std::string linear_data_path_; }; -std::string ModuleTest::model_path_; - TEST_F(ModuleTest, TestLoad) { Module module(model_path_); @@ -435,3 +441,14 @@ TEST_F(ModuleTest, TestSetOutputInvalidType) { EXPECT_NE(module.set_output(EValue()), Error::Ok); } + +TEST_F(ModuleTest, TestPTD) { + Module module(linear_path_, linear_data_path_); + + ASSERT_EQ(module.load_method("forward"), Error::Ok); + + auto tensor1 = + make_tensor_ptr({3, 3}, {2.f, 3.f, 4.f, 2.f, 3.f, 4.f, 2.f, 3.f, 4.f}); + + ASSERT_EQ(module.forward(tensor1).error(), Error::Ok); +} diff --git a/extension/module/test/resources/README.md b/extension/module/test/resources/README.md index e2b54633fae..ecbdd41c107 100644 --- a/extension/module/test/resources/README.md +++ b/extension/module/test/resources/README.md @@ -1,11 +1,23 @@ ## Resources -### model.pte +### add.pte, linear.pte, linear.ptd - Internally generated after D62209852, 2024-09-06 with: ``` buck2 run fbcode//executorch/examples/portable/scripts:export -- --model_name="add" ``` + + and + + ``` + buck2 run fbcode//executorch/examples/portable/scripts:export -- --model_name="linear" -examples + ``` - In OSS, the same file can be generated after [#5145](https://github.com/pytorch/executorch/pull/5145), 2024-09-06 with: ``` python -m examples.portable.scripts.export --model_name="add" ``` + + and + + ``` + python -m examples.portable.scripts.export --model_name="linear" -e + ``` diff --git a/extension/module/test/resources/linear.ptd b/extension/module/test/resources/linear.ptd new file mode 100644 index 0000000000000000000000000000000000000000..edab857bb3f24db7f53e812ae6949588d6e62fb8 GIT binary patch literal 336 zcmZ=^U|?_yF)(!VFfh~rvJHS31Rj7%1_l8}2*Ux&hfzEX0zf7U0|x^S5CYW!u?kp( zfdQliBnFa$s9|7m0J1=U35XMbSOka#7-WEI*nz4*x<$YwkiiBrM*@gJx?%2tIsY5S`O}oEk+$B!xvFDJ~Es5epHE6bk-;AXe&WP7N3%F%kqz3rn%Gu&}VS zu(Y)F7l=Q=A7P=M@9pfyD>C8T?#<4;nVp%-yojtfYKvH8TJm_N#FrfQvw+Adumn_q zc_0Nm{>Cv^4{*Xgbq_yroYc`rWKMzvZyrnqDM1TfIDL(Ztbt$|r~uc{2S5QJ=Lq2B z#%=7%Gv-FSt?xKy&fJ4iI_Eu^z#8_Vla-aQ*=)Dva)`~$z~(gSw$FRf&VJ|gLhnaD zk#W>Q>oShFE8r4vxz+7{OYiPc|A{!bim}OJ<(|1RFCQ2Ks3&f$Uk-cUoHCaBl8cf7 z=u0>uXOf$_RS@I_2fI83KCrw3tmy_20OWi_53!6eCfRe)G6vl!C3{ZW?O5+(4)Cb! zS(;~?ID@~(KD*k#TYJ+t5W0Ia_qwr{tc&a0xa6W8S4vtJ^L)0sXiZYa=303M`LjRC z{OS0lHfc_^$P6Z$7MyNYZ5kR%#w*}C3o!OQVzCBQU|9aXOu<{b1d{|eImu0N+>$Ba z&2fKO?p^4Hao^c|S<5_r|Jmvkl1cB9@%T-$m&}!U->R>fJm&-h3wy2bY|vBM z>in?+|98-MbBTB`G@}0qtWh2BjOo4yoz*P93?nA)B(^qgKWZHw_3%FIo~#lloc(k& l!`w;$Yt>jR){51jQ7hJhQn|8L4a#MxP1g=O-G10@${&jA( literal 0 HcmV?d00001 From 75d4abc84d8341b84320cd0b0119edc70c410de2 Mon Sep 17 00:00:00 2001 From: mcremon-meta <134334895+mcremon-meta@users.noreply.github.com> Date: Wed, 19 Feb 2025 18:51:55 -0800 Subject: [PATCH 028/584] Enable quantized add Differential Revision: D69441041 Pull Request resolved: https://github.com/pytorch/executorch/pull/8584 --- backends/cadence/aot/ops_registrations.py | 44 +++++++++++++++ backends/cadence/aot/quantizer/fusion_pass.py | 53 ++++++++++++++++++- backends/cadence/aot/quantizer/patterns.py | 33 ++++++++++++ backends/cadence/aot/quantizer/quantizer.py | 15 +++++- backends/cadence/aot/replace_ops.py | 4 ++ 5 files changed, 146 insertions(+), 3 deletions(-) diff --git a/backends/cadence/aot/ops_registrations.py b/backends/cadence/aot/ops_registrations.py index a8dd1315846..1e328cf4e53 100644 --- a/backends/cadence/aot/ops_registrations.py +++ b/backends/cadence/aot/ops_registrations.py @@ -99,6 +99,10 @@ "quantized_add(Tensor X, Tensor X_scale, Tensor X_zero_point, Tensor Y, Tensor Y_scale, " "Tensor Y_zero_point, float out_scale, int out_zero_point) -> (Tensor Z)" ) +lib.define( + "quantized_add.per_tensor(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, " + "int Y_zero_point, float out_scale, int out_zero_point) -> (Tensor Z)" +) lib.define( "quantized_mul(Tensor X, Tensor X_scale, Tensor X_zero_point, Tensor Y, Tensor Y_scale, " "Tensor Y_zero_point, float out_scale, int out_zero_point) -> (Tensor Z)" @@ -175,6 +179,10 @@ "quantized_add.out(Tensor X, Tensor X_scale, Tensor X_zero_point, Tensor Y, Tensor Y_scale, " "Tensor Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)" ) +lib.define( + "quantized_add.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, " + "int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)" +) lib.define( "quantized_mul.out(Tensor X, Tensor X_scale, Tensor X_zero_point, Tensor Y, Tensor Y_scale, " "Tensor Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)" @@ -290,6 +298,42 @@ def dequantize_per_tensor_meta( return input.new_empty(input.size(), dtype=torch.float) +@register_fake("cadence::quantized_add") +def quantized_add_meta( + X: torch.Tensor, + X_scale: torch.Tensor, + X_zero_point: torch.Tensor, + Y: torch.Tensor, + Y_scale: torch.Tensor, + Y_zero_point: torch.Tensor, + out_scale: float, + out_zero_point: int, +) -> torch.Tensor: + out_size = X.size() + if list(X.size()) == [1]: + out_size = Y.size() + + return X.new_empty(out_size, dtype=X.dtype) + + +@register_fake("cadence::quantized_add.per_tensor") +def quantized_add_per_tensor_meta( + X: torch.Tensor, + X_scale: float, + X_zero_point: int, + Y: torch.Tensor, + Y_scale: float, + Y_zero_point: int, + out_scale: float, + out_zero_point: int, +) -> torch.Tensor: + out_size = X.size() + if list(X.size()) == [1]: + out_size = Y.size() + + return X.new_empty(out_size, dtype=X.dtype) + + @register_fake("cadence::quantized_linear") def quantized_linear_meta( src: torch.Tensor, diff --git a/backends/cadence/aot/quantizer/fusion_pass.py b/backends/cadence/aot/quantizer/fusion_pass.py index 7c05e9b8678..51d019f155e 100644 --- a/backends/cadence/aot/quantizer/fusion_pass.py +++ b/backends/cadence/aot/quantizer/fusion_pass.py @@ -11,6 +11,7 @@ import torch from executorch.backends.cadence.aot.quantizer.patterns import ( AddmmPattern, + AddPattern, BmmPattern, Conv1dPattern, Conv2dPattern, @@ -41,6 +42,47 @@ ReluPatterns = (ReluPattern0, ReluPattern1) +def get_args_and_kwargs_add( + graph_module: GraphModule, + inputs_inputs: List[fx.Node], + dequants_inputs: List[fx.Node], + quant_node: fx.Node, +) -> Tuple[Tuple[ArgsType, ...], Dict[str, ArgsType]]: + X_scale_ = graph_module.graph.call_function( + torch.ops.aten.full.default, + ([1], dequants_inputs[0].args[1]), + {"dtype": torch.float}, + ) + X_zero_point_ = graph_module.graph.call_function( + torch.ops.aten.full.default, + ([1], dequants_inputs[0].args[2]), + {"dtype": torch.int32}, + ) + Y_scale_ = graph_module.graph.call_function( + torch.ops.aten.full.default, + ([1], dequants_inputs[1].args[1]), + {"dtype": torch.float}, + ) + Y_zero_point_ = graph_module.graph.call_function( + torch.ops.aten.full.default, + ([1], dequants_inputs[1].args[2]), + {"dtype": torch.int32}, + ) + args = ( + inputs_inputs[0], + X_scale_, + X_zero_point_, + inputs_inputs[1], + Y_scale_, + Y_zero_point_, + quant_node.args[1], + quant_node.args[2], + ) + + kwargs = {} + return args, kwargs + + # Helper function to get the args and kwargs for the linear replacement op def get_args_and_kwargs_linear( graph_module: GraphModule, @@ -339,7 +381,7 @@ def call(self, graph_module: fx.GraphModule) -> PassResult: # noqa: C901 ) for fused_partition in fused_partitions: anchors = pattern.get_anchors(graph_module, fused_partition) - if not anchors: + if not anchors or anchors.empty: continue if any(self.is_fused(p.nodes) for p in fused_partition): continue @@ -385,7 +427,14 @@ def call(self, graph_module: fx.GraphModule) -> PassResult: # noqa: C901 inputs_inputs + weights_inputs + other_inputs + bias_inputs ) kwargs = {} - if isinstance(pattern, (Conv1dPattern, Conv2dPattern)): + if isinstance(pattern, AddPattern): + args, kwargs = get_args_and_kwargs_add( + graph_module, + inputs_inputs, + dequants_inputs, + quant_node, + ) + elif isinstance(pattern, (Conv1dPattern, Conv2dPattern)): args, kwargs = get_args_and_kwargs_conv( graph_module, inputs_inputs, diff --git a/backends/cadence/aot/quantizer/patterns.py b/backends/cadence/aot/quantizer/patterns.py index 0dee8ebcd1d..0e907812b10 100644 --- a/backends/cadence/aot/quantizer/patterns.py +++ b/backends/cadence/aot/quantizer/patterns.py @@ -43,6 +43,7 @@ class PartitionAnchors: output: List[Union[Tuple[fx.Node], Tuple[fx.Node, SharedQuantizationSpec]]] = field( default_factory=list ) + empty: bool = False class QuantizationPattern(ABC): @@ -101,6 +102,38 @@ def replacement_op(self) -> OpOverload: return torch.ops.cadence.quantized_linear +class AddPattern(QuantizationPattern): + def partition_types(self) -> List[OpOverload]: + return [torch.ops.aten.add.Tensor] + + def get_anchors( + self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule] + ) -> PartitionAnchors: + # pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge... + add_node = fused_partition[0].nodes[-1] + + # Bail if: + # - the add node is not a tensor add + # - the add node has kwargs (e.g. alpha) + is_tensor_add = isinstance(add_node.args[0], fx.Node) and isinstance( + add_node.args[1], fx.Node + ) + if not is_tensor_add or len(add_node.kwargs) > 0: + return PartitionAnchors( + empty=True, + ) + + return PartitionAnchors( + inputs=[(add_node, 0), (add_node, 1)], + weights=[], + biases=[], + output=[(add_node,)], + ) + + def replacement_op(self) -> OpOverload: + return torch.ops.cadence.quantized_add.default + + class BmmPattern(QuantizationPattern): def partition_types(self) -> List[OpOverload]: return [torch.ops.aten.bmm.default] diff --git a/backends/cadence/aot/quantizer/quantizer.py b/backends/cadence/aot/quantizer/quantizer.py index 585f38241a2..42cc1a1df14 100644 --- a/backends/cadence/aot/quantizer/quantizer.py +++ b/backends/cadence/aot/quantizer/quantizer.py @@ -12,6 +12,7 @@ import torch from executorch.backends.cadence.aot.quantizer.patterns import ( AddmmPattern, + AddPattern, BmmPattern, Conv1dPattern, Conv2dPattern, @@ -109,7 +110,7 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: continue anchors = self.pattern.get_anchors(model, fused_partition) - if not anchors: + if not anchors or anchors.empty: continue if is_annotated( [ @@ -211,3 +212,15 @@ def __init__( self, ) -> None: super().__init__([]) + + +class CadenceWakeWordQuantizer(CadenceQuantizer): + """ + Quantizer for WakeWord, including add + """ + + def __init__(self, quantizers: Optional[list[Quantizer]] = None) -> None: + if quantizers is None: + quantizers = get_cadence_default_quantizers() + quantizers.append(CadenceAtenQuantizer(AddPattern(), qconfig_A8uW8u)) + super().__init__(quantizers) diff --git a/backends/cadence/aot/replace_ops.py b/backends/cadence/aot/replace_ops.py index d2fbc0eda80..120f69008c1 100644 --- a/backends/cadence/aot/replace_ops.py +++ b/backends/cadence/aot/replace_ops.py @@ -1839,6 +1839,10 @@ class ReplaceSingleElementTensorArgumentsFromFullOpWithScalarPass(ExportPass): replaced_scalar_args: dict[ EdgeOpOverloadPacket, tuple[EdgeOpOverload, Sequence[int]] ] = { + exir_ops.edge.cadence.quantized_add: ( + exir_ops.edge.cadence.quantized_add.per_tensor, + [1, 2, 4, 5], + ), exir_ops.edge.cadence.quantized_conv: ( exir_ops.edge.cadence.quantized_conv.per_tensor, [8, 9, 12, 13], From 68eb62f905d569d52950d45d07acf4a126b0bda7 Mon Sep 17 00:00:00 2001 From: Tom Allsop <72802373+tom-arm@users.noreply.github.com> Date: Thu, 20 Feb 2025 13:42:22 +0000 Subject: [PATCH 029/584] Arm backend: Add aten.relu_.default to quantization_annotator (#8567) * The inplace version of relu (aten.relu_.default) was missing Signed-off-by: Tom Allsop --- backends/arm/quantizer/quantization_annotator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backends/arm/quantizer/quantization_annotator.py b/backends/arm/quantizer/quantization_annotator.py index cc12923911a..09eb3e2a12c 100644 --- a/backends/arm/quantizer/quantization_annotator.py +++ b/backends/arm/quantizer/quantization_annotator.py @@ -183,6 +183,7 @@ def _match_pattern( torch.ops.aten.hardtanh.default, torch.ops.aten.hardtanh_.default, torch.ops.aten.relu.default, + torch.ops.aten.relu_.default, torch.ops.aten.mean.default, torch.ops.aten.mean.dim, torch.ops.aten.permute.default, From c35df8bfe6b0dad1b823b443e70733ee072fd950 Mon Sep 17 00:00:00 2001 From: Oscar Andersson <87121123+oscarandersson8218@users.noreply.github.com> Date: Thu, 20 Feb 2025 14:53:11 +0100 Subject: [PATCH 030/584] Arm backend: Add additional tosa_supported_op checks for BI (#8593) Add additional tosa_supported_op checks for BI If a TosaSpecification without floating point support is used, additional checks will be made during paritioning to make sure that we don't partition operators that: - are not quantized properly, i.e. does not have a dq-q pair surrounding them. - should have been decomposed prior to qunatization, e.g. div should be decomposed to a mul and recip before quantization. Signed-off-by: Oscar Andersson Co-authored-by: Erik Lundell --- .../_passes/fuse_quantized_activation_pass.py | 10 +- .../tosa_supported_operators.py | 173 +++++++++++++++++- ...test_partition_decomposed_quantized_ops.py | 65 +++++++ 3 files changed, 242 insertions(+), 6 deletions(-) create mode 100644 backends/arm/test/misc/test_partition_decomposed_quantized_ops.py diff --git a/backends/arm/_passes/fuse_quantized_activation_pass.py b/backends/arm/_passes/fuse_quantized_activation_pass.py index 3ac9f5cbb98..13c69bf92f1 100644 --- a/backends/arm/_passes/fuse_quantized_activation_pass.py +++ b/backends/arm/_passes/fuse_quantized_activation_pass.py @@ -13,7 +13,8 @@ class FuseQuantizedActivationPass(ExportPass): - def _is_fuseable_quantized_activation(self, node: Node): + @staticmethod + def _is_fuseable_quantized_activation(node: Node): """Fuse activations that have a 0 lower bound and quantized with a qmin zero-point""" is_fuseable = node.target == exir_ops.edge.aten.relu.default if node.target == exir_ops.edge.aten.hardtanh.default: @@ -29,7 +30,8 @@ def _is_fuseable_quantized_activation(self, node: Node): else: return False - def _is_fuseable_input(self, node: Node): + @staticmethod + def _is_fuseable_input(node: Node): return ( node.target in ( @@ -45,11 +47,11 @@ def call(self, graph_module: torch.fx.GraphModule): if node.op != "call_function": continue - if not self._is_fuseable_quantized_activation(node): + if not FuseQuantizedActivationPass._is_fuseable_quantized_activation(node): continue input_node = node.args[0] - if not self._is_fuseable_input(input_node): + if not FuseQuantizedActivationPass._is_fuseable_input(input_node): continue node.replace_all_uses_with(input_node) diff --git a/backends/arm/operator_support/tosa_supported_operators.py b/backends/arm/operator_support/tosa_supported_operators.py index e0ded25493a..6fe70aa696c 100644 --- a/backends/arm/operator_support/tosa_supported_operators.py +++ b/backends/arm/operator_support/tosa_supported_operators.py @@ -5,13 +5,22 @@ # pyre-unsafe +import itertools import operator +import typing from typing import final, Optional, Sequence, Type +import torch + import torch.fx as fx +from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor +from executorch.backends.arm._passes.fuse_quantized_activation_pass import ( + FuseQuantizedActivationPass, +) from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.exir.dialects._ops import ops as exir_ops from torch.fx.passes.operator_support import any_chain, chain, OperatorSupportBase +from torch.fx.passes.utils.source_matcher_utils import get_source_partitions class SupportedTOSAOperatorCheck(OperatorSupportBase): @@ -27,7 +36,9 @@ def __init__(self, tosa_spec: TosaSpecification): targets: list[str] = [] @final - def is_node_supported(self, submodules, node: fx.Node) -> bool: + def is_node_supported( + self, submodules: typing.Mapping[str, torch.nn.Module], node: fx.Node + ) -> bool: if node.target not in self.targets: return False return self.is_node_tosa_supported(node, self.tosa_spec) @@ -75,6 +86,10 @@ def tosa_support_factory( tosa_spec: TosaSpecification, additional_checks: Optional[Sequence[OperatorSupportBase]] = None, ) -> OperatorSupportBase: + negative_checks: list[OperatorSupportBase] = [] + if not tosa_spec.support_float(): + negative_checks.append(NeedsDecompositionCheck()) + negative_checks.append(CheckProperQuantization()) return chain( any_chain( BaseTOSASupportList(), @@ -83,13 +98,16 @@ def tosa_support_factory( for check in get_registered_tosa_support_checks(tosa_spec) ), ), + *negative_checks, *additional_checks if additional_checks else [], ) class BaseTOSASupportList(OperatorSupportBase): - def is_node_supported(self, submodules, node: fx.Node) -> bool: + def is_node_supported( + self, submodules: typing.Mapping[str, torch.nn.Module], node: fx.Node + ) -> bool: supported = node.op == "call_function" and node.target in [ exir_ops.edge.aten.abs.default, exir_ops.edge.aten.add.Tensor, @@ -150,3 +168,154 @@ def is_node_supported(self, submodules, node: fx.Node) -> bool: ] return supported + + +class NeedsDecompositionCheck(OperatorSupportBase): + """ + Targeted operators need to be decomposed prior to quantization in order to get a pair of q-dq-nodes surrounding + the operator, and to get optimal quantization parameters for each operator. This check will reject operators + that need to be decomposed. + """ + + def is_node_supported( + self, submodules: typing.Mapping[str, torch.nn.Module], node: fx.Node + ) -> bool: + + if node.op != "call_function": + return True + if node.target == exir_ops.edge.aten.mean.dim: + dim = node.args[1] + return dim == [-1, -2] + needs_decomp = node.target in [ + exir_ops.edge.aten.div.Tensor, + exir_ops.edge.aten._native_batch_norm_legit_no_training.default, + exir_ops.edge.aten.native_layer_norm.default, + exir_ops.edge.aten.mean.dim, + exir_ops.edge.aten._softmax.default, + exir_ops.edge.aten._log_softmax.default, + exir_ops.edge.aten.var.correction, + exir_ops.edge.aten.var.dim, + ] + return not needs_decomp + + +class CheckProperQuantization(OperatorSupportBase): + """ + For targeted nodes, check that it has been quantized as expected. In most cases this means that a pair of quantize + and dequantize nodes surrounds the node. This is neccessary for table operators and operators that need to rescale + activations. + """ + + dq_op = exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default + q_op = exir_ops.edge.quantized_decomposed.quantize_per_tensor.default + + def _is_matmul_node_supported( + self, submodules: typing.Mapping[str, torch.nn.Module], node: fx.Node + ): + """ + Find the matmul source partition containing this node and check that all its inputs and outputs are quantized. + """ + for graph_module in submodules.values(): + graph_module = typing.cast(fx.GraphModule, graph_module) + matmul_partitions = get_source_partitions( + graph_module.graph, + [ + torch.matmul, + ], + None, + ) + matmul_partitions = list( + itertools.chain.from_iterable(matmul_partitions.values()) + ) + matched_partition = None + for partition in matmul_partitions: + if node in partition.nodes: + matched_partition = partition + if matched_partition is not None: + input_quantized = all( + input_node.target == self.dq_op + for input_node in matched_partition.input_nodes + ) + if not input_quantized: + return False + output_quantized = all( + output_node_user.target == self.q_op + for output_node_user in matched_partition.output_nodes[0].users + ) + if not output_quantized: + return False + else: + return False + + return True + + def is_node_supported( + self, submodules: typing.Mapping[str, torch.nn.Module], node: fx.Node + ) -> bool: + output_quantized = False + input_quantized = False + if node.target not in ( + exir_ops.edge.aten.add.Tensor, + exir_ops.edge.aten.avg_pool2d.default, + exir_ops.edge.aten.bmm.default, + exir_ops.edge.aten.convolution.default, + exir_ops.edge.aten.exp.default, + exir_ops.edge.aten.hardtanh.default, + exir_ops.edge.aten.linear.default, + exir_ops.edge.aten.log.default, + exir_ops.edge.aten.max_pool2d_with_indices.default, + exir_ops.edge.aten.mm.default, + exir_ops.edge.aten.mul.Tensor, + exir_ops.edge.aten.reciprocal.default, + exir_ops.edge.aten.relu.default, + exir_ops.edge.aten.rsqrt.default, + exir_ops.edge.aten.sigmoid.default, + exir_ops.edge.aten.sub.Tensor, + exir_ops.edge.aten.tanh.default, + exir_ops.edge.aten.upsample_nearest2d.vec, + ): + return True + elif node.target in ( + exir_ops.edge.aten.bmm.default, + exir_ops.edge.aten.mm.default, + ): + source_fn_stack: tuple[typing.Any] = node.meta.get("source_fn_stack", []) + if len(source_fn_stack) > 0: + if source_fn_stack[-1][1] in (torch.matmul,): + return self._is_matmul_node_supported(submodules, node) + + elif node.target in (exir_ops.edge.aten.max_pool2d_with_indices.default,): + users = node.users + output_quantized = all( + user.target == operator.getitem + and all(user_user.target == self.q_op for user_user in user.users) + for user in users + ) + elif FuseQuantizedActivationPass._is_fuseable_input(node): + users = node.users + output_quantized = all( + FuseQuantizedActivationPass._is_fuseable_quantized_activation(user) + for user in users + ) + elif FuseQuantizedActivationPass._is_fuseable_quantized_activation(node): + input_node = node.all_input_nodes[0] + input_quantized = FuseQuantizedActivationPass._is_fuseable_input(input_node) + + input_quantized = input_quantized or all( + (input_node.target == self.dq_op) + or (not get_first_fake_tensor(input_node).dtype.is_floating_point) + for input_node in node.all_input_nodes + ) + + if not input_quantized: + return False + + output_quantized = output_quantized or all( + (output_node.target == self.q_op) + or (not get_first_fake_tensor(output_node).dtype.is_floating_point) + for output_node in node.users + ) + + if not output_quantized: + return False + return True diff --git a/backends/arm/test/misc/test_partition_decomposed_quantized_ops.py b/backends/arm/test/misc/test_partition_decomposed_quantized_ops.py new file mode 100644 index 00000000000..4bcae4930a2 --- /dev/null +++ b/backends/arm/test/misc/test_partition_decomposed_quantized_ops.py @@ -0,0 +1,65 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Test that tosa_supported_operators reject operators that are not +# quantized properly. This is typically a consequence of a torch op +# such a Softplus that is decompsed into many other ops without +# surrounding q/dq nodes. + +from typing import Tuple + +import torch +from executorch.backends.arm.test import common + +from executorch.backends.arm.test.tester.test_pipeline import ( + TosaPipelineBI, + TosaPipelineMI, +) + +input_t1 = Tuple[torch.Tensor] +aten_op: list[str] = ["torch.ops.aten.add.Tensor", "torch.ops.aten.softplus.default"] +exir_op: list[str] = [ + "executorch_exir_dialects_edge__ops_aten_add_Tensor", + "executorch_exir_dialects_edge__ops_aten_mul_Tensor", + "executorch_exir_dialects_edge__ops_aten_exp_default", + "executorch_exir_dialects_edge__ops_aten_div_Tensor", +] + + +test_data: dict[input_t1] = { + "3d_rand": (torch.rand(1, 5, 5),), +} + + +class Module(torch.nn.Module): + def __init__(self): + super().__init__() + self.softplus = torch.nn.Softplus() + + def forward(self, x: torch.Tensor): + return self.softplus(x + x) + + +@common.parametrize("test_data", test_data) +def test_softplus_tosa_MI(test_data: input_t1): + pipeline = TosaPipelineMI[input_t1]( + Module(), test_data=test_data, aten_op=aten_op, exir_op=exir_op + ) + # remove check_count.exir as there will be more than one delegate + pipeline.pop_stage("check_count.exir") + pipeline.run() + + +@common.parametrize("test_data", test_data) +def test_softplus_tosa_BI(test_data: input_t1): + pipeline = TosaPipelineBI[input_t1]( + Module(), test_data=test_data, aten_op=aten_op, exir_op=exir_op + ) + pipeline.pop_stage("check_not.exir") + # check that all ops in exir_op except add are rejected + pipeline.add_stage_after( + "partition", pipeline.tester.check, exir_op[1:], suffix="exir_post_partition" + ) + pipeline.run() From 2fff01add80ed0a0707c587d49f35f25278e69c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Lindstr=C3=B6m?= <33344797+martinlsm@users.noreply.github.com> Date: Thu, 20 Feb 2025 16:21:16 +0100 Subject: [PATCH 031/584] Arm backend: Add Arm model test for Wav2letter (#8594) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Arm model test for Wav2letter Signed-off-by: Martin Lindström Co-authored-by: Måns Nilsson Co-authored-by: Fredrik Knutsson --- backends/arm/test/models/test_w2l_arm.py | 150 +++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 backends/arm/test/models/test_w2l_arm.py diff --git a/backends/arm/test/models/test_w2l_arm.py b/backends/arm/test/models/test_w2l_arm.py new file mode 100644 index 00000000000..184216e0ef8 --- /dev/null +++ b/backends/arm/test/models/test_w2l_arm.py @@ -0,0 +1,150 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright 2024-2025 Arm Limited and/or its affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import unittest +from typing import Tuple + +import pytest + +import torch +from executorch.backends.arm.test import common, conftest +from executorch.backends.arm.test.tester.arm_tester import ArmTester + +from executorch.exir.backend.compile_spec_schema import CompileSpec +from torchaudio import models + + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +def get_test_inputs(batch_size, num_features, input_frames): + return (torch.randn(batch_size, num_features, input_frames),) + + +class TestW2L(unittest.TestCase): + """Tests Wav2Letter.""" + + batch_size = 10 + input_frames = 400 + num_features = 1 + + w2l = models.Wav2Letter(num_features=num_features).eval() + model_example_inputs = get_test_inputs(batch_size, num_features, input_frames) + + all_operators = { + "executorch_exir_dialects_edge__ops_aten_convolution_default", + "executorch_exir_dialects_edge__ops_aten__log_softmax_default", + "executorch_exir_dialects_edge__ops_aten_relu_default", + } + + operators_after_quantization = all_operators - { + "executorch_exir_dialects_edge__ops_aten__log_softmax_default", + } + + @pytest.mark.slow # about 3min on std laptop + def test_w2l_tosa_MI(self): + ( + ArmTester( + self.w2l, + example_inputs=self.model_example_inputs, + compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), + ) + .export() + .dump_operator_distribution() + .to_edge_transform_and_lower() + .dump_operator_distribution() + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs( + inputs=get_test_inputs( + self.batch_size, self.num_features, self.input_frames + ) + ) + ) + + @pytest.mark.slow # about 1min on std laptop + def test_w2l_tosa_BI(self): + ( + ArmTester( + self.w2l, + example_inputs=self.model_example_inputs, + compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), + ) + .quantize() + .export() + .dump_operator_distribution() + .to_edge_transform_and_lower() + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs( + atol=0.1, + qtol=1, + inputs=get_test_inputs( + self.batch_size, self.num_features, self.input_frames + ), + ) + ) + + def _test_w2l_ethos_BI_pipeline( + self, + module: torch.nn.Module, + test_data: Tuple[torch.Tensor], + compile_spec: CompileSpec, + ): + tester = ( + ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) + .quantize() + .export() + .to_edge() + .check(list(self.operators_after_quantization)) + .partition() + .to_executorch() + .serialize() + ) + return tester + + # TODO: expected fail as TOSA.Transpose is not supported by Ethos-U55 + @pytest.mark.slow + @pytest.mark.corstone_fvp + @conftest.expectedFailureOnFVP + def test_w2l_u55_BI(self): + tester = self._test_w2l_ethos_BI_pipeline( + self.w2l, + self.model_example_inputs, + common.get_u55_compile_spec(), + ) + + if conftest.is_option_enabled("corstone_fvp"): + tester.run_method_and_compare_outputs( + atol=1.0, + qtol=1, + inputs=get_test_inputs( + self.batch_size, self.num_features, self.input_frames + ), + ) + + @pytest.mark.slow + @pytest.mark.corstone_fvp + @unittest.skip("Blocked by MLBEDSW-10420") + @conftest.expectedFailureOnFVP # TODO: MLBEDSW-10093 + def test_w2l_u85_BI(self): + tester = self._test_w2l_ethos_BI_pipeline( + self.w2l, + self.model_example_inputs, + common.get_u85_compile_spec(), + ) + + if conftest.is_option_enabled("corstone_fvp"): + tester.run_method_and_compare_outputs( + atol=1.0, + qtol=1, + inputs=get_test_inputs( + self.batch_size, self.num_features, self.input_frames + ), + ) From 139be81965188079e2f788bbaf647f5070d5b9c7 Mon Sep 17 00:00:00 2001 From: George Hong Date: Thu, 20 Feb 2025 09:36:40 -0800 Subject: [PATCH 032/584] Update .ptd serialization target visibility to include clients (#8582) Update .ptd serialization target visibility to include clients (#8582) Summary: Make .ptd saving utilities visible outside of targets within *executorch*. Reviewed By: jackzhxng Differential Revision: D69871232 --- extension/flat_tensor/serialize/targets.bzl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/extension/flat_tensor/serialize/targets.bzl b/extension/flat_tensor/serialize/targets.bzl index 78054af30e9..717418ec7e6 100644 --- a/extension/flat_tensor/serialize/targets.bzl +++ b/extension/flat_tensor/serialize/targets.bzl @@ -39,7 +39,9 @@ def define_common_targets(): name = "flat_tensor_header", srcs = ["flat_tensor_header.cpp"], exported_headers = ["flat_tensor_header.h"], - visibility = ["//executorch/..."], + visibility = [ + "//executorch/...", + ], exported_deps = ["//executorch/runtime/core:core"], ) @@ -54,6 +56,7 @@ def define_common_targets(): exported_headers = ["serialize.h"], visibility = [ "//executorch/...", + "@EXECUTORCH_CLIENTS", ], exported_external_deps = ["flatbuffers-api"], ) From 463119eead765398a9a85caf44c97913a8fdb558 Mon Sep 17 00:00:00 2001 From: Shen Chen Xu Date: Thu, 20 Feb 2025 10:02:18 -0800 Subject: [PATCH 033/584] Add smart mask style KVCache and mask Differential Revision: D69595959 Pull Request resolved: https://github.com/pytorch/executorch/pull/8463 --- examples/models/llama/static_attention.py | 67 ++++++- .../llama/tests/test_static_attention.py | 178 ++++++++++-------- 2 files changed, 154 insertions(+), 91 deletions(-) diff --git a/examples/models/llama/static_attention.py b/examples/models/llama/static_attention.py index 1d9b08d2f65..3a5f88ad3f3 100644 --- a/examples/models/llama/static_attention.py +++ b/examples/models/llama/static_attention.py @@ -47,19 +47,29 @@ def calculate_cache_key(layer_id: int, head_id: int) -> str: return f"l{layer_id},h{head_id}" @staticmethod - def apply_update(cache, update, transpose=False): + def apply_update(cache, update, pos, style, transpose=False): """ After inference, update the cache state for next iteration. The runtime needs to implement the same operation. """ - if transpose: - update_len = update.size(-1) - updated = torch.roll(cache, -update_len, -1) - updated[:, :, -update_len:] = update - else: - update_len = update.size(-2) - updated = torch.roll(cache, -update_len, -2) - updated[:, -update_len:, :] = update + if style == "shift_pointer": + if transpose: + update_len = update.size(-1) + updated = torch.roll(cache, -update_len, -1) + updated[:, :, -update_len:] = update + else: + update_len = update.size(-2) + updated = torch.roll(cache, -update_len, -2) + updated[:, -update_len:, :] = update + + if style == "smart_mask": + updated = torch.clone(cache) + if transpose: + update_len = update.size(-1) + updated[:, :, pos : pos + update_len] = update + else: + update_len = update.size(-2) + updated[:, pos : pos + update_len, :] = update return updated @@ -114,6 +124,44 @@ def update( return all_data, (out_k_cache, out_v_cache) +class StaticAttentionMask: + def __init__(self, input_len, cache_len, style): + self.input_len = input_len + self.cache_len = cache_len + assert style in ("shift_pointer", "smart_mask") + self.style = style + self.unmasked_len = 0 + self.tensor = torch.zeros(1, input_len, input_len + cache_len) + self.reset() + + def reset(self): + self.unmasked_len = 0 + self.tensor[:, :, : self.cache_len] = float("-inf") + + def unmask(self, new_unmasked_len): + if new_unmasked_len <= 0: + return + + if self.style == "shift_pointer": + self.tensor[ + :, + :, + self.cache_len + - self.unmasked_len + - new_unmasked_len : self.cache_len + - self.unmasked_len, + ] = 0 + + if self.style == "smart_mask": + self.tensor[ + :, + :, + self.unmasked_len : self.unmasked_len + new_unmasked_len, + ] = 0 + + self.unmasked_len += new_unmasked_len + + class _Rope(nn.Module): def __init__(self, use_hf_rope): super().__init__() @@ -135,7 +183,6 @@ def forward( x_r, x_i = x[..., ::2], x[..., 1::2] x_out_r = x_r * freqs_cos - x_i * freqs_sin x_out_i = x_r * freqs_sin + x_i * freqs_cos - x_out = torch.cat([x_out_r, x_out_i], dim=-1) return x_out diff --git a/examples/models/llama/tests/test_static_attention.py b/examples/models/llama/tests/test_static_attention.py index bf586ec4a6c..45364b1d5ec 100644 --- a/examples/models/llama/tests/test_static_attention.py +++ b/examples/models/llama/tests/test_static_attention.py @@ -7,6 +7,7 @@ from executorch.examples.models.llama.rope import Rope from executorch.examples.models.llama.static_attention import ( StaticAttention, + StaticAttentionMask, StaticKVCache, ) @@ -92,48 +93,54 @@ def test_with_cache(self): n_chunks = 3 chunk_len = config.max_seq_len // n_chunks cache_len = config.max_seq_len - chunk_len - mask = torch.zeros(1, chunk_len, cache_len + chunk_len) - mask[:, :, :cache_len] = float("-inf") - mask[:, :, cache_len:] = torch.triu( - torch.full((1, chunk_len, chunk_len), float("-inf")), - diagonal=1, - ) - k_caches = { - StaticKVCache.calculate_cache_key(layer_id, i): torch.zeros( - 1, cache_len, config.head_dim - ) - for i in range(config.n_kv_heads) - } - v_caches = { - StaticKVCache.calculate_cache_key(layer_id, i): torch.zeros( - 1, cache_len, config.head_dim - ) - for i in range(config.n_kv_heads) - } - ys = [] - for i in range(n_chunks): - y_i, attn_update = static_attn( - x[:, i * chunk_len : (i + 1) * chunk_len, :], - freqs_cos[i * chunk_len : (i + 1) * chunk_len], - freqs_sin[i * chunk_len : (i + 1) * chunk_len], - mask=mask, - in_cache_state=(k_caches, v_caches), - out_cache_state=({}, {}), + + def test_with_style(style): + mask = StaticAttentionMask(chunk_len, cache_len, style=style) + mask.tensor[:, :, cache_len:] = torch.triu( + torch.full((1, chunk_len, chunk_len), float("-inf")), + diagonal=1, ) - ys.append(y_i) - mask[:, :, cache_len - chunk_len * (i + 1) : cache_len] = 0 - k_cache_updates, v_cache_updates = attn_update["out_cache_state"] - for cache_id, update in k_cache_updates.items(): - k_caches[cache_id] = StaticKVCache.apply_update( - k_caches[cache_id], update + k_caches = { + StaticKVCache.calculate_cache_key(layer_id, i): torch.zeros( + 1, cache_len, config.head_dim ) - for cache_id, update in v_cache_updates.items(): - v_caches[cache_id] = StaticKVCache.apply_update( - v_caches[cache_id], update + for i in range(config.n_kv_heads) + } + v_caches = { + StaticKVCache.calculate_cache_key(layer_id, i): torch.zeros( + 1, cache_len, config.head_dim ) - - y = torch.cat(ys, dim=1) - self.assertTrue(torch.isclose(y, expected, rtol=1e-3).all()) + for i in range(config.n_kv_heads) + } + ys = [] + for i in range(n_chunks): + y_i, attn_update = static_attn( + x[:, i * chunk_len : (i + 1) * chunk_len, :], + freqs_cos[i * chunk_len : (i + 1) * chunk_len], + freqs_sin[i * chunk_len : (i + 1) * chunk_len], + mask=mask.tensor, + in_cache_state=(k_caches, v_caches), + out_cache_state=({}, {}), + ) + ys.append(y_i) + mask.unmask(chunk_len) + k_cache_updates, v_cache_updates = attn_update["out_cache_state"] + + if i < n_chunks - 1: + for cache_id, update in k_cache_updates.items(): + k_caches[cache_id] = StaticKVCache.apply_update( + k_caches[cache_id], update, pos=chunk_len * i, style=style + ) + for cache_id, update in v_cache_updates.items(): + v_caches[cache_id] = StaticKVCache.apply_update( + v_caches[cache_id], update, pos=chunk_len * i, style=style + ) + + y = torch.cat(ys, dim=1) + self.assertTrue(torch.isclose(y, expected, rtol=1e-3).all()) + + test_with_style("shift_pointer") + test_with_style("smart_mask") def test_within_transformer(self): config = ModelArgs( @@ -162,48 +169,57 @@ def test_within_transformer(self): n_chunks = 3 chunk_len = config.max_seq_len // n_chunks cache_len = config.max_seq_len - chunk_len - mask = torch.zeros(1, chunk_len, cache_len + chunk_len) - mask[:, :, :cache_len] = float("-inf") - mask[:, :, cache_len:] = torch.triu( - torch.full((1, chunk_len, chunk_len), float("-inf")), - diagonal=1, - ) - k_caches = { - StaticKVCache.calculate_cache_key(layer_id, i): torch.zeros( - 1, cache_len, config.head_dim - ) - for layer_id in range(config.n_layers) - for i in range(config.n_kv_heads) - } - v_caches = { - StaticKVCache.calculate_cache_key(layer_id, i): torch.zeros( - 1, cache_len, config.head_dim - ) - for layer_id in range(config.n_layers) - for i in range(config.n_kv_heads) - } - ys = [] - for i in range(n_chunks): - y_i, attn_update = static_transformer( - x[:, i * chunk_len : (i + 1) * chunk_len], - attn_options=ForwardOptions( - mask=mask, - freqs_cos_override=freqs_cos[i * chunk_len : (i + 1) * chunk_len], - freqs_sin_override=freqs_sin[i * chunk_len : (i + 1) * chunk_len], - in_cache_state=(k_caches, v_caches), - out_cache_state=({}, {}), - ), + + def test_with_style(style): + mask = StaticAttentionMask(chunk_len, cache_len, style=style) + mask.tensor[:, :, cache_len:] = torch.triu( + torch.full((1, chunk_len, chunk_len), float("-inf")), + diagonal=1, ) - ys.append(y_i) - mask[:, :, cache_len - chunk_len * (i + 1) : cache_len] = 0 - k_cache_updates, v_cache_updates = attn_update["out_cache_state"] - for cache_id, update in k_cache_updates.items(): - k_caches[cache_id] = StaticKVCache.apply_update( - k_caches[cache_id], update + k_caches = { + StaticKVCache.calculate_cache_key(layer_id, i): torch.zeros( + 1, cache_len, config.head_dim ) - for cache_id, update in v_cache_updates.items(): - v_caches[cache_id] = StaticKVCache.apply_update( - v_caches[cache_id], update + for layer_id in range(config.n_layers) + for i in range(config.n_kv_heads) + } + v_caches = { + StaticKVCache.calculate_cache_key(layer_id, i): torch.zeros( + 1, cache_len, config.head_dim ) - - self.assertTrue(torch.isclose(ys[-1], expected, rtol=1e-3).all()) + for layer_id in range(config.n_layers) + for i in range(config.n_kv_heads) + } + ys = [] + for i in range(n_chunks): + y_i, attn_update = static_transformer( + x[:, i * chunk_len : (i + 1) * chunk_len], + attn_options=ForwardOptions( + mask=mask.tensor, + freqs_cos_override=freqs_cos[ + i * chunk_len : (i + 1) * chunk_len + ], + freqs_sin_override=freqs_sin[ + i * chunk_len : (i + 1) * chunk_len + ], + in_cache_state=(k_caches, v_caches), + out_cache_state=({}, {}), + ), + ) + ys.append(y_i) + mask.unmask(chunk_len) + k_cache_updates, v_cache_updates = attn_update["out_cache_state"] + if i < n_chunks - 1: + for cache_id, update in k_cache_updates.items(): + k_caches[cache_id] = StaticKVCache.apply_update( + k_caches[cache_id], update, pos=chunk_len * i, style=style + ) + for cache_id, update in v_cache_updates.items(): + v_caches[cache_id] = StaticKVCache.apply_update( + v_caches[cache_id], update, pos=chunk_len * i, style=style + ) + + self.assertTrue(torch.isclose(ys[-1], expected, rtol=1e-3).all()) + + test_with_style("shift_pointer") + test_with_style("smart_mask") From da17f66b09a7672630c6baa684296c5cf50d93c5 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Thu, 20 Feb 2025 12:25:12 -0600 Subject: [PATCH 034/584] [flat_tensor] Persist FreeableBuffers of external constants in method (#8599) Pull Request resolved: https://github.com/pytorch/executorch/pull/8437 ## Problem Currently, the FlatTensorDataMap persists tensors, and returns a FreeableBuffer with an empty free function. The NamedDataMap should not persist data, as most cases (eg. delegate) will want it to be freed. Ownership should be on the caller; `get_data` returns a FreeableBuffer that 'owns' the data. The FreeableBuffer in turn is owned by the caller. NOTE: this doesn't support the case where we want to share plain tensors between methods/pte files at runtime. A custom NDM could support that use-case. ## This diff: 1. Introduces a 'NamedData' struct to method.h. This holds a key and a FreeeableBuffer. 2. Iterate over all the flatbuffer tensors to count the constants tagged with EXTERNAL. NOTE: this will increase load time for all users. Potentially allocate chunks of 16 and use a linked list to store external constants, or store this number in PTE file (see D69618283). 3. Allocate space for num_external_constants using the method allocator. 4. Iterate over all flatbuffer tensors and use the named_data_map to resolve EXTERNAL tensors into the array of NamedData. 5. Pass the resolved external constants to tensor_parser, along with NDM (used for mutable external tensors). 6. Resolved external tensors are stored inside method. They are freed when the method is destructed. Some notes: https://docs.google.com/document/d/1_PBi4JgODuClUPD4PCUWrKNjyUH54zOUHGUJ3QHDNes/edit?tab=t.0#heading=h.blsvwraxss7g ghstack-source-id: 267364187 TODO: add test case when two fqns point to the same data buffer. Differential Revision: [D69477027](https://our.internmc.facebook.com/intern/diff/D69477027/) Co-authored-by: lucylq --- runtime/executor/method.cpp | 149 +++++++++++++++++-- runtime/executor/method.h | 36 +++++ runtime/executor/tensor_parser.h | 27 +++- runtime/executor/tensor_parser_aten.cpp | 6 +- runtime/executor/tensor_parser_exec_aten.cpp | 134 +++++++++-------- runtime/executor/tensor_parser_portable.cpp | 7 +- 6 files changed, 279 insertions(+), 80 deletions(-) diff --git a/runtime/executor/method.cpp b/runtime/executor/method.cpp index d435678ca2b..0857bc1c976 100644 --- a/runtime/executor/method.cpp +++ b/runtime/executor/method.cpp @@ -33,6 +33,7 @@ namespace executorch { namespace runtime { +using deserialization::NamedData; using internal::PlatformMemoryAllocator; /** @@ -289,6 +290,113 @@ Result parse_cond_value(const EValue& cond_value) { } // namespace +Result Method::get_num_external_constants() { + auto flatbuffer_values = serialization_plan_->values(); + size_t n_value = flatbuffer_values->size(); + + size_t n_external_constants = 0; + for (size_t i = 0; i < n_value; ++i) { + auto serialization_value = flatbuffer_values->Get(i); + // Ensure values are non-null. + // Note that as a side-effect of this check, we're guaranteed that all + // values are non-null, so later loops can skip that check. + ET_CHECK_OR_RETURN_ERROR( + serialization_value != nullptr && + (serialization_value->val_type() == + executorch_flatbuffer::KernelTypes::Null || + serialization_value->val() != nullptr), + InvalidProgram, + "Null value at index %" ET_PRIsize_t, + i); + // Ignore non-tensor types. + if (serialization_value->val_type() != + executorch_flatbuffer::KernelTypes::Tensor) { + continue; + } + const auto s_tensor = static_cast( + serialization_value->val()); + + // An external constant is tagged with EXTERNAL and has no + // allocation_info. + if (s_tensor->extra_tensor_info() != nullptr && + s_tensor->extra_tensor_info()->location() == + executorch_flatbuffer::TensorDataLocation::EXTERNAL && + s_tensor->allocation_info() == nullptr) { + n_external_constants++; + } + } + return n_external_constants; +} + +Error Method::parse_external_constants(const NamedDataMap* named_data_map) { + auto flatbuffer_values = serialization_plan_->values(); + size_t n_value = flatbuffer_values->size(); + + // n_external_constants_ counts the number of successfully-initialized + // external constants for ~Method() to clean up, and is incremented at the + // bottom of the loop. This makes it safe for errors to return without + // updating any state. + n_external_constants_ = 0; + for (size_t i = 0; i < n_value; ++i) { + auto serialization_value = flatbuffer_values->Get(i); + // Ignore non-tensor types. + if (serialization_value->val_type() != + executorch_flatbuffer::KernelTypes::Tensor) { + continue; + } + const auto s_tensor = static_cast( + serialization_value->val()); + // Constant tensors are resolved here; tensors with allocation_info are + // mutable and are resolved in parse_values. + if (s_tensor->extra_tensor_info() == nullptr || + s_tensor->extra_tensor_info()->location() != + executorch_flatbuffer::TensorDataLocation::EXTERNAL || + s_tensor->allocation_info() != nullptr) { + continue; + } + ET_CHECK_OR_RETURN_ERROR( + s_tensor->extra_tensor_info()->fully_qualified_name() != nullptr, + InvalidExternalData, + "Fully qualified name of external tensor is null at index %zu", + i); + + const char* key = + s_tensor->extra_tensor_info()->fully_qualified_name()->c_str(); + + // Check if this tensor has already been resolved. + if (get_data_by_key( + key, Span(external_constants_, n_external_constants_)) != + nullptr) { + continue; + } + Result tensor_layout = + named_data_map->get_metadata(key); + if (!tensor_layout.ok()) { + return tensor_layout.error(); + } + // Check external tensor compatibility. + Error err = + deserialization::validateTensorLayout(s_tensor, tensor_layout.get()); + if (err != Error::Ok) { + return err; + } + // Save the key. + external_constants_[n_external_constants_].key = key; + + // Save the buffer. + Result buffer = named_data_map->get_data(key); + ET_CHECK_OR_RETURN_ERROR( + buffer.ok(), + InvalidExternalData, + "Buffer retrieved from get_data is not valid"); + new (&external_constants_[n_external_constants_].buffer) + FreeableBuffer(std::move(buffer.get())); + + n_external_constants_ += 1; + } + return Error::Ok; +} + Error Method::parse_values(const NamedDataMap* named_data_map) { auto flatbuffer_values = serialization_plan_->values(); ET_CHECK_OR_RETURN_ERROR( @@ -299,6 +407,30 @@ Error Method::parse_values(const NamedDataMap* named_data_map) { return Error::MemoryAllocationFailed; } + // Count the number of tensors marked as EXTERNAL for this method. The actual + // number of external constants may be smaller, eg. if multiple tensors point + // to the same underlying data buffer. + // This function also ensures that all flatbuffer_values entries + // are non-null, so `val_as_X()` calls below are guaranteed to return + // non-null pointers. + Result max_external_constants = get_num_external_constants(); + if (!max_external_constants.ok()) { + return max_external_constants.error(); + } + if (max_external_constants.get() > 0) { + // Allocate space for external tensors. + external_constants_ = + memory_manager_->method_allocator()->allocateList( + max_external_constants.get()); + if (external_constants_ == nullptr) { + return Error::MemoryAllocationFailed; + } + Error err = parse_external_constants(named_data_map); + if (err != Error::Ok) { + return err; + } + } + // n_value_ counts the number of successfully-initialized values for ~Method() // to clean up, and is incremented at the bottom of the loop. This makes it // safe for errors to return without updating any state. @@ -306,16 +438,6 @@ Error Method::parse_values(const NamedDataMap* named_data_map) { for (size_t i = 0; i < n_value; ++i) { auto serialization_value = flatbuffer_values->Get(i); - // Ensure that the `val_as_X()` calls will return non-null pointers. - ET_CHECK_OR_RETURN_ERROR( - serialization_value != nullptr && - (serialization_value->val_type() == - executorch_flatbuffer::KernelTypes::Null || - serialization_value->val() != nullptr), - InvalidProgram, - "Null value at index %" ET_PRIsize_t, - i); - const auto val = serialization_value->val(); switch (serialization_value->val_type()) { @@ -416,7 +538,8 @@ Error Method::parse_values(const NamedDataMap* named_data_map) { program_, memory_manager_, static_cast(val), - named_data_map); + named_data_map, + Span(external_constants_, n_external_constants_)); if (!t.ok()) { ET_LOG( Error, @@ -1496,6 +1619,10 @@ Method::~Method() { delegates_[i].~BackendDelegate(); } } + // Free resources associated with external constants. + for (int i = 0; i < n_external_constants_; i++) { + external_constants_[i].buffer.~FreeableBuffer(); + } // All other fields are trivially destructible. } } // namespace runtime diff --git a/runtime/executor/method.h b/runtime/executor/method.h index dff4e818f9f..4108db8810e 100644 --- a/runtime/executor/method.h +++ b/runtime/executor/method.h @@ -31,6 +31,12 @@ struct EValue; namespace executorch { namespace runtime { +// Forward declare NamedData. This is a public header and must not include +// internal data types. +namespace deserialization { +struct NamedData; +} // namespace deserialization + // Forward declare Program to avoid a circular reference. class Program; @@ -42,6 +48,7 @@ using OpFunction = void (*)(KernelRuntimeContext&, EValue**); /// A list of pointers into the master values table that together compose the /// argument list for a single instruction using InstructionArgs = Span; +using deserialization::NamedData; /** * An executable method of an executorch program. Maps to a python method like @@ -66,6 +73,8 @@ class Method final { delegates_(rhs.delegates_), n_chains_(rhs.n_chains_), chains_(rhs.chains_), + external_constants_(rhs.external_constants_), + n_external_constants_(rhs.n_external_constants_), init_state_(rhs.init_state_) { // Required: clear out fields that the dtor looks at, so that we don't free // anything twice. @@ -73,6 +82,8 @@ class Method final { rhs.values_ = nullptr; rhs.n_delegate_ = 0; rhs.delegates_ = nullptr; + rhs.n_external_constants_ = 0; + rhs.external_constants_ = nullptr; // Helpful: Try to ensure that any other interactions with the old object // result in failures. @@ -288,6 +299,8 @@ class Method final { delegates_(nullptr), n_chains_(0), chains_(nullptr), + external_constants_(nullptr), + n_external_constants_(0), init_state_(InitializationState::Uninitialized) {} /// Static factory used by Program. @@ -336,8 +349,31 @@ class Method final { size_t n_chains_; Chain* chains_; + NamedData* external_constants_; + size_t n_external_constants_ = 0; + InitializationState init_state_; + /** + * Counts the number of tensors marked as EXTERNAL in the flatbuffer + * for this method. + */ + ET_NODISCARD Result get_num_external_constants(); + + /** + * Parses the flatbuffer for constant tensors tagged as EXTERNAL. + * Retrieves the external constants using the named_data_map and places them + * into `external_constants_`. Updates `n_external_constants_` to count the + * number of successfully-initialized external constants. + * FreeableBuffers returned by the named_data_map are owned by the + * method and are freed on method destruction. + * + * @param[in] named_data_map, to retrieve external constants from. + * @returns Error::Ok on success, non-Ok on failure. + */ + ET_NODISCARD Error + parse_external_constants(const NamedDataMap* named_data_map); + /** * Parses the elements of the values_ array. On error, n_value_ will be set to * the number of successfully-initialized entries so that ~Method doesn't try diff --git a/runtime/executor/tensor_parser.h b/runtime/executor/tensor_parser.h index 2ffb473544d..cfd711713ac 100644 --- a/runtime/executor/tensor_parser.h +++ b/runtime/executor/tensor_parser.h @@ -21,11 +21,21 @@ namespace executorch { namespace runtime { namespace deserialization { +/// Data structure to hold key and data buffer for external data used +/// in a method. +struct NamedData { + const char* key; + FreeableBuffer buffer; +}; + +NamedData* get_data_by_key(const char* key, Span entries); + ET_NODISCARD Result parseTensor( const Program* program, MemoryManager* memory_manager, const executorch_flatbuffer::Tensor* s_tensor, - const NamedDataMap* named_data_map = nullptr); + const NamedDataMap* named_data_map = nullptr, + Span external_constants = {}); ET_NODISCARD Result> parseTensorList( const flatbuffers::Vector* tensor_indices, @@ -33,6 +43,12 @@ ET_NODISCARD Result> parseTensorList( size_t values_len, MemoryManager* memory_manager); +// Checks that the sizes, dim_order and scalar_type match between tensors +// stored in the PTE and externally. +ET_NODISCARD Error validateTensorLayout( + const executorch_flatbuffer::Tensor* s_tensor, + const TensorLayout& expected_layout); + // Deserializes a List of optional type. The code here is the same between all // list of optionals: list of optional Tensor, list of optional float etc, so we // just use a template to avoid boilerplate. @@ -105,7 +121,11 @@ parseListOptionalType( * @param[in] nbytes The amount of memory to get from the allocator. * @param[in] allocator The source of memory for non-constant tensors. * @param[in] named_data_map An optional map of {name, blob} used to resolve - * data that is external to the PTE, if any. + * data that is mutable and external to the PTE, if any. + * @param[in] external_constants An optional span containing tensor fqn to + * corresponding tensor data. Used to resolve data that is constant and + * external to the PTE, if any. Referencing data from external_constants is + * safe, as it has the same lifetime as the method. * * @returns On success, the data pointer to use for the tensor. On failure, a * non-Ok Error. @@ -115,7 +135,8 @@ ET_NODISCARD Result getTensorDataPtr( const Program* program, size_t nbytes, HierarchicalAllocator* allocator, - const NamedDataMap* named_data_map = nullptr); + const NamedDataMap* named_data_map = nullptr, + Span external_constants = {}); } // namespace deserialization } // namespace runtime diff --git a/runtime/executor/tensor_parser_aten.cpp b/runtime/executor/tensor_parser_aten.cpp index ab9af3d0399..d1a2f712853 100644 --- a/runtime/executor/tensor_parser_aten.cpp +++ b/runtime/executor/tensor_parser_aten.cpp @@ -33,7 +33,8 @@ Result parseTensor( const Program* program, MemoryManager* memory_manager, const executorch_flatbuffer::Tensor* s_tensor, - const NamedDataMap* named_data_map) { + const NamedDataMap* named_data_map, + Span external_constants) { EXECUTORCH_SCOPE_PROF("TensorParser::parseTensor"); ET_CHECK_OR_RETURN_ERROR( @@ -108,7 +109,8 @@ Result parseTensor( program, tensor.nbytes(), memory_manager->planned_memory(), - named_data_map); + named_data_map, + external_constants); if (!data_ptr.ok()) { ET_LOG( Error, diff --git a/runtime/executor/tensor_parser_exec_aten.cpp b/runtime/executor/tensor_parser_exec_aten.cpp index 83310ff680c..a1ac245acca 100644 --- a/runtime/executor/tensor_parser_exec_aten.cpp +++ b/runtime/executor/tensor_parser_exec_aten.cpp @@ -111,12 +111,60 @@ ET_NODISCARD Result> parseTensorList( evalp_list, tensor_list, tensor_indices->size()); } +ET_NODISCARD Error validateTensorLayout( + const executorch_flatbuffer::Tensor* s_tensor, + const TensorLayout& expected_layout) { + ET_CHECK_OR_RETURN_ERROR( + static_cast(s_tensor->scalar_type()) == + expected_layout.scalar_type(), + InvalidExternalData, + "Scalar type mismatch. Expected %hhd, got %hhd.", + static_cast(s_tensor->scalar_type()), + static_cast(expected_layout.scalar_type())); + int dim = s_tensor->sizes()->size(); + ET_CHECK_OR_RETURN_ERROR( + dim == expected_layout.sizes().size(), + InvalidExternalData, + "Dim mismatch. Expected %d, got %zu.", + dim, + expected_layout.sizes().size()); + for (int i = 0; i < dim; i++) { + ET_CHECK_OR_RETURN_ERROR( + s_tensor->sizes()->Get(i) == expected_layout.sizes()[i], + InvalidExternalData, + "Sizes mismatch. Expected %d, got %d for size at index %d.", + s_tensor->sizes()->Get(i), + expected_layout.sizes()[i], + i); + ET_CHECK_OR_RETURN_ERROR( + s_tensor->dim_order()->Get(i) == expected_layout.dim_order()[i], + InvalidExternalData, + "Dim order mismatch. Expected %d, got %d for dim at index %d.", + s_tensor->dim_order()->Get(i), + expected_layout.dim_order()[i], + i); + } + return Error::Ok; +} + +// Check if key exists in entries. If it does, return a pointer to the entry +// otherwise return a nullptr. +NamedData* get_data_by_key(const char* key, Span entries) { + for (int i = 0; i < entries.size(); i++) { + if (strcmp(key, entries[i].key) == 0) { + return &entries[i]; + } + } + return nullptr; +} + ET_NODISCARD Result getTensorDataPtr( const executorch_flatbuffer::Tensor* s_tensor, const Program* program, size_t nbytes, HierarchicalAllocator* allocator, - const NamedDataMap* named_data_map) { + const NamedDataMap* named_data_map, + Span external_constants) { auto data_buffer_idx = s_tensor->data_buffer_idx(); const executorch_flatbuffer::AllocationDetails* allocation_info = s_tensor->allocation_info(); @@ -146,76 +194,38 @@ ET_NODISCARD Result getTensorDataPtr( s_tensor->extra_tensor_info()->fully_qualified_name() != nullptr, InvalidExternalData, "Fully qualified name of external tensor is null"); - // Look up tensor in named data map. - Result tensor_layout_res = named_data_map->get_metadata( - s_tensor->extra_tensor_info()->fully_qualified_name()->c_str()); - if (!tensor_layout_res.ok()) { - return tensor_layout_res.error(); - } - const TensorLayout& tensor_layout = tensor_layout_res.get(); - - // Compatibility checking. - ET_CHECK_OR_RETURN_ERROR( - static_cast(s_tensor->scalar_type()) == - tensor_layout.scalar_type(), - InvalidExternalData, - "Scalar type mismatch. Expected %hhd, got %hhd.", - static_cast(s_tensor->scalar_type()), - static_cast(tensor_layout.scalar_type())); - ET_CHECK_OR_RETURN_ERROR( - nbytes == tensor_layout.nbytes(), - InvalidExternalData, - "Nbytes mismatch. Expected %zu, got %zu.", - nbytes, - tensor_layout.nbytes()); - int dim = s_tensor->sizes()->size(); - ET_CHECK_OR_RETURN_ERROR( - dim == tensor_layout.sizes().size(), - InvalidExternalData, - "Dim mismatch. Expected %d, got %zu.", - dim, - tensor_layout.sizes().size()); - for (int i = 0; i < dim; i++) { - ET_CHECK_OR_RETURN_ERROR( - s_tensor->sizes()->Get(i) == tensor_layout.sizes()[i], - InvalidExternalData, - "Sizes mismatch. Expected %d, got %d for size at index %d.", - s_tensor->sizes()->Get(i), - tensor_layout.sizes()[i], - i); - ET_CHECK_OR_RETURN_ERROR( - s_tensor->dim_order()->Get(i) == tensor_layout.dim_order()[i], - InvalidExternalData, - "Dim order mismatch. Expected %d, got %d for dim at index %d.", - s_tensor->dim_order()->Get(i), - tensor_layout.dim_order()[i], - i); - } + const char* fqn = + s_tensor->extra_tensor_info()->fully_qualified_name()->c_str(); // Constant value. if (allocation_info == nullptr) { - Result data_res = named_data_map->get_data( - s_tensor->extra_tensor_info()->fully_qualified_name()->c_str()); - if (!data_res.ok()) { - return data_res.error(); + NamedData* data = get_data_by_key(fqn, external_constants); + if (data != nullptr) { + return const_cast(data->buffer.data()); + } + // Should never reach here; these tensors are resolved in + // Method::parse_external_constants. Any errors should be caught there. + return Error::Internal; + } else { + // Mutable value. + // Look up tensor in named data map. + Result tensor_layout_res = + named_data_map->get_metadata(fqn); + if (!tensor_layout_res.ok()) { + return tensor_layout_res.error(); + } + const TensorLayout& tensor_layout = tensor_layout_res.get(); + Error err = validateTensorLayout(s_tensor, tensor_layout); + if (err != Error::Ok) { + return err; } - // The const_cast is 'ok' here because program and runtime should - // guarantee that this data is never modified. Temporary until runtime - // takes ownership of FreeableBuffers in TODO(T214294528). - return const_cast(data_res.get().data()); - } - - // Mutable value. - else { // Call load_into. auto planned_ptr = getMemPlannedPtr(allocation_info, nbytes, allocator); if (!planned_ptr.ok()) { return planned_ptr.error(); } - auto size = named_data_map->load_data_into( - s_tensor->extra_tensor_info()->fully_qualified_name()->c_str(), - planned_ptr.get(), - nbytes); + auto size = + named_data_map->load_data_into(fqn, planned_ptr.get(), nbytes); if (size.error() != Error::Ok) { return size.error(); } diff --git a/runtime/executor/tensor_parser_portable.cpp b/runtime/executor/tensor_parser_portable.cpp index a53295470fc..3a29c86700c 100644 --- a/runtime/executor/tensor_parser_portable.cpp +++ b/runtime/executor/tensor_parser_portable.cpp @@ -21,6 +21,7 @@ namespace executorch { namespace runtime { namespace deserialization { +using executorch::runtime::Span; using torch::executor::ScalarType; using torch::executor::Tensor; using torch::executor::TensorImpl; @@ -29,7 +30,8 @@ Result parseTensor( const Program* program, MemoryManager* memory_manager, const executorch_flatbuffer::Tensor* s_tensor, - const NamedDataMap* named_data_map) { + const NamedDataMap* named_data_map, + Span external_constants) { EXECUTORCH_SCOPE_PROF("TensorParser::parseTensor"); auto method_allocator = memory_manager->method_allocator(); @@ -149,7 +151,8 @@ Result parseTensor( program, tensor_impl->nbytes(), memory_manager->planned_memory(), - named_data_map); + named_data_map, + external_constants); if (!data_ptr.ok()) { ET_LOG( Error, From 9de9ed4e976de5d603ae0d121db46c0170ac3e06 Mon Sep 17 00:00:00 2001 From: lucylq Date: Thu, 20 Feb 2025 12:55:09 -0800 Subject: [PATCH 035/584] Tokenizer test (#21) Differential Revision: D69860352 Pull Request resolved: https://github.com/pytorch/executorch/pull/8586 --- shim/xplat/executorch/build/runtime_wrapper.bzl | 2 +- test/utils/targets.bzl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/shim/xplat/executorch/build/runtime_wrapper.bzl b/shim/xplat/executorch/build/runtime_wrapper.bzl index 03bca6623f9..b81aabcd83f 100644 --- a/shim/xplat/executorch/build/runtime_wrapper.bzl +++ b/shim/xplat/executorch/build/runtime_wrapper.bzl @@ -171,7 +171,7 @@ def _patch_kwargs_common(kwargs): # don't pick up unexpected clients while things are still in flux. if not kwargs.pop("_is_external_target", False): for target in kwargs.get("visibility", []): - if not (target.startswith("//executorch") or target.startswith("@")): + if not (target.startswith("//executorch") or target.startswith("//pytorch/tokenizers") or target.startswith("@")): fail("Please manage all external visibility using the " + "EXECUTORCH_CLIENTS list in " + "//executorch/build/fb/clients.bzl. " + diff --git a/test/utils/targets.bzl b/test/utils/targets.bzl index b16ce2bac25..93e33daf81f 100644 --- a/test/utils/targets.bzl +++ b/test/utils/targets.bzl @@ -21,6 +21,7 @@ def define_common_targets(): ], visibility = [ "//executorch/...", + "//pytorch/tokenizers/...", "@EXECUTORCH_CLIENTS", ], deps = [ From 254eecab290d37b64bec117d8a66025a9f6d6df8 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Thu, 20 Feb 2025 12:59:11 -0800 Subject: [PATCH 036/584] Run unittests in debug mode at pull time and release mode on trunk builds (#8550) Per discussion in discord with @mergennachin. Motivation was to have debug coverage to catch debug-only test failures. --- .ci/scripts/setup-linux.sh | 2 +- .ci/scripts/setup-macos.sh | 2 +- .ci/scripts/unittest-linux.sh | 10 +++++++++- .ci/scripts/unittest-macos.sh | 10 +++++++++- .ci/scripts/utils.sh | 4 ++-- .github/workflows/_unittest.yml | 8 ++++++-- .github/workflows/pull.yml | 2 ++ .github/workflows/trunk.yml | 10 ++++++++++ 8 files changed, 40 insertions(+), 8 deletions(-) diff --git a/.ci/scripts/setup-linux.sh b/.ci/scripts/setup-linux.sh index 36fbcd72743..776bf6f7953 100755 --- a/.ci/scripts/setup-linux.sh +++ b/.ci/scripts/setup-linux.sh @@ -22,7 +22,7 @@ fi # have already been installed, so we use PyTorch build from source here instead # of nightly. This allows CI to test against latest commits from PyTorch install_executorch "use-pt-pinned-commit" -build_executorch_runner "${BUILD_TOOL}" +build_executorch_runner "${BUILD_TOOL}" "${2:-Release}" if [[ "${GITHUB_BASE_REF:-}" == *main* || "${GITHUB_BASE_REF:-}" == *gh* ]]; then do_not_use_nightly_on_ci diff --git a/.ci/scripts/setup-macos.sh b/.ci/scripts/setup-macos.sh index 033c2996038..bb8e45f23f1 100755 --- a/.ci/scripts/setup-macos.sh +++ b/.ci/scripts/setup-macos.sh @@ -136,7 +136,7 @@ install_pytorch_and_domains # We build PyTorch from source here instead of using nightly. This allows CI to test against # the pinned commit from PyTorch install_executorch "use-pt-pinned-commit" -build_executorch_runner "${BUILD_TOOL}" +build_executorch_runner "${BUILD_TOOL}" "${2:-Release}" if [[ "${GITHUB_BASE_REF:-}" == *main* || "${GITHUB_BASE_REF:-}" == *gh* ]]; then do_not_use_nightly_on_ci diff --git a/.ci/scripts/unittest-linux.sh b/.ci/scripts/unittest-linux.sh index 5902e3efd21..e76b43fa22c 100755 --- a/.ci/scripts/unittest-linux.sh +++ b/.ci/scripts/unittest-linux.sh @@ -14,6 +14,14 @@ else exit 1 fi +BUILD_MODE=$2 +if [[ "${BUILD_MODE:-}" =~ ^(Debug|Release)$ ]]; then + echo "Running tests in build mode ${BUILD_MODE} ..." +else + echo "Unsupported build mode ${BUILD_MODE}, options are Debug or Release." + exit 1 +fi + # The generic Linux job chooses to use base env, not the one setup by the image eval "$(conda shell.bash hook)" CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") @@ -25,7 +33,7 @@ source .ci/scripts/setup-vulkan-linux-deps.sh PYTHON_EXECUTABLE=python \ EXECUTORCH_BUILD_PYBIND=ON \ CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ -.ci/scripts/setup-linux.sh "$BUILD_TOOL" +.ci/scripts/setup-linux.sh "$BUILD_TOOL" "$BUILD_MODE" # Install llama3_2_vision dependencies. PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh diff --git a/.ci/scripts/unittest-macos.sh b/.ci/scripts/unittest-macos.sh index 907472b96bb..9c29191b459 100755 --- a/.ci/scripts/unittest-macos.sh +++ b/.ci/scripts/unittest-macos.sh @@ -14,6 +14,14 @@ else exit 1 fi +BUILD_MODE=$2 +if [[ $BUILD_MODE =~ ^(Debug|Release)$ ]]; then + echo "Running tests in build mode ${BUILD_MODE} ..." +else + echo "Unsupported build mode ${BUILD_MODE}, options are Debug or Release." + exit 1 +fi + bash .ci/scripts/setup-conda.sh eval "$(conda shell.bash hook)" @@ -27,7 +35,7 @@ PYTHON_EXECUTABLE=python \ EXECUTORCH_BUILD_PYBIND=ON \ CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ ${CONDA_RUN} --no-capture-output \ -.ci/scripts/setup-macos.sh cmake +.ci/scripts/setup-macos.sh "${BUILD_TOOL}" "${BUILD_MODE}" # Install llama3_2_vision dependencies. PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh diff --git a/.ci/scripts/utils.sh b/.ci/scripts/utils.sh index be684b7bfa2..e0bc935e861 100644 --- a/.ci/scripts/utils.sh +++ b/.ci/scripts/utils.sh @@ -109,7 +109,7 @@ build_executorch_runner_cmake() { pushd "${CMAKE_OUTPUT_DIR}" || return # This command uses buck2 to gather source files and buck2 could crash flakily # on MacOS - retry cmake -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" -DCMAKE_BUILD_TYPE=Release .. + retry cmake -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" -DCMAKE_BUILD_TYPE="${1:-Release}" .. popd || return if [ "$(uname)" == "Darwin" ]; then @@ -124,7 +124,7 @@ build_executorch_runner() { if [[ $1 == "buck2" ]]; then build_executorch_runner_buck2 elif [[ $1 == "cmake" ]]; then - build_executorch_runner_cmake + build_executorch_runner_cmake "$2" else echo "Invalid build tool $1. Only buck2 and cmake are supported atm" exit 1 diff --git a/.github/workflows/_unittest.yml b/.github/workflows/_unittest.yml index 15079920717..f2eb2cfdb51 100644 --- a/.github/workflows/_unittest.yml +++ b/.github/workflows/_unittest.yml @@ -7,6 +7,10 @@ on: required: true type: string description: Name of the docker image to use. + build-mode: + required: true + type: string + description: Build mode to use, Debug or Release. build-tool: required: true type: string @@ -30,7 +34,7 @@ jobs: timeout: 90 script: | set -eux - .ci/scripts/unittest-linux.sh "${{ inputs.build-tool }}" + .ci/scripts/unittest-linux.sh "${{ inputs.build-tool }}" "${{ inputs.build-mode }}" macos: uses: pytorch/test-infra/.github/workflows/macos_job.yml@main @@ -41,4 +45,4 @@ jobs: ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} script: | set -eux - .ci/scripts/unittest-macos.sh "${{ inputs.build-tool }}" + .ci/scripts/unittest-macos.sh "${{ inputs.build-tool }}" "${{ inputs.build-mode }}" diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 697601e3b27..b599f2fdc67 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -367,6 +367,7 @@ jobs: id-token: write contents: read with: + build-mode: Debug build-tool: cmake docker-image: executorch-ubuntu-22.04-clang12 @@ -376,6 +377,7 @@ jobs: id-token: write contents: read with: + build-mode: Debug build-tool: buck2 docker-image: executorch-ubuntu-22.04-clang12 diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index dff2b400ee6..64e26847874 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -489,3 +489,13 @@ jobs: PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh # Test llama2 PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}" + + unittest-release: + uses: ./.github/workflows/_unittest.yml + permissions: + id-token: write + contents: read + with: + build-mode: Release + build-tool: cmake + docker-image: executorch-ubuntu-22.04-clang12 From fd318cc921069160bca366b32fc03b165e7908cd Mon Sep 17 00:00:00 2001 From: cccclai Date: Thu, 20 Feb 2025 13:55:18 -0800 Subject: [PATCH 037/584] Refactor source_transformation to a seperate target Differential Revision: D69942050 Pull Request resolved: https://github.com/pytorch/executorch/pull/8602 --- examples/models/llama/TARGETS | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/examples/models/llama/TARGETS b/examples/models/llama/TARGETS index ad978205245..810b7f550df 100644 --- a/examples/models/llama/TARGETS +++ b/examples/models/llama/TARGETS @@ -95,11 +95,8 @@ runtime.command_alias( ) runtime.python_library( - name = "export_library", + name = "source_transformation", srcs = [ - "export_llama.py", - "export_llama_lib.py", - "model.py", "source_transformation/apply_spin_quant_r1_r2.py", "source_transformation/attention.py", "source_transformation/lora.py", @@ -114,6 +111,15 @@ runtime.python_library( "source_transformation/vulkan_rope.py", "source_transformation/attention_sink.py", ], +) + +runtime.python_library( + name = "export_library", + srcs = [ + "export_llama.py", + "export_llama_lib.py", + "model.py", + ], _is_external_target = True, base_module = "executorch.examples.models.llama", visibility = [ @@ -123,6 +129,7 @@ runtime.python_library( "@EXECUTORCH_CLIENTS", ], deps = [ + ":source_transformation", "//ai_codesign/gen_ai/fast_hadamard_transform:fast_hadamard_transform", "//caffe2:torch", "//executorch/backends/vulkan/_passes:vulkan_passes", From cc64fa17f5bcc705f0484e5796e8b59995679b4a Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Thu, 20 Feb 2025 14:38:49 -0800 Subject: [PATCH 038/584] Log when a Result is created from Error::ok Differential Revision: D69870978 Pull Request resolved: https://github.com/pytorch/executorch/pull/8577 --- runtime/core/result.h | 9 +++++++-- runtime/core/test/error_handling_test.cpp | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/runtime/core/result.h b/runtime/core/result.h index 7b404bca946..377573e6dfa 100644 --- a/runtime/core/result.h +++ b/runtime/core/result.h @@ -59,8 +59,13 @@ class Result final { * a non-Ok value. */ /* implicit */ Result(Error error) - : error_(error == Error::Ok ? Error::Internal : error), - hasValue_(false) {} + : error_(error == Error::Ok ? Error::Internal : error), hasValue_(false) { + if ET_UNLIKELY (error == Error::Ok) { + ET_LOG( + Debug, + "Attempted to create Result from Error::Ok, this has been converted to Error::Internal."); + } + } /// Value copy constructor. /* implicit */ Result(const T& val) : value_(val), hasValue_(true) {} diff --git a/runtime/core/test/error_handling_test.cpp b/runtime/core/test/error_handling_test.cpp index b6b58623984..ef270cad1ed 100644 --- a/runtime/core/test/error_handling_test.cpp +++ b/runtime/core/test/error_handling_test.cpp @@ -110,6 +110,7 @@ TEST(ErrorHandlingTest, ResultBasic) { } TEST(ErrorHandlingTest, OkErrorNotPossible) { + executorch::runtime::runtime_init(); Result r(Error::Ok); ASSERT_FALSE(r.ok()); ASSERT_NE(r.error(), Error::Ok); From fc5a4925d8472e7aafed7dcd77f55b6cdb845b39 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Thu, 20 Feb 2025 14:51:32 -0800 Subject: [PATCH 039/584] Measure generate_time on iOS benchmark (#8580) --- .github/scripts/extract_benchmark_results.py | 17 +++++++++-------- .../apple/Benchmark/Tests/LLaMA/LLaMATests.mm | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/scripts/extract_benchmark_results.py b/.github/scripts/extract_benchmark_results.py index 76f0e533389..ba6142a4826 100755 --- a/.github/scripts/extract_benchmark_results.py +++ b/.github/scripts/extract_benchmark_results.py @@ -229,11 +229,7 @@ def extract_ios_metric( elif method == "forward": if metric_name == "Clock Monotonic Time, s": - benchmark_result["metric"] = ( - "generate_time(ms)" - if "llama" in test_name - else "avg_inference_latency(ms)" - ) + benchmark_result["metric"] = "avg_inference_latency(ms)" benchmark_result["actualValue"] = metric_value * 1000 elif metric_name == "Memory Peak Physical, kB": @@ -241,9 +237,14 @@ def extract_ios_metric( benchmark_result["metric"] = "peak_inference_mem_usage(mb)" benchmark_result["actualValue"] = metric_value / 1024 - elif method == "generate" and metric_name == "Tokens Per Second, t/s": - benchmark_result["metric"] = "token_per_sec" - benchmark_result["actualValue"] = metric_value + elif method == "generate": + if metric_name == "Clock Monotonic Time, s": + benchmark_result["metric"] = "generate_time(ms)" + benchmark_result["actualValue"] = metric_value * 1000 + + elif metric_name == "Tokens Per Second, t/s": + benchmark_result["metric"] = "token_per_sec" + benchmark_result["actualValue"] = metric_value return benchmark_result diff --git a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm b/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm index 16c1c1c1d6a..332c3986b0b 100644 --- a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm +++ b/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm @@ -82,7 +82,7 @@ @implementation LLaMATests return; } TokensPerSecondMetric *tokensPerSecondMetric = [TokensPerSecondMetric new]; - [testCase measureWithMetrics:@[ tokensPerSecondMetric, [XCTMemoryMetric new] ] + [testCase measureWithMetrics:@[ tokensPerSecondMetric, [XCTClockMetric new], [XCTMemoryMetric new] ] block:^{ tokensPerSecondMetric.tokenCount = 0; const auto status = runner->generate( From a454be59a4d85486aae7d0a25f6f3fc425893f66 Mon Sep 17 00:00:00 2001 From: Nathanael See Date: Thu, 20 Feb 2025 15:37:11 -0800 Subject: [PATCH 040/584] update SqueezeInt4LinearInputs to process relu/gelu inputs too Differential Revision: D69673068 Pull Request resolved: https://github.com/pytorch/executorch/pull/8601 --- backends/transforms/fuse_view_copy.py | 17 +++++++++++++++++ backends/vulkan/_passes/TARGETS | 7 ++++--- backends/vulkan/_passes/__init__.py | 6 +++--- ...ar_inputs.py => squeeze_unsqueeze_inputs.py} | 17 ++++++++++++++--- backends/vulkan/vulkan_preprocess.py | 4 ++-- 5 files changed, 40 insertions(+), 11 deletions(-) rename backends/vulkan/_passes/{squeeze_int4_linear_inputs.py => squeeze_unsqueeze_inputs.py} (80%) diff --git a/backends/transforms/fuse_view_copy.py b/backends/transforms/fuse_view_copy.py index bbc155dc451..22e20d1c88b 100644 --- a/backends/transforms/fuse_view_copy.py +++ b/backends/transforms/fuse_view_copy.py @@ -40,7 +40,24 @@ def merge_view_copy_chains(graph: torch.fx.Graph) -> torch.fx.Graph: return graph +def remove_noop_view_copy(graph: torch.fx.Graph) -> torch.fx.Graph: + """ + Remove view_copy nodes that are no-ops. + """ + ops = exir_ops.edge + view_op = ops.aten.view_copy.default + for node in graph.nodes: + if node.op == "call_function" and node.target == view_op: + input_shape = list(node.args[0].meta["val"].shape) + target_shape = node.args[1] + if input_shape == target_shape: + node.replace_all_uses_with(node.args[0]) + graph.eliminate_dead_code() + return graph + + class FuseViewCopyTransform(ExportPass): def call(self, graph_module: torch.fx.GraphModule) -> PassResult: graph_module.graph = merge_view_copy_chains(graph_module.graph) + graph_module.graph = remove_noop_view_copy(graph_module.graph) return PassResult(graph_module, True) diff --git a/backends/vulkan/_passes/TARGETS b/backends/vulkan/_passes/TARGETS index 59658e58f28..5478ad0eab6 100644 --- a/backends/vulkan/_passes/TARGETS +++ b/backends/vulkan/_passes/TARGETS @@ -31,14 +31,15 @@ runtime.python_library( ) runtime.python_library( - name = "squeeze_int4_linear_inputs", + name = "squeeze_unsqueeze_inputs", srcs = [ - "squeeze_int4_linear_inputs.py", + "squeeze_unsqueeze_inputs.py", ], visibility = [ "//executorch/backends/...", ], deps = [ + "//caffe2:torch", "//executorch/backends/vulkan:custom_ops_lib", "//executorch/exir:pass_base", "//executorch/exir/dialects:lib", @@ -114,7 +115,7 @@ runtime.python_library( ":remove_asserts", ":remove_local_scalar_dense", ":remove_redundant_ops", - ":squeeze_int4_linear_inputs", + ":squeeze_unsqueeze_inputs", ":tag_memory_meta_pass", ] ) diff --git a/backends/vulkan/_passes/__init__.py b/backends/vulkan/_passes/__init__.py index 2a4a2b4b5c9..220afa6a35c 100644 --- a/backends/vulkan/_passes/__init__.py +++ b/backends/vulkan/_passes/__init__.py @@ -20,8 +20,8 @@ from executorch.backends.vulkan._passes.remove_redundant_ops import ( RemoveRedundantOpsTransform, ) -from executorch.backends.vulkan._passes.squeeze_int4_linear_inputs import ( - SqueezeInt4LinearInputs, +from executorch.backends.vulkan._passes.squeeze_unsqueeze_inputs import ( + SqueezeUnsqueezeInputs, ) from executorch.backends.vulkan._passes.tag_memory_meta_pass import TagMemoryMetaPass @@ -32,6 +32,6 @@ "RemoveAssertsTransform", "RemoveLocalScalarDenseOpsTransform", "RemoveRedundantOpsTransform", - "SqueezeInt4LinearInputs", + "SqueezeUnsqueezeInputs", "TagMemoryMetaPass", ] diff --git a/backends/vulkan/_passes/squeeze_int4_linear_inputs.py b/backends/vulkan/_passes/squeeze_unsqueeze_inputs.py similarity index 80% rename from backends/vulkan/_passes/squeeze_int4_linear_inputs.py rename to backends/vulkan/_passes/squeeze_unsqueeze_inputs.py index 95fcef7f754..a0160efa90f 100644 --- a/backends/vulkan/_passes/squeeze_int4_linear_inputs.py +++ b/backends/vulkan/_passes/squeeze_unsqueeze_inputs.py @@ -6,16 +6,27 @@ # pyre-strict -from typing import Dict, List, Tuple +from typing import Dict, List, Set, Tuple, Union import executorch.backends.vulkan.custom_ops_lib # noqa: needed to access vk op from executorch.exir.dialects._ops import ops as exir_ops +from executorch.exir.dialects.edge._ops import EdgeOpOverload from executorch.exir.pass_base import ExportPass, NodeMetadata, ProxyValue +from torch._ops import OpOverload + from torch.fx.node import Argument +OpType = Union[str, OpOverload, EdgeOpOverload] + + +class SqueezeUnsqueezeInputs(ExportPass): + _squeezable_ops: Set[OpType] = { + exir_ops.edge.et_vk.linear_weight_int4.default, + exir_ops.edge.aten.relu.default, + exir_ops.edge.aten.gelu.default, + } -class SqueezeInt4LinearInputs(ExportPass): def call_operator( self, op, # pyre-ignore @@ -26,7 +37,7 @@ def call_operator( def _squeezable(shape: List[int]) -> bool: return len(shape) > 2 and 1 in shape - if op != exir_ops.edge.et_vk.linear_weight_int4.default: + if op not in self._squeezable_ops: return super().call_operator(op, args, kwargs, meta) # pyre-ignore[16]: `None` has no attribute `node` diff --git a/backends/vulkan/vulkan_preprocess.py b/backends/vulkan/vulkan_preprocess.py index c6b444e5def..3cfcac13a8d 100644 --- a/backends/vulkan/vulkan_preprocess.py +++ b/backends/vulkan/vulkan_preprocess.py @@ -26,7 +26,7 @@ insert_prepack_nodes, RemoveLocalScalarDenseOpsTransform, RemoveRedundantOpsTransform, - SqueezeInt4LinearInputs, + SqueezeUnsqueezeInputs, TagMemoryMetaPass, ) @@ -153,7 +153,7 @@ def preprocess( # noqa: C901 RemoveRedundantOpsTransform(), AddmmToLinearTransform(), FuseDequantLinearPass(), - SqueezeInt4LinearInputs(), + SqueezeUnsqueezeInputs(), FuseViewCopyTransform(), ViewCopyToSqueezeUnsqueezePass(), FuseBatchNormWithConvPass(program), From 735f16e7b5c66d09bfb2925635470e35079c7c05 Mon Sep 17 00:00:00 2001 From: Nathanael See Date: Thu, 20 Feb 2025 15:56:00 -0800 Subject: [PATCH 041/584] update batch norm to use layout gen Differential Revision: D69937208 Pull Request resolved: https://github.com/pytorch/executorch/pull/8600 --- .../runtime/graph/ops/glsl/batchnorm.glsl | 38 ++++++++----------- .../runtime/graph/ops/glsl/batchnorm.yaml | 1 + 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/batchnorm.glsl b/backends/vulkan/runtime/graph/ops/glsl/batchnorm.glsl index deb03192af0..c2fc5a56754 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/batchnorm.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/batchnorm.glsl @@ -13,24 +13,18 @@ layout(std430) buffer; -layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; -layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; -layout(set = 0, binding = 2) uniform PRECISION sampler3D weight_in; -layout(set = 0, binding = 3) uniform PRECISION sampler3D bias_in; -layout(set = 0, binding = 4) uniform PRECISION sampler3D mean_in; -layout(set = 0, binding = 5) uniform PRECISION sampler3D var_in; +#include "indexing_utils.h" -layout(set = 0, binding = 6) uniform PRECISION restrict OutLimits { - ivec3 out_limits; -}; +${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)} +${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)} +${layout_declare_tensor(B, "r", "weight_in", DTYPE, STORAGE)} +${layout_declare_tensor(B, "r", "bias_in", DTYPE, STORAGE)} +${layout_declare_tensor(B, "r", "mean_in", DTYPE, STORAGE)} +${layout_declare_tensor(B, "r", "var_in", DTYPE, STORAGE)} -layout(set = 0, binding = 7) uniform PRECISION restrict Params { - float eps; -}; - -layout(set = 0, binding = 8) uniform PRECISION restrict Params2 { - int num_texel_per_batch; -}; +${layout_declare_ubo(B, "ivec3", "out_limits")} +${layout_declare_ubo(B, "float", "eps")} +${layout_declare_ubo(B, "int", "num_texel_per_batch")} layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; @@ -40,16 +34,16 @@ void main() { return; } - VEC4_T v = VEC4_T(texelFetch(image_in, pos, 0)); + VEC4_T v = VEC4_T(load_texel(t_in, pos)); ivec3 param_pos = ivec3(pos.z % num_texel_per_batch, 0, 0); - VEC4_T weight = VEC4_T(texelFetch(weight_in, param_pos, 0)); - VEC4_T bias = VEC4_T(texelFetch(bias_in, param_pos, 0)); - VEC4_T mean = VEC4_T(texelFetch(mean_in, param_pos, 0)); - VEC4_T var = VEC4_T(texelFetch(var_in, param_pos, 0)); + VEC4_T weight = VEC4_T(load_texel(weight_in, param_pos)); + VEC4_T bias = VEC4_T(load_texel(bias_in, param_pos)); + VEC4_T mean = VEC4_T(load_texel(mean_in, param_pos)); + VEC4_T var = VEC4_T(load_texel(var_in, param_pos)); v = ((v - mean) / sqrt(var + eps)) * weight + bias; - imageStore(image_out, pos, v); + write_texel(t_out, pos, v); } diff --git a/backends/vulkan/runtime/graph/ops/glsl/batchnorm.yaml b/backends/vulkan/runtime/graph/ops/glsl/batchnorm.yaml index a92e44f636b..116773c816a 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/batchnorm.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/batchnorm.yaml @@ -2,6 +2,7 @@ batchnorm: parameter_names_with_default_values: DTYPE: float NDIM: 3 + STORAGE: texture3d generate_variant_forall: DTYPE: - VALUE: half From 6f654ebc263abf4d0c3ea086034853496f1b36bf Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Thu, 20 Feb 2025 16:11:43 -0800 Subject: [PATCH 042/584] Re-sync c10 to runtime/core/portable_type (#8553) No material changes, but let's keep it in sync --- runtime/core/portable_type/c10/c10/macros/Export.h | 2 ++ runtime/core/portable_type/c10/c10/util/BFloat16.h | 4 ---- runtime/core/portable_type/c10/c10/util/Half.h | 4 ---- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/runtime/core/portable_type/c10/c10/macros/Export.h b/runtime/core/portable_type/c10/c10/macros/Export.h index cb68060ed81..21808de77a3 100644 --- a/runtime/core/portable_type/c10/c10/macros/Export.h +++ b/runtime/core/portable_type/c10/c10/macros/Export.h @@ -139,8 +139,10 @@ #endif #if defined(TORCH_HIP_BUILD_MAIN_LIB) +#define TORCH_HIP_CPP_API C10_EXPORT #define TORCH_HIP_API C10_EXPORT #else +#define TORCH_HIP_CPP_API C10_IMPORT #define TORCH_HIP_API C10_IMPORT #endif diff --git a/runtime/core/portable_type/c10/c10/util/BFloat16.h b/runtime/core/portable_type/c10/c10/util/BFloat16.h index ad1271fc729..09d3051ab71 100644 --- a/runtime/core/portable_type/c10/c10/util/BFloat16.h +++ b/runtime/core/portable_type/c10/c10/util/BFloat16.h @@ -8,9 +8,7 @@ #include #include #include -#ifndef C10_EMBEDDED #include -#endif // C10_EMBEDDED #if defined(__CUDACC__) && !defined(USE_ROCM) #include @@ -116,14 +114,12 @@ struct alignas(2) BFloat16 { #endif }; -#ifndef C10_EMBEDDED C10_API inline std::ostream& operator<<( std::ostream& out, const BFloat16& value) { out << (float)value; return out; } -#endif // C10_EMBEDDED } // namespace c10 diff --git a/runtime/core/portable_type/c10/c10/util/Half.h b/runtime/core/portable_type/c10/c10/util/Half.h index 5625d4c3403..b77cf7b1f4a 100644 --- a/runtime/core/portable_type/c10/c10/util/Half.h +++ b/runtime/core/portable_type/c10/c10/util/Half.h @@ -29,9 +29,7 @@ #include #include #include -#ifndef C10_EMBEDDED #include -#endif // C10_EMBEDDED #ifdef __CUDACC__ #include @@ -411,12 +409,10 @@ struct alignas(2) Half { #endif }; -#ifndef C10_EMBEDDED C10_API inline std::ostream& operator<<(std::ostream& out, const Half& value) { out << (float)value; return out; } -#endif // C10_EMBEDDED } // namespace c10 From 5e4b75b7ba7f2a37b817ac4a52ef4782096453b8 Mon Sep 17 00:00:00 2001 From: cccclai Date: Thu, 20 Feb 2025 16:52:51 -0800 Subject: [PATCH 043/584] Clean up llama library/binary dependency Differential Revision: D69942904 Pull Request resolved: https://github.com/pytorch/executorch/pull/8605 --- examples/qualcomm/oss_scripts/llama/TARGETS | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/examples/qualcomm/oss_scripts/llama/TARGETS b/examples/qualcomm/oss_scripts/llama/TARGETS index 3ef82293e03..d49253c5668 100644 --- a/examples/qualcomm/oss_scripts/llama/TARGETS +++ b/examples/qualcomm/oss_scripts/llama/TARGETS @@ -35,23 +35,12 @@ python_library( python_binary( name = "llama", - srcs = ["llama.py"], main_function = "executorch.examples.qualcomm.oss_scripts.llama.llama.main", preload_deps = [ "//executorch/extension/llm/custom_ops:model_sharding_py", ], deps = [ - "//executorch/examples/qualcomm/oss_scripts/llama:static_llama", - "//caffe2:torch", - "//executorch/extension/pybindings:aten_lib", - "//executorch/backends/qualcomm/partition:partition", - "//executorch/backends/qualcomm/quantizer:quantizer", - "//executorch/devtools/backend_debug:delegation_info", - "//executorch/devtools:lib", - "//executorch/examples/models:models", - "//executorch/examples/qualcomm:utils", - "//executorch/extension/export_util:export_util", - "//executorch/extension/llm/export:export_lib", + ":llama_lib", ], ) From 80d5e5a959cc25494e002b5b6a89948eac71f4ae Mon Sep 17 00:00:00 2001 From: Digant Desai Date: Thu, 20 Feb 2025 19:13:08 -0600 Subject: [PATCH 044/584] Fix partition logic for `force_fp32_dynamic_linear` Differential Revision: D69906370 Pull Request resolved: https://github.com/pytorch/executorch/pull/8596 --- .../xnnpack/partition/config/gemm_configs.py | 15 +++- backends/xnnpack/test/ops/test_linear.py | 70 +++++++++++++++++++ backends/xnnpack/test/ops/test_lstm.py | 5 +- 3 files changed, 86 insertions(+), 4 deletions(-) diff --git a/backends/xnnpack/partition/config/gemm_configs.py b/backends/xnnpack/partition/config/gemm_configs.py index bf16855afc1..872ba355c70 100644 --- a/backends/xnnpack/partition/config/gemm_configs.py +++ b/backends/xnnpack/partition/config/gemm_configs.py @@ -210,6 +210,11 @@ def _get_bias_deps( self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType ) -> Tuple[bool, List[torch.fx.Node]]: gemm_deps = [] + if precision == ConfigPrecisionType.FP32 and self.force_fp32_dynamic_linear: + # if force force_fp32_dynamic_linear is enabled, then we + # do not partition the weight node + return (True, gemm_deps) + if len(node.all_input_nodes) > 2 and self.bias_idx is not None: bias_node = get_input_node(node, self.bias_idx) if bias_node: @@ -477,7 +482,15 @@ def find_partition_args(input_node): node.args = old_args node.users = old_users - return valid_deps, list(set(deps) | set(src_partition.nodes)) + # When using force_fp32_dynamic_linear, we want to get_deps to overwrite the source partition nodes. + # Else we want to be greedy. + ret_deps = ( + list(set(deps) & set(src_partition.nodes)) + if self.force_fp32_dynamic_linear + else list(set(deps) | set(src_partition.nodes)) + ) + + return valid_deps, ret_deps def supported_precision_types(self): return [ diff --git a/backends/xnnpack/test/ops/test_linear.py b/backends/xnnpack/test/ops/test_linear.py index eccda406b80..30bb4f0aba2 100644 --- a/backends/xnnpack/test/ops/test_linear.py +++ b/backends/xnnpack/test/ops/test_linear.py @@ -31,6 +31,8 @@ ToEdgeTransformAndLower, ) +from torch.export.graph_signature import ExportGraphSignature, InputKind + try: from torchao.quantization.quant_api import ( int8_dynamic_activation_int4_weight, @@ -871,3 +873,71 @@ def test_linear_qd8_as_fp32(self): "dequantize_per_channel.default": 1, # 1: weight }, ) + + def test_linear_fp32_with_force_as_mm(self): + def check_signature( + signature: ExportGraphSignature, + force_flag: bool, + use_bias: bool, + legacy_mode: bool, + ): + num_params = 0 + if force_flag: + num_params = 1 # weight_param + if use_bias: + num_params += 1 # bias_param + sign_params: int = 0 + input_specs = signature.input_specs + for input_spec in input_specs: + if input_spec.kind == InputKind.PARAMETER: + sign_params += 1 + assert ( + sign_params == num_params + ), f"Expected {num_params} params, got {sign_params} with force_flag={force_flag}, use_bias={use_bias}, legacy_mode={legacy_mode}" + + for force_flag in (True, False): + for use_bias in (True, False): + for legacy_mode in (True, False): + module = BaseLinear( + in_size=8, + input_channels=13, + output_channels=17, + use_bias=use_bias, + ) + inputs = module.get_inputs() + tester = Tester(module, inputs).export() + partitioner = XnnpackPartitioner( + force_fp32_dynamic_linear=force_flag + ) + if legacy_mode: + tester.to_edge() + partitioner_stage = Partition(partitioner=partitioner) + tester.partition(partition_stage=partitioner_stage) + tester.check_not( + [ + ( + "executorch_exir_dialects_edge__ops_aten_mm_default" + if use_bias + else "executorch_exir_dialects_edge__ops_aten_addmm_default" + ) + ] + ) + else: + to_edge_and_transform_stage = ToEdgeTransformAndLower( + partitioners=[partitioner] + ) + tester.to_edge_transform_and_lower( + to_edge_and_transform_stage=to_edge_and_transform_stage + ) + tester.check_not( + ["executorch_exir_dialects_edge__ops_aten_linear_default"] + ) + + signature: ExportGraphSignature = ( + tester.get_artifact().exported_program().graph_signature + ) + check_signature(signature, force_flag, use_bias, legacy_mode) + + tester.to_executorch() + tester.serialize() + tester.run_method_and_compare_outputs() diff --git a/backends/xnnpack/test/ops/test_lstm.py b/backends/xnnpack/test/ops/test_lstm.py index bfc6113c417..be209082b37 100644 --- a/backends/xnnpack/test/ops/test_lstm.py +++ b/backends/xnnpack/test/ops/test_lstm.py @@ -54,9 +54,8 @@ def test_fp32_lstm_force_dynamic_linear(self): ) .check_not(["executorch_exir_dialects_edge__ops_aten_addmm_default"]) # Weights are supplied as input to linears - .check(["p_lstm_weight_hh_l0", "p_lstm_weight_ih_l0"]) - # Biases are owned by delegates - .check_not(["p_lstm_bias"]) + # Biases are not owned by delegates when force_fp32_dynamic_linear is set + .check(["p_lstm_weight_hh_l0", "p_lstm_weight_ih_l0", "p_lstm_bias"]) .to_executorch() .serialize() .run_method_and_compare_outputs() From 0490b0ef29c273dbb50d0be87393fa6cb1264419 Mon Sep 17 00:00:00 2001 From: cccclai Date: Thu, 20 Feb 2025 22:22:12 -0800 Subject: [PATCH 045/584] Allow getting all backend names Differential Revision: D69691354 Pull Request resolved: https://github.com/pytorch/executorch/pull/8520 --- extension/pybindings/portable_lib.py | 1 + extension/pybindings/pybindings.cpp | 19 +++++++++++++++++++ extension/pybindings/pybindings.pyi | 9 +++++++++ extension/pybindings/test/TARGETS | 8 ++++++++ .../pybindings/test/test_backend_pybinding.py | 14 ++++++++++++++ runtime/__init__.py | 18 +++++++++++++++++- runtime/backend/interface.cpp | 11 +++++++++++ runtime/backend/interface.h | 10 ++++++++++ 8 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 extension/pybindings/test/test_backend_pybinding.py diff --git a/extension/pybindings/portable_lib.py b/extension/pybindings/portable_lib.py index 25624ad60c0..24097fea6aa 100644 --- a/extension/pybindings/portable_lib.py +++ b/extension/pybindings/portable_lib.py @@ -38,6 +38,7 @@ _create_profile_block, # noqa: F401 _dump_profile_results, # noqa: F401 _get_operator_names, # noqa: F401 + _get_registered_backend_names, # noqa: F401 _load_bundled_program_from_buffer, # noqa: F401 _load_for_executorch, # noqa: F401 _load_for_executorch_from_buffer, # noqa: F401 diff --git a/extension/pybindings/pybindings.cpp b/extension/pybindings/pybindings.cpp index 97bff671149..f17ddbbbc36 100644 --- a/extension/pybindings/pybindings.cpp +++ b/extension/pybindings/pybindings.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -91,6 +92,8 @@ using ::executorch::runtime::DataLoader; using ::executorch::runtime::Error; using ::executorch::runtime::EValue; using ::executorch::runtime::EventTracerDebugLogLevel; +using ::executorch::runtime::get_backend_name; +using ::executorch::runtime::get_num_registered_backends; using ::executorch::runtime::get_registered_kernels; using ::executorch::runtime::HierarchicalAllocator; using ::executorch::runtime::Kernel; @@ -975,6 +978,18 @@ py::list get_operator_names() { return res; } +py::list get_registered_backend_names() { + size_t n_of_registered_backends = get_num_registered_backends(); + py::list res; + for (size_t i = 0; i < n_of_registered_backends; i++) { + auto backend_name_res = get_backend_name(i); + THROW_IF_ERROR(backend_name_res.error(), "Failed to get backend name"); + auto backend_name = backend_name_res.get(); + res.append(backend_name); + } + return res; +} + } // namespace PYBIND11_MODULE(EXECUTORCH_PYTHON_MODULE_NAME, m) { @@ -1028,6 +1043,10 @@ PYBIND11_MODULE(EXECUTORCH_PYTHON_MODULE_NAME, m) { prof_result.num_bytes); }, call_guard); + m.def( + "_get_registered_backend_names", + &get_registered_backend_names, + call_guard); m.def("_get_operator_names", &get_operator_names); m.def("_create_profile_block", &create_profile_block, call_guard); m.def( diff --git a/extension/pybindings/pybindings.pyi b/extension/pybindings/pybindings.pyi index fc44ce388a4..a380e90528e 100644 --- a/extension/pybindings/pybindings.pyi +++ b/extension/pybindings/pybindings.pyi @@ -220,6 +220,15 @@ def _get_operator_names() -> List[str]: """ ... +@experimental("This API is experimental and subject to change without notice.") +def _get_registered_backend_names() -> List[str]: + """ + .. warning:: + + This API is experimental and subject to change without notice. + """ + ... + @experimental("This API is experimental and subject to change without notice.") def _create_profile_block(name: str) -> None: """ diff --git a/extension/pybindings/test/TARGETS b/extension/pybindings/test/TARGETS index 73063deb651..4770bebbcc4 100644 --- a/extension/pybindings/test/TARGETS +++ b/extension/pybindings/test/TARGETS @@ -47,3 +47,11 @@ runtime.python_test( "//executorch/kernels/quantized:aot_lib", ], ) + +runtime.python_test( + name = "test_backend_pybinding", + srcs = ["test_backend_pybinding.py"], + deps = [ + "//executorch/runtime:runtime", + ], +) diff --git a/extension/pybindings/test/test_backend_pybinding.py b/extension/pybindings/test/test_backend_pybinding.py new file mode 100644 index 00000000000..fbdc2be7799 --- /dev/null +++ b/extension/pybindings/test/test_backend_pybinding.py @@ -0,0 +1,14 @@ +import unittest + +from executorch.runtime import Runtime + + +class TestBackendsPybinding(unittest.TestCase): + def test_backend_name_list( + self, + ) -> None: + + runtime = Runtime.get() + registered_backend_names = runtime.backend_registry.registered_backend_names + self.assertGreaterEqual(len(registered_backend_names), 1) + self.assertIn("XnnpackBackend", registered_backend_names) diff --git a/runtime/__init__.py b/runtime/__init__.py index 4ed99ddae01..33999b716e9 100644 --- a/runtime/__init__.py +++ b/runtime/__init__.py @@ -42,7 +42,7 @@ import functools from pathlib import Path from types import ModuleType -from typing import Any, BinaryIO, Dict, Optional, Sequence, Set, Union +from typing import Any, BinaryIO, Dict, List, Optional, Sequence, Set, Union try: from executorch.extension.pybindings.portable_lib import ( @@ -125,6 +125,21 @@ def load_method(self, name: str) -> Optional[Method]: return self._methods.get(name, None) +class BackendRegistry: + """The registry of backends that are available to the runtime.""" + + def __init__(self, legacy_module: ModuleType) -> None: + # TODO: Expose the kernel callables to Python. + self._legacy_module = legacy_module + + @property + def registered_backend_names(self) -> List[str]: + """ + Returns the names of all registered backends as a list of strings. + """ + return self._legacy_module._get_registered_backend_names() + + class OperatorRegistry: """The registry of operators that are available to the runtime.""" @@ -157,6 +172,7 @@ def get() -> "Runtime": def __init__(self, *, legacy_module: ModuleType) -> None: # Public attributes. + self.backend_registry = BackendRegistry(legacy_module) self.operator_registry = OperatorRegistry(legacy_module) # Private attributes. self._legacy_module = legacy_module diff --git a/runtime/backend/interface.cpp b/runtime/backend/interface.cpp index 84c0bb82d43..4fb1eadfa87 100644 --- a/runtime/backend/interface.cpp +++ b/runtime/backend/interface.cpp @@ -55,5 +55,16 @@ Error register_backend(const Backend& backend) { return Error::Ok; } +size_t get_num_registered_backends() { + return num_registered_backends; +} + +Result get_backend_name(size_t index) { + if (index >= num_registered_backends) { + return Error::InvalidArgument; + } + return registered_backends[index].name; +} + } // namespace runtime } // namespace executorch diff --git a/runtime/backend/interface.h b/runtime/backend/interface.h index c0305f68cd3..b74858a9d94 100644 --- a/runtime/backend/interface.h +++ b/runtime/backend/interface.h @@ -139,6 +139,16 @@ struct Backend { */ ET_NODISCARD Error register_backend(const Backend& backend); +/** + * Returns the number of registered backends. + */ +size_t get_num_registered_backends(); + +/** + * Returns the backend name at the given index. + */ +Result get_backend_name(size_t index); + } // namespace runtime } // namespace executorch From 282242e79e6abb6bf2e824f22aeabda83fee121e Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Fri, 21 Feb 2025 00:22:29 -0800 Subject: [PATCH 046/584] Try fixing unittest macos (#8590) * Try fixing unittest macos Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: * Update install_requirements.sh Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: * Try something * Increase tolerance level for test_joint_graph * lint --- .ci/scripts/unittest-macos.sh | 4 +++- examples/models/llama3_2_vision/install_requirements.sh | 4 +++- exir/tests/test_joint_graph.py | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.ci/scripts/unittest-macos.sh b/.ci/scripts/unittest-macos.sh index 9c29191b459..c0e39cee335 100755 --- a/.ci/scripts/unittest-macos.sh +++ b/.ci/scripts/unittest-macos.sh @@ -38,7 +38,9 @@ ${CONDA_RUN} --no-capture-output \ .ci/scripts/setup-macos.sh "${BUILD_TOOL}" "${BUILD_MODE}" # Install llama3_2_vision dependencies. -PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh +PYTHON_EXECUTABLE=python \ +${CONDA_RUN} --no-capture-output \ +./examples/models/llama3_2_vision/install_requirements.sh if [[ "$BUILD_TOOL" == "cmake" ]]; then .ci/scripts/unittest-macos-cmake.sh diff --git a/examples/models/llama3_2_vision/install_requirements.sh b/examples/models/llama3_2_vision/install_requirements.sh index 4d4a6f28624..9076cb967d0 100755 --- a/examples/models/llama3_2_vision/install_requirements.sh +++ b/examples/models/llama3_2_vision/install_requirements.sh @@ -5,7 +5,9 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -NIGHTLY_VERSION="dev20250115" +set +ex + +NIGHTLY_VERSION="dev20250220" # Install torchtune nightly for model definitions. pip install --pre torchtune==0.6.0.${NIGHTLY_VERSION} --extra-index-url https://download.pytorch.org/whl/nightly/cpu --no-cache-dir diff --git a/exir/tests/test_joint_graph.py b/exir/tests/test_joint_graph.py index 349fa92e826..fb74b70d313 100644 --- a/exir/tests/test_joint_graph.py +++ b/exir/tests/test_joint_graph.py @@ -18,6 +18,7 @@ from torch.export._trace import _export from torch.export.experimental import _export_forward_backward from torch.export.exported_program import OutputKind +from torch.testing import assert_close class TestJointGraph(unittest.TestCase): @@ -100,7 +101,8 @@ def forward(self, x, y): example_inputs ) # ET outputs are [loss, grads, weights] - self.assertTrue(torch.allclose(loss, et_outputs[0])) + # Without rtol and atol, this test fails in macos. + assert_close(loss, et_outputs[0], rtol=1e-4, atol=1e-4) self.assertTrue( torch.allclose(m.linear.weight.grad, et_outputs[1]) # pyre-ignore ) From cc922dae5fc25305d5091fa0ef1fc3e543630a05 Mon Sep 17 00:00:00 2001 From: Oscar Andersson <87121123+oscarandersson8218@users.noreply.github.com> Date: Fri, 21 Feb 2025 16:07:23 +0100 Subject: [PATCH 047/584] Arm backend: Enable FVP tests for ops lowered to REDUCE_SUM and fix numerical issues (#8617) Enable FVP tests for ops lowered to REDUCE_SUM vela version is stepped to fix numerical issues with REDUCE_SUM. Add ExpectedFailureOnFvp for BMM U55 tests Signed-off-by: Oscar Andersson --- .../arm/test/misc/test_multiple_outputs.py | 10 +-- backends/arm/test/ops/test_bmm.py | 7 +- backends/arm/test/ops/test_layer_norm.py | 33 +------- backends/arm/test/ops/test_logsoftmax.py | 46 ++++------ backends/arm/test/ops/test_mean_dim.py | 12 ++- backends/arm/test/ops/test_mm.py | 6 +- backends/arm/test/ops/test_softmax.py | 53 +++++------- backends/arm/test/ops/test_sum.py | 12 +-- backends/arm/test/ops/test_var.py | 84 +++++++++++-------- examples/arm/setup.sh | 3 +- 10 files changed, 119 insertions(+), 147 deletions(-) diff --git a/backends/arm/test/misc/test_multiple_outputs.py b/backends/arm/test/misc/test_multiple_outputs.py index ddddc94d277..d3bea9a4005 100644 --- a/backends/arm/test/misc/test_multiple_outputs.py +++ b/backends/arm/test/misc/test_multiple_outputs.py @@ -76,23 +76,21 @@ def _test_ethosu_BI_pipeline( tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) @pytest.mark.corstone_fvp - def test_u85_BI(self): + def test_u55_BI(self): module = self.MultipleOutputsModule() test_data = module.get_inputs() self._test_ethosu_BI_pipeline( module, test_data, - common.get_u85_compile_spec(), + common.get_u55_compile_spec(), ) @pytest.mark.corstone_fvp - @conftest.expectedFailureOnFVP - # TODO MLETORCH-598 - def test_u55_BI(self): + def test_u85_BI(self): module = self.MultipleOutputsModule() test_data = module.get_inputs() self._test_ethosu_BI_pipeline( module, test_data, - common.get_u55_compile_spec(), + common.get_u85_compile_spec(), ) diff --git a/backends/arm/test/ops/test_bmm.py b/backends/arm/test/ops/test_bmm.py index 46b6eb6d011..d7214f7622d 100644 --- a/backends/arm/test/ops/test_bmm.py +++ b/backends/arm/test/ops/test_bmm.py @@ -150,9 +150,10 @@ def test_bmm_single_input_tosa_BI(self, test_data_generator: Callable[[], Tuple] test_data = test_data_generator() self._test_bmm_tosa_BI_pipeline(self.BMMSingleInput(), test_data) + # Expected to fail on FVP as TOSA.MATMUL is not supported on U55 @parameterized.expand(BMM.test_data_generators) @pytest.mark.corstone_fvp - @unittest.expectedFailure + @conftest.expectedFailureOnFVP def test_bmm_u55_BI_xfails(self, test_data_generator: Callable[[], Tuple]): test_data = test_data_generator() self._test_bmm_ethosu_BI_pipeline( @@ -167,10 +168,10 @@ def test_bmm_u85_BI(self, test_data_generator: Callable[[], Tuple]): self.BMM(), common.get_u85_compile_spec(), test_data ) - # Expected to fail with error: Warning, unsupported fusing of TOSA Rescale previous operator is of type: Memcpy + # Expected to fail on FVP as TOSA.MATMUL is not supported on U55 @parameterized.expand(BMMSingleInput.test_data_generators) @pytest.mark.corstone_fvp - @unittest.expectedFailure + @conftest.expectedFailureOnFVP def test_bmm_single_input_u55_BI_xfails( self, test_data_generator: Callable[[], Tuple] ): diff --git a/backends/arm/test/ops/test_layer_norm.py b/backends/arm/test/ops/test_layer_norm.py index 82f0af8dcf7..a2a42189cde 100644 --- a/backends/arm/test/ops/test_layer_norm.py +++ b/backends/arm/test/ops/test_layer_norm.py @@ -158,7 +158,7 @@ def test_layer_norm_tosa_BI( self.LayerNorm(*model_params), (test_data,) ) - @parameterized.expand(test_data_suite[4:]) + @parameterized.expand(test_data_suite) @pytest.mark.corstone_fvp def test_layer_norm_u55_BI( self, @@ -170,36 +170,7 @@ def test_layer_norm_u55_BI( self.LayerNorm(*model_params), common.get_u55_compile_spec(), (test_data,) ) - # Numerical issues on FVP likely due to mul op, MLETORCH-521 - # Skip tests that require transposes. - @parameterized.expand(test_data_suite[:4]) - @pytest.mark.corstone_fvp - @conftest.expectedFailureOnFVP - def test_layer_norm_u55_BI_xfails( - self, - test_name: str, - test_data: torch.Tensor, - model_params, - ): - self._test_layernorm_ethosu_BI_pipeline( - self.LayerNorm(*model_params), common.get_u55_compile_spec(), (test_data,) - ) - - # Numerical issues on FVP likely due to mul op, MLETORCH-521 - @parameterized.expand(test_data_suite[:-2]) - @pytest.mark.corstone_fvp - @conftest.expectedFailureOnFVP - def test_layer_norm_u85_BI_xfails( - self, - test_name: str, - test_data: torch.Tensor, - model_params, - ): - self._test_layernorm_ethosu_BI_pipeline( - self.LayerNorm(*model_params), common.get_u85_compile_spec(), (test_data,) - ) - - @parameterized.expand(test_data_suite[-2:]) + @parameterized.expand(test_data_suite) @pytest.mark.corstone_fvp def test_layer_norm_u85_BI( self, diff --git a/backends/arm/test/ops/test_logsoftmax.py b/backends/arm/test/ops/test_logsoftmax.py index f34d4afbb55..bd48bd224a8 100644 --- a/backends/arm/test/ops/test_logsoftmax.py +++ b/backends/arm/test/ops/test_logsoftmax.py @@ -11,7 +11,7 @@ import pytest import torch -from executorch.backends.arm.test import common +from executorch.backends.arm.test import common, conftest from executorch.backends.arm.test.tester.arm_tester import ArmTester from executorch.exir.backend.compile_spec_schema import CompileSpec from parameterized import parameterized @@ -28,16 +28,17 @@ lambda: ("randn", torch.randn(10, 10, 10, 10), 3), lambda: ("randn_neg_dim", torch.randn(10, 5, 8, 7), -3), ] -test_data_generators_u55 = [ + +test_data_generators_FVP = [ # (test_name, test_data, dim) lambda: ("ones", torch.ones(10, 10), 1), lambda: ("ones_neg_dim", torch.ones(10, 3, 4), -1), - lambda: ("randn_neg_dim", torch.randn(10, 5, 8, 7), -3), - lambda: ("zeros", torch.zeros(10, 8, 5, 2), 0), - lambda: ("zeros_neg_dim", torch.zeros(10, 7, 8, 9), -4), + lambda: ("randn_neg_dim", torch.randn(1, 5, 8, 7), -3), + lambda: ("zeros", torch.zeros(1, 8, 5, 2), 0), + lambda: ("zeros_neg_dim", torch.zeros(1, 7, 8, 9), -4), lambda: ("rand", torch.rand(1, 2, 5, 8), 2), - lambda: ("rand_neg_dim", torch.rand(2, 10, 8, 10), -2), - lambda: ("randn", torch.randn(10, 10, 10, 10), 3), + lambda: ("rand_neg_dim", torch.rand(1, 10, 8, 10), -2), + lambda: ("randn", torch.randn(1, 10, 10, 10), 3), ] @@ -99,7 +100,7 @@ def _test_logsoftmax_tosa_ethos_BI_pipeline( module: torch.nn.Module, test_data: Tuple[torch.tensor], ): - ( + tester = ( ArmTester( module, example_inputs=test_data, @@ -114,21 +115,10 @@ def _test_logsoftmax_tosa_ethos_BI_pipeline( .check_not(["executorch_exir_dialects_edge__ops_aten__logsoftmax_default"]) .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) .to_executorch() + .serialize() ) - - def _test_logsoftmax_tosa_u55_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - self._test_logsoftmax_tosa_ethos_BI_pipeline( - common.get_u55_compile_spec(), module, test_data - ) - - def _test_logsoftmax_tosa_u85_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - self._test_logsoftmax_tosa_ethos_BI_pipeline( - common.get_u85_compile_spec(), module, test_data - ) + if conftest.is_option_enabled("corstone_fvp"): + tester.run_method_and_compare_outputs(inputs=test_data, qtol=1) @parameterized.expand(test_data_generators) def test_logsoftmax_tosa_MI(self, test_data_generator: Callable[[], Tuple]): @@ -141,18 +131,18 @@ def test_logsoftmax_tosa_BI(self, test_data_generator: Callable[[], Tuple]): test_name, test_data, dim = test_data_generator() self._test_logsoftmax_tosa_BI_pipeline(self.LogSoftmax(dim=dim), (test_data,)) - @parameterized.expand(test_data_generators_u55) + @parameterized.expand(test_data_generators_FVP) @pytest.mark.flaky # TODO: MLETORCH-460 - Numerically stabler (log)softmax implementation def test_logsoftmax_tosa_u55_BI(self, test_data_generator: Callable[[], Tuple]): test_name, test_data, dim = test_data_generator() - self._test_logsoftmax_tosa_u55_BI_pipeline( - self.LogSoftmax(dim=dim), (test_data,) + self._test_logsoftmax_tosa_ethos_BI_pipeline( + common.get_u55_compile_spec(), self.LogSoftmax(dim=dim), (test_data,) ) - @parameterized.expand(test_data_generators) + @parameterized.expand(test_data_generators_FVP) @pytest.mark.flaky # TODO: MLETORCH-460 - Numerically stabler (log)softmax implementation def test_logsoftmax_tosa_u85_BI(self, test_data_generator: Callable[[], Tuple]): test_name, test_data, dim = test_data_generator() - self._test_logsoftmax_tosa_u85_BI_pipeline( - self.LogSoftmax(dim=dim), (test_data,) + self._test_logsoftmax_tosa_ethos_BI_pipeline( + common.get_u85_compile_spec(), self.LogSoftmax(dim=dim), (test_data,) ) diff --git a/backends/arm/test/ops/test_mean_dim.py b/backends/arm/test/ops/test_mean_dim.py index 393cf1667e0..78997ac047b 100644 --- a/backends/arm/test/ops/test_mean_dim.py +++ b/backends/arm/test/ops/test_mean_dim.py @@ -10,7 +10,7 @@ from typing import Tuple import torch -from executorch.backends.arm.test import common +from executorch.backends.arm.test import common, conftest from executorch.backends.arm.test.tester.arm_tester import ArmTester from executorch.exir.backend.backend_details import CompileSpec from parameterized import parameterized @@ -121,7 +121,7 @@ def _test_adaptive_avg_pool2d_tosa_ethosu_BI_pipeline( compile_spec: CompileSpec, test_data: Tuple[torch.tensor], ): - ( + tester = ( ArmTester( module, example_inputs=test_data, @@ -141,7 +141,10 @@ def _test_adaptive_avg_pool2d_tosa_ethosu_BI_pipeline( ) .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) .to_executorch() + .serialize() ) + if conftest.is_option_enabled("corstone_fvp"): + tester.run_method_and_compare_outputs(inputs=test_data) def _test_meandim_tosa_MI_pipeline( self, module: torch.nn.Module, test_data: Tuple[torch.tensor] @@ -188,7 +191,7 @@ def _test_meandim_tosa_ethosu_BI_pipeline( compile_spec: CompileSpec, test_data: Tuple[torch.tensor], ): - ( + tester = ( ArmTester( module, example_inputs=test_data, @@ -207,7 +210,10 @@ def _test_meandim_tosa_ethosu_BI_pipeline( ) .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) .to_executorch() + .serialize() ) + if conftest.is_option_enabled("corstone_fvp"): + tester.run_method_and_compare_outputs(inputs=test_data, qtol=1) @parameterized.expand(AdaptiveAveragePool2d.test_data_suite) def test_adaptive_avg_pool2d_tosa_MI( diff --git a/backends/arm/test/ops/test_mm.py b/backends/arm/test/ops/test_mm.py index df75e4ed183..347a0b297f7 100644 --- a/backends/arm/test/ops/test_mm.py +++ b/backends/arm/test/ops/test_mm.py @@ -132,18 +132,16 @@ def test_mm_single_input_tosa_BI(self, test_data_generator: Callable[[], Tuple]) test_data = test_data_generator() self._test_mm_tosa_BI_pipeline(self.MMSingleInput(), test_data) - # Expected to fail with error: CPU performance estimation for "MatMul" not implemented + # TODO: Enable numerical testing @parameterized.expand(MM.test_data_generators) - @unittest.expectedFailure def test_mm_u55_BI(self, test_data_generator: Callable[[], Tuple]): test_data = test_data_generator() self._test_mm_ethosu_BI_pipeline( common.get_u55_compile_spec(), self.MM(), test_data ) - # Expected to fail with error: Warning, unsupported fusing of TOSA Rescale previous operator is of type: Memcpy + # TODO: Enable numerical testing @parameterized.expand(MMSingleInput.test_data_generators) - @unittest.expectedFailure def test_mm_single_input_u55_BI(self, test_data_generator: Callable[[], Tuple]): test_data = test_data_generator() self._test_mm_ethosu_BI_pipeline( diff --git a/backends/arm/test/ops/test_softmax.py b/backends/arm/test/ops/test_softmax.py index c60da18594f..787e1b73a31 100644 --- a/backends/arm/test/ops/test_softmax.py +++ b/backends/arm/test/ops/test_softmax.py @@ -12,7 +12,7 @@ import pytest import torch -from executorch.backends.arm.test import common +from executorch.backends.arm.test import common, conftest from executorch.backends.arm.test.tester.arm_tester import ArmTester from executorch.exir.backend.compile_spec_schema import CompileSpec from parameterized import parameterized @@ -30,16 +30,16 @@ lambda: ("randn_neg_dim", torch.randn(10, 5, 8, 7), -3), ] -test_data_generators_u55 = [ +test_data_generators_FVP = [ # (test_name, test_data, dim) lambda: ("ones", torch.ones(10, 10), 1), - lambda: ("ones_neg_dim", torch.ones(10, 3, 4), -1), - lambda: ("randn_neg_dim", torch.randn(10, 5, 8, 7), -3), - lambda: ("zeros", torch.zeros(10, 8, 5, 2), 0), - lambda: ("zeros_neg_dim", torch.zeros(10, 7, 8, 9), -4), + lambda: ("ones_neg_dim", torch.ones(1, 3, 4), -1), + lambda: ("randn_neg_dim", torch.randn(1, 5, 8, 7), -3), + lambda: ("zeros", torch.zeros(1, 8, 5, 2), 0), + lambda: ("zeros_neg_dim", torch.zeros(1, 7, 8, 9), -4), lambda: ("rand", torch.rand(1, 2, 5, 8), 2), - lambda: ("rand_neg_dim", torch.rand(2, 10, 8, 10), -2), - lambda: ("randn", torch.randn(10, 10, 10, 10), 3), + lambda: ("rand_neg_dim", torch.rand(1, 10, 8, 10), -2), + lambda: ("randn", torch.randn(1, 10, 10, 10), 3), ] @@ -95,13 +95,13 @@ def _test_softmax_tosa_BI_pipeline( .run_method_and_compare_outputs(inputs=test_data) ) - def _test_softmax_tosa_ethos_BI_pipeline( + def _test_softmax_ethosu_BI_pipeline( self, compile_spec: list[CompileSpec], module: torch.nn.Module, test_data: Tuple[torch.tensor], ): - ( + tester = ( ArmTester( module, example_inputs=test_data, @@ -116,21 +116,10 @@ def _test_softmax_tosa_ethos_BI_pipeline( .check_not(["executorch_exir_dialects_edge__ops_aten__softmax_default"]) .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) .to_executorch() + .serialize() ) - - def _test_softmax_tosa_u55_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - self._test_softmax_tosa_ethos_BI_pipeline( - common.get_u55_compile_spec(), module, test_data - ) - - def _test_softmax_tosa_u85_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - self._test_softmax_tosa_ethos_BI_pipeline( - common.get_u85_compile_spec(), module, test_data - ) + if conftest.is_option_enabled("corstone_fvp"): + tester.run_method_and_compare_outputs(inputs=test_data, qtol=1) @parameterized.expand(test_data_generators) def test_softmax_tosa_MI(self, test_data_generator: Callable[[], Tuple]): @@ -143,14 +132,18 @@ def test_softmax_tosa_BI(self, test_data_generator: Callable[[], Tuple]): test_name, test_data, dim = test_data_generator() self._test_softmax_tosa_BI_pipeline(self.Softmax(dim=dim), (test_data,)) - @parameterized.expand(test_data_generators_u55) + @parameterized.expand(test_data_generators_FVP) @pytest.mark.flaky # TODO: MLETORCH-460 - Numerically stabler (log)softmax implementation - def test_softmax_tosa_u55_BI(self, test_data_generator: Callable[[], Tuple]): + def test_softmax_u55_BI(self, test_data_generator: Callable[[], Tuple]): test_name, test_data, dim = test_data_generator() - self._test_softmax_tosa_u55_BI_pipeline(self.Softmax(dim=dim), (test_data,)) + self._test_softmax_ethosu_BI_pipeline( + common.get_u55_compile_spec(), self.Softmax(dim=dim), (test_data,) + ) - @parameterized.expand(test_data_generators) + @parameterized.expand(test_data_generators_FVP) @pytest.mark.flaky # TODO: MLETORCH-460 - Numerically stabler (log)softmax implementation - def test_softmax_tosa_u85_BI(self, test_data_generator: Callable[[], Tuple]): + def test_softmax_u85_BI(self, test_data_generator: Callable[[], Tuple]): test_name, test_data, dim = test_data_generator() - self._test_softmax_tosa_u85_BI_pipeline(self.Softmax(dim=dim), (test_data,)) + self._test_softmax_ethosu_BI_pipeline( + common.get_u85_compile_spec(), self.Softmax(dim=dim), (test_data,) + ) diff --git a/backends/arm/test/ops/test_sum.py b/backends/arm/test/ops/test_sum.py index 5627c55ad9e..bc0c50b8ee0 100644 --- a/backends/arm/test/ops/test_sum.py +++ b/backends/arm/test/ops/test_sum.py @@ -9,7 +9,7 @@ from typing import Tuple import torch -from executorch.backends.arm.test import common +from executorch.backends.arm.test import common, conftest from executorch.backends.arm.test.tester.arm_tester import ArmTester from executorch.exir.backend.compile_spec_schema import CompileSpec from parameterized import parameterized @@ -29,7 +29,7 @@ class Sum(torch.nn.Module): ((torch.rand(10), 0, True),), ((torch.rand(10, 10), 1, False),), ((torch.rand(10, 10, 10), [-3, 1], True),), - ((torch.rand(2, 1, 5, 8), 1, False),), + ((torch.rand(1, 1, 5, 8), 1, False),), ((torch.rand(1, 2, 3, 4), 3, True),), ((torch.rand(1, 2, 8, 8), [2, 3, 0], True),), ] @@ -39,7 +39,7 @@ class Sum(torch.nn.Module): ((torch.rand(10, 10), 1, False),), ((torch.rand(1, 2, 3, 4), 3, True),), ((torch.rand(10, 10, 10), [-3, 1], True),), - ((torch.rand(2, 1, 5, 8), 1, False),), + ((torch.rand(1, 1, 5, 8), 1, False),), ((torch.rand(1, 2, 8, 8), [2, 3, 0], True),), ] @@ -82,7 +82,7 @@ def _test_sum_tosa_BI_pipeline( .partition() .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, qtol=1) + .run_method_and_compare_outputs(inputs=test_data) ) def _test_sum_ethosu_BI_pipeline( @@ -91,7 +91,7 @@ def _test_sum_ethosu_BI_pipeline( test_data: tuple[exampledata_t], compile_spec: CompileSpec, ): - ( + tester = ( ArmTester( module, example_inputs=test_data, @@ -107,6 +107,8 @@ def _test_sum_ethosu_BI_pipeline( .to_executorch() .serialize() ) + if conftest.is_option_enabled("corstone_fvp"): + tester.run_method_and_compare_outputs(inputs=test_data, qtol=1) @parameterized.expand(Sum.test_parameters) def test_sum_tosa_MI(self, test_data: tuple[exampledata_t]): diff --git a/backends/arm/test/ops/test_var.py b/backends/arm/test/ops/test_var.py index ad095f01ded..6690c668f94 100644 --- a/backends/arm/test/ops/test_var.py +++ b/backends/arm/test/ops/test_var.py @@ -16,7 +16,7 @@ get_symmetric_quantization_config, TOSAQuantizer, ) -from executorch.backends.arm.test import common +from executorch.backends.arm.test import common, conftest from executorch.backends.arm.test.tester.arm_tester import ArmTester from executorch.backends.arm.tosa_specification import TosaSpecification @@ -36,13 +36,16 @@ class Var(torch.nn.Module): (torch.rand(1, 50, 10, 20), False, 0.5), ] + def __init__(self, keepdim: bool = True, correction: int = 0): + super().__init__() + self.keepdim = keepdim + self.correction = correction + def forward( self, x: torch.Tensor, - keepdim: bool = True, - correction: int = 0, ): - return x.var(keepdim=keepdim, correction=correction) + return x.var(keepdim=self.keepdim, correction=self.correction) class VarDim(torch.nn.Module): test_parameters = [ @@ -62,14 +65,17 @@ class VarDim(torch.nn.Module): (torch.rand(1, 50, 10, 20), -1, True, True), ] + def __init__(self, dim: int = -1, keepdim: bool = True, unbiased: bool = False): + super().__init__() + self.dim = dim + self.keepdim = keepdim + self.unbiased = unbiased + def forward( self, x: torch.Tensor, - dim: int = -1, - keepdim: bool = True, - unbiased: bool = False, ): - return x.var(dim=dim, keepdim=keepdim, unbiased=unbiased) + return x.var(dim=self.dim, keepdim=self.keepdim, unbiased=self.unbiased) class VarCorrection(torch.nn.Module): test_parameters = [ @@ -79,14 +85,19 @@ class VarCorrection(torch.nn.Module): (torch.rand(1, 50, 10, 20), (-1, -2), True, 0.5), ] + def __init__( + self, dim: int = -1, keepdim: bool = True, correction: bool = False + ): + super().__init__() + self.dim = dim + self.keepdim = keepdim + self.correction = correction + def forward( self, x: torch.Tensor, - dim: int | tuple[int] = -1, - keepdim: bool = True, - correction: int = 0, ): - return x.var(dim=dim, keepdim=keepdim, correction=correction) + return x.var(dim=self.dim, keepdim=self.keepdim, correction=self.correction) def _test_var_tosa_MI_pipeline( self, @@ -138,7 +149,7 @@ def _test_var_ethosu_BI_pipeline( quantizer = EthosUQuantizer(compile_spec).set_io( get_symmetric_quantization_config() ) - ( + tester = ( ArmTester( module, example_inputs=test_data, @@ -150,58 +161,61 @@ def _test_var_ethosu_BI_pipeline( .partition() .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) .to_executorch() + .serialize() ) + if conftest.is_option_enabled("corstone_fvp"): + tester.run_method_and_compare_outputs(inputs=test_data, qtol=1) @parameterized.expand(Var.test_parameters) def test_var_tosa_MI(self, test_tensor: torch.Tensor, keepdim, correction): - self._test_var_tosa_MI_pipeline(self.Var(), (test_tensor, keepdim, correction)) + self._test_var_tosa_MI_pipeline(self.Var(keepdim, correction), (test_tensor,)) @parameterized.expand(Var.test_parameters) def test_var_tosa_BI(self, test_tensor: torch.Tensor, keepdim, correction): - self._test_var_tosa_BI_pipeline(self.Var(), (test_tensor, keepdim, correction)) + self._test_var_tosa_BI_pipeline(self.Var(keepdim, correction), (test_tensor,)) @parameterized.expand(Var.test_parameters) def test_var_u55_BI(self, test_tensor: torch.Tensor, keepdim, correction): self._test_var_ethosu_BI_pipeline( - self.Var(), + self.Var(keepdim, correction), common.get_u55_compile_spec(), - (test_tensor, keepdim, correction), + (test_tensor,), ) @parameterized.expand(Var.test_parameters) def test_var_u85_BI(self, test_tensor: torch.Tensor, keepdim, correction): self._test_var_ethosu_BI_pipeline( - self.Var(), + self.Var(keepdim, correction), common.get_u85_compile_spec(), - (test_tensor, keepdim, correction), + (test_tensor,), ) @parameterized.expand(VarDim.test_parameters) - def test_var_dim_tosa_MI(self, test_tensor: torch.Tensor, dim, keepdim, correction): + def test_var_dim_tosa_MI(self, test_tensor: torch.Tensor, dim, keepdim, unbiased): self._test_var_tosa_MI_pipeline( - self.VarDim(), (test_tensor, dim, keepdim, correction) + self.VarDim(dim, keepdim, unbiased), (test_tensor,) ) @parameterized.expand(VarDim.test_parameters) - def test_var_dim_tosa_BI(self, test_tensor: torch.Tensor, dim, keepdim, correction): + def test_var_dim_tosa_BI(self, test_tensor: torch.Tensor, dim, keepdim, unbiased): self._test_var_tosa_BI_pipeline( - self.VarDim(), (test_tensor, dim, keepdim, correction) + self.VarDim(dim, keepdim, unbiased), (test_tensor,) ) @parameterized.expand(VarDim.test_parameters_u55) - def test_var_dim_u55_BI(self, test_tensor: torch.Tensor, dim, keepdim, correction): + def test_var_dim_u55_BI(self, test_tensor: torch.Tensor, dim, keepdim, unbiased): self._test_var_ethosu_BI_pipeline( - self.VarDim(), + self.VarDim(dim, keepdim, unbiased), common.get_u55_compile_spec(), - (test_tensor, dim, keepdim, correction), + (test_tensor,), ) @parameterized.expand(VarDim.test_parameters) - def test_var_dim_u85_BI(self, test_tensor: torch.Tensor, dim, keepdim, correction): + def test_var_dim_u85_BI(self, test_tensor: torch.Tensor, dim, keepdim, unbiased): self._test_var_ethosu_BI_pipeline( - self.VarDim(), + self.VarDim(dim, keepdim, unbiased), common.get_u85_compile_spec(), - (test_tensor, dim, keepdim, correction), + (test_tensor,), ) @parameterized.expand(VarCorrection.test_parameters) @@ -209,7 +223,7 @@ def test_var_correction_tosa_MI( self, test_tensor: torch.Tensor, dim, keepdim, correction ): self._test_var_tosa_MI_pipeline( - self.VarCorrection(), (test_tensor, dim, keepdim, correction) + self.VarCorrection(dim, keepdim, correction), (test_tensor,) ) @parameterized.expand(VarCorrection.test_parameters) @@ -217,7 +231,7 @@ def test_var_correction_tosa_BI( self, test_tensor: torch.Tensor, dim, keepdim, correction ): self._test_var_tosa_BI_pipeline( - self.VarCorrection(), (test_tensor, dim, keepdim, correction) + self.VarCorrection(dim, keepdim, correction), (test_tensor,) ) @parameterized.expand(VarCorrection.test_parameters) @@ -225,9 +239,9 @@ def test_var_correction_u55_BI( self, test_tensor: torch.Tensor, dim, keepdim, correction ): self._test_var_ethosu_BI_pipeline( - self.VarCorrection(), + self.VarCorrection(dim, keepdim, correction), common.get_u55_compile_spec(), - (test_tensor, dim, keepdim, correction), + (test_tensor,), ) @parameterized.expand(VarCorrection.test_parameters) @@ -235,7 +249,7 @@ def test_var_correction_u85_BI( self, test_tensor: torch.Tensor, dim, keepdim, correction ): self._test_var_ethosu_BI_pipeline( - self.VarCorrection(), + self.VarCorrection(dim, keepdim, correction), common.get_u85_compile_spec(), - (test_tensor, dim, keepdim, correction), + (test_tensor,), ) diff --git a/examples/arm/setup.sh b/examples/arm/setup.sh index 800dfb8d6d4..8cfacbd3747 100755 --- a/examples/arm/setup.sh +++ b/examples/arm/setup.sh @@ -65,8 +65,7 @@ tosa_reference_model_rev="70ed0b40fa831387e36abdb4f7fb9670a3464f5a" # vela vela_repo_url="https://gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-vela" -vela_rev="e131bf4f528f0d461868229972e07f371dcbc881" - +vela_rev="46d88f56902be0706e051c10153ffb7620e01ee3" ######## ### Optional user args From 9804ef9ac0e88fbdc14eb46b56e2d2cd863fd5c4 Mon Sep 17 00:00:00 2001 From: cmt0 <168370296+cmt0@users.noreply.github.com> Date: Fri, 21 Feb 2025 10:25:22 -0600 Subject: [PATCH 048/584] 128 Kernels for developer configurations Differential Revision: D69600543 Pull Request resolved: https://github.com/pytorch/executorch/pull/8570 --- runtime/kernel/targets.bzl | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/kernel/targets.bzl b/runtime/kernel/targets.bzl index ed013260a9a..e02c1288e55 100644 --- a/runtime/kernel/targets.bzl +++ b/runtime/kernel/targets.bzl @@ -8,6 +8,7 @@ def _operator_registry_preprocessor_flags(): return select({ "DEFAULT": [], "fbsource//xplat/executorch/build/constraints:executorch-max-kernel-num-256": ["-DMAX_KERNEL_NUM=256"], + "fbsource//xplat/executorch/build/constraints:executorch-max-kernel-num-128": ["-DMAX_KERNEL_NUM=128"], "fbsource//xplat/executorch/build/constraints:executorch-max-kernel-num-64": ["-DMAX_KERNEL_NUM=64"], }) else: From 2b81e6f4d88adf5f67c41427757c6f91bbc62423 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Fri, 21 Feb 2025 08:39:16 -0800 Subject: [PATCH 049/584] irangeify some of runtime/core (#8609) --- runtime/core/array_ref.h | 3 ++- runtime/core/hierarchical_allocator.h | 3 ++- runtime/core/portable_type/c10/c10/targets.bzl | 2 +- runtime/core/portable_type/test/bfloat16_test.cpp | 9 +++++---- runtime/core/portable_type/test/targets.bzl | 2 ++ runtime/core/portable_type/test/tensor_impl_test.cpp | 3 ++- runtime/core/targets.bzl | 9 +++++++-- runtime/core/tensor_layout.cpp | 3 ++- runtime/core/test/event_tracer_test.cpp | 5 +++-- runtime/core/test/memory_allocator_test.cpp | 7 ++++--- runtime/core/test/targets.bzl | 2 ++ 11 files changed, 32 insertions(+), 16 deletions(-) diff --git a/runtime/core/array_ref.h b/runtime/core/array_ref.h index d02aac955ce..a23509e8698 100644 --- a/runtime/core/array_ref.h +++ b/runtime/core/array_ref.h @@ -29,6 +29,7 @@ #include #include +#include #include namespace executorch { @@ -149,7 +150,7 @@ class ArrayRef final { if (Length != RHS.Length) { return false; } - for (size_t i = 0; i < this->Length; i++) { + for (const auto i : c10::irange(this->Length)) { if (Data[i] != RHS.Data[i]) { return false; } diff --git a/runtime/core/hierarchical_allocator.h b/runtime/core/hierarchical_allocator.h index f2f5fd18fb5..b5031fa38e5 100644 --- a/runtime/core/hierarchical_allocator.h +++ b/runtime/core/hierarchical_allocator.h @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -96,7 +97,7 @@ class HierarchicalAllocator final { "n_allocators %" PRIu32 " > %zu", n_allocators, kSpanArraySize); - for (uint32_t i = 0; i < n_allocators; ++i) { + for (const auto i : c10::irange(n_allocators)) { span_array_[i] = Span(allocators[i].base_address(), allocators[i].size()); } diff --git a/runtime/core/portable_type/c10/c10/targets.bzl b/runtime/core/portable_type/c10/c10/targets.bzl index 64436278e79..2bde5eac5e4 100644 --- a/runtime/core/portable_type/c10/c10/targets.bzl +++ b/runtime/core/portable_type/c10/c10/targets.bzl @@ -45,7 +45,7 @@ def define_common_targets(): "-DC10_USING_CUSTOM_GENERATED_MACROS", ], visibility = [ - "//executorch/runtime/core/portable_type/...", + "//executorch/...", ], deps = select({ "DEFAULT": [], diff --git a/runtime/core/portable_type/test/bfloat16_test.cpp b/runtime/core/portable_type/test/bfloat16_test.cpp index 6b42a6e4a5e..505f80e770f 100644 --- a/runtime/core/portable_type/test/bfloat16_test.cpp +++ b/runtime/core/portable_type/test/bfloat16_test.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -41,7 +42,7 @@ uint16_t bits_from_f32(float src) { TEST(BFloat16Conversion, FloatToBFloat16AndBack) { // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,cppcoreguidelines-avoid-magic-numbers,modernize-avoid-c-arrays) float in[100]; - for (int i = 0; i < 100; ++i) { + for (const auto i : c10::irange(100)) { // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions,cppcoreguidelines-avoid-magic-numbers) in[i] = i + 1.25; } @@ -51,7 +52,7 @@ TEST(BFloat16Conversion, FloatToBFloat16AndBack) { // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,cppcoreguidelines-avoid-magic-numbers,modernize-avoid-c-arrays) float out[100]; - for (int i = 0; i < 100; ++i) { + for (const auto i : c10::irange(100)) { bfloats[i].x = bits_from_f32(in[i]); out[i] = f32_from_bits(bfloats[i].x); @@ -64,7 +65,7 @@ TEST(BFloat16Conversion, FloatToBFloat16AndBack) { TEST(BFloat16Conversion, FloatToBFloat16RNEAndBack) { // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,cppcoreguidelines-avoid-magic-numbers,modernize-avoid-c-arrays) float in[100]; - for (int i = 0; i < 100; ++i) { + for (const auto i : c10::irange(100)) { // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions,cppcoreguidelines-avoid-magic-numbers) in[i] = i + 1.25; } @@ -74,7 +75,7 @@ TEST(BFloat16Conversion, FloatToBFloat16RNEAndBack) { // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,cppcoreguidelines-avoid-magic-numbers,modernize-avoid-c-arrays) float out[100]; - for (int i = 0; i < 100; ++i) { + for (const auto i : c10::irange(100)) { bfloats[i].x = round_to_nearest_even(in[i]); out[i] = f32_from_bits(bfloats[i].x); diff --git a/runtime/core/portable_type/test/targets.bzl b/runtime/core/portable_type/test/targets.bzl index c0b4ef00c78..d8e82a15fba 100644 --- a/runtime/core/portable_type/test/targets.bzl +++ b/runtime/core/portable_type/test/targets.bzl @@ -11,6 +11,7 @@ def define_common_targets(): srcs = ["bfloat16_test.cpp"], deps = [ "//executorch/runtime/core/portable_type:portable_type", + "//executorch/runtime/core/portable_type/c10/c10:c10", ], ) @@ -52,5 +53,6 @@ def define_common_targets(): deps = [ "//executorch/runtime/core/exec_aten/util:tensor_util", "//executorch/runtime/core/portable_type:portable_type", + "//executorch/runtime/core/portable_type/c10/c10:c10", ], ) diff --git a/runtime/core/portable_type/test/tensor_impl_test.cpp b/runtime/core/portable_type/test/tensor_impl_test.cpp index bd5f82c5d1f..0b8ae05f4da 100644 --- a/runtime/core/portable_type/test/tensor_impl_test.cpp +++ b/runtime/core/portable_type/test/tensor_impl_test.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -76,7 +77,7 @@ TEST_F(TensorImplTest, TestSetSizesContigContract) { SizesType new_sizes[RANK] = {0, 0, 0, 0, 0}; // assign random sizes between 1 and 100 - for (int i = 0; i < RANK; i++) { + for (const auto i : c10::irange(RANK)) { new_sizes[i] = distribution(generator); } Error err = resize_tensor_impl(&t, {new_sizes, RANK}); diff --git a/runtime/core/targets.bzl b/runtime/core/targets.bzl index c3535688f63..d67312beda3 100644 --- a/runtime/core/targets.bzl +++ b/runtime/core/targets.bzl @@ -50,6 +50,7 @@ def define_common_targets(): ], exported_preprocessor_flags = get_core_flags(), exported_deps = [ + "//executorch/runtime/core/portable_type/c10/c10:c10", "//executorch/runtime/platform:platform", ], ) @@ -73,6 +74,7 @@ def define_common_targets(): ], exported_deps = [ ":core", + "//executorch/runtime/core/portable_type/c10/c10:c10", ], visibility = [ "//executorch/...", @@ -145,13 +147,16 @@ def define_common_targets(): ":tensor_layout", ], ) - + runtime.cxx_library( name = "tensor_layout", srcs = ["tensor_layout.cpp"], exported_headers = ["tensor_layout.h"], + deps = [ + "//executorch/runtime/core/portable_type/c10/c10:c10", + ], exported_deps = [ - ":core", + ":core", "//executorch/runtime/core/exec_aten:lib", ], visibility = ["//executorch/..."], diff --git a/runtime/core/tensor_layout.cpp b/runtime/core/tensor_layout.cpp index 748a43fc5d6..f0fac442e20 100644 --- a/runtime/core/tensor_layout.cpp +++ b/runtime/core/tensor_layout.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include #include @@ -43,7 +44,7 @@ Result TensorLayout::create( return Error::InvalidArgument; } - for (size_t i = 0; i < dim_order.size(); i++) { + for (const auto i : c10::irange(dim_order.size())) { if (dim_order[i] >= sizes.size()) { return Error::InvalidArgument; } diff --git a/runtime/core/test/event_tracer_test.cpp b/runtime/core/test/event_tracer_test.cpp index 622de1ff9fa..9591d9c06ee 100644 --- a/runtime/core/test/event_tracer_test.cpp +++ b/runtime/core/test/event_tracer_test.cpp @@ -8,6 +8,7 @@ #include +#include #include #include #include @@ -207,7 +208,7 @@ TEST(TestEventTracer, SimpleEventTracerTest) { // and also with a null pointer (to test that the null case works). DummyEventTracer dummy; std::vector dummy_event_tracer_arr = {&dummy, nullptr}; - for (size_t i = 0; i < dummy_event_tracer_arr.size(); i++) { + for (const auto i : c10::irange(dummy_event_tracer_arr.size())) { RunSimpleTracerTest(&dummy); RunSimpleTracerTest(nullptr); } @@ -234,7 +235,7 @@ TEST(TestEventTracer, SimpleEventTracerTestDelegate) { // and also with a null pointer (to test that the null case works). DummyEventTracer dummy; std::vector dummy_event_tracer_arr = {&dummy, nullptr}; - for (size_t i = 0; i < dummy_event_tracer_arr.size(); i++) { + for (const auto i : c10::irange(dummy_event_tracer_arr.size())) { RunSimpleTracerTestDelegate(&dummy); RunSimpleTracerTestDelegate(nullptr); } diff --git a/runtime/core/test/memory_allocator_test.cpp b/runtime/core/test/memory_allocator_test.cpp index dfd2f23a488..f0fa44ae6e7 100644 --- a/runtime/core/test/memory_allocator_test.cpp +++ b/runtime/core/test/memory_allocator_test.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -62,12 +63,12 @@ TEST_F(MemoryAllocatorTest, MemoryAllocatorAlignment) { 128, 2}; - for (int i = 0; i < arr_size; i++) { + for (const auto i : c10::irange(arr_size)) { auto align_size = alignment[i]; constexpr size_t mem_size = 1000; uint8_t mem_pool[mem_size]; MemoryAllocator allocator = MemoryAllocator(mem_size, mem_pool); - for (int j = 0; j < arr_size; j++) { + for (const auto j : c10::irange(arr_size)) { auto size = allocation[j]; void* start = allocator.allocate(size, align_size); EXPECT_ALIGNED(start, align_size); @@ -81,7 +82,7 @@ TEST_F(MemoryAllocatorTest, MemoryAllocatorNonPowerOfTwoAlignment) { MemoryAllocator allocator(mem_size, mem_pool); size_t alignment[5] = {0, 5, 6, 12, 34}; - for (int i = 0; i < 5; i++) { + for (const auto i : c10::irange(5)) { ASSERT_EQ(nullptr, allocator.allocate(8, alignment[i])); } } diff --git a/runtime/core/test/targets.bzl b/runtime/core/test/targets.bzl index 7332aad8a3d..abe52bcadff 100644 --- a/runtime/core/test/targets.bzl +++ b/runtime/core/test/targets.bzl @@ -40,6 +40,7 @@ def define_common_targets(): ], deps = [ "//executorch/runtime/core:event_tracer", + "//executorch/runtime/core/portable_type/c10/c10:c10", ], ) @@ -68,6 +69,7 @@ def define_common_targets(): ], deps = [ "//executorch/runtime/core:memory_allocator", + "//executorch/runtime/core/portable_type/c10/c10:c10", ], ) From f87940dc74598add29640e0f2195b838e3da3dcd Mon Sep 17 00:00:00 2001 From: Dave Bort Date: Fri, 21 Feb 2025 09:46:21 -0800 Subject: [PATCH 050/584] Update buck deps for new replace_scalar_with_tensor transforms Differential Revision: D69883148 Pull Request resolved: https://github.com/pytorch/executorch/pull/8588 --- backends/arm/_passes/TARGETS | 1 + backends/cadence/aot/TARGETS | 1 + backends/cadence/aot/pass_utils.py | 16 ++++++++-------- backends/cadence/aot/passes.py | 6 +++--- backends/cadence/aot/replace_ops.py | 6 +++--- backends/transforms/targets.bzl | 14 ++++++++++++++ 6 files changed, 30 insertions(+), 14 deletions(-) diff --git a/backends/arm/_passes/TARGETS b/backends/arm/_passes/TARGETS index 6ca59cfee27..843d6b159dc 100644 --- a/backends/arm/_passes/TARGETS +++ b/backends/arm/_passes/TARGETS @@ -7,6 +7,7 @@ python_library( deps = [ "//executorch/backends/arm:tosa_quant_utils", "//executorch/backends/arm:tosa_utils", + "//executorch/backends/transforms:replace_scalar_with_tensor", "//executorch/backends/xnnpack/_passes:xnnpack_passes", "//executorch/exir:lib", ], diff --git a/backends/cadence/aot/TARGETS b/backends/cadence/aot/TARGETS index 78a78bbda30..2dd3c4dc49d 100644 --- a/backends/cadence/aot/TARGETS +++ b/backends/cadence/aot/TARGETS @@ -256,6 +256,7 @@ python_library( "//executorch/backends/cadence/aot:pass_utils", "//executorch/backends/cadence/aot:remove_ops", "//executorch/backends/cadence/aot:utils", + "//executorch/backends/transforms:replace_scalar_with_tensor", "//executorch/exir:pass_base", "//executorch/exir/dialects:lib", "//executorch/exir/dialects/edge:lib", diff --git a/backends/cadence/aot/pass_utils.py b/backends/cadence/aot/pass_utils.py index d0166061c7f..3d73e7f8c1e 100644 --- a/backends/cadence/aot/pass_utils.py +++ b/backends/cadence/aot/pass_utils.py @@ -7,7 +7,7 @@ # pyre-strict from dataclasses import dataclass -from typing import Callable, List, Optional, Set, Union +from typing import Callable, List, Optional, Set, Type, Union import torch from executorch.backends.cadence.aot.utils import get_edge_overload_packet @@ -32,33 +32,33 @@ class CadencePassAttribute: # A dictionary that maps an ExportPass to its attributes. -ALL_CADENCE_PASSES: dict[ExportPass, CadencePassAttribute] = {} +ALL_CADENCE_PASSES: dict[Type[ExportPass], CadencePassAttribute] = {} -def get_cadence_pass_attribute(p: ExportPass) -> CadencePassAttribute: +def get_cadence_pass_attribute(p: Type[ExportPass]) -> CadencePassAttribute: return ALL_CADENCE_PASSES[p] # A decorator that registers a pass. def register_cadence_pass( pass_attribute: CadencePassAttribute, -) -> Callable[[ExportPass], ExportPass]: - def wrapper(cls: ExportPass) -> ExportPass: +) -> Callable[[Type[ExportPass]], Type[ExportPass]]: + def wrapper(cls: Type[ExportPass]) -> Type[ExportPass]: ALL_CADENCE_PASSES[cls] = pass_attribute return cls return wrapper -def get_all_available_cadence_passes() -> Set[ExportPass]: +def get_all_available_cadence_passes() -> Set[Type[ExportPass]]: return set(ALL_CADENCE_PASSES.keys()) # Create a new filter to filter out relevant passes from all passes. def create_cadence_pass_filter( opt_level: int, debug: bool = False -) -> Callable[[ExportPass], bool]: - def _filter(p: ExportPass) -> bool: +) -> Callable[[Type[ExportPass]], bool]: + def _filter(p: Type[ExportPass]) -> bool: pass_attribute = get_cadence_pass_attribute(p) return ( pass_attribute.opt_level is not None diff --git a/backends/cadence/aot/passes.py b/backends/cadence/aot/passes.py index ab23149e60d..4e27f83c13e 100644 --- a/backends/cadence/aot/passes.py +++ b/backends/cadence/aot/passes.py @@ -6,7 +6,7 @@ # pyre-strict -from typing import Any, List, Optional, Type +from typing import Any, cast, List, Optional, Type import torch import torch.fx @@ -95,9 +95,9 @@ def get_cadence_passes( passes = get_passes_in_default_order() pass_filter = create_cadence_pass_filter(opt_level) filtered_passes = [ - # pyre-fixme[20]: Call `torch.fx.passes.infra.pass_base.PassBase.__call__` expects argument `graph_module`. filtered_pass() # pyre-fixme[6]: In call `filter.__new__` ... got `List[Type[typing.Callable[[GraphModule], Optional[PassResult]]]]`. for filtered_pass in list(filter(pass_filter, passes)) ] - return filtered_passes + # The type checker can't infer the proper type of the list comprehension. + return cast(List[Optional[PassResult]], filtered_passes) diff --git a/backends/cadence/aot/replace_ops.py b/backends/cadence/aot/replace_ops.py index 120f69008c1..f91fb26ddc8 100644 --- a/backends/cadence/aot/replace_ops.py +++ b/backends/cadence/aot/replace_ops.py @@ -1719,9 +1719,9 @@ def call_operator(self, op, args, kwargs, meta): ) -@register_cadence_pass(CadencePassAttribute(opt_level=0))( - ReplaceScalarWithTensorArgPass() -) +register_cadence_pass(CadencePassAttribute(opt_level=0))(ReplaceScalarWithTensorArgPass) + + @register_cadence_pass(CadencePassAttribute(opt_level=0)) class ReplaceScalarTensorWithFullPass(ExportPass): """ diff --git a/backends/transforms/targets.bzl b/backends/transforms/targets.bzl index c532798546d..ec4e1412862 100644 --- a/backends/transforms/targets.bzl +++ b/backends/transforms/targets.bzl @@ -201,6 +201,20 @@ def define_common_targets(): ], ) + runtime.python_library( + name = "replace_scalar_with_tensor", + srcs = [ + "replace_scalar_with_tensor.py", + ], + visibility = [ + "//executorch/backends/...", + ], + deps = [ + "//caffe2:torch", + "//executorch/exir:pass_base", + ], + ) + runtime.python_test( name = "test_duplicate_dynamic_quant_chain", srcs = [ From 52a3a9a21fb9858d7e91d651246005ad49cf81eb Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Fri, 21 Feb 2025 10:31:24 -0800 Subject: [PATCH 051/584] Use faster method to find the torch Python module's __path__ (#8611) I noticed that the previous method took over 1 second on my M1 Mac Pro, so I found this method, which takes under 100 ms. I believe the root cause is that actually importing torch takes a little time. --- build/Utils.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/Utils.cmake b/build/Utils.cmake index a27edf33669..113f4829b86 100644 --- a/build/Utils.cmake +++ b/build/Utils.cmake @@ -357,7 +357,7 @@ function(add_torch_to_cmake_prefix_path) endif() execute_process( COMMAND "${PYTHON_EXECUTABLE}" -c - "import torch as _; print(_.__path__[0], end='')" + "import importlib.util; print(importlib.util.find_spec('torch').submodule_search_locations[0])" OUTPUT_VARIABLE _tmp_torch_path ERROR_VARIABLE _tmp_torch_path_error RESULT_VARIABLE _tmp_torch_path_result COMMAND_ECHO STDERR From 95caa8ae26bcbb088def200e687e3e48a4a27ac8 Mon Sep 17 00:00:00 2001 From: Kimish Patel Date: Fri, 21 Feb 2025 11:13:23 -0800 Subject: [PATCH 052/584] [Executorch] Add broadcasting support to optimized op_sub (#8256) * [Executorch] Refactor op_mul's broadcasting utils Summary: Refactoring broadcast handling utils that were added for op_mul. This is in prepartion use these utils to handle broadcast for other ops such as add, sub, div. Plus remove a redundant test Test Plan: optimized_kernels_test in CI Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] * [ExecuTorch] Add broadcast support for optimized add op Summary: This brings add op to feature parity, wrt, broadcasting, to mul op in optimized kernels lib Test Plan: tests added Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] * Update on "[ExecuTorch] Add broadcast support for optimized add op" Summary: This brings add op to feature parity, wrt, broadcasting, to mul op in optimized kernels lib Test Plan: tests added Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] * [Executorch] Refactor op_add to support op_sub broadcasting Summary: Refactor op_add to conslidate commong broadcasting related improvements Test Plan: Previously added tests Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] * [Executorch] Add broadcasting support to optimized op_sub Summary: This diff builds on top of previous one to add support for limited handling of broadcasting for sub Test Plan: tests added Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] * Update base for Update on "[Executorch] Add broadcasting support to optimized op_sub" Summary: This diff builds on top of previous one to add support for limited handling of broadcasting for sub Test Plan: tests added Reviewers: Subscribers: Tasks: Tags: cc larryliu0820 manuelcandales [ghstack-poisoned] * Update base for Update on "[Executorch] Add broadcasting support to optimized op_sub" Summary: This diff builds on top of previous one to add support for limited handling of broadcasting for sub Test Plan: tests added Reviewers: Subscribers: Tasks: Tags: cc larryliu0820 manuelcandales [ghstack-poisoned] * Update base for Update on "[Executorch] Add broadcasting support to optimized op_sub" Summary: This diff builds on top of previous one to add support for limited handling of broadcasting for sub Test Plan: tests added Reviewers: Subscribers: Tasks: Tags: cc larryliu0820 manuelcandales [ghstack-poisoned] * Update base for Update on "[Executorch] Add broadcasting support to optimized op_sub" Summary: This diff builds on top of previous one to add support for limited handling of broadcasting for sub Test Plan: tests added Reviewers: Subscribers: Tasks: Tags: cc larryliu0820 manuelcandales Differential Revision: [D69491818](https://our.internmc.facebook.com/intern/diff/D69491818) [ghstack-poisoned] * Update base for Update on "[Executorch] Add broadcasting support to optimized op_sub" Summary: This diff builds on top of previous one to add support for limited handling of broadcasting for sub Test Plan: tests added Reviewers: Subscribers: Tasks: Tags: cc larryliu0820 manuelcandales Differential Revision: [D69491818](https://our.internmc.facebook.com/intern/diff/D69491818) [ghstack-poisoned] --- kernels/optimized/cpu/op_sub.cpp | 109 ++-------------------------- kernels/optimized/cpu/targets.bzl | 1 + kernels/test/op_sub_test.cpp | 116 ++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 104 deletions(-) diff --git a/kernels/optimized/cpu/op_sub.cpp b/kernels/optimized/cpu/op_sub.cpp index 7ee880d9977..489421f1b2d 100644 --- a/kernels/optimized/cpu/op_sub.cpp +++ b/kernels/optimized/cpu/op_sub.cpp @@ -15,6 +15,8 @@ #include #include +#include + namespace torch { namespace executor { namespace native { @@ -138,110 +140,9 @@ Tensor& opt_sub_out( } } - auto selected_optimized_path = select_optimized_path(a, b, out); - if (selected_optimized_path == ElementwiseOptimizedPath::kTreatAs1d) { - // Resize for dynamic shape - auto error = resize_tensor(out, a.sizes()); - ET_KERNEL_CHECK_MSG( - ctx, - error == Error::Ok, - InvalidArgument, - out, - "Failed to resize output tensor."); - - ET_SWITCH_REAL_TYPES(a_type, ctx, "sub.out", CTYPE, [&]() { - CTYPE alpha_val; - ET_KERNEL_CHECK( - ctx, utils::extract_scalar(alpha, &alpha_val), InvalidArgument, ); - - using Vec = executorch::vec::Vectorized; - executorch::vec::map2( - [alpha_val](Vec x, Vec y) { return x - Vec(alpha_val) * y; }, - out.mutable_data_ptr(), - a.const_data_ptr(), - b.const_data_ptr(), - out.numel()); - }); - } else if (selected_optimized_path != ElementwiseOptimizedPath::kNone) { - const Tensor* lhs; - const Tensor* rhs; - if (selected_optimized_path == - ElementwiseOptimizedPath::kBroadcast2dBy1dReverseArguments) { - lhs = &b; - rhs = &a; - } else { - // Catch failure to update logic when subing new broadcasting possibility. - ET_DCHECK( - selected_optimized_path == - ElementwiseOptimizedPath::kBroadcast2dBy1d); - lhs = &a; - rhs = &b; - } - auto error = resize_tensor(out, lhs->sizes()); - ET_KERNEL_CHECK_MSG( - ctx, - error == Error::Ok, - InvalidArgument, - out, - "Failed to resize output tensor."); - ET_SWITCH_REAL_TYPES(out_type, ctx, "sub.out", CTYPE, [&]() { - CTYPE alpha_val; - ET_KERNEL_CHECK( - ctx, utils::extract_scalar(alpha, &alpha_val), InvalidArgument, ); - - using Vec = executorch::vec::Vectorized; - if (selected_optimized_path == - ElementwiseOptimizedPath::kBroadcast2dBy1dReverseArguments) { - executorch::vec::broadcasting_map_2d_by_1d( - [alpha_val](Vec x, Vec y) { return y - Vec(alpha_val) * x; }, - out.mutable_data_ptr(), - lhs->const_data_ptr(), - rhs->const_data_ptr(), - lhs->sizes()[lhs->dim() - 2], - lhs->sizes()[lhs->dim() - 1]); - } else { - executorch::vec::broadcasting_map_2d_by_1d( - [alpha_val](Vec x, Vec y) { return x - Vec(alpha_val) * y; }, - out.mutable_data_ptr(), - lhs->const_data_ptr(), - rhs->const_data_ptr(), - lhs->sizes()[lhs->dim() - 2], - lhs->sizes()[lhs->dim() - 1]); - } - }); - } else { - ScalarType common_type = - promoteTypes(a_type, b_type, /*half_to_float*/ true); - ET_KERNEL_CHECK(ctx, canCast(common_type, out_type), InvalidArgument, out); - - ET_KERNEL_CHECK( - ctx, - resize_to_broadcast_target_size(a, b, out) == Error::Ok, - InvalidArgument, - out); - - ET_SWITCH_REALH_TYPES(a_type, ctx, "sub.out", CTYPE_A, [&]() { - ET_SWITCH_REALH_TYPES(b_type, ctx, "sub.out", CTYPE_B, [&]() { - using CTYPE_IN = typename torch::executor:: - promote_types::type; - ET_DCHECK(CppTypeToScalarType::value == common_type); - ET_SWITCH_REALH_TYPES(out_type, ctx, "sub.out", CTYPE_OUT, [&]() { - CTYPE_IN alpha_val; - ET_KERNEL_CHECK( - ctx, utils::extract_scalar(alpha, &alpha_val), InvalidArgument, ); - - SubInner< - can_cast::value, - CTYPE_A, - CTYPE_B, - CTYPE_IN, - CTYPE_OUT>::run(a, b, alpha_val, out); - }); - }); - }); - } - - return out; + static constexpr const char op_name[] = "sub.out"; + return torch::executor::kernels::impl::opt_add_sub_out_impl( + ctx, a, b, alpha, out); } Tensor& opt_sub_scalar_out( diff --git a/kernels/optimized/cpu/targets.bzl b/kernels/optimized/cpu/targets.bzl index 41dde099290..2a66407a5ce 100644 --- a/kernels/optimized/cpu/targets.bzl +++ b/kernels/optimized/cpu/targets.bzl @@ -90,6 +90,7 @@ _OPTIMIZED_ATEN_OPS = ( name = "op_sub", deps = [ ":binary_ops", + ":add_sub_impl", "//executorch/kernels/portable/cpu:scalar_utils", "//executorch/kernels/portable/cpu/util:broadcast_util", ], diff --git a/kernels/test/op_sub_test.cpp b/kernels/test/op_sub_test.cpp index 39fc9e14925..aafaf688b0d 100644 --- a/kernels/test/op_sub_test.cpp +++ b/kernels/test/op_sub_test.cpp @@ -99,6 +99,109 @@ class OpSubOutTest : public OperatorTest { EXPECT_TENSOR_CLOSE(out, tf.make(sizes, /*data=*/{0.1, 1.2, 3.4, 7.8})); } + template + void test_broadcast_3D() { + TensorFactory tf_a; + + Tensor a = + tf_a.make({2, 2, 3}, /*data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + Tensor b = tf_a.make({2, 1, 3}, /*data=*/{2, 3, 4, 5, 6, 7}); + + // Destination for output of mul. + Tensor out = + tf_a.make({2, 2, 3}, /*data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + Tensor expected = + tf_a.make({2, 2, 3}, /*data=*/{-1, -1, -1, 2, 2, 2, 2, 2, 2, 5, 5, 5}); + + // Check that it matches the expected output. + EXPECT_TENSOR_CLOSE(op_sub_out(a, b, 1.0, out), expected); + // b - a * 1.5 output should be + expected = tf_a.make( + {2, 2, 3}, + /*data=*/ + {0.5, + 0.0, + -0.5, + -4.0, + -4.5, + -5.0, + -5.5, + -6.0, + -6.5, + -10.0, + -10.5, + -11.0}); + EXPECT_TENSOR_CLOSE(op_sub_out(b, a, 1.5, out), expected); + } + + template + void test_broadcast_4D() { + TensorFactory tf_a; + + Tensor a = tf_a.make( + {2, 2, 3, 5}, + /*data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60}); + Tensor b = tf_a.make( + {2, 1, 3, 5}, + /*data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}); + + // Destination for output of mul. + Tensor out = tf_a.zeros({2, 2, 3, 5}); + Tensor expected = tf_a.make( + {2, 2, 3, 5}, + /*data=*/{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30}); + + // Check that it matches the expected output. + EXPECT_TENSOR_CLOSE(op_sub_out(a, b, 1.0, out), expected); + expected = tf_a.make( + {2, 2, 3, 5}, + /*data=*/{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, -15, -15, -15, -15, -15, -15, -15, -15, -15, + -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, + -15, -15, -15, -15, -15, -15, -15, -15, -15, -30, -30, -30, + -30, -30, -30, -30, -30, -30, -30, -30, -30, -30, -30, -30}); + EXPECT_TENSOR_CLOSE(op_sub_out(b, a, 1.0, out), expected); + + b = tf_a.make( + {2, 2, 1, 5}, /*data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}); + out = tf_a.zeros({2, 2, 3, 5}); + expected = tf_a.make( + {2, 2, 3, 5}, + /*data=*/{0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 15, 15, 15, 15, 15, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 25, 25, 25, 25, 25, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 35, 35, 35, 35, 35, 40, 40, 40, 40, 40}); + + // Check that it matches the expected output. + EXPECT_TENSOR_CLOSE(op_sub_out(a, b, 1.0, out), expected); + expected = tf_a.make( + {2, 2, 3, 5}, + /*data=*/{-0.5000, -1.0000, -1.5000, -2.0000, -2.5000, + -8.0000, -8.5000, -9.0000, -9.5000, -10.0000, + -15.5000, -16.0000, -16.5000, -17.0000, -17.5000, + + -18.0000, -18.5000, -19.0000, -19.5000, -20.0000, + -25.5000, -26.0000, -26.5000, -27.0000, -27.5000, + -33.0000, -33.5000, -34.0000, -34.5000, -35.0000, + + -35.5000, -36.0000, -36.5000, -37.0000, -37.5000, + -43.0000, -43.5000, -44.0000, -44.5000, -45.0000, + -50.5000, -51.0000, -51.5000, -52.0000, -52.5000, + + -53.0000, -53.5000, -54.0000, -54.5000, -55.0000, + -60.5000, -61.0000, -61.5000, -62.0000, -62.5000, + -68.0000, -68.5000, -69.0000, -69.5000, -70.0000}); + EXPECT_TENSOR_CLOSE(op_sub_out(b, a, 1.5, out), expected); + } + void test_sub_enumerate_a_types() { #define ENUMERATE_TEST_ENTRY(ctype, dtype) \ test_sub_enumerate_b_types(); @@ -237,6 +340,19 @@ TEST_F(OpSubOutTest, BroadcastScalarRank0Supported) { EXPECT_TENSOR_EQ(out, ret); } +TEST_F(OpSubOutTest, BroadcastNDTest) { + // Test 3D tensors + test_broadcast_3D(); + test_broadcast_3D(); + // Sub doesnt yet support BFloat16 + // test_broadcast_3D(); + + // Test 4D tensors + test_broadcast_4D(); + test_broadcast_4D(); + // test_broadcast_4D(); +} + // // Death Tests // From ad4675a1e2d74fd2f26b7c4041dc0dabeeb35410 Mon Sep 17 00:00:00 2001 From: derekxu Date: Fri, 21 Feb 2025 13:50:05 -0800 Subject: [PATCH 053/584] Expose executorchcoreml target to ExecuTorch-clients Differential Revision: D69996302 Pull Request resolved: https://github.com/pytorch/executorch/pull/8620 --- backends/apple/coreml/TARGETS | 1 + 1 file changed, 1 insertion(+) diff --git a/backends/apple/coreml/TARGETS b/backends/apple/coreml/TARGETS index 9d722457e34..d77e33679ab 100644 --- a/backends/apple/coreml/TARGETS +++ b/backends/apple/coreml/TARGETS @@ -76,6 +76,7 @@ runtime.cxx_python_extension( base_module = "", visibility = [ "//executorch/examples/apple/coreml/...", + "@EXECUTORCH_CLIENTS", ], external_deps = [ "pybind11", From 79580a726620ccea7f0655f26ff0f21ed936467e Mon Sep 17 00:00:00 2001 From: cccclai Date: Fri, 21 Feb 2025 15:46:13 -0800 Subject: [PATCH 054/584] fix export llama to qnn Differential Revision: D69942429 Pull Request resolved: https://github.com/pytorch/executorch/pull/8608 --- examples/models/llama/TARGETS | 3 +++ examples/qualcomm/oss_scripts/llama/TARGETS | 1 + examples/qualcomm/oss_scripts/llama/llama.py | 13 ++++++++----- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/examples/models/llama/TARGETS b/examples/models/llama/TARGETS index 810b7f550df..489d42c29c4 100644 --- a/examples/models/llama/TARGETS +++ b/examples/models/llama/TARGETS @@ -96,6 +96,9 @@ runtime.command_alias( runtime.python_library( name = "source_transformation", + visibility = [ + "//executorch/examples/...", + ], srcs = [ "source_transformation/apply_spin_quant_r1_r2.py", "source_transformation/attention.py", diff --git a/examples/qualcomm/oss_scripts/llama/TARGETS b/examples/qualcomm/oss_scripts/llama/TARGETS index d49253c5668..e4bad10a234 100644 --- a/examples/qualcomm/oss_scripts/llama/TARGETS +++ b/examples/qualcomm/oss_scripts/llama/TARGETS @@ -19,6 +19,7 @@ python_library( name = "llama_lib", srcs = ["llama.py"], deps = [ + "//executorch/examples/models/llama:source_transformation", "//caffe2:torch", "//executorch/backends/qualcomm/partition:partition", "//executorch/backends/qualcomm/quantizer:quantizer", diff --git a/examples/qualcomm/oss_scripts/llama/llama.py b/examples/qualcomm/oss_scripts/llama/llama.py index 48353d3ee6b..e853812a949 100755 --- a/examples/qualcomm/oss_scripts/llama/llama.py +++ b/examples/qualcomm/oss_scripts/llama/llama.py @@ -1039,10 +1039,7 @@ def _build_parser(): return parser -def main(args) -> None: - parser = _build_parser() - - args = parser.parse_args(args) +def export_llama(args) -> None: if args.compile_only and args.pre_gen_pte: exit("Cannot set both compile_only and pre_gen_pte as true") @@ -1143,6 +1140,12 @@ def main(args) -> None: raise Exception(e) +def main(): + parser = _build_parser() + args = parser.parse_args() + export_llama(args) + + # flake8: noqa: C901 if __name__ == "__main__": - main(sys.argv[1:]) + main() From cae89c5a35f75298c9c1b735bb952ccff12e5098 Mon Sep 17 00:00:00 2001 From: Nathanael See Date: Fri, 21 Feb 2025 15:46:21 -0800 Subject: [PATCH 055/584] Convolution 1D optimized axis map Differential Revision: D70009298 Pull Request resolved: https://github.com/pytorch/executorch/pull/8628 --- backends/vulkan/runtime/graph/ops/impl/Convolution.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp index 71b7ce80cc0..18599ed4ba6 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp @@ -475,7 +475,12 @@ void add_conv1d_node( const ValueRef out, const bool clamp_out) { ValueRef arg_weight = prepack_standard( - graph, weight, graph.storage_type_of(out), utils::kChannelsPacked); + graph, + weight, + graph.storage_type_of(out), + utils::kChannelsPacked, + /* passthrough = */ false, + utils::kOptimizedAxisMap); ValueRef arg_bias = prepack_biases( graph, bias, From 54dccc9c6b20f734be5c19a86baf2b73ea45f4b7 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Fri, 21 Feb 2025 16:01:30 -0800 Subject: [PATCH 056/584] ANE-friendly static llama (#8436) * init * up * up * up * up * up * lint * up * up * up * up * lint --- examples/apple/coreml/llama/export.py | 285 +++++++++ .../apple/coreml/llama/llama_transformer.py | 570 ++++++++++++++++++ examples/apple/coreml/llama/readme.md | 39 ++ examples/apple/coreml/llama/run.py | 134 ++++ 4 files changed, 1028 insertions(+) create mode 100644 examples/apple/coreml/llama/export.py create mode 100644 examples/apple/coreml/llama/llama_transformer.py create mode 100644 examples/apple/coreml/llama/readme.md create mode 100644 examples/apple/coreml/llama/run.py diff --git a/examples/apple/coreml/llama/export.py b/examples/apple/coreml/llama/export.py new file mode 100644 index 00000000000..58bc0859c79 --- /dev/null +++ b/examples/apple/coreml/llama/export.py @@ -0,0 +1,285 @@ +# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. + +# pyre-strict + +import argparse +import json + +import sys + +import coremltools as ct +import torch +from executorch.backends.apple.coreml.compiler import CoreMLBackend # pyre-ignore +from executorch.backends.apple.coreml.partition import CoreMLPartitioner # pyre-ignore +from executorch.examples.models.llama.source_transformation.quantize import ( + EmbeddingQuantHandler, +) + +from executorch.exir.backend.utils import format_delegated_graph +from executorch.exir.capture._config import EdgeCompileConfig, ExecutorchBackendConfig +from executorch.exir.passes import MemoryPlanningPass +from executorch.exir.passes.quant_fusion_pass import QuantFusionPass +from executorch.exir.passes.sym_shape_eval_pass import ConstraintBasedSymShapeEvalPass +from executorch.extension.export_util.utils import export_to_edge, save_pte_program + +sys.path.insert(0, ".") +from llama_transformer import InputManager, ModelArgs, Transformer + + +class SplitLinearModule(torch.nn.Module): + def __init__(self, in_features, out_features, target_split_size, max_splits): + super(SplitLinearModule, self).__init__() + num_splits = max(out_features // target_split_size, 1) + if num_splits > max_splits: + num_splits = max_splits + + self.split_size = out_features // num_splits + self.split_remainder = out_features % num_splits + self.splits = torch.nn.ModuleList( + [torch.nn.Linear(in_features, self.split_size) for _ in range(num_splits)] + ) + print( + f"Splitting out_features={out_features} into {num_splits} of size {self.split_size}" + ) + if self.split_remainder > 0: + print( + f"Warning: remainder {self.split_remainder} after splitting out_features={out_features} into {num_splits} of size {self.split_size}" + ) + self.splits.append(torch.nn.Linear(in_features, self.split_remainder)) + + def split_sizes(self): + return [split.out_features for split in self.splits] + + def forward(self, x): + return torch.cat([split(x) for split in self.splits], dim=-1) + + +def replace_linear_with_split_linear(model, target_split_size, max_splits): + for name, module in model.named_children(): + if isinstance(module, torch.nn.Linear): + new_module = SplitLinearModule( + module.in_features, module.out_features, target_split_size, max_splits + ) + split_sizes = new_module.split_sizes() + if module.bias is not None: + split_bias = module.bias.split(split_sizes) + split_weights = module.weight.split(split_sizes, dim=0) + for i, split in enumerate(new_module.splits): + split.weight = torch.nn.Parameter(split_weights[i]) + if module.bias is not None: + split.bias = torch.nn.Parameter(split_bias[i]) + else: + split.bias = None + setattr(model, name, new_module) + else: + replace_linear_with_split_linear(module, target_split_size, max_splits) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument( + "-n", + "--output_name", + default="model.pte", + help="Override the output filename of the saved pte model file.", + ) + parser.add_argument( + "-p", + "--params", + help="config.json", + ) + parser.add_argument( + "-c", + "--checkpoint", + help="checkpoint path", + ) + parser.add_argument( + "--seq_length", + type=int, + default=1, + help="length sequence to evaluate", + ) + parser.add_argument( + "--max_seq_length", + type=int, + default=128, + help="maximum length sequence to evaluate", + ) + parser.add_argument( + "--cache_size", + type=int, + default=None, + help="Cache size. Old items are evicted from cache", + ) + parser.add_argument( + "-E", + "--embedding-quantize", + default=None, + type=str, + help="type of embedding quantization, ',', e.g., '8,1024'.", + ) + parser.add_argument( + "--coreml-quantize", + default=None, + choices=["b4w", "c4w"], + help="This option is only for coreml: Use coreml quantization, e.g. b4w (for blockwise 4 bit weight), c4w (for channelwise 4 bit weight)", + ) + parser.add_argument( + "--use_cache_list", + action="store_true", + help="Use cache list to speed up model computation (does not work in pybindings)", + ) + parser.add_argument( + "--target_split_size", + type=int, + default=None, + help="Split linear layers into smaller chunks of target_split_size.", + ) + parser.add_argument( + "--max_splits", + type=int, + default=8, + help="Maximum number of splits to divide linear layers", + ) + + export_args = parser.parse_args() + params_path = export_args.params + checkpoint_path = export_args.checkpoint + + # Load model args + with open(params_path, "r") as f: + params = json.loads(f.read()) + + args = ModelArgs( + max_seq_len=export_args.max_seq_length, + generate_full_logits=False, + use_cache_list=export_args.use_cache_list, + **params, + ) + + with torch.device("meta"): + model = Transformer(args) + + checkpoint = torch.load( + checkpoint_path, map_location="cpu", mmap=True, weights_only=True + ) + if "model" in checkpoint: + checkpoint = checkpoint["model"] + + missing, unexpected = model.load_state_dict( + checkpoint, + strict=False, + assign=True, + ) + print("Missing keys: ", missing) + print("Unexpected keys: ", unexpected) + + float_dtype = torch.float16 # dtype for model/inputs + model.eval() + model.to(float_dtype) + + if export_args.embedding_quantize: + bitwidth, group_size = export_args.embedding_quantize.split(",") + if group_size == "none" or group_size == "None" or group_size == "0": + group_size = None + else: + group_size = int(group_size) + bitwidth = int(bitwidth) + model = EmbeddingQuantHandler( + model, + bitwidth=bitwidth, + group_size=group_size, + packed=(bitwidth in [2, 4]), + ).quantized_model() + + if export_args.target_split_size is not None: + replace_linear_with_split_linear( + model, export_args.target_split_size, export_args.max_splits + ) + + model = model.to(float_dtype) + + op_linear_quantizer_config = None + if export_args.coreml_quantize == "b4w": + op_linear_quantizer_config = { + "mode": "linear_symmetric", + "dtype": "int4", + "granularity": "per_block", + "block_size": 32, + "weight_threshold": 512, + } + elif export_args.coreml_quantize == "c4w": + op_linear_quantizer_config = { + "mode": "linear_symmetric", + "dtype": "int4", + "granularity": "per_channel", + } + + compile_specs = CoreMLBackend.generate_compile_specs( # pyre-fixme[16] + minimum_deployment_target=ct.target.iOS18, + compute_precision=ct.precision(ct.precision.FLOAT16.value), + compute_unit=ct.ComputeUnit.CPU_AND_NE, + model_type=CoreMLBackend.MODEL_TYPE.MODEL, # pyre-fixme[16] + op_linear_quantizer_config=op_linear_quantizer_config, + ) + partitioner = CoreMLPartitioner( # pyre-fixme[16] + compile_specs=compile_specs, + take_over_mutable_buffer=False, + skip_ops_for_coreml_delegation=[ + "quantized_decomposed.embedding_4bit.dtype", + "aten.embedding.default", + ], + ) + + input_manager = InputManager( + n_layers=args.n_layers, + max_batch_size=args.max_batch_size, + n_kv_heads=args.n_kv_heads, + max_seq_length=args.max_seq_len, + head_dim=args.head_dim, + use_cache_list=export_args.use_cache_list, + seq_length=export_args.seq_length, + dtype=float_dtype, + minus_infinity=-30000, + cache_size=export_args.cache_size, + ) + example_inputs = input_manager.get_inputs(tokens=[0]) + + edge_manager = export_to_edge( + model, + example_inputs, + edge_compile_config=EdgeCompileConfig( + _check_ir_validity=False, + _skip_type_promotion=(float_dtype == torch.float16), + _skip_dim_order=True, + ), + ) + print("Edge program") + print(edge_manager.exported_program()) + + for node in edge_manager.exported_program().graph_module.graph.nodes: + print(node.name, node.target, node.args, node.kwargs) + + edge_manager = edge_manager.to_backend(partitioner) + + print("Delegated program") + + print(format_delegated_graph(edge_manager.exported_program().graph_module)) + + executorch_program = edge_manager.to_executorch( + ExecutorchBackendConfig( + extract_delegate_segments=True, + passes=[ + QuantFusionPass(), + ], + memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False), + sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(), + ) + ) + + filename = save_pte_program(executorch_program, export_args.output_name) + print(f"Saved Executorch program to local {filename}") + + +if __name__ == "__main__": + main() # pragma: no cover diff --git a/examples/apple/coreml/llama/llama_transformer.py b/examples/apple/coreml/llama/llama_transformer.py new file mode 100644 index 00000000000..5788bcd5e5a --- /dev/null +++ b/examples/apple/coreml/llama/llama_transformer.py @@ -0,0 +1,570 @@ +# @lint-ignore-every LICENSELINT +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# Copyright (c) Meta Platforms, Inc. All Rights Reserved. + +# Please refer to README.md in the same folder for more information. + +from dataclasses import dataclass +from functools import partial +from typing import Dict, List, Optional, Tuple + +import torch +import torch.nn.functional as F + +from executorch.examples.models.llama.llama_transformer import RMSNorm + +from executorch.examples.models.llama.rope import ( + hf_apply_rotary_emb, + hf_precompute_freqs_cis, + precompute_freqs_cis, + RotaryEmbedding, +) + +from torch import nn + + +# These are just to prevent to_edge from decomposing SDPA +# A better method is to use the to_edge_transform_and_lower API for CoreML +# and not decompose SDPA +@torch.library.custom_op("coreml::sdpa", mutates_args=()) +def sdpa( + q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, attn_mask: torch.Tensor +) -> torch.Tensor: + """Same as F.scaled_dot_product_attention, but with custom op to avoid lowering during dialect conversion.""" + return torch.ops.aten.scaled_dot_product_attention.default( + q, k, v, attn_mask=attn_mask + ) + + +@torch.library.register_fake("coreml::sdpa") +def _( + q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, attn_mask: torch.Tensor +) -> torch.Tensor: + """Fake implementation with the right output shape, which is required for torch.compile/export/fx tracing.""" + expected_shape = list(q.shape) + expected_shape[-1] = v.shape[-1] + return q.new_empty(expected_shape) + + +def find_multiple(n: int, k: int) -> int: + if n % k == 0: + return n + return n + k - (n % k) + + +@dataclass +class ModelArgs: + dim: int = 2048 + n_layers: int = 16 + n_heads: int = 32 + n_kv_heads: Optional[int] = None + vocab_size: int = 128256 + hidden_dim: Optional[int] = None + head_dim: Optional[int] = None # Optional customized head_dim + multiple_of: int = 256 + ffn_dim_multiplier: Optional[float] = None + norm_eps: float = 1e-5 + max_batch_size: int = 1 + max_seq_len: int = 128 + max_context_len: int = 2048 + moe: bool = False # True to enable the MoE (Mixture of Experts) + num_experts: int = 8 # Number of experts + num_activated_experts: int = 2 # Number of experts to activate + + # Generate logits for all inputs. When it's True, it would take big memory usage + # at runtime. Enable it only necessary (e.g., use perplexity tools that requires + # logits for all input tokens.) + generate_full_logits: bool = False + # A dictionary mapping from pruned token-id to original token-id + input_prune_map: Optional[Dict[int, int]] = None + # A dictionary mapping from pruned token-id to original token-id + output_prune_map: Optional[Dict[int, int]] = None + use_hf_rope: bool = False # Use HuggingFace's RoPE implementation + rope_theta: Optional[float] = ( + None # The official name to override self.rope_freq_base. + ) + rope_freq_base: float = 10000.0 # The base frequency for RoPE. Keep it for BC. + use_scaled_rope: bool = True # Use scaled RoPE, introduced in llama3.1. + # Additional Model Metadata needed at runtime + rope_scale_factor: int = 8 + bos_idx: int = 1 + eos_idx: int = 3 + bos_count: int = -1 # i.e., a single EOS is used as BOS + eos_count: int = 2 + + quantization_args: Optional[dict] = None + lora_args: Optional[dict] = None + + use_cache_list: bool = True + + def __post_init__(self): + if self.n_kv_heads is None: + self.n_kv_heads = self.n_heads + + # rope_theta overrides rope_freq_base since it's the official name. + if self.rope_theta is not None: + self.rope_freq_base = self.rope_theta + + if self.hidden_dim is None: + # If hidden_dim is not explicitly set in the ModelArgs, + # then calculate implicitly based on dim and also multiple of `args.multiple_of` + multiple_of = self.multiple_of + hidden_dim = 4 * self.dim + hidden_dim = int(2 * hidden_dim / 3) + if self.ffn_dim_multiplier is not None: + hidden_dim = int(self.ffn_dim_multiplier * hidden_dim) + self.hidden_dim = find_multiple(hidden_dim, multiple_of) + + if self.head_dim is None: + self.head_dim = self.dim // self.n_heads + + +class Rope(torch.nn.Module): + def __init__(self, params: ModelArgs): + super().__init__() + self.params = params + if self.params.use_hf_rope: + self.precompute_freqs_cis = hf_precompute_freqs_cis + else: + self.precompute_freqs_cis = partial( + precompute_freqs_cis, use_scaled=self.params.use_scaled_rope + ) + freqs_cos, freqs_sin = self.precompute_freqs_cis( + self.params.head_dim, + ( + self.params.max_context_len # Normal llama2. + if self.params.ffn_dim_multiplier is None + else self.params.max_context_len * 2 # Sharded checkpoint. + ), + self.params.rope_freq_base, + scale_factor=8, + ) + self.register_buffer("freqs_cos", freqs_cos, persistent=False) + self.register_buffer("freqs_sin", freqs_sin, persistent=False) + if self.params.use_hf_rope: + self.apply_rotary_emb = hf_apply_rotary_emb + else: + self.apply_rotary_emb = RotaryEmbedding() + + def forward( + self, + q: torch.Tensor, + k: torch.Tensor, + freqs_cos: torch.Tensor, + freqs_sin: torch.Tensor, + ): + return self.apply_rotary_emb(q, k, freqs_cos, freqs_sin) + + def get_freqs(self, input_pos: Optional[torch.Tensor], seq_len: int): + """ + Get the precomputed frequencies for the given input position and sequence length. + + Args: + input_pos (torch.Tensor): The input position tensor. + seq_len (int): The sequence length. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: The precomputed frequencies for the given input position and sequence length. + """ + assert ( + input_pos is not None + ), "input_pos must be provided when use_kv_cache is True" + input_pos_item = input_pos[-1].item() + + # CoreML partitioner is not picking up _check_is_size + # So instead use _check as workaround. Should be easy fix for partitioner + # torch._check_is_size(input_pos_item) + torch._check(input_pos_item >= 0) + torch._check(input_pos_item + seq_len <= self.params.max_seq_len) + # pyre-ignore: Incompatible parameter type [6]: torch.narrow does expect int or Tensor + freqs_cos = self.freqs_cos.narrow(0, input_pos_item, seq_len) + # pyre-ignore: Incompatible parameter type [6] + freqs_sin = self.freqs_sin.narrow(0, input_pos_item, seq_len) + + return freqs_cos, freqs_sin + + +class FeedForward(nn.Module): + def __init__(self, args: ModelArgs): + super().__init__() + assert args.hidden_dim is not None + hidden_dim: int = args.hidden_dim + self.w1 = nn.Linear(args.dim, hidden_dim, bias=False) + self.w2 = nn.Linear(hidden_dim, args.dim, bias=False) + self.w3 = nn.Linear(args.dim, hidden_dim, bias=False) + + def forward(self, x): + return self.w2(F.silu(self.w1(x)) * self.w3(x)) + + +class ConditionalFeedForward(nn.Module): + def __init__(self, args: ModelArgs): + super().__init__() + self.dim = args.dim + hidden_dim = args.hidden_dim + if hidden_dim is None: + # If hidden_dim is not explicitly set in the ModelArgs, + # then calculate implicitly based on dim and also multiple of `args.multiple_of` + multiple_of = args.multiple_of + hidden_dim = 4 * self.dim + hidden_dim = int(2 * hidden_dim / 3) + hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of) + + self.w1 = nn.Parameter(torch.randn(args.num_experts, hidden_dim, self.dim)) + self.w2 = nn.Parameter(torch.randn(args.num_experts, hidden_dim, self.dim)) + self.w3 = nn.Parameter(torch.randn(args.num_experts, hidden_dim, self.dim)) + self.num_experts = args.num_experts + + def forward(self, x: torch.Tensor, expert_indices: torch.Tensor) -> torch.Tensor: + w1_weights = self.w1[expert_indices].transpose(-1, -2) # [T, A, D, D] + w3_weights = self.w3[expert_indices].transpose(-1, -2) # [T, A, D, D] + w2_weights = self.w2[expert_indices] # [T, A, D, D] + x1 = F.silu(torch.einsum("ti,taio -> tao", x, w1_weights)) + x3 = torch.einsum("ti, taio -> tao", x, w3_weights) + expert_outs = torch.einsum("tao, taoi -> tai", (x1 * x3), w2_weights) + return expert_outs + + +class MOEFeedForward(nn.Module): + def __init__(self, config) -> None: + super().__init__() + self.gate = nn.Linear(config.dim, config.num_experts, bias=False) + self.cond_ffn = ConditionalFeedForward(config) + self.dim = config.dim + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = x.view(-1, self.dim) + # T = num_tokens, E = num_experts, D = hidden dim, A = activated experts + # x: [T, D] + scores = self.gate(x) # [T, E] + expert_weights, expert_indices = torch.topk(scores, 2, dim=-1) # [T, A], [T, A] + expert_weights = expert_weights.softmax(dim=-1) # [T, A] + expert_outs = self.cond_ffn(x, expert_indices) + return torch.einsum("tai,ta -> ti", expert_outs, expert_weights) + + +class Attention(nn.Module): + def __init__(self, args: ModelArgs, layer_id: int, rope: Rope): + super().__init__() + self.n_heads = args.n_heads + self.n_kv_heads = self.n_heads if args.n_kv_heads is None else args.n_kv_heads + + assert self.n_heads % self.n_kv_heads == 0 + model_parallel_size = 1 + self.n_local_heads = self.n_heads // model_parallel_size + self.n_local_kv_heads = self.n_kv_heads // model_parallel_size + self.n_rep = self.n_local_heads // self.n_local_kv_heads + self.head_dim = args.head_dim + self.max_batch_size = args.max_batch_size + self.max_seq_len = args.max_seq_len + self.dim = args.dim + self.wq = nn.Linear(self.dim, self.n_heads * self.head_dim, bias=False) + self.wk = nn.Linear(self.dim, self.n_kv_heads * self.head_dim, bias=False) + self.wv = nn.Linear(self.dim, self.n_kv_heads * self.head_dim, bias=False) + self.wo = nn.Linear(self.n_heads * self.head_dim, self.dim, bias=False) + + self.layer_id = layer_id + + self.rope = rope + + def forward( + self, + x: torch.Tensor, + freqs_cos: torch.Tensor, + freqs_sin: torch.Tensor, + k_cache: torch.Tensor, + v_cache: torch.Tensor, + attn_mask: torch.Tensor, + ): + bsz, seqlen, _ = x.shape + # QKV + q, k, v = self.wq(x), self.wk(x), self.wv(x) + # We need view_copy elimination + q = q.view(bsz, seqlen, self.n_local_heads, self.head_dim) + k = k.view(bsz, seqlen, self.n_local_kv_heads, self.head_dim) + v = v.view(bsz, seqlen, self.n_local_kv_heads, self.head_dim) + + # RoPE relative positional embeddings + q, k = self.rope.forward(q, k, freqs_cos, freqs_sin) + + q = q.transpose(1, 2) # (bs, n_local_heads, seqlen, head_dim) + k = k.transpose(1, 2) + v = v.transpose(1, 2) + + new_k = k + new_v = v + + k = torch.concat([k_cache, k], dim=2) + v = torch.concat([v_cache, v], dim=2) + + # grouped multiquery attention: expand out keys and values + if self.n_rep > 1: + k = k.repeat_interleave(self.n_rep, dim=1) + v = v.repeat_interleave(self.n_rep, dim=1) + + output = torch.ops.coreml.sdpa(q, k, v, attn_mask) + + output = output.transpose(1, 2).contiguous().view(bsz, seqlen, -1) + + output = self.wo(output) + + return output, new_k, new_v + + +class TransformerBlock(nn.Module): + def __init__(self, layer_id: int, args: ModelArgs, rope: Rope): + super().__init__() + self.n_heads = args.n_heads + self.dim = args.dim + self.head_dim = args.head_dim + self.attention = Attention(args, layer_id, rope) + if args.moe: + self.block_sparse_moe = MOEFeedForward(args) + else: + self.feed_forward = FeedForward(args) + self.attention_norm = RMSNorm(args.dim, eps=args.norm_eps) + self.ffn_norm = RMSNorm(args.dim, eps=args.norm_eps) + + def forward( + self, + x, + freqs_cos, + freqs_sin, + k_cache, + v_cache, + attn_mask, + ): # x: 1xN + norm_emb = self.attention_norm(x) + h, new_k, new_v = self.attention.forward( + norm_emb, freqs_cos, freqs_sin, k_cache, v_cache, attn_mask + ) + + h = x + h + out = h + self.feed_forward(self.ffn_norm(h)) + return out, new_k, new_v + + +class Transformer(nn.Module): + def __init__(self, params: ModelArgs): + super().__init__() + self.params = params + self.vocab_size = params.vocab_size + self.n_layers = params.n_layers + + self.tok_embeddings = nn.Embedding(params.vocab_size, params.dim) + self.rope = Rope(params) + self.layers = torch.nn.ModuleList() + for layer_id in range(params.n_layers): + self.layers.append(TransformerBlock(layer_id, params, self.rope)) + self.norm = RMSNorm(params.dim, eps=params.norm_eps) + self.output = nn.Linear(params.dim, params.vocab_size, bias=False) + self.generate_full_logits = params.generate_full_logits + self.max_seq_len = params.max_seq_len + self.input_prune_map = params.input_prune_map + self.output_prune_map = params.output_prune_map + self.use_cache_list = params.use_cache_list + + def forward( + self, + tokens: torch.LongTensor, # tokens + input_pos: torch.LongTensor, + input_length: torch.LongTensor, # input_length + k_caches: List[torch.FloatTensor], + v_caches: List[torch.FloatTensor], + attn_mask: torch.LongTensor, + h: Optional[torch.FloatTensor] = None, # embeddings + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + if (tokens is None) ^ (h is not None): + raise ValueError( + "You cannot specify both tokens and h at the same time, and must specify either one" + ) + if tokens is not None and h is None: + h = self.tok_embeddings(tokens) + seqlen = h.shape[1] + freqs_cos, freqs_sin = self.rope.get_freqs(input_pos, seqlen) + + k_out = [] + v_out = [] + for i, layer in enumerate(self.layers): + h, new_k, new_v = layer( + h, + freqs_cos, + freqs_sin, + k_caches[i] if self.use_cache_list else k_caches[i, :, :, :, :], + v_caches[i] if self.use_cache_list else v_caches[i, :, :, :, :], + attn_mask, + ) + k_out.append(new_k) + v_out.append(new_v) + + if not self.generate_full_logits: + # Only the last logit is used for the new generated token + h = h[:, input_length - 1, :].squeeze(1) + + h = self.norm(h) + + logits = self.output(h) + + if not self.use_cache_list: + k_out = torch.stack(k_out, dim=0) + v_out = torch.stack(v_out, dim=0) + return logits, k_out, v_out + + +class InputManager: + def __init__( + self, + n_layers: int, + max_batch_size: int, + n_kv_heads: int, + max_seq_length: int, + head_dim: int, + use_cache_list: bool, + seq_length: int, + dtype=torch.float16, + minus_infinity=-torch.inf, + cache_size=None, + ): + if cache_size is None: + cache_size = max_seq_length - seq_length + self.cache_size = cache_size + assert self.cache_size + seq_length <= max_seq_length + + self.n_layers = n_layers + self.max_batch_size = max_batch_size + self.n_kv_heads = n_kv_heads + self.head_dim = head_dim + + self.seq_length = seq_length + self.use_cache_list = use_cache_list + + if self.use_cache_list: + self.k_caches = [ + torch.zeros(self.get_cache_shape(self.cache_size)).to(dtype) + for _ in range(self.n_layers) + ] + self.v_caches = [ + torch.zeros(self.get_cache_shape(self.cache_size)).to(dtype) + for _ in range(self.n_layers) + ] + else: + self.k_caches = torch.zeros(self.get_cache_shape(self.cache_size)).to(dtype) + self.v_caches = torch.zeros(self.get_cache_shape(self.cache_size)).to(dtype) + + attn_cache = minus_infinity * torch.ones( + seq_length, self.cache_size + ) # attn for past tokens + attn_seq = torch.triu( + minus_infinity * torch.ones(self.seq_length, self.seq_length), diagonal=1 + ) # attn for current tokens + self.attn_mask = torch.concat([attn_cache, attn_seq], dim=-1).to(dtype) + assert self.attn_mask.shape == ( + self.seq_length, + self.cache_size + self.seq_length, + ) + + self.input_pos = 0 + self.cache_pos = 0 + + def get_cache_shape(self, length): + if self.use_cache_list: + return ( + self.max_batch_size, + self.n_kv_heads, + length, + self.head_dim, + ) + return ( + self.n_layers, + self.max_batch_size, + self.n_kv_heads, + length, + self.head_dim, + ) + + def _update_cache(self, start, length, new_k_caches, new_v_caches): + """ + Copies new cache data from start to start + length to cache + """ + assert self.cache_pos + length <= self.cache_size + assert start + length <= self.seq_length + + if self.use_cache_list: + for i in range(self.n_layers): + assert new_k_caches[i].shape == self.get_cache_shape(self.seq_length) + assert new_v_caches[i].shape == self.get_cache_shape(self.seq_length) + + self.k_caches[i][ + :, :, (self.cache_pos) : (self.cache_pos + length), : + ] = new_k_caches[i][:, :, start : (start + length), :] + self.v_caches[i][ + :, :, (self.cache_pos) : (self.cache_pos + length), : + ] = new_v_caches[i][:, :, start : (start + length), :] + else: + assert new_k_caches.shape == self.get_cache_shape(self.seq_length) + assert new_v_caches.shape == self.get_cache_shape(self.seq_length) + self.k_caches[:, :, :, (self.cache_pos) : (self.cache_pos + length), :] = ( + new_k_caches[:, :, :, start : (start + length), :] + ) + self.v_caches[:, :, :, (self.cache_pos) : (self.cache_pos + length), :] = ( + new_v_caches[:, :, :, start : (start + length), :] + ) + + self.cache_pos += length + if self.cache_pos == self.cache_size: + self.cache_pos = 0 + + def update(self, input_length, new_k_caches, new_v_caches): + # Copy as much new cache data into cache as possible without wrapping + amount_to_copy = min(input_length, self.cache_size - self.cache_pos) + self._update_cache(0, amount_to_copy, new_k_caches, new_v_caches) + if self.input_pos <= self.cache_size: + self.attn_mask[:, (self.input_pos) : (self.input_pos + amount_to_copy)] = ( + 0.0 + ) + + # Copy remainder (cache is now wrapped around and has more room) + # Attention mask needs no further updates. Attention is paid to the whole cache + remaining_to_copy = min( + input_length - amount_to_copy, self.cache_size - self.cache_pos + ) + if remaining_to_copy > 0: + self._update_cache( + amount_to_copy, remaining_to_copy, new_k_caches, new_v_caches + ) + + self.input_pos += input_length + + def get_inputs(self, tokens: List[int]): + input_length = len(tokens) + assert input_length <= self.seq_length + + return ( + # tokens + torch.concat( + [ + torch.tensor(tokens, dtype=torch.int64), + torch.zeros(self.seq_length - input_length, dtype=torch.int64), + ], + axis=-1, + ).reshape(1, -1), + # input_pos + torch.tensor([self.input_pos], dtype=torch.long), + # input_length + torch.tensor([input_length], dtype=torch.long), + # k_cache + self.k_caches, + # v_cache + self.v_caches, + # attn_mask + self.attn_mask, + ) + + def get_inputs_and_remaining_tokens(self, tokens: List[int]): + processed_tokens = min(self.seq_length, len(tokens)) + return ( + self.get_inputs(tokens[0:processed_tokens]), + tokens[processed_tokens:], + ) diff --git a/examples/apple/coreml/llama/readme.md b/examples/apple/coreml/llama/readme.md new file mode 100644 index 00000000000..353f0b56307 --- /dev/null +++ b/examples/apple/coreml/llama/readme.md @@ -0,0 +1,39 @@ +# ANE-friendly Llama models + +This directory contains ANE-friendly Llama models. + +Export model with: +``` +python export.py -n /path/to/output/model.pte -p /path/to/params.json -c /path/to/model.pth --seq_length 64 --max_seq_length 1024 --coreml-quantize c4w +``` + +(Note the script should be run from the executorch/examples/apple/coreml/llama directory.) + +The runner is written in python and is only intended to serve as an example for how the model inputs should be processed; it is not performant. + + +Run model with: +``` +python run.py -m /path/to/model.pte -t /path/to/tokenizer.model --prompt "Once upon a time," +``` + +(Note the script should be run from the executorch/examples/apple/coreml/llama directory.) + + +## Export args +* seq_length: the number of tokens processed by the model. Sequences shorter than seq_length must be padded, and sequences longer than it must be chunked. +* max_seq_length: the maximum context tokens that can be processed. +* cache_size: the size of the KV cache sequences. This parameter is optional, and defaults to max_seq_length - seq_length. If a smaller cache_size is used, older tokens are evicted from the cache and no longer play a role in attention. For example, if max_seq_length=1024, but cache_size is 512, the model can generate up to 1024 tokens, but only the current tokens and the previous 512 will participate in attention. In terms of computation, cache_size plays a similar role to max_seq_length in models without cache eviction. +* use_cache_list: boolean option that controls whether KV caches are passed as a list of 4D tensors, one per layer, or if they are passed as one 5D tensor. (Note that use_cache_list does not work with ExecuTorch pybindings.) +* target_split_size: this option splits linear layers into chunks of target size. For example, if target_split_size is 1024, a linear layer with (in_features=512, out_features=8096) will be split into 8 linear layers with (in_features=512, out_features=1024) and the results concatted. If not specified, the default is no splitting. +* max_splits: this controls the maximum number of splits for linear layers. It is only relevant if target_size is passed and defaults to 8. + +## Llama1B on iPhone 15 + +We are actively experimenting with different settings. But here are ones that we've found work well for Llama1B on iPhone 15 Pro: + +* Set use_cache_list +* Split linear layers with target_split_size=1024, max_splits=8 +* Use seq_length=32 or seq_length=64, both of which offer reasonable tradeoffs for prefill and decode performance. seq_length=32 is better at decode and seq_length=64 is better at prefill. + +In our tests, we set max_seq_length=1024, but if your application allows for it, performance can improve with max_seq_length=512 or by keeping max_seq_length=1024 and setting cache_size=512-seq_length. diff --git a/examples/apple/coreml/llama/run.py b/examples/apple/coreml/llama/run.py new file mode 100644 index 00000000000..65026e1f6bc --- /dev/null +++ b/examples/apple/coreml/llama/run.py @@ -0,0 +1,134 @@ +import argparse +import sys + +import sentencepiece as spm + +import torch + +from executorch.runtime import Runtime + + +sys.path.insert(0, ".") +from executorch.examples.models.llama.runner.generation import next_token +from executorch.examples.models.llama.tokenizer import tiktoken +from llama_transformer import InputManager + + +class Tokenizer: + def __init__(self, model_path: str): + # Try sentence piece + try: + print("Trying to load sentencepiece") + sp = spm.SentencePieceProcessor() + sp.load(model_path) + self.tokenizer = sp + except: + print("Trying to load tiktoken") + self.tokenizer = tiktoken.Tokenizer(model_path) + + def encode(self, text, bos, eos): + if isinstance(self.tokenizer, spm.SentencePieceProcessor): + bos_string = "" if bos else "" + eos_string = "" if eos else "" + return self.tokenizer.encode(f"{bos_string}{text}{eos_string}") + return self.tokenizer.encode(text, bos=bos, eos=eos) + + def decode_token(self, token): + if isinstance(self.tokenizer, spm.SentencePieceProcessor): + return f"{self.tokenizer.decode(token)} " + return self.tokenizer.decode_token(token) + + def stop_tokens(self): + if isinstance(self.tokenizer, spm.SentencePieceProcessor): + return [self.tokenizer.eos_id()] + return self.tokenizer.stop_tokens + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument( + "-m", + "--model", + help="model.pte", + ) + parser.add_argument( + "-t", + "--tokenizer", + help="tokenizer.model path", + ) + parser.add_argument( + "--prompt", + type=str, + default="Once upon a time,", + ) + parser.add_argument( + "--temperature", + type=float, + default=0.6, + ) + parser.add_argument( + "--top_p", + type=float, + default=0.9, + ) + + args = parser.parse_args() + + tokenizer = Tokenizer(args.tokenizer) + + runtime = Runtime.get() + program = runtime.load_program(args.model) + method = program.load_method("forward") + + metadata = method.metadata + print("Method metadata: ", metadata, "\n\n") + + assert ( + metadata.num_inputs() == 6 + ), "Do not export with --use_cache_list for use in pybindings" + # k_cache input + n_layers, max_batch_size, n_kv_heads, cache_size, head_dim = ( + metadata.input_tensor_meta(3).sizes() + ) + + # mask input + seq_length, max_seq_length = metadata.input_tensor_meta(5).sizes() + + input_manager = InputManager( + n_layers=n_layers, + max_batch_size=max_batch_size, + n_kv_heads=n_kv_heads, + max_seq_length=max_seq_length, + head_dim=head_dim, + use_cache_list=False, + seq_length=seq_length, + dtype=torch.float16, + minus_infinity=-30000.0, + cache_size=cache_size, + ) + + print(args.prompt, end="") + tokens = tokenizer.encode(args.prompt, bos=True, eos=False) + while input_manager.input_pos + seq_length < max_seq_length: + while len(tokens) > 0 and ( + input_manager.input_pos + seq_length < max_seq_length + ): + inputs, remaining_tokens = input_manager.get_inputs_and_remaining_tokens( + tokens + ) + processed_tokens = len(tokens) - len(remaining_tokens) + logits, k, v = method.execute(inputs) + input_manager.update( + input_length=processed_tokens, new_k_caches=k, new_v_caches=v + ) + tokens = remaining_tokens + + tokens = [next_token(logits, args.temperature, args.top_p)] + + if tokens[-1] in tokenizer.stop_tokens(): + break + print(tokenizer.decode_token(tokens[-1]), end="", flush=True) + + +if __name__ == "__main__": + main() From f72c187d4f7718a77780249748dae871148d75ae Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Fri, 21 Feb 2025 17:06:43 -0800 Subject: [PATCH 057/584] [Android benchmark] introduce warm up iterations Differential Revision: D70017241 Pull Request resolved: https://github.com/pytorch/executorch/pull/8626 --- .../main/java/org/pytorch/minibench/BenchmarkActivity.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java index 15f527475bc..afa8fca3233 100644 --- a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java +++ b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java @@ -44,6 +44,7 @@ protected void onCreate(Bundle savedInstanceState) { .get(); int numIter = intent.getIntExtra("num_iter", 50); + int numWarmupIter = intent.getIntExtra("num_warm_up_iter", 5); // TODO: Format the string with a parsable format Stats stats = new Stats(); @@ -58,6 +59,10 @@ protected Void doInBackground(Void... voids) { stats.errorCode = module.loadMethod("forward"); stats.loadEnd = System.nanoTime(); + for (int i = 0; i < numWarmupIter; i++) { + module.forward(); + } + for (int i = 0; i < numIter; i++) { long start = System.nanoTime(); module.forward(); From 5cf0106d71db8c1d6e43288fb939d2ee57f68616 Mon Sep 17 00:00:00 2001 From: Dave Bort Date: Fri, 21 Feb 2025 17:51:46 -0800 Subject: [PATCH 058/584] Arm backend: add passes buck dep for #8593 Differential Revision: D70003345 Pull Request resolved: https://github.com/pytorch/executorch/pull/8625 --- backends/arm/operator_support/TARGETS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backends/arm/operator_support/TARGETS b/backends/arm/operator_support/TARGETS index eb8c78bcf29..0de9f060bf5 100644 --- a/backends/arm/operator_support/TARGETS +++ b/backends/arm/operator_support/TARGETS @@ -5,8 +5,9 @@ python_library( srcs = glob(["*.py"]), typing = True, deps = [ + "//executorch/backends/arm/_passes:passes", + "//executorch/backends/arm:tosa_specification", "//executorch/backends/xnnpack/_passes:xnnpack_passes", "//executorch/exir:lib", - "//executorch/backends/arm:tosa_specification" ], ) From 54b371f75cae1b8226e07ff443ce8405df84871d Mon Sep 17 00:00:00 2001 From: skrtskrtfb <72409736+skrtskrtfb@users.noreply.github.com> Date: Fri, 21 Feb 2025 19:51:14 -0800 Subject: [PATCH 059/584] Fix memory_planning API to use run() Differential Revision: D68939461 Pull Request resolved: https://github.com/pytorch/executorch/pull/8622 --- backends/cadence/aot/memory_planning.py | 28 +++++++++++++++---- .../cadence/aot/tests/test_memory_passes.py | 23 ++++++++------- 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/backends/cadence/aot/memory_planning.py b/backends/cadence/aot/memory_planning.py index 77ae7eb7995..8c64fab61c1 100644 --- a/backends/cadence/aot/memory_planning.py +++ b/backends/cadence/aot/memory_planning.py @@ -46,6 +46,7 @@ def get_aligned_offset(pre_aligned_offset: int, alignment: int) -> int: def collect_specs_from_graph_module( graph_module: torch.fx.GraphModule, + graph_signature: ExportGraphSignature, alloc_graph_input: bool, alloc_graph_output: bool, ) -> Iterable[TensorSpec]: @@ -56,6 +57,7 @@ def collect_specs_from_graph_module( # Collect the specs from all the nodes in the graph module, and return it return collect_specs_from_nodes( graph_module.graph.nodes, + graph_signature, ignore_graph_input=not alloc_graph_input, ignore_graph_output=not alloc_graph_output, ) @@ -107,7 +109,7 @@ def memory_available(spec: TensorSpec) -> bool: # Iterate over all the specs in sorted order for spec in sorted( collect_specs_from_graph_module( - graph_module, alloc_graph_input, alloc_graph_output + graph_module, graph_signature, alloc_graph_input, alloc_graph_output ), key=lambda spec: spec.allocated_memory, reverse=True, @@ -182,7 +184,7 @@ def greedy_by_size_for_offset_calculation_with_hierarchy( # Iterate over all the specs in sorted order for spec in sorted( collect_specs_from_graph_module( - graph_module, alloc_graph_input, alloc_graph_output + graph_module, graph_signature, alloc_graph_input, alloc_graph_output ), key=lambda spec: spec.allocated_memory, reverse=True, @@ -250,6 +252,7 @@ def greedy_by_size_for_offset_calculation_with_hierarchy( def find_peak_memory_usages_per_memory( graph_module: torch.fx.GraphModule, + graph_signature: ExportGraphSignature, alloc_graph_input: bool, alloc_graph_output: bool, mem_constraints: Optional[MemConstraints] = None, @@ -265,7 +268,7 @@ def find_peak_memory_usages_per_memory( # go through all nodes in the graph, collect memory usage per spec.mem_id for spec in collect_specs_from_graph_module( - graph_module, alloc_graph_input, alloc_graph_output + graph_module, graph_signature, alloc_graph_input, alloc_graph_output ): if mem_constraints is not None and mem_constraints.skipped_spec(spec): continue @@ -288,6 +291,7 @@ def find_peak_memory_usages_per_memory( def find_peak_memory_usage( graph_module: torch.fx.GraphModule, + graph_signature: ExportGraphSignature, alloc_graph_input: bool, alloc_graph_output: bool, mem_constraints: Optional[MemConstraints] = None, @@ -303,7 +307,7 @@ def find_peak_memory_usage( # Iterate over all the node specs for spec in collect_specs_from_graph_module( - graph_module, alloc_graph_input, alloc_graph_output + graph_module, graph_signature, alloc_graph_input, alloc_graph_output ): if spec.lifetime[0] is None or ( mem_constraints is not None and mem_constraints.skipped_spec(spec) @@ -358,6 +362,7 @@ def print_memory_planning_info( # Get the peak memory usages per memory space peak_memory_usages_per_memory = find_peak_memory_usages_per_memory( executorch_prog.exported_program().graph_module, + executorch_prog.exported_program().graph_signature, alloc_graph_input, alloc_graph_output, mem_constraints, @@ -393,6 +398,7 @@ def print_memory_planning_info( # Get the total peak memory usage across all memory spaces total_peak_memory_usage = find_peak_memory_usage( executorch_prog.exported_program().graph_module, + executorch_prog.exported_program().graph_signature, alloc_graph_input, alloc_graph_output, mem_constraints, @@ -453,7 +459,17 @@ def _init_mem_algos(self) -> None: greedy_by_size_for_offset_calculation_with_hierarchy, ] - def __call__(self, graph_module: torch.fx.GraphModule) -> PassResult: + def __call__( + self, + graph_module: torch.fx.GraphModule, + ) -> PassResult: + return self.run(graph_module) + + def run( + self, + graph_module: torch.fx.GraphModule, + graph_signature: Optional[ExportGraphSignature] = None, + ) -> PassResult: mem_constraints = MemConstraints( opt_level=self.opt_level, alloc_graph_input=self.alloc_graph_input, @@ -475,6 +491,6 @@ def __call__(self, graph_module: torch.fx.GraphModule) -> PassResult: alloc_graph_output=self.alloc_graph_output, alignment=self.mem_alignment, ) - mem_planning(graph_module) + mem_planning.run(graph_module, graph_signature) return PassResult(graph_module, True) diff --git a/backends/cadence/aot/tests/test_memory_passes.py b/backends/cadence/aot/tests/test_memory_passes.py index d50456796c9..1844a3b4d80 100644 --- a/backends/cadence/aot/tests/test_memory_passes.py +++ b/backends/cadence/aot/tests/test_memory_passes.py @@ -46,14 +46,13 @@ def calculate_aligned_num_bytes(num: int, alignment: int = 16) -> int: inputs = (torch.ones(batch_size, input_dim),) model = PeakMemoryTestModel(input_dim, hidden_dim, output_dim) - graph_module = ( - compiler.export_to_executorch_gen_etrecord(model, inputs) - .exported_program() - .graph_module - ) + exported_program = compiler.export_to_executorch_gen_etrecord( + model, inputs + ).exported_program() peak_usage, _ = find_peak_memory_usage( - graph_module, + exported_program.graph_module, + exported_program.graph_signature, mem_constraints=None, alloc_graph_input=True, alloc_graph_output=True, @@ -73,14 +72,13 @@ def calculate_aligned_num_bytes(num: int, alignment: int = 16) -> int: input_dim, hidden_dim, hidden_dim, hidden_dim, output_dim ) - graph_module = ( - compiler.export_to_executorch_gen_etrecord(model, inputs) - .exported_program() - .graph_module - ) + exported_program = compiler.export_to_executorch_gen_etrecord( + model, inputs + ).exported_program() peak_usage, _ = find_peak_memory_usage( - graph_module, + exported_program.graph_module, + exported_program.graph_signature, mem_constraints=None, alloc_graph_input=True, alloc_graph_output=True, @@ -111,6 +109,7 @@ def forward(self, x): graph_module.graph.eliminate_dead_code() peak_usage, _ = find_peak_memory_usage( graph_module, + executorch_prog.exported_program().graph_signature, alloc_graph_input=False, alloc_graph_output=False, mem_constraints=None, From 366d87ea894b5f34a78f1c3ec41392eec52cfc25 Mon Sep 17 00:00:00 2001 From: Guang Yang <42389959+guangy10@users.noreply.github.com> Date: Sat, 22 Feb 2025 06:42:35 -0800 Subject: [PATCH 060/584] Run Hugging Face models via Optimum on CI (#8630) Co-authored-by: Guang Yang --- .github/workflows/trunk.yml | 85 +++++++--------- extension/export_util/export_hf_model.py | 117 ----------------------- 2 files changed, 32 insertions(+), 170 deletions(-) delete mode 100644 extension/export_util/export_hf_model.py diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 64e26847874..7f66474bdba 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -374,7 +374,13 @@ jobs: secrets: inherit strategy: matrix: - hf_model_repo: [google/gemma-2-2b] + hf_model_id: [ + google/gemma-2-2b, + Qwen/Qwen2.5-0.5B, + HuggingFaceTB/SmolLM2-135M, + meta-llama/Llama-3.2-1B, + allenai/OLMo-1B-hf + ] fail-fast: false with: secrets-env: EXECUTORCH_HF_TOKEN @@ -389,66 +395,39 @@ jobs: CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake - - echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a" - rm -rf cmake-out - cmake \ - -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ - -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ - -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ - -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ - -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DPYTHON_EXECUTABLE=python \ - -Bcmake-out . - cmake --build cmake-out -j9 --target install --config Release - - echo "Build llama runner" - dir="examples/models/llama" - cmake \ - -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ - -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ - -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DPYTHON_EXECUTABLE=python \ - -Bcmake-out/${dir} \ - ${dir} - cmake --build cmake-out/${dir} -j9 --config Release echo "::endgroup::" - echo "::group::Set up HuggingFace Dependencies" - if [ -z "$SECRET_EXECUTORCH_HF_TOKEN" ]; then - echo "::error::SECRET_EXECUTORCH_HF_TOKEN is empty. For security reason secrets won't be accessible on forked PRs. Please make sure you submit a non-forked PR." - exit 1 - fi + echo "::group::Set up Hugging Face" pip install -U "huggingface_hub[cli]" huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN + git clone https://github.com/huggingface/optimum-executorch + cd optimum-executorch + # There is no release yet, for CI stability, always test from the same commit on main + git checkout 6a7e83f3eee2976fa809335bfb78a45b1ea1cb25 + pip install . pip install accelerate sentencepiece pip list echo "::endgroup::" - echo "::group::Export to ExecuTorch" - TOKENIZER_FILE=tokenizer.model - TOKENIZER_BIN_FILE=tokenizer.bin - ET_MODEL_NAME=et_model - DOWNLOADED_TOKENIZER_FILE_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${{ matrix.hf_model_repo }}" --files "${TOKENIZER_FILE}") - if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH/$TOKENIZER_FILE" ]; then - echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH" - python -m extension.llm.tokenizer.tokenizer -t "$DOWNLOADED_TOKENIZER_FILE_PATH/$TOKENIZER_FILE" -o ./${TOKENIZER_BIN_FILE} - ls ./tokenizer.bin - else - echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}." - exit 1 - fi - - python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME} - - cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is" + echo "::group::Export and Run ${{ matrix.hf_model_id }}" + # Pass matrix variable as environment variable + export MODEL_ID="${{ matrix.hf_model_id }}" + python -c " + import os + from optimum.executorch import ExecuTorchModelForCausalLM + from transformers import AutoTokenizer + + model_id = os.getenv('MODEL_ID') + print(f'Loading model: {model_id}') + model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe='xnnpack') + tokenizer = AutoTokenizer.from_pretrained(model_id) + generated_text = model.text_generation( + tokenizer=tokenizer, + prompt='Simply put, the theory of relativity states that', + max_seq_len=64 + ) + print(generated_text) + " echo "::endgroup::" diff --git a/extension/export_util/export_hf_model.py b/extension/export_util/export_hf_model.py deleted file mode 100644 index 929773fa4d3..00000000000 --- a/extension/export_util/export_hf_model.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import argparse -import os - -import torch -import torch.export._trace -from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner -from executorch.exir import EdgeCompileConfig, ExecutorchBackendConfig, to_edge -from torch.nn.attention import SDPBackend -from transformers import AutoModelForCausalLM -from transformers.generation.configuration_utils import GenerationConfig -from transformers.integrations.executorch import convert_and_export_with_cache -from transformers.modeling_utils import PreTrainedModel - - -def main() -> None: - parser = argparse.ArgumentParser() - parser.add_argument( - "-hfm", - "--hf_model_repo", - required=True, - default=None, - help="a valid huggingface model repo name", - ) - parser.add_argument( - "-d", - "--dtype", - type=str, - choices=["float32", "float16", "bfloat16"], - default="float32", - help="specify the dtype for loading the model", - ) - parser.add_argument( - "-o", - "--output_name", - required=False, - default=None, - help="output name of the exported model", - ) - - args = parser.parse_args() - - # Configs to HF model - device = "cpu" - # TODO: remove getattr once https://github.com/huggingface/transformers/pull/33741 is merged - dtype = getattr(torch, args.dtype) - batch_size = 1 - max_length = 123 - cache_implementation = "static" - attn_implementation = "sdpa" - - # Load and configure a HF model - model = AutoModelForCausalLM.from_pretrained( - args.hf_model_repo, - attn_implementation=attn_implementation, - device_map=device, - torch_dtype=dtype, - generation_config=GenerationConfig( - use_cache=True, - cache_implementation=cache_implementation, - max_length=max_length, - cache_config={ - "batch_size": batch_size, - "max_cache_len": max_length, - }, - ), - ) - print(f"{model.config}") - print(f"{model.generation_config}") - - input_ids = torch.tensor([[1]], dtype=torch.long) - cache_position = torch.tensor([0], dtype=torch.long) - - def _get_constant_methods(model: PreTrainedModel): - metadata = { - "get_dtype": 5 if model.config.torch_dtype == torch.float16 else 6, - "get_bos_id": model.config.bos_token_id, - "get_eos_id": model.config.eos_token_id, - "get_head_dim": model.config.hidden_size / model.config.num_attention_heads, - "get_max_batch_size": model.generation_config.cache_config.batch_size, - "get_max_seq_len": model.generation_config.cache_config.max_cache_len, - "get_n_kv_heads": model.config.num_key_value_heads, - "get_n_layers": model.config.num_hidden_layers, - "get_vocab_size": model.config.vocab_size, - "use_kv_cache": model.generation_config.use_cache, - } - return {k: v for k, v in metadata.items() if v is not None} - - with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad(): - - exported_prog = convert_and_export_with_cache(model, input_ids, cache_position) - prog = ( - to_edge( - exported_prog, - compile_config=EdgeCompileConfig( - _check_ir_validity=False, - _skip_dim_order=True, - ), - constant_methods=_get_constant_methods(model), - ) - .to_backend(XnnpackPartitioner()) - .to_executorch(ExecutorchBackendConfig(extract_delegate_segments=True)) - ) - out_name = args.output_name if args.output_name else model.config.model_type - filename = os.path.join("./", f"{out_name}.pte") - with open(filename, "wb") as f: - prog.write_to_file(f) - print(f"Saved exported program to {filename}") - - -if __name__ == "__main__": - main() From abe8834aff0d54577c7c8ebba142f271645b9bff Mon Sep 17 00:00:00 2001 From: YIWENX14 <164585414+YIWENX14@users.noreply.github.com> Date: Sun, 23 Feb 2025 00:23:06 -0800 Subject: [PATCH 061/584] Update visibility of target examples/models/llama:source_transformation Differential Revision: D69945779 Pull Request resolved: https://github.com/pytorch/executorch/pull/8635 --- examples/models/llama/TARGETS | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/models/llama/TARGETS b/examples/models/llama/TARGETS index 489d42c29c4..46875b34124 100644 --- a/examples/models/llama/TARGETS +++ b/examples/models/llama/TARGETS @@ -98,6 +98,7 @@ runtime.python_library( name = "source_transformation", visibility = [ "//executorch/examples/...", + "@EXECUTORCH_CLIENTS", ], srcs = [ "source_transformation/apply_spin_quant_r1_r2.py", From b6bd89df5cc9c4d36fa1f7e6cb9d85b1899ce1fc Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Sun, 23 Feb 2025 22:03:05 -0600 Subject: [PATCH 062/584] [ExecuTorch] Add broadcasting support to optimized op_div (#8643) Summary: Similar to broadcast support in op_mul Test Plan: Tests added Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: d7a4ae168774f9cadd44c6c532526df50fba9e30 Pull Request resolved: https://github.com/pytorch/executorch/pull/8257 Co-authored-by: Kimish Patel --- kernels/optimized/cpu/op_div.cpp | 52 +++++++++--------------------- kernels/test/op_div_test.cpp | 54 ++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 38 deletions(-) diff --git a/kernels/optimized/cpu/op_div.cpp b/kernels/optimized/cpu/op_div.cpp index 4d7b8efe9e3..e630f1c03bd 100644 --- a/kernels/optimized/cpu/op_div.cpp +++ b/kernels/optimized/cpu/op_div.cpp @@ -120,46 +120,22 @@ Tensor& opt_div_out( out.numel()); }); } else if (selected_optimized_path != ElementwiseOptimizedPath::kNone) { - const Tensor* lhs; - const Tensor* rhs; - if (selected_optimized_path == - ElementwiseOptimizedPath::kBroadcast2dBy1dReverseArguments) { - lhs = &b; - rhs = &a; - } else { - // Catch failure to update logic when subing new broadcasting possibility. - ET_DCHECK( - selected_optimized_path == - ElementwiseOptimizedPath::kBroadcast2dBy1d); - lhs = &a; - rhs = &b; - } - auto error = resize_tensor(out, lhs->sizes()); - ET_KERNEL_CHECK_MSG( - ctx, - error == Error::Ok, - InvalidArgument, - out, - "Failed to resize output tensor."); - ET_SWITCH_REALB_TYPES(out_type, ctx, "sub.out", CTYPE, [&]() { - using Vec = executorch::vec::Vectorized; + // Reason for using alpha is becasuse handle_broadcast_elementwise + // is used for add and sub as well: + ET_SWITCH_REALB_TYPES(out_type, ctx, "div.out", CTYPE, [&]() { if (selected_optimized_path == - ElementwiseOptimizedPath::kBroadcast2dBy1dReverseArguments) { - executorch::vec::broadcasting_map_2d_by_1d( - [](Vec x, Vec y) { return y / x; }, - out.mutable_data_ptr(), - lhs->const_data_ptr(), - rhs->const_data_ptr(), - lhs->sizes()[lhs->dim() - 2], - lhs->sizes()[lhs->dim() - 1]); + ElementwiseOptimizedPath::kBroadcast2dBy1dReverseArguments || + selected_optimized_path == + ElementwiseOptimizedPath::kBroadcastLastDimReverseArguments || + selected_optimized_path == + ElementwiseOptimizedPath::kBroadcastNdByNdReverseArguments) { + auto div_lambda = [](auto x, auto y) { return y / x; }; + return torch::executor::handle_broadcast_elementwise( + ctx, div_lambda, a, b, out, selected_optimized_path); } else { - executorch::vec::broadcasting_map_2d_by_1d( - [](Vec x, Vec y) { return x / y; }, - out.mutable_data_ptr(), - lhs->const_data_ptr(), - rhs->const_data_ptr(), - lhs->sizes()[lhs->dim() - 2], - lhs->sizes()[lhs->dim() - 1]); + auto div_lambda = [](auto x, auto y) { return x / y; }; + return torch::executor::handle_broadcast_elementwise( + ctx, div_lambda, a, b, out, selected_optimized_path); } }); } else { diff --git a/kernels/test/op_div_test.cpp b/kernels/test/op_div_test.cpp index 97d538971c5..8f41419a8e0 100644 --- a/kernels/test/op_div_test.cpp +++ b/kernels/test/op_div_test.cpp @@ -83,6 +83,52 @@ class OpDivOutTest : public OperatorTest { ET_EXPECT_KERNEL_FAILURE(context_, op_div_out(a, b, out)); } + template + void test_broadcast_3D() { + TensorFactory tf_a; + + Tensor a = + tf_a.make({2, 2, 3}, /*data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + Tensor b = tf_a.make({2, 1, 3}, /*data=*/{2, 3, 4, 5, 6, 7}); + + // Destination for output of mul. + Tensor out = + tf_a.make({2, 2, 3}, /*data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + Tensor expected = tf_a.make( + {2, 2, 3}, + /*data=*/ + {0.5000, + 0.6667, + 0.75002, + 2.0000, + 1.6667, + 1.5000, + 1.4000, + 1.3333, + 1.2857, + 2.0000, + 1.8333, + 1.7143}); + // Check that it matches the expected output. + EXPECT_TENSOR_CLOSE_WITH_TOL(op_div_out(a, b, out), expected, 1e-4, 1e-4); + expected = tf_a.make( + {2, 2, 3}, + /*data=*/ + {2.0000, + 1.5000, + 1.3333, + 0.5000, + 0.6000, + 0.6667, + 0.7143, + 0.7500, + 0.7778, + 0.5000, + 0.5455, + 0.5833}); + EXPECT_TENSOR_CLOSE_WITH_TOL(op_div_out(b, a, out), expected, 1e-4, 1e-4); + } + /** * Common testing for div operator, for float output types */ @@ -457,6 +503,14 @@ TEST_F(OpDivOutTest, DynamicShapeUpperBoundLargerThanExpected) { EXPECT_TENSOR_CLOSE(out, expected_result); } +TEST_F(OpDivOutTest, BroadcastNDTest) { + // Test 3D tensors + test_broadcast_3D(); + // half and bfloat16 are not supported for div quite yet + // test_broadcast_3D(); + // test_broadcast_3D(); +} + TEST_F(OpDivOutTest, DynamicShapeUnbound) { GTEST_SKIP() << "Dynamic shape not supported"; TensorFactory tf; From cc5b3ed70327535afb74445347b1f7647d4dd6f7 Mon Sep 17 00:00:00 2001 From: Adrian Lundell <36153706+AdrianLundell@users.noreply.github.com> Date: Mon, 24 Feb 2025 13:29:06 +0100 Subject: [PATCH 063/584] Arm backend: test_pipeline improvements (#8644) - Add OpNotSupportedPipeline for checking that ops are not delegated properly. - Make use_to_edge_transform_and_lower default to true since this is the recommended API. - Rename TestPassPipeline -> PassPipeline to avoid warnings in pytest log, and make exir_ops optional as its not used then. - Allow to add the first non unique stage to a pipeline w/o suffix (E.g. run_method_and_compare_outputs will rarely be used twice even though it is theoretically possible, so we don't want to refer to it as run_method_and_compare_outputs.0 if not necessary). - Add custom_path option to all pipelines for easily dumping artifacts. - Typing and documentation fixes. Signed-off-by: Adrian Lundell --- ...test_partition_decomposed_quantized_ops.py | 5 +- backends/arm/test/ops/test_conv2d.py | 2 +- .../arm/test/passes/test_cast_int64_pass.py | 4 +- .../arm/test/passes/test_fold_qdq_pass.py | 4 +- .../test/passes/test_fuse_batchnorm_pass.py | 4 +- .../test/passes/test_insert_table_ops_pass.py | 4 +- .../passes/test_meandim_to_averagepool2d.py | 4 +- .../test_unsqueeze_before_repeat_pass.py | 4 +- backends/arm/test/tester/test_pipeline.py | 200 ++++++++++++++---- 9 files changed, 175 insertions(+), 56 deletions(-) diff --git a/backends/arm/test/misc/test_partition_decomposed_quantized_ops.py b/backends/arm/test/misc/test_partition_decomposed_quantized_ops.py index 4bcae4930a2..3fe339e0f9e 100644 --- a/backends/arm/test/misc/test_partition_decomposed_quantized_ops.py +++ b/backends/arm/test/misc/test_partition_decomposed_quantized_ops.py @@ -60,6 +60,9 @@ def test_softplus_tosa_BI(test_data: input_t1): pipeline.pop_stage("check_not.exir") # check that all ops in exir_op except add are rejected pipeline.add_stage_after( - "partition", pipeline.tester.check, exir_op[1:], suffix="exir_post_partition" + "to_edge_transform_and_lower", + pipeline.tester.check, + exir_op[1:], + suffix="exir_post_partition", ) pipeline.run() diff --git a/backends/arm/test/ops/test_conv2d.py b/backends/arm/test/ops/test_conv2d.py index 827e6dfffa3..b41738b3e8d 100644 --- a/backends/arm/test/ops/test_conv2d.py +++ b/backends/arm/test/ops/test_conv2d.py @@ -370,7 +370,7 @@ def test_conv2d_tosa_BI(test_module): pipeline = TosaPipelineBI[input_t]( test_module, test_module.get_inputs(), aten_op, exir_op ) - pipeline.change_args("run_method_and_compare_outputs.0", qtol=1) + pipeline.change_args("run_method_and_compare_outputs", qtol=1) pipeline.run() diff --git a/backends/arm/test/passes/test_cast_int64_pass.py b/backends/arm/test/passes/test_cast_int64_pass.py index 0465a85deb9..ae1e09f52a2 100644 --- a/backends/arm/test/passes/test_cast_int64_pass.py +++ b/backends/arm/test/passes/test_cast_int64_pass.py @@ -8,7 +8,7 @@ import torch from executorch.backends.arm._passes.cast_int64_pass import CastInt64ToInt32Pass -from executorch.backends.arm.test.tester.test_pipeline import TestPassPipeline +from executorch.backends.arm.test.tester.test_pipeline import PassPipeline input_t = Tuple[torch.Tensor] # Input x @@ -28,7 +28,7 @@ def test_int64_model_tosa_BI(): "executorch_exir_dialects_edge__ops_dim_order_ops__to_dim_order_copy_default": 1, "executorch_exir_dialects_edge__ops_aten_add_Tensor": 1, } - pipeline = TestPassPipeline[input_t]( + pipeline = PassPipeline[input_t]( module, module.get_inputs(), tosa_version="TOSA-0.80+BI", diff --git a/backends/arm/test/passes/test_fold_qdq_pass.py b/backends/arm/test/passes/test_fold_qdq_pass.py index f63fa33bca1..8d0ff90755f 100644 --- a/backends/arm/test/passes/test_fold_qdq_pass.py +++ b/backends/arm/test/passes/test_fold_qdq_pass.py @@ -9,7 +9,7 @@ from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import ( FoldAndAnnotateQParamsPass, ) -from executorch.backends.arm.test.tester.test_pipeline import TestPassPipeline +from executorch.backends.arm.test.tester.test_pipeline import PassPipeline input_t = Tuple[torch.Tensor, torch.Tensor] # Input x, y @@ -32,7 +32,7 @@ def test_fold_qdq_pass_tosa_BI(): is removed from the representation. """ module = SimpleQuantizeModel() - pipeline = TestPassPipeline[input_t]( + pipeline = PassPipeline[input_t]( module, module.get_inputs(), tosa_version="TOSA-0.80+BI", diff --git a/backends/arm/test/passes/test_fuse_batchnorm_pass.py b/backends/arm/test/passes/test_fuse_batchnorm_pass.py index b18e536b155..45b3253f848 100644 --- a/backends/arm/test/passes/test_fuse_batchnorm_pass.py +++ b/backends/arm/test/passes/test_fuse_batchnorm_pass.py @@ -8,7 +8,7 @@ import torch from executorch.backends.arm._passes.fuse_batchnorm2d_pass import FuseBatchnorm2DPass from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.test_pipeline import TestPassPipeline +from executorch.backends.arm.test.tester.test_pipeline import PassPipeline input_t = Tuple[torch.Tensor] # Input x @@ -138,7 +138,7 @@ def forward(self, x): @common.parametrize("module", modules) def test_fuse_batchnorm_tosa_MI(module): """Test various cases where the batchnorm should and shouldn't be fused.""" - pipeline = TestPassPipeline[input_t]( + pipeline = PassPipeline[input_t]( module, module.get_inputs(), tosa_version="TOSA-0.80+MI", diff --git a/backends/arm/test/passes/test_insert_table_ops_pass.py b/backends/arm/test/passes/test_insert_table_ops_pass.py index 5c761c8bcb4..bdbcef3713d 100644 --- a/backends/arm/test/passes/test_insert_table_ops_pass.py +++ b/backends/arm/test/passes/test_insert_table_ops_pass.py @@ -11,7 +11,7 @@ FoldAndAnnotateQParamsPass, ) from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass -from executorch.backends.arm.test.tester.test_pipeline import TestPassPipeline +from executorch.backends.arm.test.tester.test_pipeline import PassPipeline input_t = Tuple[torch.Tensor] # Input x @@ -27,7 +27,7 @@ def get_inputs(self) -> input_t: def test_insert_table_tosa_BI(): module = Sigmoid() - pipeline = TestPassPipeline[input_t]( + pipeline = PassPipeline[input_t]( module, module.get_inputs(), tosa_version="TOSA-0.80+BI", diff --git a/backends/arm/test/passes/test_meandim_to_averagepool2d.py b/backends/arm/test/passes/test_meandim_to_averagepool2d.py index 935085c66e4..66fdff6e532 100644 --- a/backends/arm/test/passes/test_meandim_to_averagepool2d.py +++ b/backends/arm/test/passes/test_meandim_to_averagepool2d.py @@ -11,7 +11,7 @@ ConvertMeanDimToAveragePoolPass, ) from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.test_pipeline import TestPassPipeline +from executorch.backends.arm.test.tester.test_pipeline import PassPipeline input_t = Tuple[torch.Tensor, torch.Tensor] # Input x @@ -65,7 +65,7 @@ def test_meandim_to_avgpool_tosa_BI(module): Tests the MeanDimToAveragePool2dPass which converts mean.dim to average_pool2d for the special case where dim is [-1, -2] and keepdim is True. """ - pipeline = TestPassPipeline[input_t]( + pipeline = PassPipeline[input_t]( module, module.get_inputs(), tosa_version="TOSA-0.80+BI", diff --git a/backends/arm/test/passes/test_unsqueeze_before_repeat_pass.py b/backends/arm/test/passes/test_unsqueeze_before_repeat_pass.py index 8f4a9130cea..942d7decfba 100644 --- a/backends/arm/test/passes/test_unsqueeze_before_repeat_pass.py +++ b/backends/arm/test/passes/test_unsqueeze_before_repeat_pass.py @@ -10,7 +10,7 @@ UnsqueezeBeforeRepeatPass, ) from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.test_pipeline import TestPassPipeline +from executorch.backends.arm.test.tester.test_pipeline import PassPipeline input_t = Tuple[ torch.Tensor, Dict[str, int], list[str] @@ -47,7 +47,7 @@ def test_unsqueeze_before_repeat_tosa_MI(test_data): """ module = Repeat() data, ops_after_pass, ops_not_after_pass = test_data - pipeline = TestPassPipeline( + pipeline = PassPipeline( module, data, tosa_version="TOSA-0.80+MI", diff --git a/backends/arm/test/tester/test_pipeline.py b/backends/arm/test/tester/test_pipeline.py index 0f079b3a6fd..1df2db8c4f1 100644 --- a/backends/arm/test/tester/test_pipeline.py +++ b/backends/arm/test/tester/test_pipeline.py @@ -4,7 +4,7 @@ # LICENSE file in the root directory of this source tree. import logging -from typing import Any, Callable, Dict, Generic, List, Optional, Type, TypeVar +from typing import Callable, Dict, Generic, List, Optional, Type, TypeVar import torch from executorch.backends.arm.test import common @@ -20,16 +20,19 @@ class BasePipelineMaker(Generic[T]): """ - The BasePiplineMaker defines a list of stages to be applied to a torch.nn.module for lowering it in the Arm backend. To be inherited and adjusted for particular targets. - Importantly, the pipeline list can be modified before running the pipeline to support various pipeline extensions and debugging usecases. + The BasePiplineMaker defines a list of stages to be applied to a torch.nn.module for lowering it + in the Arm backend. To be inherited and adjusted for particular targets. Importantly, the + pipeline list can be modified before running the pipeline to support various pipeline extensions + and debugging usecases. Attributes: module: The module which the pipeline is applied to. test_data: Data used for quantizing and testing the module. aten_ops: Aten dialect ops expected to be found in the graph after export. - exir_ops: Exir dialect ops expected to be found in the graph after to_edge. - compile_spec: The compile spec used in the lowering process - use_edge_to_transform_and_lower: Selects betweeen two possible routes for lowering the module: + compile_spec: The compile spec used in the lowering process. + exir_ops: Exir dialect ops expected to be found in the graph after to_edge if not using + use_edge_to_transform_and_lower. + use_edge_to_transform_and_lower: Selects betweeen two possible routes for lowering: tester.to_edge_transform_and_lower() or tester.to_edge().check(exir_ops).partition() @@ -40,11 +43,11 @@ class PipelineStage: Helper class to store a pipeline stage as a function call + args for calling later on. Attributes: - id: name of the function to be called, used for refering to stages in the pipeline - func: handle to the function to be called - args: args used when called - kwargs: kwargs used when called - is_called: keeps track of if the function has been called + id: name of the function to be called, used for refering to stages in the pipeline. + func: handle to the function to be called. + args: args used when called. + kwargs: kwargs used when called. + is_called: keeps track of if the function has been called. """ def __init__(self, func: Callable, id: str, *args, **kwargs): @@ -73,9 +76,9 @@ def __init__( module: torch.nn.Module, test_data: T, aten_ops: str | List[str], - exir_ops: str | List[str], compile_spec: List[CompileSpec], - use_to_edge_transform_and_lower: bool = False, + exir_ops: Optional[str | List[str]] = None, + use_to_edge_transform_and_lower: bool = True, ): self.tester = ArmTester( @@ -83,7 +86,12 @@ def __init__( ) self.aten_ops = aten_ops if isinstance(aten_ops, list) else [aten_ops] - self.exir_ops = exir_ops if isinstance(exir_ops, list) else [exir_ops] + if exir_ops is None: + self.exir_ops = [] + elif isinstance(exir_ops, list): + self.exir_ops = exir_ops + else: + self.exir_ops = [exir_ops] self.test_data = test_data self._stages = [] @@ -152,10 +160,11 @@ def add_stage(self, func: Callable, *args, **kwargs): suffix = str(len(stages_containing_stage_id)) - stage_id = stage_id + "." + suffix + if not suffix == "0": + stage_id = stage_id + "." + suffix - if stage_id in id_list: - raise ValueError("Suffix must be unique in pipeline") + if stage_id in id_list: + raise ValueError("Suffix must be unique in pipeline") pipeline_stage = self.PipelineStage(func, stage_id, *args, **kwargs) self._stages.insert(pos, pipeline_stage) @@ -230,26 +239,42 @@ def run(self): class TosaPipelineBI(BasePipelineMaker, Generic[T]): - """Lowers a graph to BI TOSA spec (with quantization) and tests it with the TOSA reference model.""" + """ + Lowers a graph to BI TOSA spec (with quantization) and tests it with the TOSA reference model. + + Attributes: + module: The module which the pipeline is applied to. + test_data: Data used for quantizing and testing the module. + + aten_ops: Aten dialect ops expected to be found in the graph after export. + exir_ops: Exir dialect ops expected to be found in the graph after to_edge. + if not using use_edge_to_transform_and_lower. + + tosa_version: A string for identifying the TOSA version, see common.get_tosa_compile_spec for + options. + use_edge_to_transform_and_lower: Selects betweeen two possible ways of lowering the module. + custom_path : Path to dump intermediate artifacts such as tosa and pte to. + """ def __init__( self, module: torch.nn.Module, - test_data: Any, - aten_op: str, - exir_op: str, + test_data: T, + aten_op: str | List[str], + exir_op: Optional[str | List[str]] = None, tosa_version: str = "TOSA-0.80+BI", - use_to_edge_transform_and_lower: bool = False, + use_to_edge_transform_and_lower: bool = True, + custom_path: str = None, ): compile_spec = common.get_tosa_compile_spec( - tosa_version, + tosa_version, custom_path=custom_path ) super().__init__( module, test_data, aten_op, - exir_op, compile_spec, + exir_op, use_to_edge_transform_and_lower, ) self.add_stage(self.tester.quantize, pos=0) @@ -284,26 +309,42 @@ def __init__( class TosaPipelineMI(BasePipelineMaker, Generic[T]): - """Lowers a graph to MI TOSA spec and tests it with the TOSA reference model""" + """ + Lowers a graph to MI TOSA spec and tests it with the TOSA reference model. + + Attributes: + module: The module which the pipeline is applied to. + test_data: Data used for quantizing and testing the module. + + aten_ops: Aten dialect ops expected to be found in the graph after export. + exir_ops: Exir dialect ops expected to be found in the graph after to_edge. + if not using use_edge_to_transform_and_lower. + + tosa_version: A string for identifying the TOSA version, see common.get_tosa_compile_spec for + options. + use_edge_to_transform_and_lower: Selects betweeen two possible ways of lowering the module. + custom_path : Path to dump intermediate artifacts such as tosa and pte to. + """ def __init__( self, module: torch.nn.Module, - test_data: Any, - aten_op: str, - exir_op: str, + test_data: T, + aten_op: str | List[str], + exir_op: Optional[str | List[str]] = None, tosa_version: str = "TOSA-0.80+MI", - use_to_edge_transform_and_lower: bool = False, + use_to_edge_transform_and_lower: bool = True, + custom_path: str = None, ): compile_spec = common.get_tosa_compile_spec( - tosa_version, + tosa_version, custom_path=custom_path ) super().__init__( module, test_data, aten_op, - exir_op, compile_spec, + exir_op, use_to_edge_transform_and_lower, ) self.add_stage_after( @@ -322,24 +363,38 @@ def __init__( class EthosU55PipelineBI(BasePipelineMaker, Generic[T]): - """Lowers a graph to u55 BI TOSA spec and tests it on the Corstone300 FVP, if run_on_fvp is true.""" + """ + Lowers a graph to u55 BI TOSA spec and tests it on the Corstone300 FVP, if run_on_fvp is true. + + Attributes: + module: The module which the pipeline is applied to. + test_data: Data used for quantizing and testing the module. + aten_ops: Aten dialect ops expected to be found in the graph after export. + + exir_ops: Exir dialect ops expected to be found in the graph after to_edge. + if not using use_edge_to_transform_and_lower. + run_on_fvp: Set to true to test the pte fileon a fvp simulator. + use_edge_to_transform_and_lower: Selects betweeen two possible ways of lowering the module. + custom_path : Path to dump intermediate artifacts such as tosa and pte to. + """ def __init__( self, module: torch.nn.Module, test_data: T, aten_ops: str | List[str], - exir_ops: str | List[str], + exir_ops: Optional[str | List[str]] = None, run_on_fvp: bool = False, use_to_edge_transform_and_lower: bool = False, + custom_path: str = None, ): - compile_spec = common.get_u55_compile_spec() + compile_spec = common.get_u55_compile_spec(custom_path=custom_path) super().__init__( module, test_data, aten_ops, - exir_ops, compile_spec, + exir_ops, use_to_edge_transform_and_lower, ) self.add_stage(self.tester.quantize, pos=0) @@ -378,24 +433,38 @@ def __init__( class EthosU85PipelineBI(BasePipelineMaker, Generic[T]): - """Lowers a graph to u85 BI TOSA spec and tests it on the Corstone320 FVP, if run_on_fvp is true.""" + """ + Lowers a graph to u85 BI TOSA spec and tests it on the Corstone320 FVP, if run_on_fvp is true. + + Attributes: + module: The module which the pipeline is applied to. + test_data: Data used for quantizing and testing the module. + aten_ops: Aten dialect ops expected to be found in the graph after export. + + exir_ops: Exir dialect ops expected to be found in the graph after to_edge if not using + use_edge_to_transform_and_lower. + run_on_fvp: Set to true to test the pte fileon a fvp simulator. + use_edge_to_transform_and_lower: Selects betweeen two possible ways of lowering the module. + custom_path : Path to dump intermediate artifacts such as tosa and pte to. + """ def __init__( self, module: torch.nn.Module, test_data: T, aten_ops: str | List[str], - exir_ops: str | List[str], + exir_ops: str | List[str] = None, run_on_fvp: bool = False, use_to_edge_transform_and_lower: bool = False, + custom_path: str = None, ): - compile_spec = common.get_u85_compile_spec() + compile_spec = common.get_u85_compile_spec(custom_path=custom_path) super().__init__( module, test_data, aten_ops, - exir_ops, compile_spec, + exir_ops, use_to_edge_transform_and_lower, ) self.add_stage(self.tester.quantize, pos=0) @@ -433,7 +502,7 @@ def __init__( ) -class TestPassPipeline(BasePipelineMaker, Generic[T]): +class PassPipeline(BasePipelineMaker, Generic[T]): """ Runs single passes directly on an edge_program and checks operators before/after. @@ -450,6 +519,7 @@ class TestPassPipeline(BasePipelineMaker, Generic[T]): pass_list: List of regular passes. pass_functions: List of functions applied directly to the exported program. passes_with_exported_program: List of passes initiated with an exported_program. + custom_path : Path to dump intermediate artifacts such as tosa and pte to. Passes are run in order pass_list -> pass_functions -> passes_with_exported_program. See arm_tester.RunPasses() for more information. @@ -467,16 +537,17 @@ def __init__( pass_list: Optional[List[Type[PassType]]] = None, pass_functions: Optional[List[Callable]] = None, passes_with_exported_program: Optional[List[Type[ExportPass]]] = None, + custom_path: str = None, ): compile_spec = common.get_tosa_compile_spec( - tosa_version, + tosa_version, custom_path=custom_path ) super().__init__( module, test_data, None, - None, compile_spec, + None, use_to_edge_transform_and_lower=False, ) @@ -507,3 +578,48 @@ def __init__( if ops_not_after_pass: self.add_stage(self.tester.check_not, ops_not_after_pass, suffix="after") self.add_stage(self.tester.run_method_and_compare_outputs) + + +class OpNotSupportedPipeline(BasePipelineMaker, Generic[T]): + """ + Runs the partitioner on a module and checks that ops are not delegated to test + SupportedTOSAOperatorChecks. + + Attributes: + module: The module which the pipeline is applied to. + test_data: Data with a representative shape which the operator_check is performed on. + tosa_version: The TOSA-version which to test for. + + non_delegated_ops : Exir ops expected not to be delegated. + n_expected_delegates : Number of delegate calls (0 in the usual case). + custom_path : Path to dump intermediate artifacts such as tosa and pte to. + """ + + def __init__( + self, + module: torch.nn.Module, + test_data: T, + tosa_version: str, + non_delegated_ops: Dict[str, int], + n_expected_delegates: int = 0, + custom_path: str = None, + ): + compile_spec = common.get_tosa_compile_spec( + tosa_version, custom_path=custom_path + ) + super().__init__( + module, + test_data, + [], + compile_spec, + [], + ) + self.change_args("check_not.exir", []) + self.change_args( + "check_count.exir", + { + "torch.ops.higher_order.executorch_call_delegate": n_expected_delegates, + **non_delegated_ops, + }, + ) + self.pop_stage("to_executorch") From 9c51e584cca048dba285759c7f1e7ecad6c87623 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Mon, 24 Feb 2025 12:23:11 -0800 Subject: [PATCH 064/584] Move ExecuTorch-specific stuff in shim to shim_et (try #3) (#8647) In preparation for replacing the checked-in files in shim/ with https://github.com/facebook/buck2-shims-meta as a submodule. The portions of this diff that are not a pure move are: creation of shim_et/BUCK symlink shim/tools to shim_et/tools so that fbsource//tools movement of Android toolchains from shim/BUCK to shim_et/BUCK because they are not present in buck2-shims-meta edits to .buckconfig edit to Utils.cmake to point at shim_et// instead of shim// edit to shim_et/README.md (note that shim/README.md will come back when shim/ is a submodule pointing to buck2-shims-meta) --- .buckconfig | 9 +-- build/Utils.cmake | 4 +- shim/BUCK | 23 -------- shim/README.md | 16 ------ shim_et/BUCK | 57 +++++++++++++++++++ shim_et/README.md | 19 +++++++ shim_et/tools | 1 + .../xplat/executorch/backends/backends.bzl | 0 .../backends/qualcomm/qnn_version.bzl | 0 .../xnnpack/third-party/third_party_libs.bzl | 0 .../xplat/executorch/build/env_interface.bzl | 0 .../executorch/build/runtime_wrapper.bzl | 0 .../xplat/executorch/build/selects.bzl | 0 .../xplat/executorch/build/type_defs.bzl | 0 .../xplat/executorch/codegen/codegen.bzl | 0 .../extension/pybindings/pybindings.bzl | 0 .../executorch/kernels/optimized/lib_defs.bzl | 0 .../optimized/op_registration_util.bzl | 0 .../kernels/portable/op_registration_util.bzl | 0 .../xplat/executorch/kernels/test/util.bzl | 0 20 files changed, 84 insertions(+), 45 deletions(-) delete mode 100644 shim/README.md create mode 100644 shim_et/BUCK create mode 100644 shim_et/README.md create mode 120000 shim_et/tools rename {shim => shim_et}/xplat/executorch/backends/backends.bzl (100%) rename {shim => shim_et}/xplat/executorch/backends/qualcomm/qnn_version.bzl (100%) rename {shim => shim_et}/xplat/executorch/backends/xnnpack/third-party/third_party_libs.bzl (100%) rename {shim => shim_et}/xplat/executorch/build/env_interface.bzl (100%) rename {shim => shim_et}/xplat/executorch/build/runtime_wrapper.bzl (100%) rename {shim => shim_et}/xplat/executorch/build/selects.bzl (100%) rename {shim => shim_et}/xplat/executorch/build/type_defs.bzl (100%) rename {shim => shim_et}/xplat/executorch/codegen/codegen.bzl (100%) rename {shim => shim_et}/xplat/executorch/extension/pybindings/pybindings.bzl (100%) rename {shim => shim_et}/xplat/executorch/kernels/optimized/lib_defs.bzl (100%) rename {shim => shim_et}/xplat/executorch/kernels/optimized/op_registration_util.bzl (100%) rename {shim => shim_et}/xplat/executorch/kernels/portable/op_registration_util.bzl (100%) rename {shim => shim_et}/xplat/executorch/kernels/test/util.bzl (100%) diff --git a/.buckconfig b/.buckconfig index 7a4aecd9710..a7c48881bb3 100644 --- a/.buckconfig +++ b/.buckconfig @@ -8,14 +8,15 @@ root = . prelude = third-party/prelude shim = shim + shim_et = shim_et [repository_aliases] config = prelude ovr_config = prelude - toolchains = shim - fbcode = shim - fbcode_macros = shim - fbsource = shim + toolchains = shim_et + fbcode = shim_et + fbcode_macros = shim_et + fbsource = shim_et buck = shim [cxx] diff --git a/build/Utils.cmake b/build/Utils.cmake index 113f4829b86..646ef5ff285 100644 --- a/build/Utils.cmake +++ b/build/Utils.cmake @@ -206,9 +206,9 @@ function(extract_sources sources_file) if(ANDROID_ABI) if("${ANDROID_ABI}" STREQUAL "arm64-v8a") - set(target_platforms_arg "--target-platforms=shim//:android-arm64") + set(target_platforms_arg "--target-platforms=shim_et//:android-arm64") elseif("${ANDROID_ABI}" STREQUAL "x86_64") - set(target_platforms_arg "--target-platforms=shim//:android-x86_64") + set(target_platforms_arg "--target-platforms=shim_et//:android-x86_64") else() message( FATAL_ERROR diff --git a/shim/BUCK b/shim/BUCK index 77f3742e2dc..55fdd5bb03f 100644 --- a/shim/BUCK +++ b/shim/BUCK @@ -61,26 +61,3 @@ remote_test_execution_toolchain( name = "remote_test_execution", visibility = ["PUBLIC"], ) - -execution_platform( - name = "android-arm64", - cpu_configuration = "prelude//cpu:arm64", - os_configuration = "prelude//os:android", - # REVIEW: not sure if this is correct - use_windows_path_separators = host_info().os.is_windows, - visibility = ["PUBLIC"], -) - -execution_platform( - name = "android-x86_64", - cpu_configuration = "prelude//cpu:x86_64", - os_configuration = "prelude//os:android", - # REVIEW: not sure if this is correct - use_windows_path_separators = host_info().os.is_windows, - visibility = ["PUBLIC"], -) - -noop_test_toolchain( - name = "test", - visibility = ["PUBLIC"], -) diff --git a/shim/README.md b/shim/README.md deleted file mode 100644 index 85933e51b2c..00000000000 --- a/shim/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# buck2 shims - -The `shim/` subtree helps the Meta-internal buck2 build system also work in the -open-source repo. - -Shims are how open-source buck2 supports a [line -like](https://github.com/pytorch/executorch/blob/50aa517549d10324147534d91d04a923b76421d6/kernels/optimized/targets.bzl#L1): - -``` -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") -``` - -In the open-source repo, `fbsource//xplat` (a Meta-internal root) doesn't exist. -The `shim = shim` line in `../.buckconfig` tells buck2 to look in -[`shim/xplat/executorch/build/runtime_wrapper.bzl`](https://github.com/pytorch/executorch/blob/main/shim/xplat/executorch/build/runtime_wrapper.bzl) -instead. diff --git a/shim_et/BUCK b/shim_et/BUCK new file mode 100644 index 00000000000..a1a9bdaf65d --- /dev/null +++ b/shim_et/BUCK @@ -0,0 +1,57 @@ +load("@prelude//platforms:defs.bzl", "execution_platform") +load("@prelude//tests:test_toolchain.bzl", "noop_test_toolchain") +load("@prelude//toolchains:cxx.bzl", "system_cxx_toolchain") +load("@prelude//toolchains:genrule.bzl", "system_genrule_toolchain") +load("@prelude//toolchains:python.bzl", "system_python_bootstrap_toolchain", "system_python_toolchain") +load("@prelude//toolchains:remote_test_execution.bzl", "remote_test_execution_toolchain") + +# Although the non-Android toolchains below are present in shim/BUCK, it appears that we +# have to duplicate them here or builds won't work. +system_cxx_toolchain( + name = "cxx", + cxx_flags = ["-std=c++20"], + visibility = ["PUBLIC"], +) + +system_genrule_toolchain( + name = "genrule", + visibility = ["PUBLIC"], +) + +system_python_toolchain( + name = "python", + visibility = ["PUBLIC"], +) + +system_python_bootstrap_toolchain( + name = "python_bootstrap", + visibility = ["PUBLIC"], +) + +execution_platform( + name = "android-arm64", + cpu_configuration = "prelude//cpu:arm64", + os_configuration = "prelude//os:android", + # REVIEW: not sure if this is correct + use_windows_path_separators = host_info().os.is_windows, + visibility = ["PUBLIC"], +) + +execution_platform( + name = "android-x86_64", + cpu_configuration = "prelude//cpu:x86_64", + os_configuration = "prelude//os:android", + # REVIEW: not sure if this is correct + use_windows_path_separators = host_info().os.is_windows, + visibility = ["PUBLIC"], +) + +noop_test_toolchain( + name = "test", + visibility = ["PUBLIC"], +) + +remote_test_execution_toolchain( + name = "remote_test_execution", + visibility = ["PUBLIC"], +) diff --git a/shim_et/README.md b/shim_et/README.md new file mode 100644 index 00000000000..a9e55273b0b --- /dev/null +++ b/shim_et/README.md @@ -0,0 +1,19 @@ +# buck2 shims + +The `shim_et/` subtree helps the Meta-internal buck2 build system also work in the +open-source repo. + +Shims are how open-source buck2 supports a [line +like](https://github.com/pytorch/executorch/blob/50aa517549d10324147534d91d04a923b76421d6/kernels/optimized/targets.bzl#L1): + +``` +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +``` + +In the open-source repo, `fbsource//xplat` (a Meta-internal root) doesn't exist. +The `fbsource = shim_et` line in `../.buckconfig` tells buck2 to look in +[`shim_et/xplat/executorch/build/runtime_wrapper.bzl`](https://github.com/pytorch/executorch/blob/main/shim_et/xplat/executorch/build/runtime_wrapper.bzl) +instead. + +NOTE: `tools` is a symlink to `../shim/tools` because `fbsource//` +must refer here, but `fbsource//tools` in particular lives in `shim/`. diff --git a/shim_et/tools b/shim_et/tools new file mode 120000 index 00000000000..0b2cfeed777 --- /dev/null +++ b/shim_et/tools @@ -0,0 +1 @@ +../shim/tools \ No newline at end of file diff --git a/shim/xplat/executorch/backends/backends.bzl b/shim_et/xplat/executorch/backends/backends.bzl similarity index 100% rename from shim/xplat/executorch/backends/backends.bzl rename to shim_et/xplat/executorch/backends/backends.bzl diff --git a/shim/xplat/executorch/backends/qualcomm/qnn_version.bzl b/shim_et/xplat/executorch/backends/qualcomm/qnn_version.bzl similarity index 100% rename from shim/xplat/executorch/backends/qualcomm/qnn_version.bzl rename to shim_et/xplat/executorch/backends/qualcomm/qnn_version.bzl diff --git a/shim/xplat/executorch/backends/xnnpack/third-party/third_party_libs.bzl b/shim_et/xplat/executorch/backends/xnnpack/third-party/third_party_libs.bzl similarity index 100% rename from shim/xplat/executorch/backends/xnnpack/third-party/third_party_libs.bzl rename to shim_et/xplat/executorch/backends/xnnpack/third-party/third_party_libs.bzl diff --git a/shim/xplat/executorch/build/env_interface.bzl b/shim_et/xplat/executorch/build/env_interface.bzl similarity index 100% rename from shim/xplat/executorch/build/env_interface.bzl rename to shim_et/xplat/executorch/build/env_interface.bzl diff --git a/shim/xplat/executorch/build/runtime_wrapper.bzl b/shim_et/xplat/executorch/build/runtime_wrapper.bzl similarity index 100% rename from shim/xplat/executorch/build/runtime_wrapper.bzl rename to shim_et/xplat/executorch/build/runtime_wrapper.bzl diff --git a/shim/xplat/executorch/build/selects.bzl b/shim_et/xplat/executorch/build/selects.bzl similarity index 100% rename from shim/xplat/executorch/build/selects.bzl rename to shim_et/xplat/executorch/build/selects.bzl diff --git a/shim/xplat/executorch/build/type_defs.bzl b/shim_et/xplat/executorch/build/type_defs.bzl similarity index 100% rename from shim/xplat/executorch/build/type_defs.bzl rename to shim_et/xplat/executorch/build/type_defs.bzl diff --git a/shim/xplat/executorch/codegen/codegen.bzl b/shim_et/xplat/executorch/codegen/codegen.bzl similarity index 100% rename from shim/xplat/executorch/codegen/codegen.bzl rename to shim_et/xplat/executorch/codegen/codegen.bzl diff --git a/shim/xplat/executorch/extension/pybindings/pybindings.bzl b/shim_et/xplat/executorch/extension/pybindings/pybindings.bzl similarity index 100% rename from shim/xplat/executorch/extension/pybindings/pybindings.bzl rename to shim_et/xplat/executorch/extension/pybindings/pybindings.bzl diff --git a/shim/xplat/executorch/kernels/optimized/lib_defs.bzl b/shim_et/xplat/executorch/kernels/optimized/lib_defs.bzl similarity index 100% rename from shim/xplat/executorch/kernels/optimized/lib_defs.bzl rename to shim_et/xplat/executorch/kernels/optimized/lib_defs.bzl diff --git a/shim/xplat/executorch/kernels/optimized/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl similarity index 100% rename from shim/xplat/executorch/kernels/optimized/op_registration_util.bzl rename to shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl diff --git a/shim/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl similarity index 100% rename from shim/xplat/executorch/kernels/portable/op_registration_util.bzl rename to shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl diff --git a/shim/xplat/executorch/kernels/test/util.bzl b/shim_et/xplat/executorch/kernels/test/util.bzl similarity index 100% rename from shim/xplat/executorch/kernels/test/util.bzl rename to shim_et/xplat/executorch/kernels/test/util.bzl From 728c255fff0e75cfc399b22a7d745627db13e0a1 Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Mon, 24 Feb 2025 14:43:22 -0800 Subject: [PATCH 065/584] [executorch][flat_tensor] implement load into and dont hold onto the segment (#8650) * [flat_tensor] Persist FreeableBuffers of external constants in method Pull Request resolved: https://github.com/pytorch/executorch/pull/8437 ## Problem Currently, the FlatTensorDataMap persists tensors, and returns a FreeableBuffer with an empty free function. The NamedDataMap should not persist data, as most cases (eg. delegate) will want it to be freed. Ownership should be on the caller; `get_data` returns a FreeableBuffer that 'owns' the data. The FreeableBuffer in turn is owned by the caller. NOTE: this doesn't support the case where we want to share plain tensors between methods/pte files at runtime. A custom NDM could support that use-case. ## This diff: 1. Introduces a 'NamedData' struct to method.h. This holds a key and a FreeeableBuffer. 2. Iterate over all the flatbuffer tensors to count the constants tagged with EXTERNAL. NOTE: this will increase load time for all users. Potentially allocate chunks of 16 and use a linked list to store external constants, or store this number in PTE file (see D69618283). 3. Allocate space for num_external_constants using the method allocator. 4. Iterate over all flatbuffer tensors and use the named_data_map to resolve EXTERNAL tensors into the array of NamedData. 5. Pass the resolved external constants to tensor_parser, along with NDM (used for mutable external tensors). 6. Resolved external tensors are stored inside method. They are freed when the method is destructed. Some notes: https://docs.google.com/document/d/1_PBi4JgODuClUPD4PCUWrKNjyUH54zOUHGUJ3QHDNes/edit?tab=t.0#heading=h.blsvwraxss7g ghstack-source-id: 267364187 TODO: add test case when two fqns point to the same data buffer. Differential Revision: [D69477027](https://our.internmc.facebook.com/intern/diff/D69477027/) * [executorch][flat_tensor] implement load into and dont hold onto the segment Pull Request resolved: https://github.com/pytorch/executorch/pull/8447 1. Implement load_into in FlatTensorDataMap 2. Do not persist 'data_ro' in the FlatTensorDataMap. From `get_data`, return the FreeableBuffer given by the data loader. TODO: add test for load_into. ghstack-source-id: 267467148 Differential Revision: [D69148652](https://our.internmc.facebook.com/intern/diff/D69148652/) --------- Co-authored-by: lucylq --- .../flat_tensor/flat_tensor_data_map.cpp | 211 +++++++++--------- extension/flat_tensor/flat_tensor_data_map.h | 50 ++++- extension/flat_tensor/test/targets.bzl | 2 +- 3 files changed, 150 insertions(+), 113 deletions(-) diff --git a/extension/flat_tensor/flat_tensor_data_map.cpp b/extension/flat_tensor/flat_tensor_data_map.cpp index 20ebc99994a..ff526e359d4 100644 --- a/extension/flat_tensor/flat_tensor_data_map.cpp +++ b/extension/flat_tensor/flat_tensor_data_map.cpp @@ -52,11 +52,14 @@ Result get_flat_tensor_metadata( for (int i = 0; i < tensors->size(); i++) { if (std::strcmp(tensors->Get(i)->fully_qualified_name()->c_str(), key) == 0) { - // TODO(T214294528): Support multiple segments in FlatTensor. - if (tensors->Get(i)->segment_index() != 0) { - return Error::InvalidExternalData; - } - return tensors->Get(i); + const auto* metadata = tensors->Get(i); + ET_CHECK_OR_RETURN_ERROR( + metadata->segment_index() >= 0 && metadata->offset() >= 0, + InvalidExternalData, + "Invalid segment_index %d or offset %" PRIu64 "; malformed PTD file.", + metadata->segment_index(), + metadata->offset()); + return metadata; } } return Error::NotFound; @@ -75,6 +78,23 @@ Result create_tensor_layout( scalar_type); } +Result get_and_check_segment_offset( + const flatbuffers::Vector< + flatbuffers::Offset>* segments, + const flat_tensor_flatbuffer::TensorMetadata* metadata) { + ET_CHECK_OR_RETURN_ERROR( + segments != nullptr, + InvalidExternalData, + "No segments in external data flatbuffer."); + + ET_CHECK_OR_RETURN_ERROR( + metadata->segment_index() < segments->size(), + InvalidExternalData, + "Invalid segment_index %d; malformed PTD file.", + metadata->segment_index()); + return segments->Get(metadata->segment_index())->offset(); +} + } // namespace ET_NODISCARD Result FlatTensorDataMap::get_metadata( @@ -89,39 +109,73 @@ ET_NODISCARD Result FlatTensorDataMap::get_metadata( ET_NODISCARD Result FlatTensorDataMap::get_data( const char* key) const { - auto tensor_metadata = flat_tensor_->tensors(); - - Result metadata_res = - get_flat_tensor_metadata(key, tensor_metadata); - if (!metadata_res.ok()) { - return metadata_res.error(); + Result metadata = + get_flat_tensor_metadata(key, flat_tensor_->tensors()); + if (!metadata.ok()) { + return metadata.error(); } - const auto metadata = metadata_res.get(); - if (metadata->segment_index() < 0 || metadata->offset() < 0) { - // Invalid segment_index/offset; malformed PTD file. - return Error::InvalidExternalData; + Result tensor_layout = + create_tensor_layout(metadata.get()); + if (!tensor_layout.ok()) { + return tensor_layout.error(); } - - Result tensor_layout_res = create_tensor_layout(metadata); - if (!tensor_layout_res.ok()) { - return tensor_layout_res.error(); + Result segment_offset = + get_and_check_segment_offset(flat_tensor_->segments(), metadata.get()); + if (!segment_offset.ok()) { + return segment_offset.error(); } - // This FreeableBuffer doesn't own the underlying data, and will not free it, - // which is why the free function is a nullptr. - // TODO(T214294528): Remove data_ro_ and instead load the data here, letting - // FreeableBuffer own it. - return FreeableBuffer( - static_cast(data_ro_.data()) + metadata->offset(), - tensor_layout_res.get().nbytes(), - nullptr); + // Load constant data. + ET_CHECK_OR_RETURN_ERROR( + segment_offset.get() < + header_.segment_base_offset + header_.segment_data_size, + InvalidExternalData, + "Invalid segment offset %d is larger than the segment_base_offset + segment_data_size %" PRIu64 + "; malformed PTD file.", + segment_offset.get(), + header_.segment_base_offset + header_.segment_data_size); + return loader_->load( + header_.segment_base_offset + segment_offset.get() + + metadata.get()->offset(), + tensor_layout.get().nbytes(), + DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::External)); } ET_NODISCARD Result FlatTensorDataMap::load_data_into( ET_UNUSED const char* key, ET_UNUSED void* buffer, ET_UNUSED size_t size) const { - return Error::NotImplemented; + Result metadata = + get_flat_tensor_metadata(key, flat_tensor_->tensors()); + if (!metadata.ok()) { + return metadata.error(); + } + Result tensor_layout = + create_tensor_layout(metadata.get()); + if (!tensor_layout.ok()) { + return tensor_layout.error(); + } + ET_CHECK_OR_RETURN_ERROR( + size < tensor_layout.get().nbytes(), + InvalidArgument, + "Buffer size %zu is smaller than tensor size %zu", + size, + tensor_layout.get().nbytes()); + + Result segment_offset = + get_and_check_segment_offset(flat_tensor_->segments(), metadata.get()); + if (!segment_offset.ok()) { + return segment_offset.error(); + } + // Load mutable data. + DataLoader::SegmentInfo info = DataLoader::SegmentInfo( + DataLoader::SegmentInfo::Type::Mutable, 0, nullptr); + return loader_->load_into( + header_.segment_base_offset + segment_offset.get() + + metadata.get()->offset(), + tensor_layout.get().nbytes(), + info, + buffer); } ET_NODISCARD Result FlatTensorDataMap::get_num_keys() const { @@ -138,45 +192,34 @@ ET_NODISCARD Result FlatTensorDataMap::get_key( /* static */ Result FlatTensorDataMap::load( DataLoader* loader) { - // Load data map. - size_t flatbuffer_offset = 0; - size_t flatbuffer_size = 0; - size_t segment_base_offset = 0; - size_t segment_data_size = 0; - { - // Check header. - Result header = loader->load( - /*offset=*/0, - FlatTensorHeader::kNumHeadBytes, - DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::External)); - if (!header.ok()) { - return header.error(); - } - Result fh = - FlatTensorHeader::Parse(header->data(), header->size()); - if (fh.ok()) { - // The header has the data map size. - flatbuffer_offset = fh->flatbuffer_offset; - flatbuffer_size = fh->flatbuffer_size; - segment_base_offset = fh->segment_base_offset; - segment_data_size = fh->segment_data_size; - } else if (fh.error() == Error::NotFound) { - // No header, throw error. - ET_LOG(Error, "No FlatTensorHeader found."); - return fh.error(); - } else { - // corruption, throw error. - ET_LOG(Error, "Flat tensor header may be corrupt."); - return fh.error(); - } + // Check header. + Result header = loader->load( + /*offset=*/0, + FlatTensorHeader::kNumHeadBytes, + DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::External)); + if (!header.ok()) { + ET_LOG(Error, "Failed to load header."); + return header.error(); + } + Result fh = + FlatTensorHeader::Parse(header->data(), header->size()); + if (fh.error() == Error::NotFound) { + // No header, throw error. + ET_LOG(Error, "No FlatTensorHeader found."); + return fh.error(); + } else if (fh.error() != Error::Ok) { + // corruption, throw error. + ET_LOG(Error, "Flat tensor header may be corrupt."); + return fh.error(); } // Load flatbuffer data as a segment. Result flat_tensor_data = loader->load( /*offset=*/0, - flatbuffer_offset + flatbuffer_size, + fh->flatbuffer_offset + fh->flatbuffer_size, DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::External)); if (!flat_tensor_data.ok()) { + ET_LOG(Error, "Failed to load flat_tensor data."); return flat_tensor_data.error(); } @@ -204,54 +247,8 @@ ET_NODISCARD Result FlatTensorDataMap::get_key( const flat_tensor_flatbuffer::FlatTensor* flat_tensor = flat_tensor_flatbuffer::GetFlatTensor(flat_tensor_data->data()); - // Validate flatbuffer data. - flatbuffers::Verifier verifier( - reinterpret_cast(flat_tensor_data->data()), - flat_tensor_data->size()); - bool ok = flat_tensor_flatbuffer::VerifyFlatTensorBuffer(verifier); - ET_CHECK_OR_RETURN_ERROR( - ok, - InvalidExternalData, - "Verification failed; data may be truncated or corrupt"); - - // Get pointer to tensor metadata. - const auto* s_tensor_metadata = flat_tensor->tensors(); - if (s_tensor_metadata == nullptr) { - ET_LOG(Error, "FlatTensor has no tensor metadata."); - return Error::InvalidExternalData; - } - - // Load constant data. - const auto* s_data_segment = flat_tensor->segments(); - - // TODO(T214294528): Support multiple segments in FlatTensor. - if (s_data_segment->size() != 1) { - ET_LOG( - Error, - "FlatTensor has %u segments, only 1 supported.", - s_data_segment->size()); - } - // First segment size should be <= the total segment data size. - int segment_size = s_data_segment->Get(0)->size(); - int segment_offset = s_data_segment->Get(0)->offset(); - if (segment_size > segment_data_size) { - ET_LOG( - Error, - "FlatTensor segment size %d > segment data size %zu", - segment_size, - segment_data_size); - } - - Result data_ro = loader->load( - /*offset=*/segment_base_offset + segment_offset, - segment_size, - DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::External)); - if (!data_ro.ok()) { - return data_ro.error(); - } - return FlatTensorDataMap( - std::move(flat_tensor_data.get()), flat_tensor, std::move(data_ro.get())); + fh.get(), std::move(flat_tensor_data.get()), flat_tensor, loader); } } // namespace extension diff --git a/extension/flat_tensor/flat_tensor_data_map.h b/extension/flat_tensor/flat_tensor_data_map.h index 7bd33e68927..00f4bf07d19 100644 --- a/extension/flat_tensor/flat_tensor_data_map.h +++ b/extension/flat_tensor/flat_tensor_data_map.h @@ -10,6 +10,8 @@ #include +#include + #include #include #include @@ -41,17 +43,50 @@ class FlatTensorDataMap final : public executorch::runtime::NamedDataMap { static executorch::runtime::Result load( executorch::runtime::DataLoader* loader); + /** + * Retrieve the metadata for the specified key. + * + * @param[in] key The name of the tensor to get metadata on. + * + * @return Error::NotFound if the key is not present. + */ ET_NODISCARD executorch::runtime::Result get_metadata(const char* key) const override; + + /** + * Retrieve read-only data for the specified key. + * + * @param[in] key The name of the tensor to get data on. + * + * @return error if the key is not present or data cannot be loaded. + */ ET_NODISCARD executorch::runtime::Result get_data( const char* key) const override; + + /** + * Loads the data of the specified tensor into the provided buffer. + * + * @param[in] key The name of the tensor to get the data of. + * @param[in] buffer The buffer to load data into. Must point to at least + * `size` bytes of memory. + * @param[in] size The number of bytes to load. + * + * @returns an Error indicating if the load was successful. + */ ET_NODISCARD executorch::runtime::Result load_data_into(const char* key, void* buffer, size_t size) const override; + /** + * @returns The number of keys in the map. + */ ET_NODISCARD executorch::runtime::Result get_num_keys() const override; + + /** + * @returns The key at the specified index, error if index out of bounds. + */ ET_NODISCARD executorch::runtime::Result get_key( size_t index) const override; @@ -61,26 +96,31 @@ class FlatTensorDataMap final : public executorch::runtime::NamedDataMap { private: FlatTensorDataMap( + const FlatTensorHeader& header, executorch::runtime::FreeableBuffer&& flat_tensor_data, const flat_tensor_flatbuffer::FlatTensor* flat_tensor, - executorch::runtime::FreeableBuffer&& data_ro) - : flat_tensor_data_(std::move(flat_tensor_data)), + executorch::runtime::DataLoader* loader) + : header_(header), + flat_tensor_data_(std::move(flat_tensor_data)), flat_tensor_(flat_tensor), - data_ro_(std::move(data_ro)) {} + loader_(loader) {} // Not copyable or assignable. FlatTensorDataMap(const FlatTensorDataMap& rhs) = delete; FlatTensorDataMap& operator=(FlatTensorDataMap&& rhs) noexcept = delete; FlatTensorDataMap& operator=(const FlatTensorDataMap& rhs) = delete; + // FlatTensor header, containing segment_base_offset and segment_data_size. + const FlatTensorHeader header_; + // Serialized flat_tensor flatbuffer data. executorch::runtime::FreeableBuffer flat_tensor_data_; // Flatbuffer representation of the flat_tensor. const flat_tensor_flatbuffer::FlatTensor* flat_tensor_; - // Loaded read-only tensor data. - executorch::runtime::FreeableBuffer data_ro_; + // Data loader, used to load segment data. + executorch::runtime::DataLoader* loader_; }; } // namespace extension diff --git a/extension/flat_tensor/test/targets.bzl b/extension/flat_tensor/test/targets.bzl index bc04edfbe1e..28baace3eeb 100644 --- a/extension/flat_tensor/test/targets.bzl +++ b/extension/flat_tensor/test/targets.bzl @@ -40,7 +40,7 @@ def define_common_targets(is_fbcode=False): } runtime.cxx_test( - name = "flat_tensor_data_map", + name = "flat_tensor_data_map_test", srcs = [ "flat_tensor_data_map_test.cpp", ], From 2859e4767edd72df8d941b85a24e185eb9dc3bd4 Mon Sep 17 00:00:00 2001 From: Jack <32371937+jackzhxng@users.noreply.github.com> Date: Mon, 24 Feb 2025 14:43:54 -0800 Subject: [PATCH 066/584] Add qwen 2.5 (#8355) --- examples/models/llama/attention.py | 13 +++- examples/models/llama/model_args.py | 1 + examples/models/llama/rope.py | 11 ++- examples/models/llama/static_attention.py | 7 +- examples/models/qwen2_5/1_5b_config.json | 14 ++++ examples/models/qwen2_5/README.md | 63 +++++++++++++++ examples/models/qwen2_5/convert_weights.py | 90 ++++++++++++++++++++++ 7 files changed, 189 insertions(+), 10 deletions(-) create mode 100644 examples/models/qwen2_5/1_5b_config.json create mode 100644 examples/models/qwen2_5/README.md create mode 100644 examples/models/qwen2_5/convert_weights.py diff --git a/examples/models/llama/attention.py b/examples/models/llama/attention.py index 91168a388d3..66eeb10989f 100644 --- a/examples/models/llama/attention.py +++ b/examples/models/llama/attention.py @@ -175,9 +175,16 @@ def __init__(self, args: ModelArgs, layer_id: int, rope: Rope): self.max_batch_size = args.max_batch_size self.max_context_len = args.max_context_len self.dim = args.dim - self.wq = nn.Linear(self.dim, self.n_heads * self.head_dim, bias=False) - self.wk = nn.Linear(self.dim, self.n_kv_heads * self.head_dim, bias=False) - self.wv = nn.Linear(self.dim, self.n_kv_heads * self.head_dim, bias=False) + self.attention_qkv_bias = args.attention_qkv_bias + self.wq = nn.Linear( + self.dim, self.n_heads * self.head_dim, bias=self.attention_qkv_bias + ) + self.wk = nn.Linear( + self.dim, self.n_kv_heads * self.head_dim, bias=self.attention_qkv_bias + ) + self.wv = nn.Linear( + self.dim, self.n_kv_heads * self.head_dim, bias=self.attention_qkv_bias + ) self.wo = nn.Linear(self.n_heads * self.head_dim, self.dim, bias=False) self.layer_id = layer_id diff --git a/examples/models/llama/model_args.py b/examples/models/llama/model_args.py index e1c4edb8e93..28804839815 100644 --- a/examples/models/llama/model_args.py +++ b/examples/models/llama/model_args.py @@ -21,6 +21,7 @@ class ModelArgs: num_experts: int = 8 # Number of experts num_activated_experts: int = 2 # Number of experts to activate attention_type: str = "mha" # Attention type, registered in attention.py + attention_qkv_bias: bool = False use_kv_cache: bool = False # Use key/value cache use_sdpa_with_kv_cache_op: bool = ( False # Use custom sdpa op that updates kv cache in-place diff --git a/examples/models/llama/rope.py b/examples/models/llama/rope.py index 01352f404df..e081c442032 100644 --- a/examples/models/llama/rope.py +++ b/examples/models/llama/rope.py @@ -114,6 +114,7 @@ def apply_rotary_emb_to_k( return xk_out.type_as(xk) +# Wrap apply_rotary_emb in a module to enable it to be module swapped out. class RotaryEmbedding(torch.nn.Module): def __init__(self): super().__init__() @@ -213,14 +214,20 @@ class Rope(torch.nn.Module): def __init__(self, params: ModelArgs): super().__init__() self.params = params + + # Choose the appropriate RoPE implementation if self.params.use_hf_rope: self.precompute_freqs_cis = hf_precompute_freqs_cis + self.apply_rotary_emb = hf_apply_rotary_emb else: self.precompute_freqs_cis = partial( precompute_freqs_cis, use_scaled=self.params.use_scaled_rope, scale_factor=self.params.rope_scale_factor, ) + self.apply_rotary_emb = RotaryEmbedding() + + # Precompute frequencies freqs_cos, freqs_sin = self.precompute_freqs_cis( self.params.head_dim, ( @@ -232,10 +239,6 @@ def __init__(self, params: ModelArgs): ) self.register_buffer("freqs_cos", freqs_cos, persistent=False) self.register_buffer("freqs_sin", freqs_sin, persistent=False) - if self.params.use_hf_rope: - self.apply_rotary_emb = hf_apply_rotary_emb - else: - self.apply_rotary_emb = RotaryEmbedding() def forward( self, diff --git a/examples/models/llama/static_attention.py b/examples/models/llama/static_attention.py index 3a5f88ad3f3..43db873fb65 100644 --- a/examples/models/llama/static_attention.py +++ b/examples/models/llama/static_attention.py @@ -207,22 +207,23 @@ def __init__(self, config: ModelArgs, layer_id: int, rope: Rope): self.dim = config.dim self.head_dim = config.head_dim self.inv_scale = 1.0 / (float(self.head_dim) ** 0.5) + self.attention_qkv_bias = config.attention_qkv_bias self.wqs = nn.ModuleList( [ - nn.Linear(self.dim, self.head_dim, bias=False) + nn.Linear(self.dim, self.head_dim, bias=self.attention_qkv_bias) for _ in range(self.n_heads) ] ) self.wks = nn.ModuleList( [ - nn.Linear(self.dim, self.head_dim, bias=False) + nn.Linear(self.dim, self.head_dim, bias=self.attention_qkv_bias) for _ in range(self.n_kv_heads) ] ) self.wvs = nn.ModuleList( [ - nn.Linear(self.dim, self.head_dim, bias=False) + nn.Linear(self.dim, self.head_dim, bias=self.attention_qkv_bias) for _ in range(self.n_kv_heads) ] ) diff --git a/examples/models/qwen2_5/1_5b_config.json b/examples/models/qwen2_5/1_5b_config.json new file mode 100644 index 00000000000..64daca5a7cd --- /dev/null +++ b/examples/models/qwen2_5/1_5b_config.json @@ -0,0 +1,14 @@ +{ + "dim": 1536, + "ffn_dim_multiplier": 1, + "hidden_dim": 8960, + "n_heads": 12, + "n_kv_heads": 2, + "n_layers": 28, + "norm_eps": 1e-06, + "rope_theta": 1000000.0, + "use_scaled_rope": false, + "vocab_size": 151936, + "use_hf_rope": true, + "attention_qkv_bias": true +} diff --git a/examples/models/qwen2_5/README.md b/examples/models/qwen2_5/README.md new file mode 100644 index 00000000000..9bf791a35ed --- /dev/null +++ b/examples/models/qwen2_5/README.md @@ -0,0 +1,63 @@ +## Summary +Qwen 2.5 is the latest iteration of the Qwen series of large language models (LLMs) developed by Alibaba. At the moment, 1.5b is currently supporting, with plans in the future for adding the 0.5b and 3b versions. + +## Instructions + +Qwen 2.5 uses the same example code as Llama, while the checkpoint, model params, and tokenizer are different. Please see the [Llama README page](../llama/README.md) for details. + +All commands for exporting and running Llama on various backends should also be applicable to Qwen 2.5, by swapping the following args: +``` +--model qwen2_5 +--params examples/models/qwen2_5/1_5b_config.json +--checkpoint +``` + +### Generate the Checkpoint +The original checkpoint can be obtained from HuggingFace: +``` +huggingface-cli download Qwen/Qwen2.5-1.5B +``` + +We then convert it to Meta's checkpoint format: +``` +python examples/models/qwen2_5/convert_weights.py +``` + +### Example export and run +Here is an basic example for exporting and running Qwen 2.5, although please refer to [Llama README page](../llama/README.md) for more advanced usage. + +Export to XNNPack, no quantization: +``` +# No quantization +# Set these paths to point to the downloaded files +QWEN_CHECKPOINT=path/to/checkpoint.pth + +python -m examples.models.llama.export_llama \ + --model "qwen2_5" \ + --checkpoint "${QWEN_CHECKPOINT:?}" \ + --params examples/models/qwen2_5/1_5b_config.json \ + -kv \ + --use_sdpa_with_kv_cache \ + -d fp32 \ + -X \ + --metadata '{"get_bos_id":151643, "get_eos_ids":[151643]}' \ + --output_name="qwen2_5-1_5b.pte" + --verbose +``` + +Run using the executor runner: +``` +# Currently a work in progress, just need to enable HuggingFace json tokenizer in C++. +# In the meantime, can run with an example Python runner with pybindings: + +python -m examples.models.llama.runner.native + --model qwen2_5 + --pte + -kv + --tokenizer /tokenizer.json + --tokenizer_config /tokenizer_config.json + --prompt "Who is the founder of Meta?" + --params examples/models/qwen2_5/1_5b_config.json + --max_len 64 + --temperature 0 +``` diff --git a/examples/models/qwen2_5/convert_weights.py b/examples/models/qwen2_5/convert_weights.py new file mode 100644 index 00000000000..6b6c0bbdfe2 --- /dev/null +++ b/examples/models/qwen2_5/convert_weights.py @@ -0,0 +1,90 @@ +import argparse +from typing import Dict + +import torch + +from torchtune.models.convert_weights import get_mapped_key + +from torchtune.training import FullModelHFCheckpointer + +# Standard _FROM_META weight mapping of Meta weights to TorchTune + additional bias weight mappings. +_QWEN_2_FROM_META = { + "tok_embeddings.weight": "tok_embeddings.weight", + "norm.weight": "norm.scale", + "layers.{}.attention.wk.weight": "layers.{}.attn.k_proj.weight", + "layers.{}.attention.wk.bias": "layers.{}.attn.k_proj.bias", + "layers.{}.attention.wq.weight": "layers.{}.attn.q_proj.weight", + "layers.{}.attention.wq.bias": "layers.{}.attn.q_proj.bias", + "layers.{}.attention.wv.weight": "layers.{}.attn.v_proj.weight", + "layers.{}.attention.wv.bias": "layers.{}.attn.v_proj.bias", + "layers.{}.attention.wo.weight": "layers.{}.attn.output_proj.weight", + "layers.{}.attention_norm.weight": "layers.{}.sa_norm.scale", + "layers.{}.ffn_norm.weight": "layers.{}.mlp_norm.scale", + "layers.{}.feed_forward.w1.weight": "layers.{}.mlp.w1.weight", + "layers.{}.feed_forward.w2.weight": "layers.{}.mlp.w2.weight", + "layers.{}.feed_forward.w3.weight": "layers.{}.mlp.w3.weight", +} + + +def qwen_2_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: + """ + Convert a state dict from torchtune's format to Meta's format. This function + doesn't handle any sharding or splitting of state dicts. It follows the + state_dict IN -> state_dict OUT pattern. + + Args: + state_dict (Dict[str, torch.Tensor]): State dict in torchtune's format. + + Returns: + Dict[str, torch.Tensor]: State dict in Meta's format. + """ + converted_state_dict = {} + inverted_mapping_dict = {v: k for k, v in _QWEN_2_FROM_META.items()} + + for key, value in state_dict.items(): + new_key = get_mapped_key(key, inverted_mapping_dict) + converted_state_dict[new_key] = value + + # 0.5b and 1.5b models share the same weights for tok_embeddings and output embeddings, see https://github.com/QwenLM/Qwen2.5/issues/733. + converted_state_dict["output.weight"] = converted_state_dict[ + "tok_embeddings.weight" + ] + + return converted_state_dict + + +def main(): + parser = argparse.ArgumentParser( + description="Convert Qwen2 weights to Meta format." + ) + parser.add_argument( + "input_dir", + type=str, + help="Path to directory containing checkpoint files", + ) + parser.add_argument("output", type=str, help="Path to the output checkpoint") + + args = parser.parse_args() + + # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves. + checkpointer = FullModelHFCheckpointer( + # checkpoint_dir="/home/jackzhxng/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B/snapshots/8faed761d45a263340a0528343f099c05c9a4323/", + checkpoint_dir=args.input_dir, + checkpoint_files=["model.safetensors"], + output_dir=".", + model_type="QWEN2", + ) + + print("Loading checkpoint...") + sd = checkpointer.load_checkpoint() + + print("Converting checkpoint...") + sd = qwen_2_tune_to_meta(sd["model"]) + # torch.save(sd, "/home/jackzhxng/models/qwen2_5-1_5b.pth") + + torch.save(sd, args.output) + print(f"Checkpoint saved to {args.output}") + + +if __name__ == "__main__": + main() From c69a1ba7f294e69ab18a68269121a5633c20fcb6 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Mon, 24 Feb 2025 16:00:01 -0800 Subject: [PATCH 067/584] Sync constexpr irange patch from PyTorch core (#8610) Specifically, pytorch/pytorch#147633 . --- .../core/portable_type/c10/c10/util/irange.h | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/runtime/core/portable_type/c10/c10/util/irange.h b/runtime/core/portable_type/c10/c10/util/irange.h index 2719a82075c..3249bdfa5cf 100644 --- a/runtime/core/portable_type/c10/c10/util/irange.h +++ b/runtime/core/portable_type/c10/c10/util/irange.h @@ -24,28 +24,28 @@ struct integer_iterator { using pointer = I*; using reference = I&; - explicit integer_iterator(I value) : value(value) {} + explicit constexpr integer_iterator(I value) : value(value) {} - I operator*() const { + constexpr I operator*() const { return value; } - I const* operator->() const { + constexpr I const* operator->() const { return &value; } - integer_iterator& operator++() { + constexpr integer_iterator& operator++() { ++value; return *this; } - integer_iterator operator++(int) { + constexpr integer_iterator operator++(int) { const auto copy = *this; ++*this; return copy; } - bool operator==(const integer_iterator& other) const { + constexpr bool operator==(const integer_iterator& other) const { if constexpr (one_sided) { // Range-for loops' end test is `begin != end`, not `begin < // end`. To handle `c10::irange(n)` where n < 0 (which should be @@ -64,7 +64,7 @@ struct integer_iterator { return false; // Horrible hack } - bool operator!=(const integer_iterator& other) const { + constexpr bool operator!=(const integer_iterator& other) const { return !(*this == other); } @@ -80,12 +80,12 @@ template < std::enable_if_t, bool> = true> struct integer_range { public: - integer_range(I begin, I end) : begin_(begin), end_(end) {} + constexpr integer_range(I begin, I end) : begin_(begin), end_(end) {} using iterator = detail::integer_iterator; - iterator begin() const { + constexpr iterator begin() const { return begin_; } - iterator end() const { + constexpr iterator end() const { return end_; } @@ -116,7 +116,7 @@ integer_range irange(Integer1 begin, Integer2 end) { template < typename Integer, std::enable_if_t, bool> = true> -integer_range irange(Integer end) { +constexpr integer_range irange(Integer end) { return {Integer(), end}; } From 7103bb309058dc7e2ce08c1174eb8f08ef2b5ac1 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Mon, 24 Feb 2025 16:10:37 -0800 Subject: [PATCH 068/584] Replace shim/ with facebook/buck2-shims-meta submodule (#8656) After the recent move of most things in shim to shim_et, shim/ was mostly an outdated version of buck2-shims-meta. Now it is buck2-shims-meta. Test Plan: regular GitHub CI on this rev (cmake is still dependent on buck right now) --- .gitmodules | 3 + shim | 1 + shim/.buckconfig | 0 shim/.gitignore | 5 -- shim/BUCK | 63 ------------------- shim/TARGETS | 0 shim/build_defs/native_rules.bzl | 26 -------- .../build_defs/default_platform_defs.bzl | 8 --- shim/tools/build_defs/fb_native_wrapper.bzl | 10 --- 9 files changed, 4 insertions(+), 112 deletions(-) create mode 160000 shim delete mode 100644 shim/.buckconfig delete mode 100644 shim/.gitignore delete mode 100644 shim/BUCK delete mode 100644 shim/TARGETS delete mode 100644 shim/build_defs/native_rules.bzl delete mode 100644 shim/tools/build_defs/default_platform_defs.bzl delete mode 100644 shim/tools/build_defs/fb_native_wrapper.bzl diff --git a/.gitmodules b/.gitmodules index f7da7e771fb..f1a5efee931 100644 --- a/.gitmodules +++ b/.gitmodules @@ -70,3 +70,6 @@ [submodule "third-party/pocketfft"] path = third-party/pocketfft url = https://github.com/mreineck/pocketfft +[submodule "shim"] + path = shim + url = https://github.com/facebook/buck2-shims-meta diff --git a/shim b/shim new file mode 160000 index 00000000000..0b32ea082a2 --- /dev/null +++ b/shim @@ -0,0 +1 @@ +Subproject commit 0b32ea082a2e79807b4015ed1e75308404e69e23 diff --git a/shim/.buckconfig b/shim/.buckconfig deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/shim/.gitignore b/shim/.gitignore deleted file mode 100644 index a1412f7fa8e..00000000000 --- a/shim/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -# We currently expect end users to run reindeer vendor themselves -# so mark these things as to ignore -/third-party/rust/.cargo/ -/third-party/rust/BUCK -/third-party/rust/vendor/ diff --git a/shim/BUCK b/shim/BUCK deleted file mode 100644 index 55fdd5bb03f..00000000000 --- a/shim/BUCK +++ /dev/null @@ -1,63 +0,0 @@ -load("@prelude//platforms:defs.bzl", "execution_platform") -load("@prelude//tests:test_toolchain.bzl", "noop_test_toolchain") -load("@prelude//toolchains:cxx.bzl", "system_cxx_toolchain") -load("@prelude//toolchains:genrule.bzl", "system_genrule_toolchain") -load("@prelude//toolchains:go.bzl", "system_go_toolchain") -load("@prelude//toolchains:haskell.bzl", "system_haskell_toolchain") -load("@prelude//toolchains:ocaml.bzl", "system_ocaml_toolchain") -load("@prelude//toolchains:python.bzl", "system_python_bootstrap_toolchain", "system_python_toolchain") -load("@prelude//toolchains:remote_test_execution.bzl", "remote_test_execution_toolchain") -load("@prelude//toolchains:rust.bzl", "system_rust_toolchain") - -# TODO: sync this directory with https://github.com/facebook/buck2-shims-meta. -# Internal context: -# https://fb.workplace.com/groups/222849770514616/posts/600883896044533/ - -oncall("executorch") - -system_cxx_toolchain( - name = "cxx", - cxx_flags = ["-std=c++20"], - visibility = ["PUBLIC"], -) - -system_genrule_toolchain( - name = "genrule", - visibility = ["PUBLIC"], -) - -system_go_toolchain( - name = "go", - visibility = ["PUBLIC"], -) - -system_haskell_toolchain( - name = "haskell", - visibility = ["PUBLIC"], -) - -system_ocaml_toolchain( - name = "ocaml", - visibility = ["PUBLIC"], -) - -system_python_toolchain( - name = "python", - visibility = ["PUBLIC"], -) - -system_python_bootstrap_toolchain( - name = "python_bootstrap", - visibility = ["PUBLIC"], -) - -system_rust_toolchain( - name = "rust", - default_edition = "2021", - visibility = ["PUBLIC"], -) - -remote_test_execution_toolchain( - name = "remote_test_execution", - visibility = ["PUBLIC"], -) diff --git a/shim/TARGETS b/shim/TARGETS deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/shim/build_defs/native_rules.bzl b/shim/build_defs/native_rules.bzl deleted file mode 100644 index a3e3a7039b0..00000000000 --- a/shim/build_defs/native_rules.bzl +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under both the MIT license found in the -# LICENSE-MIT file in the root directory of this source tree and the Apache -# License, Version 2.0 found in the LICENSE-APACHE file in the root directory -# of this source tree. - -def buck_genrule(visibility = ["PUBLIC"], **kwargs): - # @lint-ignore BUCKLINT: avoid "native is forbidden in fbcode" - native.genrule(visibility = visibility, **kwargs) - -def buck_command_alias(**_): - pass - -def buck_filegroup(visibility = ["PUBLIC"], **kwargs): - # @lint-ignore BUCKLINT: avoid "native is forbidden in fbcode" - native.filegroup(visibility = visibility, **kwargs) - -def alias(actual, visibility = ["PUBLIC"], **kwargs): - if actual.startswith("//buck2/"): - actual = "root//" + actual.removeprefix("//buck2/") - native.alias(actual = actual, visibility = visibility, **kwargs) - -def buck_sh_binary(visibility = ["PUBLIC"], **kwargs): - # @lint-ignore BUCKLINT: avoid "native is forbidden in fbcode" - native.sh_binary(visibility = visibility, **kwargs) diff --git a/shim/tools/build_defs/default_platform_defs.bzl b/shim/tools/build_defs/default_platform_defs.bzl deleted file mode 100644 index 3f860ae49df..00000000000 --- a/shim/tools/build_defs/default_platform_defs.bzl +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under both the MIT license found in the -# LICENSE-MIT file in the root directory of this source tree and the Apache -# License, Version 2.0 found in the LICENSE-APACHE file in the root directory -# of this source tree. - -DEVSERVER_PLATFORM_REGEX = "UNUSED" diff --git a/shim/tools/build_defs/fb_native_wrapper.bzl b/shim/tools/build_defs/fb_native_wrapper.bzl deleted file mode 100644 index d67b9384fe9..00000000000 --- a/shim/tools/build_defs/fb_native_wrapper.bzl +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under both the MIT license found in the -# LICENSE-MIT file in the root directory of this source tree and the Apache -# License, Version 2.0 found in the LICENSE-APACHE file in the root directory -# of this source tree. - -fb_native = struct( - config_setting = native.config_setting, -) From 745be4e6e8c3a5532d0940229bc9a5e108b63a89 Mon Sep 17 00:00:00 2001 From: Diego Palma Date: Mon, 24 Feb 2025 16:17:49 -0800 Subject: [PATCH 069/584] Use portable lib for finetuning demo Differential Revision: D69954671 Pull Request resolved: https://github.com/pytorch/executorch/pull/8613 --- examples/llm_pte_finetuning/README.md | 68 +++++++++++++------ examples/llm_pte_finetuning/TARGETS | 5 +- examples/llm_pte_finetuning/__init__.py | 19 ++++++ .../llm_pte_finetuning/model_loading_lib.py | 59 +++++++++++++++- .../llm_pte_finetuning/qwen_05b_config.yaml | 2 +- examples/llm_pte_finetuning/runner.py | 25 +++++-- examples/llm_pte_finetuning/training_lib.py | 2 +- 7 files changed, 148 insertions(+), 32 deletions(-) create mode 100644 examples/llm_pte_finetuning/__init__.py diff --git a/examples/llm_pte_finetuning/README.md b/examples/llm_pte_finetuning/README.md index f3e946f28ca..bdd317109e5 100644 --- a/examples/llm_pte_finetuning/README.md +++ b/examples/llm_pte_finetuning/README.md @@ -6,21 +6,43 @@ In this tutorial, we show how to fine-tune an LLM using executorch. You will need to have a model's checkpoint, in the Hugging Face format. For example: -``` -git clone https://huggingface.co/microsoft/Phi-3-mini-4k-instruct +```console +git clone git clone https://huggingface.co/Qwen/Qwen2-0.5B-Instruct ``` You will need to install [torchtune](https://github.com/pytorch/torchtune) following [its installation instructions](https://github.com/pytorch/torchtune?tab=readme-ov-file#installation). +You might run into an issue with the `triton` package when installing `torchtune`. You can build `triton` locally following the [instructions in their repo](https://github.com/triton-lang/triton?tab=readme-ov-file#install-from-source). + ## Config Files +The directory structure of the `llm_pte_finetuning` is: + +```console +examples/llm_pte_finetuning +├── README.md +├── TARGETS +├── __init__.py +│ ├── model_loading_lib.cpython-312.pyc +│ └── training_lib.cpython-312.pyc +├── model_exporter.py +├── model_loading_lib.py +├── phi3_alpaca_code_config.yaml +├── phi3_config.yaml +├── qwen_05b_config.yaml +├── runner.py +└── training_lib.py +``` + +We already provide configs out of the box. The following sections explain how you can setup the config for your own model or dataset. + As mentioned in the previous section, we internally use `torchtune` APIs, and thus, we use config files that follow `torchtune`'s structure. Typically, in the following sections we go through a working example which can be found in the `phi3_config.yaml` config file. ### Tokenizer We need to define the tokenizer. Let's suppose we would like to use [PHI3 Mini Instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) model from Microsoft. We need to define the tokenizer component: -``` +```yaml tokenizer: _component_: torchtune.models.phi3.phi3_mini_tokenizer path: /tmp/Phi-3-mini-4k-instruct/tokenizer.model @@ -33,7 +55,7 @@ This will load the tokenizer, and set the max sequence length to 1024. The class In this example we use the [Alpaca-Cleaned dataset](https://huggingface.co/datasets/yahma/alpaca-cleaned). We need to define the following parameters: -``` +```yaml dataset: _component_: torchtune.datasets.alpaca_cleaned_dataset seed: null @@ -47,7 +69,7 @@ Torchtune supports datasets using huggingface dataloaders, so custom datasets co For the loss function, we use PyTorch losses. In this example we use the `CrossEntropyLoss`: -``` +```yaml loss: _component_: torch.nn.CrossEntropyLoss ``` @@ -56,7 +78,7 @@ loss: Model parameters can be set, in this example we replicate the configuration for phi3 mini instruct benchmarks: -``` +```yaml model: _component_: torchtune.models.phi3.lora_phi3_mini lora_attn_modules: ['q_proj', 'v_proj'] @@ -70,7 +92,7 @@ model: Depending on how your model is defined, you will need to instantiate different components. In these examples we use checkpoints from HF (hugging face format), and thus we will need to instantiate a `FullModelHFCheckpointer` object. We need to pass the checkpoint directory, the files with the tensors, the output directory for training and the model type: -``` +```yaml checkpointer: _component_: torchtune.training.FullModelHFCheckpointer checkpoint_dir: /tmp/Phi-3-mini-4k-instruct @@ -87,7 +109,7 @@ checkpointer: Torchtune supports `cuda` and `bf16` tensors. However, for ExecuTorch training we only support `cpu` and `fp32`: -``` +```yaml device: cpu dtype: fp32 ``` @@ -101,28 +123,34 @@ The `model_exporter.py` exports the LLM checkpoint into an ExecuTorch checkpoint * `cfg`: Configuration file * `output_file`: The `.pte` output path -``` -python model_exporter.py --cfg=phi3_config.yaml --output_file=phi3_mini_lora.pte +```console +python model_exporter.py \ + --cfg=qwen_05b_config.yaml \ + --output_file=qwen2_0_5B.pte ``` ### Step 2: Run the fine-tuning job To run the fine-tuning job: -``` -python runner.py --cfg=phi3_config.yaml --model_file=phi3_mini_lora.pte +```console +python runner.py \ + --cfg=qwen_05b_config.yaml \ + --model_file=qwen2_0_5B.pte \ + --num_training_steps=10 \ + --num_eval_steps=5 ``` You need to use **the same** config file from the previous step. The `model_file` arg is the `.pte` model from the previous step. Example output: -``` -Evaluating the model before training... -100%|██████████████████████████████████████████████████████████████████████████████████████| 3/3 [31:23<00:00, 627.98s/it] -Eval loss: tensor(2.3778) -100%|██████████████████████████████████████████████████████████████████████████████████████| 5/5 [52:29<00:00, 629.84s/it] -Losses: [2.7152762413024902, 0.7890686988830566, 2.249271869659424, 1.4777560234069824, 0.8378427624702454] -100%|██████████████████████████████████████████████████████████████████████████████████████| 3/3 [30:35<00:00, 611.90s/it] -Eval loss: tensor(0.8464) +```console +Evaluating the model before training +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:47<00:00, 9.45s/it] +Eval loss: tensor(0.9441) +100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [01:30<00:00, 9.09s/it] +Losses: [0.5646533966064453, 1.3464953899383545, 1.297974705696106, 1.2249481678009033, 0.6750457286834717, 0.7721152901649475, 1.0774847269058228, 0.7962403893470764, 0.8448256850242615, 0.8731598854064941] +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:45<00:00, 9.18s/it] +Eval loss: tensor(0.7679) ``` diff --git a/examples/llm_pte_finetuning/TARGETS b/examples/llm_pte_finetuning/TARGETS index fee67914909..5ba24c11203 100644 --- a/examples/llm_pte_finetuning/TARGETS +++ b/examples/llm_pte_finetuning/TARGETS @@ -12,7 +12,7 @@ python_library( "fbcode//caffe2:torch", "fbcode//executorch/examples/llm_pte_finetuning:training_lib", "fbcode//executorch/exir:lib", - "fbcode//executorch/extension/pybindings:aten_lib", # @manual For PTE loader + "fbcode//executorch/extension/pybindings:portable_lib", # @manual For PTE loader "fbcode//pytorch/torchtune:lib", "fbsource//third-party/pypi/blobfile:blobfile", # @manual For tokenizer "fbsource//third-party/pypi/omegaconf:omegaconf", @@ -27,11 +27,12 @@ python_library( ], deps = [ "fbcode//caffe2:torch", - "fbcode//executorch/extension/pybindings:aten_lib", # @manual For PTE loader + "fbcode//executorch/extension/pybindings:portable_lib", # @manual For PTE loader "fbcode//pytorch/torchtune:lib", "fbsource//third-party/pypi/blobfile:blobfile", # @manual For tokenizer "fbsource//third-party/pypi/tiktoken:tiktoken", # @manual For tokenizer "fbsource//third-party/pypi/tqdm:tqdm", + "fbcode//executorch/backends/xnnpack/partition:xnnpack_partitioner", ], ) diff --git a/examples/llm_pte_finetuning/__init__.py b/examples/llm_pte_finetuning/__init__.py new file mode 100644 index 00000000000..1db9dd0e3be --- /dev/null +++ b/examples/llm_pte_finetuning/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# Copyright 2024 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from .model_loading_lib import export_model_lora_training, load_checkpoint, setup_model +from .training_lib import eval_model, get_dataloader, TrainingModule, update_function + +__all__ = [ + "eval_model", + "get_dataloader", + "update_function", + "TrainingModule", + "export_model_lora_training", + "load_checkpoint", + "setup_model", +] diff --git a/examples/llm_pte_finetuning/model_loading_lib.py b/examples/llm_pte_finetuning/model_loading_lib.py index 3372a97e269..2c42a0e7635 100644 --- a/examples/llm_pte_finetuning/model_loading_lib.py +++ b/examples/llm_pte_finetuning/model_loading_lib.py @@ -9,8 +9,9 @@ from typing import Any, Dict, Tuple import torch +from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner from executorch.examples.llm_pte_finetuning.training_lib import TrainingModule -from executorch.exir import to_edge +from executorch.exir import EdgeCompileConfig, to_edge from omegaconf import DictConfig from torch.export import export, ExportedProgram @@ -72,16 +73,70 @@ def export_model_lora_training( exported_graph: ExportedProgram = export(model, example_args, strict=False) print("Creating a joint forward-backwards graph for training") joint_graph = _export_forward_backward(exported_graph) + ep = joint_graph + + # Currently there is no implementation of empty_permuted for edge dialect. + # We manually make a pass to rewrite the empty_permuted to empty and permute. + for node in ep.graph.nodes: + if ( + node.op == "call_function" + and node.target == torch.ops.aten.empty_permuted.out + ): + print("found empty_permute: ", node) + empty_permuted_node = node + with ep.graph.inserting_before(empty_permuted_node): + empty_node = ep.graph.create_node( + "call_function", + torch.ops.aten.empty.memory_format, + (node.args[0],), + empty_permuted_node.kwargs, + ) + permute_node = ep.graph.create_node( + "call_function", + torch.ops.aten.permute, + (empty_node, node.args[1]), + ) + for user in empty_permuted_node.users.copy(): + user.replace_input_with(empty_permuted_node, permute_node) + if ( + node.op == "call_function" + and node.target == torch.ops.aten.empty_permuted.default + ): + print("found empty_permute default: ", node) + empty_permuted_node = node + with ep.graph.inserting_before(empty_permuted_node): + empty_node = ep.graph.create_node( + "call_function", + torch.ops.aten.empty.memory_format, + (node.args[0],), + empty_permuted_node.kwargs, + ) + permute_node = ep.graph.create_node( + "call_function", + torch.ops.aten.permute.default, + (empty_node, node.args[1]), + ) + for user in empty_permuted_node.users.copy(): + user.replace_input_with(empty_permuted_node, permute_node) # 2. to_edge: Make optimizations for Edge devices. print("Lowering to edge dialect") - edge_program = to_edge(joint_graph) + edge_program = to_edge( + joint_graph, + compile_config=EdgeCompileConfig( + _core_aten_ops_exception_list=[torch.ops.aten.empty_permuted.default] + ), + ) print(edge_program._edge_programs["forward"].graph_module) # 3. to_executorch: Convert the graph to an ExecuTorch program. print("Exporting to executorch") + edge_program = edge_program.to_backend( + XnnpackPartitioner(force_fp32_dynamic_linear=True) + ) executorch_program = edge_program.to_executorch() + print(executorch_program.exported_program().graph_signature) print(f"Saving to {output_file}") with open(output_file, "wb") as file: diff --git a/examples/llm_pte_finetuning/qwen_05b_config.yaml b/examples/llm_pte_finetuning/qwen_05b_config.yaml index b93517b8fda..f5ab2dbad68 100644 --- a/examples/llm_pte_finetuning/qwen_05b_config.yaml +++ b/examples/llm_pte_finetuning/qwen_05b_config.yaml @@ -27,7 +27,7 @@ checkpointer: model.safetensors ] recipe_checkpoint: null - output_dir: /tmp/Qwen2-0.5B-Instruct + output_dir: /tmp/qwen_0.5B_ft-output model_type: QWEN2 resume_from_checkpoint: False save_adapter_weights_only: False diff --git a/examples/llm_pte_finetuning/runner.py b/examples/llm_pte_finetuning/runner.py index 0deebcf010d..0baf160a56b 100644 --- a/examples/llm_pte_finetuning/runner.py +++ b/examples/llm_pte_finetuning/runner.py @@ -15,7 +15,7 @@ update_function, ) -from executorch.extension.pybindings.aten_lib import ( # @manual +from executorch.extension.pybindings.portable_lib import ( # @manual _load_for_executorch_from_buffer, ) from omegaconf import OmegaConf @@ -30,6 +30,18 @@ ) parser.add_argument("--cfg", type=str, help="Path to the config file.") parser.add_argument("--model_file", type=str, help="Path to the ET model file.") +parser.add_argument( + "--num_training_steps", + type=int, + help="Number of training steps, assuming 1 epoch.", + default=100, +) +parser.add_argument( + "--num_eval_steps", + type=int, + help="Number of eval steps, assuming 1 epoch.", + default=5, +) def main() -> None: @@ -47,10 +59,11 @@ def main() -> None: train_set, val_set = torch.utils.data.random_split(ds, [0.8, 0.2]) train_dataloader = get_dataloader(cfg, train_set, tokenizer, loss_fn) val_dataloader = get_dataloader(cfg, val_set, tokenizer, loss_fn) + num_training_steps = args.num_training_steps + num_eval_steps = args.num_eval_steps max_seq_len = cfg.tokenizer.max_seq_len # Num of steps to run training. Assume 1 epoch - num_steps = 100 with open(file, "rb") as f: model_bytes = f.read() et_mod = _load_for_executorch_from_buffer(model_bytes) @@ -62,7 +75,7 @@ def main() -> None: dataloader=val_dataloader, loss_fn=loss_fn, max_seq_len=max_seq_len, - num_eval_steps=10, + num_eval_steps=num_eval_steps, ) print("Eval loss: ", eval_loss) @@ -74,9 +87,9 @@ def main() -> None: learning_rate = 5e-3 f.seek(0) losses = [] - for i, batch in tqdm(enumerate(train_dataloader), total=num_steps): + for i, batch in tqdm(enumerate(train_dataloader), total=num_training_steps): # Run for a limited number of steps. - if i >= num_steps: + if i >= num_training_steps: break tokens, labels = batch["tokens"], batch["labels"] token_size = tokens.shape[1] @@ -113,7 +126,7 @@ def main() -> None: dataloader=val_dataloader, loss_fn=loss_fn, max_seq_len=max_seq_len, - num_eval_steps=10, + num_eval_steps=num_eval_steps, ) print("Eval loss: ", eval_loss) diff --git a/examples/llm_pte_finetuning/training_lib.py b/examples/llm_pte_finetuning/training_lib.py index dfdaf9b115a..f8cae70d39c 100644 --- a/examples/llm_pte_finetuning/training_lib.py +++ b/examples/llm_pte_finetuning/training_lib.py @@ -10,7 +10,7 @@ from typing import Any import torch -from executorch.extension.pybindings.aten_lib import ExecuTorchModule # @manual +from executorch.extension.pybindings.portable_lib import ExecuTorchModule # @manual from torch.nn import functional as F from torch.utils.data import DataLoader, Dataset, DistributedSampler From 0add08096af742c167ed28f417615f05a22116dd Mon Sep 17 00:00:00 2001 From: Jack <32371937+jackzhxng@users.noreply.github.com> Date: Mon, 24 Feb 2025 18:01:16 -0800 Subject: [PATCH 070/584] Add tests for qwen + allow uninitialized weights in Llama model (#8552) --- .ci/scripts/gather_test_models.py | 2 +- .ci/scripts/test_model.sh | 12 ++++++++++- examples/models/__init__.py | 1 + examples/models/llama/export_llama_lib.py | 2 ++ examples/models/llama/model.py | 25 +++++++++++++++------- examples/models/qwen2_5/__init__.py | 14 ++++++++++++ examples/models/qwen2_5/convert_weights.py | 2 -- 7 files changed, 46 insertions(+), 12 deletions(-) create mode 100644 examples/models/qwen2_5/__init__.py diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py index d02213b9faf..515bc97cca3 100755 --- a/.ci/scripts/gather_test_models.py +++ b/.ci/scripts/gather_test_models.py @@ -90,7 +90,7 @@ def model_should_run_on_event(model: str, event: str) -> bool: We put higher priority and fast models to pull request and rest to push. """ if event == "pull_request": - return model in ["mv3", "vit"] + return model in ["mv3", "vit", "qwen2_5"] # TODO: remove, just to test the ci elif event == "push": # These are super slow. Only run it periodically return model not in ["dl3", "edsr", "emformer_predict"] diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh index 157449c0717..054ac02bc07 100755 --- a/.ci/scripts/test_model.sh +++ b/.ci/scripts/test_model.sh @@ -91,7 +91,17 @@ test_model() { # Install requirements for llama vision. bash examples/models/llama3_2_vision/install_requirements.sh fi - # python3 -m examples.portable.scripts.export --model_name="llama2" should works too + if [[ "${MODEL_NAME}" == "qwen2_5" ]]; then + # Install requirements for export_llama + bash examples/models/llama/install_requirements.sh + # Test export_llama script: python3 -m examples.models.llama.export_llama. + # Use Llama random checkpoint with Qwen 2.5 1.5b model configuration. + "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/qwen2_5/1_5b_config.json + rm "./${MODEL_NAME}.pte" + return # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears. + fi + + # Export a basic .pte and run the model. "${PYTHON_EXECUTABLE}" -m examples.portable.scripts.export --model_name="${MODEL_NAME}" "${STRICT}" run_portable_executor_runner } diff --git a/examples/models/__init__.py b/examples/models/__init__.py index 822d55fc09d..55f5c449ca2 100644 --- a/examples/models/__init__.py +++ b/examples/models/__init__.py @@ -34,6 +34,7 @@ "resnet50": ("resnet", "ResNet50Model"), "llava": ("llava", "LlavaModel"), "efficient_sam": ("efficient_sam", "EfficientSAM"), + "qwen2_5": ("qwen2_5", "Qwen2_5Model"), } __all__ = [ diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index 4ad92903534..6d9ba750431 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -84,6 +84,7 @@ verbosity_setting = None +# All models that leverage the transformer architecture defined in llama_transformer.py. EXECUTORCH_DEFINED_MODELS = [ "stories110m", "llama2", @@ -91,6 +92,7 @@ "llama3_1", "llama3_2", "static_llama", + "qwen2_5", ] TORCHTUNE_DEFINED_MODELS = ["llama3_2_vision"] diff --git a/examples/models/llama/model.py b/examples/models/llama/model.py index 90582af4856..bc4fd6ccb11 100644 --- a/examples/models/llama/model.py +++ b/examples/models/llama/model.py @@ -236,14 +236,23 @@ def __init__(self, **kwargs): eviction_batch_size=eviction_batch_size, ) - # assign=True: load params/buffers by assignment instead of performing an in-place copy. - # Because we are using device="meta", tensors do not have memory associated with them - # and an in-place copy is a no-op. Use assign=True in load_state_dict for this scenario. - missing, unexpected = self.model_.load_state_dict( - checkpoint, - strict=False, - assign=True, - ) # self.model_ = Transformer(gptconf) + missing, unexpected = None, None + try: + # assign=True: load params/buffers by assignment instead of performing an in-place copy. + # Because we are using device="meta", tensors do not have memory associated with them + # and an in-place copy is a no-op. Use assign=True in load_state_dict for this scenario. + missing, unexpected = self.model_.load_state_dict( + checkpoint, + strict=False, + assign=True, + ) # self.model_ = Transformer(gptconf) + except RuntimeError as e: + print( + "Could not load checkpoint into mode, defaulting to random uninitialized weights." + ) + print(f"Error: {e}") + # Need to provide concrete (empty) values for meta-initialized tensors for quantization. + self.model_.to_empty(device="cpu") if missing: missing_weights = [fqn for fqn in missing if fqn.endswith(".weight")] diff --git a/examples/models/qwen2_5/__init__.py b/examples/models/qwen2_5/__init__.py new file mode 100644 index 00000000000..d86a97a114d --- /dev/null +++ b/examples/models/qwen2_5/__init__.py @@ -0,0 +1,14 @@ +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from executorch.example.models.llama.model import Llama2Model + + +class Qwen2_5Model(Llama2Model): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + +__all__ = [ + "Qwen2_5Model", +] diff --git a/examples/models/qwen2_5/convert_weights.py b/examples/models/qwen2_5/convert_weights.py index 6b6c0bbdfe2..9aada5b3e90 100644 --- a/examples/models/qwen2_5/convert_weights.py +++ b/examples/models/qwen2_5/convert_weights.py @@ -68,7 +68,6 @@ def main(): # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves. checkpointer = FullModelHFCheckpointer( - # checkpoint_dir="/home/jackzhxng/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B/snapshots/8faed761d45a263340a0528343f099c05c9a4323/", checkpoint_dir=args.input_dir, checkpoint_files=["model.safetensors"], output_dir=".", @@ -80,7 +79,6 @@ def main(): print("Converting checkpoint...") sd = qwen_2_tune_to_meta(sd["model"]) - # torch.save(sd, "/home/jackzhxng/models/qwen2_5-1_5b.pth") torch.save(sd, args.output) print(f"Checkpoint saved to {args.output}") From 77589c6713c90de5ead5cf8b21f9659c45db05eb Mon Sep 17 00:00:00 2001 From: Mengtao Yuan Date: Mon, 24 Feb 2025 18:12:56 -0800 Subject: [PATCH 071/584] etLLM: add options to apply embedding or output. (#8653) Co-authored-by: Martin Yuan --- examples/models/llama/llama_transformer.py | 19 +++++++++++++++---- examples/models/llama/model_args.py | 2 ++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/examples/models/llama/llama_transformer.py b/examples/models/llama/llama_transformer.py index 7a0db6adf02..aba55705d20 100644 --- a/examples/models/llama/llama_transformer.py +++ b/examples/models/llama/llama_transformer.py @@ -170,14 +170,24 @@ def __init__(self, params: ModelArgs): self.params = params self.vocab_size = params.vocab_size self.n_layers = params.n_layers + self.apply_embedding = params.apply_embedding + self.apply_output = params.apply_output - self.tok_embeddings = nn.Embedding(params.vocab_size, params.dim) + self.tok_embeddings = ( + nn.Embedding(params.vocab_size, params.dim) + if self.apply_embedding + else None + ) self.rope = Rope(params) self.layers = torch.nn.ModuleList() for layer_id in range(params.n_layers): self.layers.append(TransformerBlock(layer_id, params, self.rope)) self.norm = RMSNorm(params.dim, eps=params.norm_eps) - self.output = nn.Linear(params.dim, params.vocab_size, bias=False) + self.output = ( + nn.Linear(params.dim, params.vocab_size, bias=False) + if self.apply_output + else None + ) self.use_kv_cache = params.use_kv_cache self.generate_full_logits = params.generate_full_logits self.max_seq_len = params.max_seq_len @@ -195,7 +205,7 @@ def forward( raise ValueError( "You cannot specify both tokens and h at the same time, and must specify either one" ) - if tokens is not None and h is None: + if self.apply_embedding and tokens is not None and h is None: h = self.tok_embeddings(tokens) if attn_options is None: @@ -219,7 +229,8 @@ def forward( h = self.norm(h) - logits = self.output(h) + if self.apply_output: + logits = self.output(h) if self.output_prune_map is not None: # expand to original size so that downstream applications can use the logits as-is. diff --git a/examples/models/llama/model_args.py b/examples/models/llama/model_args.py index 28804839815..226e0049803 100644 --- a/examples/models/llama/model_args.py +++ b/examples/models/llama/model_args.py @@ -35,6 +35,8 @@ class ModelArgs: input_prune_map: Optional[Dict[int, int]] = None # A dictionary mapping from pruned token-id to original token-id output_prune_map: Optional[Dict[int, int]] = None + apply_embedding: bool = True # Use embedding inside the transformer + apply_output: bool = True # Use output layer (unembedding) inside the transformer use_hf_rope: bool = False # Use HuggingFace's RoPE implementation rope_theta: Optional[float] = ( None # The official name to override self.rope_freq_base. From e36768537f9daef44bd4e6d22a60e033e5ab96c7 Mon Sep 17 00:00:00 2001 From: Jack <32371937+jackzhxng@users.noreply.github.com> Date: Mon, 24 Feb 2025 18:21:39 -0800 Subject: [PATCH 072/584] Update executorch codeowners (#8657) Differential Revision: D70131570 --- CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CODEOWNERS b/CODEOWNERS index 7a9d2a88f88..1edbf41f3aa 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -32,7 +32,7 @@ /examples/llm_manual @larryliu0820 /examples/llm_pte_finetuning @JacobSzwejbka /examples/mediatek @cccclai -/examples/models @lucylq +/examples/models @lucylq @jackzhxng /examples/portable @larryliu0820 @manuelcandales /examples/qualcomm @cccclai /examples/selective_build @lucylq @larryliu0820 @JacobSzwejbka From f1a7d60e823104d1753b114656f798334fc728f3 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Mon, 24 Feb 2025 19:05:23 -0800 Subject: [PATCH 073/584] Don't define aten-mode targets in OSS (#8661) Define get_aten_mode_options in runtime_wrapper and use it. Test Plan: buck2 build //runtime/... has many fewer errors now (aten-mode targets were failing) --- devtools/bundled_program/targets.bzl | 4 ++-- devtools/etdump/targets.bzl | 4 ++-- extension/evalue_util/targets.bzl | 4 ++-- extension/evalue_util/test/targets.bzl | 4 ++-- extension/module/targets.bzl | 4 ++-- extension/module/test/targets.bzl | 4 ++-- extension/parallel/targets.bzl | 4 ++-- extension/runner_util/targets.bzl | 4 ++-- extension/runner_util/test/targets.bzl | 4 ++-- extension/tensor/targets.bzl | 4 ++-- extension/tensor/test/targets.bzl | 4 ++-- extension/training/module/targets.bzl | 4 ++-- extension/training/optimizer/targets.bzl | 4 ++-- extension/training/optimizer/test/targets.bzl | 4 ++-- kernels/prim_ops/targets.bzl | 4 ++-- kernels/quantized/targets.bzl | 4 ++-- runtime/backend/targets.bzl | 4 ++-- runtime/core/exec_aten/targets.bzl | 4 ++-- runtime/core/exec_aten/testing_util/targets.bzl | 4 ++-- runtime/core/exec_aten/util/targets.bzl | 4 ++-- runtime/core/exec_aten/util/test/targets.bzl | 4 ++-- runtime/core/targets.bzl | 4 ++-- runtime/core/test/targets.bzl | 4 ++-- runtime/executor/targets.bzl | 4 ++-- runtime/executor/test/targets.bzl | 4 ++-- runtime/kernel/targets.bzl | 4 ++-- runtime/kernel/test/targets.bzl | 4 ++-- shim_et/xplat/executorch/build/runtime_wrapper.bzl | 3 +++ test/utils/targets.bzl | 4 ++-- third-party/gtest_defs.bzl | 3 ++- 30 files changed, 61 insertions(+), 57 deletions(-) diff --git a/devtools/bundled_program/targets.bzl b/devtools/bundled_program/targets.bzl index 7035b3b31f6..09e9aae11b1 100644 --- a/devtools/bundled_program/targets.bzl +++ b/devtools/bundled_program/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = ("_aten" if aten_mode else "") runtime.cxx_library( name = "runtime" + aten_suffix, diff --git a/devtools/etdump/targets.bzl b/devtools/etdump/targets.bzl index ddbb35eab74..bf4807aa442 100644 --- a/devtools/etdump/targets.bzl +++ b/devtools/etdump/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") SCALAR_TYPE_STEM = "scalar_type" SCALAR_TYPE = SCALAR_TYPE_STEM + ".fbs" @@ -87,7 +87,7 @@ def define_common_targets(): exported_external_deps = ["flatccrt"], ) - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = "_aten" if aten_mode else "" runtime.cxx_library( name = "etdump_flatcc" + aten_suffix, diff --git a/extension/evalue_util/targets.bzl b/extension/evalue_util/targets.bzl index e700ea21467..47934eb78af 100644 --- a/extension/evalue_util/targets.bzl +++ b/extension/evalue_util/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = ("_aten" if aten_mode else "") runtime.cxx_library( diff --git a/extension/evalue_util/test/targets.bzl b/extension/evalue_util/test/targets.bzl index 9e78f665c97..5d6161d09e9 100644 --- a/extension/evalue_util/test/targets.bzl +++ b/extension/evalue_util/test/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = "_aten" if aten_mode else "" runtime.cxx_test( diff --git a/extension/module/targets.bzl b/extension/module/targets.bzl index 4cbfa0ca0f5..09a610a1fca 100644 --- a/extension/module/targets.bzl +++ b/extension/module/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = ("_aten" if aten_mode else "") runtime.cxx_library( diff --git a/extension/module/test/targets.bzl b/extension/module/test/targets.bzl index bc4ce2c6af7..19ba09cf4e6 100644 --- a/extension/module/test/targets.bzl +++ b/extension/module/test/targets.bzl @@ -3,7 +3,7 @@ load( "ANDROID", "CXX", ) -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -12,7 +12,7 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = ("_aten" if aten_mode else "") runtime.cxx_test( diff --git a/extension/parallel/targets.bzl b/extension/parallel/targets.bzl index b1da51b6171..82b3deab129 100644 --- a/extension/parallel/targets.bzl +++ b/extension/parallel/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = ("_aten" if aten_mode else "") runtime.cxx_library( diff --git a/extension/runner_util/targets.bzl b/extension/runner_util/targets.bzl index bc0fee197d6..3ab0c26cc72 100644 --- a/extension/runner_util/targets.bzl +++ b/extension/runner_util/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = ("_aten" if aten_mode else "") runtime.cxx_library( diff --git a/extension/runner_util/test/targets.bzl b/extension/runner_util/test/targets.bzl index f55a1ea995f..95d5804ecdf 100644 --- a/extension/runner_util/test/targets.bzl +++ b/extension/runner_util/test/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(is_fbcode = False): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(is_fbcode = False): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = ("_aten" if aten_mode else "") # TODO(dbort): Find a way to make these run for ANDROID/APPLE in xplat. The diff --git a/extension/tensor/targets.bzl b/extension/tensor/targets.bzl index 97654094af6..bf1485aaba5 100644 --- a/extension/tensor/targets.bzl +++ b/extension/tensor/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = ("_aten" if aten_mode else "") runtime.cxx_library( diff --git a/extension/tensor/test/targets.bzl b/extension/tensor/test/targets.bzl index 29c8bff84bc..5bf8c7019b8 100644 --- a/extension/tensor/test/targets.bzl +++ b/extension/tensor/test/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = ("_aten" if aten_mode else "") runtime.cxx_test( diff --git a/extension/training/module/targets.bzl b/extension/training/module/targets.bzl index 88da84ed131..cfdd0f9897a 100644 --- a/extension/training/module/targets.bzl +++ b/extension/training/module/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = ("_aten" if aten_mode else "") runtime.cxx_library( diff --git a/extension/training/optimizer/targets.bzl b/extension/training/optimizer/targets.bzl index 3b00ae0bfdc..fb33f41f1ca 100644 --- a/extension/training/optimizer/targets.bzl +++ b/extension/training/optimizer/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = "_aten" if aten_mode else "" # if aten_mode: diff --git a/extension/training/optimizer/test/targets.bzl b/extension/training/optimizer/test/targets.bzl index 11269bfa180..7a93337a379 100644 --- a/extension/training/optimizer/test/targets.bzl +++ b/extension/training/optimizer/test/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = "_aten" if aten_mode else "" runtime.cxx_test( name = "sgd_test" + aten_suffix, diff --git a/kernels/prim_ops/targets.bzl b/kernels/prim_ops/targets.bzl index 9a753b50faa..c1af21a7e73 100644 --- a/kernels/prim_ops/targets.bzl +++ b/kernels/prim_ops/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = ("_aten" if aten_mode else "") runtime.cxx_library( diff --git a/kernels/quantized/targets.bzl b/kernels/quantized/targets.bzl index fde6698099e..a2533cb003a 100644 --- a/kernels/quantized/targets.bzl +++ b/kernels/quantized/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") load("@fbsource//xplat/executorch/codegen:codegen.bzl", "et_operator_library", "executorch_generated_lib", "exir_custom_ops_aot_lib") def define_common_targets(): @@ -77,7 +77,7 @@ def define_common_targets(): ], ) - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = "_aten" if aten_mode else "" runtime.cxx_library( diff --git a/runtime/backend/targets.bzl b/runtime/backend/targets.bzl index fe7ce489b56..12d68396730 100644 --- a/runtime/backend/targets.bzl +++ b/runtime/backend/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = ("_aten" if aten_mode else "") runtime.cxx_library( name = "interface" + aten_suffix, diff --git a/runtime/core/exec_aten/targets.bzl b/runtime/core/exec_aten/targets.bzl index 5664a2aea41..9114be639c0 100644 --- a/runtime/core/exec_aten/targets.bzl +++ b/runtime/core/exec_aten/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = "_aten" if aten_mode else "" # Depend on this target if your types (Tensor, ArrayRef, etc) should be flexible between ATen and executor diff --git a/runtime/core/exec_aten/testing_util/targets.bzl b/runtime/core/exec_aten/testing_util/targets.bzl index 446d0a97769..ed130c8706c 100644 --- a/runtime/core/exec_aten/testing_util/targets.bzl +++ b/runtime/core/exec_aten/testing_util/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -6,7 +6,7 @@ def define_common_targets(): The directory containing this targets.bzl file should also contain both TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = ("_aten" if aten_mode else "") runtime.cxx_library( diff --git a/runtime/core/exec_aten/util/targets.bzl b/runtime/core/exec_aten/util/targets.bzl index 55e38d882fd..ac46da052ca 100644 --- a/runtime/core/exec_aten/util/targets.bzl +++ b/runtime/core/exec_aten/util/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = "_aten" if aten_mode else "" exported_preprocessor_flags_ = [] diff --git a/runtime/core/exec_aten/util/test/targets.bzl b/runtime/core/exec_aten/util/test/targets.bzl index 357e91eea3a..1fcf984e034 100644 --- a/runtime/core/exec_aten/util/test/targets.bzl +++ b/runtime/core/exec_aten/util/test/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -35,7 +35,7 @@ def define_common_targets(): ], ) - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = "_aten" if aten_mode else "" runtime.cxx_test( name = "tensor_util_test" + aten_suffix, diff --git a/runtime/core/targets.bzl b/runtime/core/targets.bzl index d67312beda3..3195e727d96 100644 --- a/runtime/core/targets.bzl +++ b/runtime/core/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def event_tracer_enabled(): return native.read_config("executorch", "event_tracer_enabled", "false") == "true" @@ -82,7 +82,7 @@ def define_common_targets(): ], ) - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = ("_aten" if aten_mode else "") runtime.cxx_library( name = "evalue" + aten_suffix, diff --git a/runtime/core/test/targets.bzl b/runtime/core/test/targets.bzl index abe52bcadff..ef09c905674 100644 --- a/runtime/core/test/targets.bzl +++ b/runtime/core/test/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -102,7 +102,7 @@ def define_common_targets(): ], ) - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = "_aten" if aten_mode else "" runtime.cxx_test( diff --git a/runtime/executor/targets.bzl b/runtime/executor/targets.bzl index 67163ed8789..c5d07448a06 100644 --- a/runtime/executor/targets.bzl +++ b/runtime/executor/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def _program_preprocessor_flags(): """Returns the preprocessor_flags to use when building Program.cpp""" @@ -42,7 +42,7 @@ def define_common_targets(): ], ) - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = "_aten" if aten_mode else "" runtime.cxx_library( name = "program" + aten_suffix, diff --git a/runtime/executor/test/targets.bzl b/runtime/executor/test/targets.bzl index 922fa17ba75..1dbb4ea6108 100644 --- a/runtime/executor/test/targets.bzl +++ b/runtime/executor/test/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(is_fbcode = False): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(is_fbcode = False): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = ("_aten" if aten_mode else "") runtime.cxx_library( diff --git a/runtime/kernel/targets.bzl b/runtime/kernel/targets.bzl index e02c1288e55..d49435f2825 100644 --- a/runtime/kernel/targets.bzl +++ b/runtime/kernel/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def _operator_registry_preprocessor_flags(): max_kernel_num = native.read_config("executorch", "max_kernel_num", None) @@ -51,7 +51,7 @@ def define_common_targets(): preprocessor_flags = ["-DMAX_KERNEL_NUM=1"], ) - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = "_aten" if aten_mode else "" runtime.cxx_library( diff --git a/runtime/kernel/test/targets.bzl b/runtime/kernel/test/targets.bzl index 96e0c8c557c..bd66fc05b6f 100644 --- a/runtime/kernel/test/targets.bzl +++ b/runtime/kernel/test/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") load("@fbsource//xplat/executorch/codegen:codegen.bzl", "et_operator_library", "executorch_generated_lib") def define_common_targets(): @@ -88,7 +88,7 @@ def define_common_targets(): ], ) - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = "_aten" if aten_mode else "" runtime.cxx_test( diff --git a/shim_et/xplat/executorch/build/runtime_wrapper.bzl b/shim_et/xplat/executorch/build/runtime_wrapper.bzl index b81aabcd83f..5bfba568423 100644 --- a/shim_et/xplat/executorch/build/runtime_wrapper.bzl +++ b/shim_et/xplat/executorch/build/runtime_wrapper.bzl @@ -333,6 +333,9 @@ def get_oss_build_kwargs(): } return {} +def get_aten_mode_options(): + return (False,) if env.is_oss else (True, False) + # Names in this struct should match the standard Buck rule names if possible: # see the "Build Rules" section in the sidebar of # https://buck.build/concept/build_rule.html. diff --git a/test/utils/targets.bzl b/test/utils/targets.bzl index 93e33daf81f..249e7bdf2be 100644 --- a/test/utils/targets.bzl +++ b/test/utils/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -7,7 +7,7 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = "_aten" if aten_mode else "" runtime.cxx_library( diff --git a/third-party/gtest_defs.bzl b/third-party/gtest_defs.bzl index c1f4778b80c..ac8046e264d 100644 --- a/third-party/gtest_defs.bzl +++ b/third-party/gtest_defs.bzl @@ -1,3 +1,4 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options") # Copied from fbsource/third-party/googletest COMPILER_FLAGS = [ @@ -17,7 +18,7 @@ def define_gtest_targets(): visibility = ["PUBLIC"], ) - for aten_mode in (True, False): + for aten_mode in get_aten_mode_options(): aten_suffix = "_aten" if aten_mode else "" # # Google Test From f965746a201c5565adf5fbaad8bb5c917069619d Mon Sep 17 00:00:00 2001 From: Wouter Devriendt Date: Mon, 24 Feb 2025 23:37:05 -0800 Subject: [PATCH 074/584] Fix pyre issue following D70084542 Differential Revision: D70136275 Pull Request resolved: https://github.com/pytorch/executorch/pull/8658 --- backends/vulkan/serialization/vulkan_graph_builder.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backends/vulkan/serialization/vulkan_graph_builder.py b/backends/vulkan/serialization/vulkan_graph_builder.py index 81447472123..d01c8b53b35 100644 --- a/backends/vulkan/serialization/vulkan_graph_builder.py +++ b/backends/vulkan/serialization/vulkan_graph_builder.py @@ -267,7 +267,6 @@ def get_or_create_value_for(self, arg: _Argument): elif isinstance(arg, list) and isinstance(arg[0], Node): return self.create_value_list_value(arg) elif isinstance(arg, torch.fx.immutable_collections.immutable_list): - # pyre-ignore[6] return self.create_value_list_value(arg) elif isinstance(arg, str): return self.create_string_value(arg) From 9484c011c441d30af78908fc676f674ed2dcf74f Mon Sep 17 00:00:00 2001 From: shewu-quic <138087975+shewu-quic@users.noreply.github.com> Date: Tue, 25 Feb 2025 16:38:11 +0800 Subject: [PATCH 075/584] Qualcomm AI Engine Direct - Enable AR-N model for prompt processing in hybrid mode (#8210) * Qualcomm AI Engine Direct - Enable AR-N mode to process prompt in hybrid mode Summary: - Add `max_seq_len` to refer to maximum number of tokens that the model can process & consider at once to generate predictions/responses. - Add `prefill_ar_n` to determine the number of tokens to consume and the number of logits to produce for prompt processor in hybrid mode. - Remove prefill mode * fixed CI * Add the figure to readme and fixed unused variable * fixed linting --- backends/qualcomm/tests/test_qnn_delegate.py | 8 +- examples/qualcomm/oss_scripts/llama/README.md | 21 +- .../llama/assets/PromptProcessingWithARN.png | Bin 0 -> 48393 bytes examples/qualcomm/oss_scripts/llama/llama.py | 237 +++--- .../oss_scripts/llama/model/static_llama.py | 60 +- .../oss_scripts/llama/qnn_llama_runner.cpp | 6 +- .../oss_scripts/llama/runner/io_manager.cpp | 697 ++++++++++++------ .../oss_scripts/llama/runner/io_manager.h | 74 +- .../oss_scripts/llama/runner/runner.cpp | 123 ++-- .../oss_scripts/llama/runner/runner.h | 7 +- exir/lowered_backend_module.py | 2 +- 11 files changed, 769 insertions(+), 466 deletions(-) create mode 100644 examples/qualcomm/oss_scripts/llama/assets/PromptProcessingWithARN.png diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index 9b05ad871f4..f8552e4fd4b 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -3154,9 +3154,9 @@ def test_llama3_2_1b(self): "llama3_2", "--model_mode", "hybrid", - "--prefill_seq_len", + "--prefill_ar_len", "32", - "--kv_seq_len", + "--max_seq_len", "512", "--num_sharding", "4", @@ -3234,9 +3234,9 @@ def test_llama_stories_110m(self): "stories110m", "--model_mode", "hybrid", - "--prefill_seq_len", + "--prefill_ar_len", "32", - "--kv_seq_len", + "--max_seq_len", "128", ] if self.compile_only: diff --git a/examples/qualcomm/oss_scripts/llama/README.md b/examples/qualcomm/oss_scripts/llama/README.md index 439278cb424..cd468eebb26 100644 --- a/examples/qualcomm/oss_scripts/llama/README.md +++ b/examples/qualcomm/oss_scripts/llama/README.md @@ -8,11 +8,16 @@ This file provides you the instructions to run LLAMA model with different parame We offer the following modes to execute the model: -Prefill Mode: This is also known as batch prefill mode, where the model takes in a list of tokens as input and generates the next token along with the key-value (KV) cache for all tokens. This mode is efficient for encoding the user's prompt. - KV Cache Mode: In KV Cache mode, the model takes in a single previous token and generates the next predicted token along with its KV cache. It is efficient for generating subsequent tokens after the initial prompt. -Hybrid Mode: Hybrid mode leverages the strengths of both batch prefill and KV cache modes to optimize token generation speed. Initially, it uses prefill mode to efficiently generate the prompt's key-value (KV) cache. Then, the mode switches to KV cache mode, which excels at generating subsequent tokens. +Hybrid Mode: Hybrid mode leverages the strengths of both AR-N model and KV cache modes to optimize token generation speed. Initially, it uses AR-N model to efficiently generate the prompt's key-value (KV) cache. Then, the mode switches to KV cache mode, which excels at generating subsequent tokens. + - AR-N model: The auto-regression (AR) length determines the number of tokens to consume and the number of logits to produce. Use it to process the prompt and generate the key-value (kv) cache, which serves as a prompt processor in hybrid mode. + - Prompt processing with AR-N model: +
+ Prompt Processing With AR-N Model +
Prompt processing is done using a for-loop. An N-token block is taken, and the KV cache is updated for that block. This process is repeated until all tokens are consumed, with the last block potentially requiring padding. For flexibility, the AR-N model can handle any input length less than the maximum sequence length. For TTFT, the input length (or number of blocks) will vary depending on the actual input length, rather than always being the same. +
+
## Instructions @@ -50,13 +55,13 @@ At the end of this step, users should have the following files ready: `consolida ### Step3: Run default examples using hybrid mode. #### LLAMA2 ```bash -python examples/qualcomm/oss_scripts/llama/llama.py -b build-android -s ${SERIAL_NUM} -m ${SOC_MODEL} --ptq 16a4w --checkpoint stories110M.pt --params params.json --tokenizer_model tokenizer.model --tokenizer_bin tokenizer.bin --llama_model stories110m --model_mode hybrid --prefill_seq_len 32 --kv_seq_len 128 --prompt "Once upon a time" +python examples/qualcomm/oss_scripts/llama/llama.py -b build-android -s ${SERIAL_NUM} -m ${SOC_MODEL} --ptq 16a4w --checkpoint stories110M.pt --params params.json --tokenizer_model tokenizer.model --tokenizer_bin tokenizer.bin --llama_model stories110m --model_mode hybrid --prefill_ar_len 32 --max_seq_len 128 --prompt "Once upon a time" ``` #### LLAMA3.2 Default example using hybrid mode. ```bash -python examples/qualcomm/oss_scripts/llama/llama.py -b build-android -s ${SERIAL_NUM} -m ${SOC_MODEL} --ptq 16a4w --checkpoint consolidated.00.pth --params params.json --tokenizer_model tokenizer.model --llama_model llama3_2 --model_mode hybrid --prefill_seq_len 32 --kv_seq_len 128 --prompt "what is 1+1" +python examples/qualcomm/oss_scripts/llama/llama.py -b build-android -s ${SERIAL_NUM} -m ${SOC_MODEL} --ptq 16a4w --checkpoint consolidated.00.pth --params params.json --tokenizer_model tokenizer.model --llama_model llama3_2 --model_mode hybrid --prefill_ar_len 32 --max_seq_len 128 --prompt "what is 1+1" ``` ### KV Cache update mechanism @@ -109,16 +114,16 @@ We have two distinct mechanisms for updating the key-value (KV) cache, which can ### Additional Configs when running the script If you would like to compile the model only, we have provided the flag `--compile_only`. Taking LLAMA3.2 as an example: ```bash -python examples/qualcomm/oss_scripts/llama/llama.py -b build-android -m ${SOC_MODEL} --ptq 16a4w --checkpoint consolidated.00.pth --params params.json --tokenizer_model tokenizer.model --llama_model llama3_2 --model_mode hybrid --prefill_seq_len 32 --kv_seq_len 128 --prompt "what is 1+1" --compile_only +python examples/qualcomm/oss_scripts/llama/llama.py -b build-android -m ${SOC_MODEL} --ptq 16a4w --checkpoint consolidated.00.pth --params params.json --tokenizer_model tokenizer.model --llama_model llama3_2 --model_mode hybrid --prefill_ar_len 32 --max_seq_len 128 --prompt "what is 1+1" --compile_only ``` On the other hand, if you already have a pre-compiled .pte model, you can perform inference by providing the flag `--pre_gen_pte` and specifying the folder that contains the .pte model. Taking LLAMA3.2 as an example: ```bash -python examples/qualcomm/oss_scripts/llama/llama.py -b build-android -s ${SERIAL_NUM} -m ${SOC_MODEL} --ptq 16a4w --checkpoint consolidated.00.pth --params params.json --tokenizer_model tokenizer.model --llama_model llama3_2 --model_mode hybrid --prefill_seq_len 32 --kv_seq_len 128 --prompt "what is 1+1" --pre_gen_pte ${FOLDER_TO_PRE_GEN_PTE} +python examples/qualcomm/oss_scripts/llama/llama.py -b build-android -s ${SERIAL_NUM} -m ${SOC_MODEL} --ptq 16a4w --checkpoint consolidated.00.pth --params params.json --tokenizer_model tokenizer.model --llama_model llama3_2 --model_mode hybrid --prefill_ar_len 32 --max_seq_len 128 --prompt "what is 1+1" --pre_gen_pte ${FOLDER_TO_PRE_GEN_PTE} ``` You can select the KV Cache update mechanism at runtime by setting the `KV_UPDATER` variable to either "shift_pointer" or "smart_mask". By default, it is set to "smart_mask". `KV_UPDATER` = "shift_pointer" ```bash -python examples/qualcomm/oss_scripts/llama/llama.py -b build-android -s ${SERIAL_NUM} -m ${SOC_MODEL} --ptq 16a4w --checkpoint consolidated.00.pth --params params.json --tokenizer_model tokenizer.model --llama_model llama3_2 --model_mode hybrid --prefill_seq_len 32 --kv_seq_len 128 --prompt "what is 1+1" --kv_updator ${KV_UPDATER} +python examples/qualcomm/oss_scripts/llama/llama.py -b build-android -s ${SERIAL_NUM} -m ${SOC_MODEL} --ptq 16a4w --checkpoint consolidated.00.pth --params params.json --tokenizer_model tokenizer.model --llama_model llama3_2 --model_mode hybrid --prefill_ar_len 32 --max_seq_len 128 --prompt "what is 1+1" --kv_updator ${KV_UPDATER} ``` diff --git a/examples/qualcomm/oss_scripts/llama/assets/PromptProcessingWithARN.png b/examples/qualcomm/oss_scripts/llama/assets/PromptProcessingWithARN.png new file mode 100644 index 0000000000000000000000000000000000000000..228b846f7c386b1db5de0b6b42b21dd30eb561c6 GIT binary patch literal 48393 zcmd421yCGsyY`8@1_-VRfdqE~!7T(QXmGb+fk6g`;2y(*+u#};1__ej!CiyQ;4lm} z%ntweJ-es&eBaiduj;GauA1tafvWC)p6=(quitfbthS~KF##O`8X6k0>T5+^G&GDn z)W;7G2lY#vj!rachwi1TB9B%*#;}k2g5@BmA%})mmrQtPg^l`-@A2B$3k{8=@8N?! z=nk?)L+i*Yojx)k(UC$)ktk4=M&3g1t^jaY|LFYXO~=hKZP>ph^FumkaeAq;v+Xg-&M{Nt?=$!xTGtezYCxd_B|8y@oLe4GG#Gxn3{zk5QC2@8#$5eG}{b9-#4?E{kv zPM+a?MUKi;>(pWWK_UCV#NCg>-Q2%_)1J|rIRxzWpx4_ClPaeH!bhW5{``4l*B|#m z(9=3@a`2r()R!+mT*kk;gQc_{^Sr1iac55mxxe$8zTh`gU*k;|EpZxb>p#n&mn?Bw z{bn?1;5Iy#9iROp!LibO%%JKM`;)@&nUC)|_L?5eZX7JwO5`ttKwAQ4Vey4+dP zv&;kWhx^3Ci8k_$RxINQ*I$b*9_qN`~9P@$mPRlL2^_D#^mD6Az zDevd1LCaoRC8jl3q?3aNid@K#YFp2TLbbqJ-K;RqI&kcK#618CkZEH zINZOd&S4tV=bX9JWLL4JAbe#~Lzm<`XKwOykL?AE(upr%zg(K!yUI8_zrNsncl_(l zXeP*w(m&bnLyM%%%}^hyA7w=08hs7;S@ZI=@(*}dO&Wnc%$=Vp+X}`cvv?|%K-$OD z0?xf*y1D%=TOiYgn?5F_B~!e^@qIr%I?F0By###S?GS9MM9L^p72b2)H7s~zRH2bo zP;aO94f+Q01u_r@R;P7A5`2BZUcTQwmc8&oo6B(Q2Fx5DwY`l+_0*V{gC>N7xuVhC zLQfibSM;uQqQRlW8{#edV+nou`=?JVg(}RVH%6GZ9N`u;;wLFz_}mY>B0}Admbc0o zp_g09TCW@ICrO#^5b&Cog4%%g0%NeWnwg_wUlQZO3()Aus{jk76q%qKokA|4prB^k zY_7x13#`~HaL1MPkx67NsnWUlwu|w*CUt|soc?&PWF?D0PhCdl0@ev2wyAK?*)P?? zcu*)-9v)ME;57k+Lezbc^LQkqran=~K{G7^m$Xf&#L0=erMerJZf)d;_b;{#=`DcB zT?(|F^M(85CeBRuYJTBJme|~tJ#PZ@!brc`x8Y=yrlIdI-7B0}g$S=!I^i&#!!Yi5 zE!}`m8opY)KQe?8SX%=%P-Amm2Y@D**U^)7NYFH5)_>Sx`J0PW|3tx2N&KBoWD!cj z;yqDs9r**-hO!E9V5ApCENOdD_ZY*OJFV4k9Ky|~nZvI3xW4kWKzjSpixLd>iVX~T z!zJ0-2?!yR|f=+w7H|mQbIG4_0KKn{|KTEF8a~P& z-0i+t>YGALKU;C+!MZ1`H7F^YFzapEJ2hTb!qOuLR*JXvZ5njO`}9nmv#LJ-o@911 zyg&p?#RnU{7Lr?B68-r_99+~@g6apGQb>w%*Lt}8r<`C}jj1FEC0(45%k=9{)aEh4 z10zcZ3xlTA8m%y`r6wN8Sf(*HdhVDwTb@e$Ofi=Ie6{^8e3!;`>?0t7Xd_eP^3D)o zrcODTOl#wM$uwwWI~`5j;3Gj9!ayII2@M^~L^L(FbQKG1zmF#Iw~z78^WT2$O>#!1 zo?*zfAi`MM9Y&7+r^A1c=U3oWI7^vY8YbkUn^Lv}aeu%N8!ML;HCEfse*Ow154%6M z#c!hL6-EG7q|e@JXA8k}Wmk%9-cq}-C#|Gt^D2|H&fM5<_2VbW+isFr&%JiO==4_@ z&H0>;o#c2W*=XrIOLpBj#~rL@yZUE45l{7WeJm$!u>UG>^A2B z|K-Ks41q})r`neo&CuF_aks+JcRm_}_+){bPm@CX4Nr|4Hx#Q`wH2~OxiHynhf|b# zy(jY+#n2RQ-K>R{KUy+@8I?9NGG02^4IhA_QGZn^mLRKG^k;^_uO zPaYLOPy~yRvs@{qAU?LP`Y`;30En(|0~~(PG-W9xj}~LsPYVk9ZmaSW(LH@L9m}E%$CQY)JGyVMwt4Ub|K*jn98r8m3H8XGMXi#iE+6iJmZ~lEzVOd#8?+JaC3(O7s&%LH2Fd8xZ?Zk)z)0c z-)DOw+){LUiO~$~jkE(}3Pc(GNNBW|&)VWZD)H~@bDp`qsRw3;+9?=JBteJ+;5#4| z^jn^FiWShACDp63H^((Y-R406@}=^!?m3HG-Kn2GN)d=RYd?1>jd-yhjtJOdNP3C7 zGzgUm?fjPYuRsR*OQs3UpJ>zA0BN-}-ot94IN-NJm1LnPWfI@gX6FS{?1md<=4pk_ zP8*IHX5V6!2=M;J-hvu%WyOqVif^e&^#?>`n*T(E`5Hj+X+`_#$1HB|Y>5>k{A8&z zv-a9d5+=zAf@>dau#a4)qODI|ppho`@rtjBNztB^#w!Rx|DZvG5KAB9aEp;VfSmh@ zw8Pjdd#97z-&`syw@|K{BGCGHweyQmp%HuM5BJ-T%-woXvtEm z{%89unr9FFO+A%R-a`*(%Z2m*-JVhmxHhL&eP<};`+s#=%Nu-P;f1}p<;)pc=ezvw zi%c<>&752fdNCS>AtLCnjzhQBvc$nA0*7o|3P^CMYvlFBxlqWxsSO^vaUwcrDyxx4 z1O;6e1u>0`W=npz>LYYbPLApDJe6_tGjPpp#SY||hTUF@T)q3Txech3td`rnQQJI2 zLSc0L;UU@Ri=t8ZML(Oo>XS^&z&%x}0lP|Gz`q?)ASFquUI}Q~by!Zm%_9MM=MRlYWypDxQpEAiO-%uFQ?{=U#jScILD>gr)uF!F4({c_LY4F{ zuu)qIKfREB5rcH#&|ZPjAljFqe1nPcIUxT4LE^WOqp1y4{n|%U}o;;S^tH{ zaBMHj4>n&}*|I^tY{`3QvI(kXqfd~|u^;=+i%ZJnjVpmF{6i_Mn2Gt- z0GanO(^V!~#3miKtz0K-?1a?8jzpu8>t3=*=?|!0^+@(@pR|z6<&v`D5%Py#nXYc( z0J}0Xfl{!=YsbBi?ICj2Vg}Nlvv@`qTyQOq0UQ#Zg6RD&K?PfQR5eQBu679yM z78d(tw<)ig?sZkD%{7GQgp({h4yx~!Q1aLyoaJf4pJla_YI2z~N8kt=1RbSzzV{4L zGw!l3cAg{qq7FRu{}B}Kiu;}A)C{YBdkIyG+nZM~;qcG1u8VIn1nm;?a~|`UvK%gz z(wniZw86B&gQ)dPy@BMu&b6dNc&4*T1S@yOWm>!foWUzNfc)W(yuA(~e2cmWw4~;i z*MS$4LR-c24|DU>xkqE*#aF6{9+}!Ql)=Vsn+hVoH*CV`7 z=r@V=X-OD=^AwzmEH8EVdljorxrDnhACQX6S(~t>u&<3MHRFk<{1wA}Vh@yblHmX8 zLuj|;I$vv!P?7Y#WDdl*FHFzf&B+CYryFs;NdnaQunGjuTLLyp`JO$O=0{@}j5YA^ zx^O(9*j79c zCH4iSd&hkyM47C9n(6{7VVy{4Q3VNwwC4!$+Io+8)CcY?Q0q;$)v!>4<;XXObZlq? zqwt|Y3-!fE5)C&p#Su!jGdpA1g^N~W-<$Qq^-)x_U zBolv{sW2=%_IVA~pOKmB55l9-!M(^aUzzd1`c$Z#tlu|MTSackhSx7`C2IHkss6_p z>(^3RjvdWHe+8*mRwqwI^~K0wF6dBNbo+{Lqcb-OIA^qBL$Ts-n$XQ)0Ph^2pKG~mfwS6kI_jnNEr6W5?IiB+)vCH9GQVvI$LuC#M~4X zdTWCoSfvtIJ@}-6F^@+-(W!dKHKXn|gVV)qF)of{W6m(c6-==*^NaA04Gh-Pz)7Y^|%hO@{_8D~LV9XaSw5;MTk{7?A#tvh}!!;{UPU z{ksl!dQiWj7oB|1L@!RGW#AzOKAQJ$JDt&3UislXW3VO^f(y=o?U*nj9&?)pw)y4; zA02)9LJ4b+qvC44{n1)?xSyB>_VO*v5WfC9WiyZj--|w8Z*QaDI?2IRC!7+7_r3Dx zBMpwWn^-XtbGFvaqmHwTnJnd}}4%RoXsj`Rck(6LyyooxgE8 z;=1V?0EDabxn?Z=TxuFgc7nJGFDJEXn9r5vfWsG01k@=3TLVeo!Xlz*%qVNdRG)_t z>cKhs`P|fYyG6aRrC$w5$jTvQ1ks8?TdHHUG>zi8&Bb1p^cc-HcU*f&m05Ilw%lhL z!Nx&nin|QU-xTWko@CImOJW2z6ovvaI#yru-K zQdQ84#N5vS2QruAuDXl_q-p~)!%A=swBEW8Hi7 z>RhbT96c}h!B?A}!@{ruk3GyCPOFu%&J#y`+KS5cUZqPb zeW|B??kQCYJL7A|<@zEgYK|jtlVL z649fLtDoYhl6OqQ1^LJa=J3so6#lht=EMvkHIgi|I`#8-gr9iCrq_MfhxHo6>?>hBQsiW2xq5x)6bzZ`Td zb$&o7h$`iGkG~=v?u?S1+*WQ1C5M(T&&MA?)>l~fiDAi@_SPS{k{uUg(Mk)BcHd*2 z!wu_i{j?Wb<~NtEhX1!aWY6ZkoShmPaG2+?NGtbVIqfxSjkN2FS`O7xSQp#x-_IJ&r+QKJyEl@#+-<(o zp_yR*<`TTdSZf7r_c{F1-sY{pauxKsWX_NB_{rY1{v&5pAqiKqAfsFZx&+K5odzVU z^a%@)oC`1gaH$Db9~v?$9w;s~Mgaf-dfqN=AP?bs0I*{tZks@gheyYtl5@AsNLAp* z$Ym(yQ@zh&>)B!R?ymwRyP1oskEi3gAxW7`CGq=9*i!?-hepjt(q1-$!lO%%p!2_7 z7h1jQ8orSy&I~V<>(@h(rkWarNDA-hC(9I!|L*~#!b0`@w|pj>|3VqVviXC!;CAjt z;QLwZ1unR~d!v)HyL+$(_>y1}wgZKl(@o1@-(P!Hbj4r9pR z&;KGYO-F;QvnidqSZrFUDPjKo>lxiMjTbyh@y}??W)~YcRCo&AZeYX0QAU`!@Y$-* zGZOt5mxm2LCS#d^5u59Y_iH(a5+!!@YqbLt`RsPTzZ5zHErR`ZnS!Pl3DbKb)ZWxt z<2wV5nnhfmsUq(u$7fND=*jE0?N3jgCarTbgFAt$I+CeRIO#>ozc9!gU6VXcU=A~m zB$(_BEwcdE5-3);9~~eicebjUlPX{xj+6L5vK7b=XrS)K{Cwh<33LD>4X>r9_kh3{ zoDisf-|5k!OFWI4q9}G=%S(_u7`=Qb0M&#SxvexD1DU0O-wEbNV9(UPaIzk{L-T@5 z=_%Q9+w1o8qnY({S7EHDCjT1p!-<{<-Hxk_d$Tt0&(n> z9j(nK{U6G5n|Au3qAj+x&X~_swSfXUG*9ao8-^dcpo@EloNztQ#5= z?S)67kOYRj`K$A+rMjfJ>hzWW($LDHk+#lw^KiyM1FMOYTT0lyPzMg3% zdL|+=^W<}k6C1R=G)#T}Xr*266ikb;!`%Gd6W48Mxd61vAiJf^bj%`739`=l40#*TsyS-c-qsXiF6{DG}KXB%1@3DYT)&u*U&a2Xf{@GDQg8%$ENo39l@f;}4Drj&!agzY}v5aFPU zd>c0HZ~%Ox<{1CPPH1dUL^ERMOZ7JF(~JjteuXQG3(XOBr1WHD8&9XuPWtNgt_sq7 z1I6JFZ$`x&!gWpgmAS+f&}#UMMoy5^$VtcEv+Fk5o<6?=p?d%C9lgXnHfXQT#6ns;0HBDyt>Kzgz&nYdqG zy}%||K=Te~OQ~=$c8jq5X77H-S!M@9d=VTVsPi_c z?ggIGdgM)-p_tPP#y{@*;C>BhRPPL!8z;irf-F{*PX1+vl(KbCp-hu!D6!zAnRyjX zkkDBg>$c2oH@3*Or<5uh9E}4oYX?B{qarco{-VJAjJP*KJ-bdx+yCmsN`#1i^r2CA z8}WL#i2dO>C}^60`ugXMJkC4P!Fqp(kRTp4b2oVck>N*#9@}-9D$lE%Kq)v(ZL0nH z&lD-hX;CLzUcu8okxuojmvC}C-9JkT)e;vxZ@zb0YEoJ;ZqVMRFas?;dZ4kJ+i^Y> zYWO3yW}}zK;{uD{@u4)dy&^8*NW(gwb6~;@KtM-P#y;W4LI8tJst&U#xBFC&bQsZ# zBtC^`CS4@%9WMFzf;OC+C&LNvZ=~RqhTxu4TzvzN_bsSd-cRbiyq1F`YG3Dkok zLEH*3O1)C~5aAXhI{v(tOzM%()vG5*?Sm2D^d3s>*IJqLm`*$UZ9h=1u+!PFavGPT z8`T&P3=%!Q4()d{2m;F2SeZ;%na@am zi_<{f<5q8{?&CGNjZXa>C8^$uxE1AZ2uf%gnv;ZZM?6 z&El-ye6$w)jVEh8RRhk!H#L1r7^heF9Z0LQOuNVSj1M(nlNB{3pwH6b!0(|*#4(!# z)sujDZxbLAUI%8|n_Z&PS3L<4aX4n-gU$rdWx_)SO^Sx`y6#DVW;Rl{OKBa=cBPV% z?a6gvCA9C2Ygcu^a{=wjUY(ik0x}UeUoZUCPsB9Thvz<|55@--I!&>a3u7TwO}RJP zSD{NCx!9RGHD0Oyrs=sA)xWDg=@!2!kZ59Shq^Nbz+sFr5TuQq-5NU&6-~aq@fih) z5s07J`6R*y@Nqd7+DWyNj%PB3%V?`QD5q(M~m(r6yg4NZAQP}{8!8j z<49KH)6j)MF7O$1L4(w)*(2J`>=-8BUwif{eZ#H&PoNoX)Tx$vG*kbX<;lKU(V)?t z%qUU^%P4i0g9IBxv zQz{b4mUU0|?4m%vE50;32hH>f=6CNDK<(L>6XUBr9Ke-!AQd;JbIF<*0-BVKstT0M z1t1OF9L{fX+dHKmc}zTU(J*08>)HpV+^0e9Gi|ooE~bKwKkts7yQlm$+5@#n>C|Za zkFoM^NO!N>16FSLb%>@4+`;}ASmu8BZ?H@!DGK+Czx41&;*!m8%7n&KO!Fx@{i!y4 z3GvIjmN<$cSet?3Z%sg-i%=yKOO}1(5&4l4@I4)Mqr;KHDdG2;Avk%IfQJbptWNj* zj9vdxLV;$K>w|?hAIRsh*IB~vfQTd=1(s76JN!T2RKBe!L9M?WX#Zx+d=N9^Ujdn* zEZESK)D>?k5=l(K<+_A;c0KAI!A-B75gRLP@Qorhq$%+4*TN$N?5m-N&;UN5_XpuU zz06$-`|!7~`b&?O=EwGz&;j`5dDCT|+j9UBl~yeC-%^=u>dZ%x)UW6W221=xD`nhn z`ZT0pv*bC_Bf}yIlPhjAK}iw7;}-tu+V@vA)->fW*IVdcn9{6^r85oW&r#$!PE?s3 zY3Pv3nD$C(ju7tPpGWPl_?YO3jE6lu-u)7Jwyl=10=CpNpiydlK&F3LG}ZHS;l3&{ znzHaF^PoCl=D*dehlz673hLQuD&Dz(#W`J8oIaG>o4%H4Z*&H^C94dZI+mdo?DT?EE|=Gwnax zb(07iL88H7wZ6|V{|%+2735?F{!!Auk~o`Atb1SnTh!X5b3zDi3faFG941!T_C&|e zm>3#uQ?Ffwd|DE`w=%&gl0M~HaUuUnJcE>XZY zEq$bhevTJ53f^Lwo;E5KZR$J06$VvbdLo$4_i+8-bw@{m4l1x066d}p>Ny}tPUh`~ z>;4xtUQqn==s& z!o9`l5^#rYj~w6%Je#xC{P#!>TWr+fW68b%XUk%W9AWvQg%b}hv0~s{}VmLqOvH4 z4_3zwag-HPhwJUNgGfl0s2l%{5#f2X|1ZK3Xl;JP^qDa+l%wWc?K=J;dqc%JE+wOL z51#?fEKk(`AYv4JjV+`yJbkbmQO03#{6>#qkp9&CeZ3X4QWDf zR_Fu+_o7FN{Auc=p|inN7;Hp@wj=4Ox}D0z{(BoL`o3Sw^^4#AfYiTV(4DE`F{^*Q zxHj9fCxLk$y1!3!6vGi6V9Su}fAjo-gJjCs8jRBF*#cpATav4^KcA>14Lu>1S!Z$^ zBgzVi*Rsekwha|9?F+OH4orL-LJcKDOuCsrKCTp4m%L)k#63FRJsMPZ475gjqo{~I zi%M#gNDrsg@iF(G#*{-&kQ{NJI||k7^VWn!KDdU& z=eX#Pi9!ND-b_C@>;3VM>C9UDhlk=sou}vbyarCJ6i^}y=qwSR#+_!??(^$`0$Abg zpj2vEhmb>8W{9n?z<8)9+tlrRcMqhwMm?L=Yt?`FNs4KsM_8kKZgrIJ;p|)Utd|O~ zIdK0?H-Dh?#d>H;+~Vk#I?B?J(sVs9NqpfFXLs#6{5V5s*VQH_gqTiL*Z6(6-*IP6 zvUBNGFWMJQ=8fNsh9_N_mVtJ7F`VMI;}=Mxi~*g(!E$w5lry1cSs34`_Kmw(8n80< zi2cpbpJFvVv99FfO|ks>A6}*0cOGHO9tvtG{-u#+x<&BRw=F`>R-wl2kDf*!QQxT) zqeni7zNcnvbGFRP5IedJufw+N(+eR&&Oa(C3G2lCZdk!PY{6JDz^i_0i^_1z3GmEX zzXQsKdWc4qOOl<7qwx6_Q)x+r=ZFx-DKY&W?#VVc>i$0?*L>O%u3|Hmm&eNHlKvlI zfO~cAm~~R6xt5l5-p9%Z@?ABChqB}+<*e|7gL*xj`VI8a&GFyM)& z@mZ9#r4qYNUpddXMX{a9k}yj@tpQ8F9hHgeOf{QMVj{j=y7 zc7?C$<(>=1uPU_@NiB~$9=@h6U&^qb{V%3 zmAr$|)w?c<+TiK*e96hVeWm$@fTg=!w>S#;>1x2E%Rr7(b+iW72^*u4Bl(~kdi z$x@E-KA`wbnl!Ya7Le5hkxBNvj*!R!am4QDJwK#N&N*J~ye%ixx)^MMPQQGc#sEMC zJ*f22_&XGCsvtD^yE0^VkHsuDa5&H1NA;=CFxW+^-iaE{q<9-p(OT-6$cCtsqL|Z8jPkVe@|5^GGk)fhVf2jB+*@LWq1}u%X^)xDreFP>qibx^93jFI(&TFuC4-cSR zureKDT1Ts9MVs+W0w&J|EGjA*oB;w0!y{?63uyjUbWz9cz4`jPQ;0f6iT0&)<%5x! zOizDe@=9UC)PWH9M!TL7WxyNQpuM3i)LtVLqukr2EU2bnLXQ~COlJit;!ui-?y*pc z39p|Dwgpq(6z(O!ieoq*uebCcIP?rW+`5Q0n+RJNME?kAh_Y3$xSy$D48F`qrXZ^k2%@Jvhsp_p)5n6Jh}&2>2F_L6oD&pl}0&ipcU7wRmR ztP+BoSc8Z{=j?yF%o$^nPpeKg zTrJ@jkPY&mnHw@uP8%}2gs9P16@R`7OSw6u4l`GM=`gkg@l*oRXfEDs0u?thZ;vD) zqr!gmXiP^nm;$zJr#p|cTL+as2T#bf!KMgKurSWMZWj~wnffQVM9!|}8y!y3{ zO4R9v9G?dC1+G_#`U#OlRWx68Ty+e#oLYz_fuJNk)mbwwLCbRlNn z>rW&13NdF|PUR!)hCc`~rXRHk6i$1u-CE|~C^ko}ZB)Gk=J1a&bYO&Qz7iz@_iqiH zEx~01YXs$ordfvJqheW!tObJKX-ep`PZgDSV+W&u4>I%lIs~H8vP$vVW_1Y3q#HE2 zR2%f2c526&3XqJ<4UucL2pR310v(s1hm!uIV!X*?!Bm8HlF791f!e+%vB2KMn2HZ)C?p%Gl z;TIC!wRXeOf>Bu1jxR@-blerx4ac7Z$HPE#k?^yt^G(8WwV=7W?R|%w&+)wFl{bj< z+#j|#BXcwN^uNUed&ILuT$nt`hS9!QM~z+N^~6$f*t{~wa#b!3v&d=LgB>Cr25nsNrd@yLL*-2J4US~8us^P07ld19=7iky?dsAs~nhk2`g z8wnB!CkZ@#g!QWC@yeA{mrJnw&5=~Q{z*O(3@5E1S{5pYY z?YZalL>Q4FrhfT%^$5~ArCvd*yPM%x*Vg9BchxIAVyZwO?hK+kBKc}0={9`JyC&dB zA54G$6OY!#G;q&uP@iYA% zSU-dq3QBTe{^@tRq-79zKu(6U=^nHNuNnPbJrHf>y12ScQiS2^OC$9H4n8~Q1D64S z>)q%xjOw+2FW8?vQ3#@*lK+KuIf`E``{+Euus=%IHhJHtt)XEOfD^8hpn_YR>Id0( zm~r{;8AgGM_)rRs0Ay2*CuxgNohe95==%kVgcmJ(hPxh1q>OnX>Q2qqQJJ=V&)#_4 zA+mh&M;>ewVJ*A^!nq6{)+$IxS#lQ;2a|g`sh4LhMJIgq6~MEw8C$c{RgGAiLTuV67`MHBmB0AFI zyu#*u38nvv-!IgccOpY?^=om9)k;B@qzi1E#`Q%v(_x>RwWN*nF?7v3k|`v^aS;LC zjTiXL=vM<|aO{NU0C-NhSu#>ZhMJ-)J?(VIWKng_O=@|o_3Ss-$}j^_xIUs?jjvU2 zw#K4#5mpbi>v#D3TYvdHChu-1%U%C4)JC|RAFW)#GWY^GIb4Q{KTec2RPm{QujKtC zzu_LlcXy&7q5+ri95n$gnc;3+@z7zN<>NJd?_zPmPbZm01v900?h!K@)v4p1kmB2L zG}`f$CX5}zC7+)ZeNw!Q@;#^r$h_82mu+}@8m6L{${t6G-hXy#J6kzdHgwy0 zgS*wjN{IU^dn1ubqR^iA4Dg6j|B2C0>EdQ=y6*|{X7&f|^ICQ85FBd5s}U~S`%c!Y z3s^|(v+3BCA1-Mls5t67l4I&8EPZjoH21%!X`LA}=colMhF(0xzb84G_CB|ZI*dt{ z<5BnL^wewM3X#&eVv~Lgs)AY*LsiAVclD!;wwrI^gZ-)?V+Gl~*%~Aopw9HiS>)Nl z6op<&I(0?{t{#rw_~)@;2#e77LnbKM!SV8S^dyzE9-(Zi3|%Fudkc#khLbF~!Q#^+ zJk-kW-{MSs@#c-~D?m4GvinWX*+b7xP!J@S7adC3&(O6F7terlGrBnn%$+NQCaHgA zN;=MrcG-)r{N{@=P8bigp%$!{py|4g`esF?I!Lj6YMw6-F}|g2iZtJup4LlFu(>{? znl3d5&->XGd$HfEA@0zGr1ALi%C z^)li97PWUqVve=afjy#;9*|uWqG=)9`}&3ROri57mU0`7q!hvjMoT(djd+7Cav$#S zeT3Yx(a|$va&St0PXDea0(XU0qB4tB0f$b(%RO93;R(BQw2h!(;#6uKV8j&olM?y~Gi3E&LbE49Ugb=Yc3Xgfdsf5>5R!WL`n6 z%mZA7^b;x?9BHEHmF}n(0{Rkp@>hv$qkt7R@>o4s-v-@`d?*K@)<&HNMmc@Tfak=B z6rLGv8S;hKoxGh;3bw5>OieS}YAaJC_YH`j3WvUo9x6eQXi2ov(>I2PoRcKfoKxcDT6WV=ybD3!)4A(X5Fj{}TOWDG)jMeIh?^BGSYNse9?#exU$>^%P9gJA*1ujR95 z{t1+mynK0(CNb3873+4#G;+&Jw|r0t*SZiphST7775XPLugz>>o8s+lTnSse@(ZE9 z@EhqU#lOnF4%qSB4|c3!O8WPbS^SFPvrSP`>xa+O2w#(4O%3^hA zfH;QiGJnyAGBv|+E%8L?swfzF+CGv6ua*Q6Qtc1|tORn?Ek?we7@Hhw52bDQJoG;% zZ_K;XiJaYOT~oY&dA@nF(FfUdfF?Vv{fu#qpwA6Pu){sof&O)OM$PtB@0zsx09%ZK zxZx5FEdo4b=a-x9>#=7UyuH(5cx|4aZl)0cZ`dYRzQCa3 zd0v-dYqtx!{D_sh%@+ChD)#MxXd5}+>W8-``li)XABZ{iID0x*38*!{OuZ~+T&jYT z+sb-I2Q~_3PKICi62ImAt+F2L_MQ&~Qmq*Tl{kY{QugAzXw{}*q@F@=IK_(c{FoME zH~MheL!iX{cbsn!PSDwgxbWCbe9cfi#lF#WV-0GvtO%{wcM!Gb8wBEyOy%@r58i$f z27a+yS-`#gMan2Sbi-$;*39esTMOFt!54Xg@YTsR7j9dl5j3Jg0OuP_;(`#x1S4dX z>>(#zt}DzPGFrj3G6um}h^SWrfiWE?%Twh6H5DPk1IG=w705Cb9@bcLwVu)%Bwe}g z-eEsVo8hceCEe%w(TjB!m6VjZG3r4qCvO*$Q56dCMu+ktYPiN3o@LqL&d?J^sC2>^ z2Pcsy_b8|~2M5C|B%=M4E8xEAh$27jKY+R@HUnF^^ACo#RN8_2Q2Vs+LAUJrF4WHf zkd~+3do0GK_1sX+{e`!@twklKh~Ih{VnJ=wj8X$Dh+}8{Lg`LN@3pn!tXogj#r;3e zKLEF1Qx&lqNg~D=mf9q#p0kOFhH%ZS2CL+B!3<~*B&rK;!}o;>1cP%|-j!O5w1!!=$C>%(~sO!4$?ztf9M{4in3b`0SGj7 zZZ~iF73mR44Bcti6E+vh;E*m#dI`m(uCsDm5yv678aInaNetf}`K~-KefxVTwQTC? z#!pWbUp3uNb=#*A=$tx4-I(pCBZ?saew(ks-&3xYDcIt=K8~mx)_U$Nq#axyTw=@Z z?Qa(xt&i-loT6l*4$(;$`E&DifmeP9njP3tnI*k5fKe4vY5n9KtrOhr+PujKDEQnK& z(&*frdkW2aX>q@e&qWmVWl-NCPrl2j({J_JygxBHPsF>QeNqx($x^86MuMt2b z``mt=hj7ww>*tYK`!-7d(3#ElHoQFEpXoc!r5FeIeb~n}xsA9@uC8j{}t=~Ww9(E_jXaf@8DzI_;bnL~O=f(>C-p|iDF^c%Cx@EX$@XZ+a~fRdE{ z$WH$wLq?qN!6zGx%LW1#WDvUxYNBWiFyU?i8{bqMwtpulflA{n)qqu3Z4Xprbcv{M zQbv!4dSHVXirp8E4-1$i+i&cc!%t$r$-dv1*pl0F0i)u(0?=FLF2eRr7LVyD*J7qlVw z$JbjunR6?w8E-@8v~l0~W6KucL5I~tn`^f@Oo`a|Nnjf%Ve{VhR^e5CL4?qB#RPK{pTHu4cSdLOFkd~E)?DX`zc8F!#!Cx<5ZSw(EK z`0~t;tV)y|%68lx&#KUb%x?g2u)$LqT!GA_y-G3+F(*wF9mj?Wv!!!V@bV3D^khb% z-!zXHR-cA7(<+_+aHed(J4`FzYgG~ZdfFEptL2S(*~gK?@R$5<{g(01(y#63d=8)Hy&5U7Ff%ED|upAg+@_E0kTc@yGqz4k?{^F z14bVIH%b;$^ERRq6tl0)iG+NiBcG#u7;-NHH*V8;F$BD$fu5LQym2GLNm}PX#rB$_ zuz%^`zJ9VDW34Ge5poNrTd>MBa_laoyD-ip?4zNzG67QQeL&~vlMQzhYh##?+?KnP zm6UbR&AY)X#@V!BDHs0qlGPHL<9a)0T7?h!U~_{`X2@ZFv(UGmxic&jt+t*`uWKDA z#q4k$9&FuZ=Ay5(RrM`RV(O({+lR%*qB+ltX`sym?mf-l08n1rM4??Pe z=xf^3pB~8(=r2Em5%u2tlz`L1Tu;M#TUFTp@l)6@4lX^}a=0Xx!;o-#u2Xs%LXgix zCvr|6V#%e4OTwU?ycZ|tW3FfUZV1mt+`JF8=E+2K+3=uF7gcI=SP?y9{l*`#RbH2ytgf>4ukqC?Y%ayJB4mMYt1evZX9f)5G zuHW3syDgDES_>TJQO*@Q8BrfmDLSzT+-aqr<}&PCz{&JC3C=l<9Bg8ca{kCL54Pap zqU(@;i|Y-PvSPHnuaoYi5c2DM791_cW_7I)=7jM{XkO(+h6L-jo8pOb+5Y-W!Ti{~ z{Z5b&C6Vlwp3kMzLjB*EKm?#9ud@jHq#c9dIg@i@9k>z(tlhzO z5bv+C`0NU$8}j_KOeahPl6)`tc{Vk&| zzvE8^Z5Bi{d!#oPn_B>-SceotYXBXEpP>xMGUq|TC$)1+0>$1=aGySDC(TC7*1Bwk zeALO93%Bs&W8H@496#o%Hk@(mfNO*x$%Sz8JlElC$;0L2@b+p9-`}8AnTpV}n;i?a z(xj|ybHmWwVO2t-nsevT&(ZTAlV-5`!Qfq&9vhaA_D0`KHU^0C0`D9&PmX{H zO(2KhaT@4PSUIELpunYC5ZlS1Pcwq7ivUZt5=Id?t9_)MSU5fop^YXhs{x?j@L z^S4suOznZoT=1bLQJ$P^QN)PHC!AHX$;5HP$=98ZxMeqKLrWnQ;- z!)2iSjU3$uG(#;P1{3s~usH%m5;c$r>VWBolMv)eJG8W?h>!%PZ_?>n6}y0 zbO`?yqrI3nXzlF7aSDOrw1#w(R_D32kL1Ej2{=g^h6IEEn*O6%xWlsOJ!bG$X*KSy zJ=dpR6qS@~AxwhU-K0a;$LXD=r~<{NF^Q^2$kCKBna+|%SebO)NzP8_)H@RK5R-Xk zV7j*sbj+wXrzHtvtsY{dV~Pb}-<7=U^WAa>AY>!|A-wg@+*X&6Amp~#_hh3pWJBM3 zXS2)es(aJ|l?tvPS0nVu2;a8g^Jgg_|Hf<3U5cf^|N+1HEZ+ft3Ll4m50vX-_>Xp zWoyd{>MPRIsL~MxbV8rogC*;@IpQfijgpB*b z<;+_6x{PeRv7;iGAWDC!%(ILa0HMkTl%&Ai^D&bn7SfuPkAJxg%5hHaeJ70dY#CF7 zumX|mJrVVOzdnd`psd-m8>r9>|AXf!7&HKdNVfYNd1H&JC9sB>1)lc*sX<~&uaO?> z-CwG8%H+#vx{0{V>aEae=5em}dSHdlYprDnY0W%Q0N=oz{VC&dD8_=FqT)$kJ$_D^ zZtOb2TJe|Z}kGt?e0RJfx4-;rnUy) zIzJv50_dniUKL#sxf&b&x|w~>Q}l7 z0)>6#13|qWUeA-P_RA9PR2FRwS8VAA2p&(Bf}NvL*6eT;8f@snLT>bBWP!DBkbe)5 z@uEBq9`q1Ky_pB8zSm|w>2>$EJg}XwMomQ^B#5Zb(^~KtcV`0_(&u~8a?w?()Mko1SD&zCT|bd)?S^b?4=L^`x3ba#|uUJp`-yit!#76?qkmGK#O+ zZ2YyO5pBEDrcf=rkymN)e{lAeaZz?{`!)y&k|HG_pdcYFAyNZKNp}kvGzik&QUger z#8A?Llr)G)w{#BOFm%U%4cB!)?{oj&`0(;c&zU)6ohy#D*1m6tvof&0y$Bq)+&;f6 zrDxUrC8Fz^3DWM%sySzIy|~;>2jO2x5AIsd8}j$}E!kUJaJ*7>cA!fiCYR^*#+>LJ zGwF*|E&jzlQrQSRmfr%`5m^tZ$d!edX0@uw%L>o-M_rNP-*M5bUi&8Dp4QW%VP ziG|xA7`g99CpA+wYqO_p*>XZ-&vIyqofnzOQK`8Sut?FK0X3br3l*cEQctozpXfw$ z$i+AQ3z~Z=%HOdds1b=`ZPH}EdZ=UpSQ4JbJ^L~2wJ$JPBiFDy!7u& zu)i@39|@Nve#rH}tp4Ul#zQ`*ia}C$he7Y^?m!>uu``K5BIqE}tXE5rw4wHHe3eCn zyVmYoPEO~1i)o|(!bOe!YW1dBCc8!+AH|a?XmZM{!G-g93tgmB=rF|Fb%NuWN8W7u zSPjW1QyxE?H|l>T7c(#Q)t9!WIPU4!z0Ox_r_T__>=cLt4=mfCX4gs2yS$;5(^bDd z^lfMD`Oc2FRw2dFx@6YHHT%qPZ7980m2sarZ@h1OQ9V+iD5>C{Zm!AeAcVXn>re~Y|F=Z};@QvrMxpQuw=v7h2spWL9 zfvK+<59{K1JC4$(UItHzT~|HL)5MQrb~EiFTl4#5*&s9=Ow+0_>fcGP9h zut}Cq;9fC+gOQIg$9l}7{#xUpT!2n|Hud0Vfw*pwi|rz1&2EE$<3=q$D@iycGpBN4 zahZ9uV6}aWCwRZ*Y&9VPzY3l~QPJ+7!mP1;xIUzMdW>&pwdKO^+Mc@g!qHUUHLf)0 zJ$!wCnZDhzp4H<6CI4()L`wYywX}0WLt){?jgxtA`DS4`z8Uq^>7t6x*#zWlYv^i! z#yKdwv6Mw~(`N-^V_iqZu!nutQS`&TvK*weNvGT%Q5GQ~G=@qKCApWBWGArPQ_eB5 zJwc_E*|1eQyG;-}M|ZfKkhDMS?G+b9Go88N+9tj$Q;KuM;27mzA(3TqhZ+G&6OXI* zcM$U(k*mX)LCGlSvIPTiE3BWXez2%bMw&xaOm~@-j?}8dz$WqfIJy8xY7^AyONnrEf*~IR|3xcOzmGN!x39s zL;Kh6-d&3=-tnC=UNKaHr}0Q%f2P@6l#Q0JBbY{~KVU|GVnn*f42>x~!R~)B6U9TH zuX7Wr{UTNR7%(}~n~#U|x4(FgVv%^UV_kztvspeGbLwjJtU)B*ezeB2i^@d(RK#Mb zpUdQs10aCn2NRHzy&~0gw1Ei7x!yJRHCd7fLMe*?2nz^xzKh;X$0RW6E>!bQ$Zfk} z-W;Ri0TKfMB%s26g963u0xqr>N9XdRM^C|}%M5+7MfXUv_7=V3T0>oe1i=?oyq?`h zU4wh4Mu%|he8PY=bvVPRUfiBT#p{y?O0E5>t?RBYjZxd7$kLihqf`v?(VF-LP(USV z*!PaO>?mSHo(I>~3I%$e7GrR!(0FqlJcz_21i$yCLmQ}qPqGm)?nG`+MUZV{l}2b;~S}KiStqvKw;4 zkE`QSJ#_=|&txFFx}0fWQK3vFzc(!TMRR?FL#k>Ua5tn98B9uX1})ci)Gdr zA9-5frRbz6dk@)^$;{ty^fiB6XPr)yz{e8xzB)?mNhwt$^_Z(asiAWz(k@MWqg|S} zNz|Kk{P$Z-m<=JpN~3K9(&SnG$^AjAD@yGNLT$gJMbBs`M_dYa8t79l)?%Oiu7xSa8H9`0q=ZqnuwL+DEt{BIiuk8y=d425&5`%43J^Fw!b5;$pirpyc9A zszQkuZU0k^iGxs?tY4ltNqfgCiA1kVAxi`M?7Uu_EmCu6-nF0TOW2`8` zUP5{d3YYpOO*KPnM0HS7h1j<6?$<3+#@SJY{3rFaZTF!ssCzsY-E8rJqICZFDYP8L zVLzLSYkNc_ta$u0ZZ5vSroO@Cnw;iB^E6*E+3MU+2U2UL%$L}rhRHQ*3<}d3(76wI z<*8?(|9$bNDEf^aB|^t*5mH)PzbFGI;hJJQYw8An|3WD|JFWlw;V?@U-ki=G?)QA& zmeR6%Eh7_sh)-g)ANzHGmoDz%EgLQ1Ne-Iw^g6`8ji$`B|6Z2T^(&PN1wI%@R=Z|` zuRpWgjN7-cCrAxxBn$rqDFw)ZQ2IE0u*J7t-2 zEY$v!o&Zi2qe_^G=*hMqeLti@n8B|;p?vi1nT^ZyFF`kqAH1b<&H8w3X zv9al-dF2EO9a!PjWEk_y%u#ZEkTNmJkzSmM0M+@uUJvk)t*_1S*M+lR#IYtZIXlY4 z$*5U$@s7pwo~yFzoX+1ts*&qZ?N4Gmi8Mq&3xO&;*T8f*$PwR%?KO4XHq-G<_zgST zwsqlSHr-h6bLFA}Aa~yyw>=&r1P<7zELz{AfUP=9skzn7YPusf6to#ErT(WwRasO%9l&Cep>Axa67|=Ws(A_G+L0gD|qxovos>RFkzGhgE$kt>%`3 zd;ht-X=n)TsU*%#eD3u@=p&&-W-J`d3h(+cO4M$+@-TsuS(w2A~#!G*W((wY?~%1E@Uy09|zj+ zmvS|%&D+dh&6XN471N>C2)asqqfYMr%lDfHkf|yT4kd*=_DYKbHYvq)f(@bI)%PRl z|5jXDr85ej1zR~eQUs7gjn(xBaJiU~0aYFpO7Di|kkS6q{sx7bR_3@oY#>}y+QvVTNKR4<4@xx2UWAUHg zy*Z0U6$Zf<#Kk*;t$9^L>syC7fyVpY^|wMku<-xuk?oe{?mFz$xf}JYeFPZ1$ytXq zhn*A#GA$PC!zauyCuhfHO&ZQfmF)0;{nNdj-S@e^98w|QTY7P}k_`H)H5T29$mt3G zk&*10>p2-ILq93J)8lYIT0udz2Sl!Z_IGYucbF%+rR>9xMkG_Ab0Vj69CH5sI2^1D z1Q3U)!*#u=W=0Y-p=SPnJH&}cK3-+-VII5JFe$A=jeK4C^TUgv>3DWHcmNXwN zD*AT}iDN@uL(WHHx8vDhpJg^3LkdEpGuGm2iPD96&_`L)P>uP_=aI~&+V`$R0jG_S6UD{ zUren*zf3yH%JkQ9M1tF({O9>*wMl*0^?vddMCjM9WoG{UuJ?8Ld;8q&Fq)?Gz=v8S zjg7{WxM7RYAm3Fh0S5?4J^F4>U0)7*$KyeW&e0hvknq(y)rnfy9C>7gA_v3;onf?r z5A9|8sAh*{F#Wo3`GLs8DNi;~*QDi6WTm7W%3yqOyEKROuG@=s$YfIZQ1eH#bti1| z*6!au60UQ&3W|NEHvZ3xQoKKc;xHlYZhW*+Zz^J&E4bs*o{B)0;1a6}o+#Yay=suO z=6&mf`f0uLbDu{q zBaIybN*vZDWRy5KMhZyy&hH~ntG3LKBJA-}u)qIGqVxift)I6R`MZwJ{YO5TJF7Y2 zL1D$a`t~1n)_=Ldkk`YInnfEBG7{ma|B(kOra_-@$-m)A3cC`Bx9VaOP9T2};;a2V)XWW`}Zwke+|Bae2XJ> z@Rm3b$_Tp|JZ=qT{HJN;rH3$~T)CV<_-$$1*Rb`2rdVDh2PSq?1iYA6ZQCH)ZM|vXl~vJyUfTXdA-`#I_$hl{+emgPcpRY zE)^`zglUnE7Am2A+nm)2J@S~f*cteK8iV1pbiTL_+UbkNuUcW|hfDt8G~^M?!|rbu zzt?5O8~2ZdNiW#yA6LjHz2Y)gHP2#kI^5Bpws||9C2gA(9^66k4kNw8%fuA+qSnvS z){{S9qc9c}du)>hK8wUn?a zb|+;p=jo)=gq1q7bF!c2IRaK!94Fd9itf)33{Xr_|4^{l&Oju|q?PWRD5je)*(3^c_}8NN~{u zJCyS&n|7nczyX@3;jLOb)uV-9j`6|4TYvLZ2;Q6uSB-y_?+ME?W%rqg^qT>#J z%k`!ua9LB|Z6kmD8I%SPCUr@PTP^9N<5jOX!-6|xpJ1jZzo3q4a91MN_thxUmj7DC zWQu%O97e~?8nUtdA|m-9-0ba@9W7kFS%1U?eZhxB@?9Bv>;<3WV&;JVJ%47{s66iY z`uk^NF)9sKI577Q3qXy|f`fmP*rC)u2@C2s9IG!Ce{$tTiN39}UCb|sHXIPeWAJIeJJYRrt+^e=PD7~j@)f?QR`IlH z1x9?zo7X$lbRz)&+U1?_COUhAC$mG}l&4U=vjMFha7>TZx68V(?fBSqx73;!Jx@qZ zu&8?@SpF)c&+SJ^O?W zzt{C!p)4sX&4UMW5hfO+RwXF@LW{^?|=IrI;q(9U?&_novc<*K2}JO-XMvS9Y(HkUgiV)V3jWdnBx zV6O^eXX_<(#>9l3(j28FFet4duhN4S>XOC;Qn1u* ziHNz%GhS(+>U;j0b`hEE9CIGHPuLoLpF6!q$4|6Rq$zmw@oiQ8a(FnsurC>U3D+)v z*YeEKS-@b3yvG!_JS!PNds+WGE9tlEC_@2uV&VO@mA`Vr<#~;b{(-?v@lSzd`oS(% zwfD`lb(afKBt9mg8%WWMr{*!*BV09c*t8PgkjLYzX*IrhnIi z+714Guke8poJ;z6Ywrfd4WPpc$@p&j(%@H%d?;7Q83J-1?)Wnh57@hJfAVUT4`rqa z5j*Jg^Pr!DwpVX=B>DEjkS5eM8t+pMFB>O)ei?ar&VNsY{65j#-E0sRP<%vwQ)2$! zeV_9_pPX+bac&@1xb2_8!B0fd2wbym0s5u-10-tV_N#j|KIff2qsGU#WA6ShQaW(|H;T{yxo6~2VbJ>yI<-~yc9H*c*HH&V zJ|>hOO+f@xMT2~uDzj&}qDm>dJwEYZ663hBdlwm0(RK;K$p=29kq?9x#zpQgpB>Zv za}8ZFYlG0R4}6{BYy?Ohoj~_ zLvx!wlT@d=J(6j*tA_5q!jK(WNE#Cx`!p+*wi3VvkwEF&!B+hfM#H`Qjn)ifk+Gt_ zNR}H7M9~V8BMtAM6!xlncl~_kqsWcbA-_1mfibWFTBKD5;D~@wh=KgJ#4!?JxNAwP zK7o2S2i}I?Pt10J1?>6LM&YC3msxCNWl2bH*_y!A|L}&Qz|K${Ao2WD^Rd}Q_@1Ob z^DA+)b)asp04!^}ZiSEPZTDdv_bob}9s`K6(7~bFJ7fO6IwbHI?y%!fCr!vV`CJjV z#hKGu!Bp)z!TAZ;l`DpJde+omli01ziMcKwjfFun%aS}V;5H7fza7TDU+Ly3C2tr| zH-ybuo+pYN2z{VRDleYh?5{6c*>fqu1cjHWy(oynOD*9&A0YWnml#Afs*CO6QNWm2 zShU97-}pGjz+*Nmm;!ambv3mlFz62I#DXNwaw`A9h)u22COd)f%U@Iy*MJ3NKK`SW zA`oE(fI!%HdXb1>q-XJek%B43`;L)RwJ{S#wZ$>tweM9wwP3T6>kEuxH=lHcg+n(Z9hoWjEMuB9hb-W{Sg>wYfPk| zegSqfkq@cN@fk@|WB@l`WK^jvlG9YVIjk|GQJ@(Iz<^!THTLR!kKc+vcg%}t*QW(A zp(x;|%-bCaOu=)8q7-myELM%@|gJE-r5En816Q6P6G{=H3l z&}}Z9EjTBfAC2bq*STMbyPebkES5H`ow;AhvsPCf~<<1HRii$Dtq zvy(OxH@qq>$M`J`dYVpnOV&=n-e7-0u6Y`t(Ia;UMCKhsVSV(8l|8fB15@gVlJ&(4 zEF=soyS8DBz|RzYZ*r8T>a|9Y-nr>E9Xod}yl|Tna%;Ir3y;IWOT`Mm#`Cc!}u%(0vK3{u!2L&n-G{n)? zp6R}NCbbbD_G$bG5r8paBO<*DDOR&XIP#K{9a5F2c`DqFn0ayGGIDjlOjjN_M|?V8xicDqplQ%6?AHtm;fWs&ofZh28 z7jnCH>YoMICm07?DS~4#<7dgaYcSasC}c z20_2AAR`kGrtBuS^Dsimh<+5}zkf3iaBq80T5cGsnUhmSUIPa5Qo$WP{R*!ozr8Ml zFWuU)-?S5l|v=2*2 z46cfx)x!P>10-Q^6o(UQVAlkH#Gc@ERDyP31^RBr?E!sopy@_n8~YXct;tTs;S5jo zUAq$m=XRveWO<8HoBHpWbbg^6^8EuYREb3vEUAJrtmmJfRUrRQ+c-Wnm~BadDV zqYB5M2GBGDkD|n2LQ>NDpvwszc91K4@FK#Q56nN{8z9_q;f=ujRBhY={z^eh|R0NjW-az>r;R5Jd#=fnIhK;jSFK}T7$3m)rGNV&Q*&L zwi1AP@N+C25kwGrc}WBYTWDkZUHli5LNjA^5!u<|!E1PYL-F z63jZsV%cg%A_=`9Qt<#GN4((4)~tv}KY$A8vFq!M7g>71V z?|wov{lEpS8DEjLlo$d8EwVcN2GJp#j%s!$3)Ye8 z)wvaaXWjg5g^<6dlPB{oS^8|#)T=^}bKBmrrGKGR$j15j8+X2+4MW}=h#DWh(=1vO zLBlT8GVSgc;l{abOc^*2eGJyHMTzeGNw22#a=dFJbz5l%Fgkr00%I|RJ;v7_BadLy zexg@O5Ox0Dk`q4R!N3$H{&BKe49QeP_8*ec17HCU4bSAr0$eW^j>JdnlnbUZzhz-$CkzoD7k8>E4!Z4uu!z66^zI=^}yBXe;~_2b4-bcG3WG00=Y+fsgNn zCx~UT0ebP^>JDaim~p>NVgxcEb6BRjE*ykV-@0(yjO;)_9i@>kBaaD@cVs zqBBNoIH^HFQWdr)%BR-41TVGrVDwN8v8?XN%B!YmH9?2hO9%Z)7~oh(F^|otgmFfs zuJej3KrzW2obiJ=Gg z1C^p?8Oy!Qfc?IdGiLFLN;xmz-f2EjT{;L7jXhXYL!NbJTm zAUtN-6gT?^<%d1FLz=S0QLL$;`v(5^Aq8w_U;^N_3Z%RIs*h%>8KZaYRvBQF<`MBMtP@|9?U=fBMx|tYCw0Un7 za;wkf+|Zl@oz2%&!JAzy<;u4g?;f8*=Da`OWCUInJ0lt`T*h{Uhxfnw>w0tIBWvi$ zA_J6HjIR54fMkI>KDE#F>4z^KXY`;QLwNy}KT7laRCTC;?e7xhCr3q(S-;*-S1&YZ zp)z_rgrqzoQxR<3O?ga+bv9H5M>!F4-kH|GxYs!MbOyjb2kF5|G{KQRI*%F9`*&0sf7w}SH|-RKaTpbyPk(@!53NX@oR_HO zrddrVQVypFS6zLivYXQ1q-Y)-{VsBjH?Z1x@n!Pl&?|!%xM^7p8|5oM2w}Z$XwO)^ zck$ru%R5p`YB^!pvxhE67_p6WKx~j}K{MEWFwhdw^*I-&w*7XN?C7P#jOmr$b|9s0 zS0J7#rlaq1%j<8>YinsVZyvs@IrJcGt(%Mb1_-B5JQX0iUzcm|7?H*@#XsV=`(q}U zrM~{x?$FHI8~uLmT8Z=tJJ+XO)y=?3*KSu@@k4TU*W`_hOd7>uMkT|Dr z`aQ8?5rF2P1KbML2_@lcm!M_Q70M)YM6%(J+51)>V4_*vu2^dnycR`eDL(x`kCBIB zx7ZdA0HIpU;u{2I@pJn1UVwF~o{otnVOzSuHvaR?PpkZbSH5SGbx9|X2l$sxfB%q8 zZ!Q48w}$~P#w0-&Juh3Y86DL7iDW!4`#l2!;>L4kJxz}uRwflzvANZMaFH(7-qX1I z@TysjBr#$`H!?BlR}oXVjPDH(9zaE%F%y`Vb@WBaRNm`m$P=_4Fcmz4Sp9%6C=de} z6GNZ zR~=w<5bmin^9D5W5of~TbQuMO11zUc_QB3n5Oh{1KWUYE=lf7cUsuE!!Yl4MZY8xr zy{O9b#B#TBQ)Nso4#uB!$9Oa0tgDvXId!A70*TT#;!Lll)*$gEVR+>w-J(+E+cHu-Qxu6r5C1w1&KALqu2rK4}h}b7J6G7h! z%0t5$&vhWxY5Als%*oARWBtqfwZ(9?gd--6p@n*RiNqkOsyb4-knn~=7p2zv<4=z= zKTsr>KA8?wuQQuN9yX(H^f>}D`H{FSt@RzT$38KXif_(WaCxc;+dcdP1ApC-JaMb{ zeJmIF7LN~Q{-Syholhmv#sK>+pKZDgo(Cof*!~tj#%j)0+d0g6_`@RZDe$ThKTCN^U#2AUs@&wABN;`XXH-YXt3IEVrIh$O z_eTkAdav9U2SqIDyYdD*?Pgt(VIYaNj*e=kmXeIwjO>G1FTF3qyXjArl1bf8S|ViQ zVXM#wDwo*&C;@wmiXE7UptOlCk@R4P`m7(wr7etT7A!VMdPz!sp`|q^xiudlzarR9 z`Ae_Ewa`f1f^0(3bD_tvEQd zbUeXZpI0yEUA@nfylVwjUDh05_Tz^g>iD~)nH`uY^tla1xnA#49h9joHENgZ0)#ry zrTNzgzd4-|8fqEy@9eI4-~4ggI(`-AKDt(>$ukk7@MwZ45y`P^iarN^x`a9K)l7nrkQ7|nIbTz%ml$_YWlaVl@5(?IBVt-x&(XEwu z@p013L5F}~>KL0lBNFNt#oRiv$Z5)*<;R~Cen%kN>! z;I`O{IA$4!2Cw=qg!6#BC61;~-c3l$4WA4UEFOAMJ}|%u)*^oYvT-$ayO$j;^S1~<&V={MMlYYs^9d}5gbfl!5X41@80K+fOo>WIV?)N=> zuiIN(ql?#mQeUpx`1Vm^Nfce0^5P$x-o>Lnc{kIg0mz3oo)e_iXs4yv2Yu!t|f{631`3C*Hi1fJKt^OF&A4n!{9<9iO!e! zO?zoM2BgA$|HI%I3_n9@z$ZPQ zQids#&-)~)5>Au#=2^%vy}{z3olS4}A|JK+iGhFcn0n($^Q)r}>%{%P>TwFKsCuT< zdsi9ack{ zreFPT-e1YMnb!1|H78qx3I;Gd-bBKajqEuK}^or#boC? zE6)H|)awp1(QzgB8T*p+nO#Py4(&zntOTk}td`x0p9*p(LSLwPUwOX29`rGALf1K1 z?bX=CX_3v1)x6^MC{YBwvUAgwWPz;Eo?+0mg4Z>>Pio+@n=T~_#c`(2hd z0FKAhgZszdZ)(FYn!P^E(mcD7AodykuJ^htmGU|(j+9N{8Qi;{GCFw6?<5(`pnPK1 zv8+{aFhf3xYk&HHXe2o@-KbM_W5|%IJ1k6tnQ5f;!evbGj^33!koEX%5R4RPXd!;% zleGE!oB&vsn~`}ZHAusd06=2}$Jb~(TFumR+2L;yu$!W9wR^?RyIRsaF-n?wKI&u$ z!zFAJLZLM{T28>V&vxVBIHP~Fi;4t}+og#0nV4tb@A@!4_+Le^ygX0c4@^Gm1}krY z?*p#y*7>c>>q=mx_OcIx34|VzRXbCnM~nF9IMMcI}nFwor zb#LM;8-michxhZ2fPKi=yzqbQLmi<#J!s5m1*;mSB1f$lD|!UUJbM9=jd>RpVSs{?T)PmzQn zUN0^yBVqegf$6v~QS;VZSYfjGVdoS~8LFO&-91~RV_%u(;J;O3UuytBNBw;M5?rf=k>^1T;Dj_+1+ z;0D(%xFI^|Kj4PS6&xn-$mMoPl#STC57ukH4M;rlS;#o-)su+Fn5pHJHjx)GcOZ<4 z`_%4+Tysu+p1Llt)THmZAvoGlPKjJpMp{rPzC0%KozENmfMo3z7zQ2cebd-{@kDrk zeRp1*+2!hN^}7`8mvuvB!<(7x{6R^l>O4E+F|WVs7>_$#8uvGFM{bJQYZwhZ+im|h zLE#gW=;Hr_prC9+(oB;Rk{n9GXZ{|y6tGp02A^c-p6=O&bIRde^C8nNDypH8ALK*1 zot3@CYF8Qh7Y;R*`tBO_1~q>%n0MRp#3$91L<;|J{*;YZ0cWrDwR;!=!!-P3(00gn zz+<)kerLERF?1K0jQPST0*=?J-gYOEfl0O|nTVb@?)M%36(FRT3S)=U z>Q@ne{V|Jyz>{1T#|*l*b}8pFmZ0|H(fP_N$8~;RCHwx<#_Mx=jTGw8NL0_mLEoL^ zSX$p-WPFN}i~PA5BuHdJ7u(%0l34GfG$cQz)04?W7Zp9~?CXsB^C}?QrIgn0a9qZT zas~_C#$rlJBZiYjvm*GBU|RB;nBdDA%IyxH1$-7z61u(T3<%Y(i0 zk72$usNKD>mT76fn_queMhtIMz-FW`=>Oc!QB|bpbt>f)&%xo%YDn7;xuQABuXEnM z6kbCdWL;E3PV(y(8t5VWKd^fS(r1cy6evf#;x+m5Ru+xr)cud)HkLGh5p>&aAU^m_ zWWF1c>5eF%%{!j6oKbaHdWMF5lj@*w26iX=XWy078ulgy5E7wfKAnSute z0zn-i4RywPyJy1iX|ikrPb|%=Z523TU4^KhX_8EqOG8h#819|GIdWm%24BLdI9l%t zw^?UhtW_jMFJebG&?}`e1vsxjY!1~YF*b%eXBf{9)wZKvp%lV5NH}i7T@qGN94<=u zfqJ^#c(t!XN-Zz4qa}hkO3#|j3Mm2?Koc`74cbZg&@WWt$L8ejbm9M)*x}R4p*57l z6<>5dDOag$+8M}P<_q0ypSb=Z`8--oq+<&~hPtg0LL;an6ffjsUj_X3ej0Oppv2GR zyz`77EOUw>mr1{n0NGi=-H>7in=IW%R=t%PKV&4F`8qv~naIzetiR}7>uK6-;>?OzdN>TSHt@QA z-Zg(dZ!imXm~rd>F!h3M1_39)Y+Jm7Kem))L;3D4nOOkpxQpOloPJoW0www92jJv3 z{C`W{^soR16`Wtb^d2HB;^~i2Etf0vrxz~_lAy>~)B@G)Z2#Wh3iDYT+VhpPTNCfTmavqw-gT*+>6BLOvI~EU$J~PArET+xY2#Cu(hW zqn3`K$bdWtSTPPkztXdVm4c&yci+@x3$G~}wQ{Y6d1os(`MMcdAjeq8cWMr!<(89H z4U|iy@k~9>69$(HkxTb#sw$=0x1F%88 z=Y*J5t3;Z`$B<_6xBr^O|K*SgqC2}_3a=TDbwm5R;tH!*OWh;VxVHAVle)Q9Q|LgF zIQ7ab3bYmJ60(LAiaQLq4)UslH`Z=^?wk(|Xn@u1;cSJ$Z4 z5it{k_yp|0Jj`?~NJ$+#!ud~2XG}F$PxNwkIZ|3|L{A)Hfkbg(;Ilxk-pr<+GzuOp zMK}-IiuoDaj26}CD%FY|Yn7C{yB#U(Xdik{lHeN5EK!Vz5O=&Vu-|svs1mu^Pu?K2 z7{S0JBL&Rj;*}5WU$&*-9dWE0vS2xUv9vxf$5*MNdtbHr{t*iCeV>)1FD@VZ(;l56 zl5>q_1ZK&Vz(!zxN&Qyw*a<)*7A~mlg2)Q~1Jv`I-8?Oq?aE$tm7nZ?|8P#95?T;4 zTC0aV6%PUoD-&Y$0d4BKcM~d)N;c~@!0WXKoW7%br+pWQUhY~6ftd*%uzNDoz;4?T zOU{)hFgHAc#S=!e$wtgp+$1(xsibv8+e@p1sD=90urs%1*+gPj)C0Y7=%mtN?eA+} zV1Q)eYUepRZ~plrB4Htv9@VT>_MS=&ox#O?&?DDl@_+63gpYowRRC}LRy}OD{)O)v9)&3TPdd{u<~qx8RM zQ0)PtkHrJ=wZu|%vNx$5RaCr5x@LST?=Pgc>V1h0Mb(ou?KP8nl^y=t(*El$J?D z+i1~CI|{z~19p>4bX*+W1V4YYc3vbgho;0AYQawHEJ5xApXuSB;ROtY&x%i8MaQbY zR!r$p^<}fJ$2rOjp*7Wc>O3D1kgapDHjFU>x!{>{q<56{nrM?}B2;-A`AqyVg+*Hr zd+p2PBKdJS4!OPUsq9axndkD=mv-H?%Rx3qrf2*6?fdkAn0KQBt92jigZp1YL~0E; z?O|OKQovAGS3|iuQ9s)|g*wGJIW#oW^o5$rPJeJMWhP=vRv|nOM^_h4}Za55Z{6uahC{o;@yp(p_qrmXNZ)Jh46U zG0v>tw8%N80eA=qXh_L<_p3b`xz3$StEwR~ZnOH@Bqz5TJC7eib}gaYx>f8&kgSgr z(mDl-YkjFjM97U;Vh$@Asuy4l?)w|DJQA=;9Bp2b2IgzXYl5%^ZK|tSuPS7=Wxat- zCxxD)$w3vQTzP)0byq1W1EGUKLu7?DRYTB_ySVM;QV8SnW(jg^R4GO#7BCguVBqFf z1MZKmnVD?c9jdt%#2gOduaVlCLo|Q*w|>HDbx`$VVZR=h?Ls zLVdMM<)P*L;nCY`{G0&uuNUvq>Ju=3;MP)h2O9HuV-E3&a8mcK^d)kKjWyW}Io4+PQAm9sI0K_d2)cnx`qFxioc7bbBdyv1M}m0=~J%!o)xc>|y%$ z{}K}qV~1*CN$KxF2PGBX>fI%lP3U8Tau`gnz4K+$voq?-R^zhSd4@DLlBffX8jxBt z#g9|?{N^2ds^cUc{nOp~i~Ls?*_kKa3omUHp%|8$rX6uN1&qkVfrnz1A@l`?tBO^1 zQ^*v5AIyruhH}w4pN$E;F7>Fs@ltikhO9LD#0?j{+i&@o+f!PoRt!UvRYjEAIJM8K zD(e6?@6bV8t+}&};TW!Wzhydq7XN|Ke)hpi3IS|LwL3KgKmV|G8@dI8>CD^2}DS_-}4oq@9D2TP^Cl40zYY`Tf`Cj>8x zse_eDQBhZR-}x^OWQgwrvuyQokX?Z{NL`oL->>Oa0C~6AcC=CMdO*4{gsY=wq~sVl zEcoH9XGkgkx-rlOY|R_%J;&`ua{SR7KY^sYapr$^^{g$j%0eh|!o99@zQ3#H*UN9u zZU0uc+7H`I)9R@ruRU_u6}GvKCxT%?=6x?NYF5#@i*rNU!1AC(6*WyfVYe)CiMGuE z`KR&+{Bn)x=>ys3(bKbolbJ3buWGGa52F^#Jjgc#+WbO@U5nHC(^bw}*%Z^MA!l4I2(Wcp9Eg~#1g=33v7@mecCghz!B5E$Ko>|ItvrO(5vY5I5zT=7Qw4#G?k2wRmG(8e%8mQ{4efk>C78V8P5}HI4(&({RUDP0D>&W8)i8FBJ}&i z=U}Ge@$-iahG4LeOY=I<-nyDuXV^rT~vJNq-&S9AXF-x@7Zys%;=GW1Za zRC0Y*6|608%f;o^(Z}Y95!0c1ee&J;o9swI8=u|!f-$-wRz>|2v|xD}KQ?*OvA)y- zDmZ2j4p%PLM*X@gFM+fz&n2Sph+1-LmU9}niJOBF+DN#yel|L@0#EWLvT{jZ{&U(e z7GvLH`{f8wc(h z#mEvc6L$+|d1z&d;1*0^>$bxXU|jYmC<}uan#OWGgUo|v^69f&5zbw`{BYc{7*pylBMh(6{J zd}iZ1FsKYrbn6ONsSdvTD}~WWo&XuF`jw*-u`i2?kcCI3r_#XV%^&!A2TSfK*v^j6 z-n+keP_q3<wF+6m9K=4q(eyS#0jrNly?b%K#lmOT(G2;>#uv*P{^EmQM{^U`=q|3G-+5rgQcG?=}STv4CT?&LyE`kPZuS#jjWHj z^?E%|VyA~TtF103X0=?{bz{G6WRa>@A9x}K9#niaiQMiA@{SNn-hEsqUL(mU-y-EF z8W^Q{d{A0@BdrXu(7-(KyZ2Qd88kVzhw~$P3e%3X|9N^XwK=ETq|e@m^M?r~@T@P) ze#-J_k*Ynrsi8rO5cj%!Jo)O#S|R+Y=KJuvIiJ3O~wu z22|Qq_|q>#DCHde6D$)AYcqnR(PYb%Z6L&n5No#)z8+Ge=VU&aOqYFqlECM*e?lk& z^N6OWlYW#ymL~GeTD$D;Pnc47kxnj9Qy~SKnfdS0BX*L6>%v%d)scH}g_@nwQ7Vd! zSI0tNi>45dT=lQ80;{R+SI^a{5F)iSbfYKA0 z#0XfcQU%G>kSRHjF^saL;-Z0ph&@67IDzFMD4HN$cRuT@^8lo$s(77Rb7=Y}8}O+! zf5fdM_1m;8sa)lqp8a&dM6H%<$h3f$BxCg2v5mOIVst_k>k#_`ck28fA}J5C$B#CM zv{Kj^(3Gq93nbQu^M8Pygd;w5jDsO1egJUGH98&mz~yZaZ%oi@c%~Cy&%c?y1@C_x z8_XmkpJb<=qa2QcE`1I-Tqy6#-#2?$UPGwVvS8Io|4wY7JG&+u>+@`K+_^ zIK`C4vm)DLU&e@!;$&y>0)ryscZ}w4U=V}*jvrbpjNRs5Jo6F}&`fX(mE zxYQoMNtn{2MXrOr(yMpH(KB+@XTWk+S$~mEyZQ$*`9hT}ZXF&YEW$|Ldt%m`qZeW9 zwv~*9ta&5Khu7U4I?Qw75}PkF8jMnLIiC)GU@aJjd1juy_kG>>6%p>Aujc%yl-^uya}J^j zn>5-ftn?c^kr0lg?>PBHrqieIx zAM*c{I+)zxZ9iuVdE)>W|6%+lbAiu|xy9Nv=Y-s_NuaCytT+uteX72aCZVAS6_yi7 zZgVY$nq*JNL%_^Up0Io7lvFX&_D==Nt`F;b+l(1A_I42ff_g#nNukEvU z+)muON0k_?X{PK*nakap_^9cr7TFRZtfq3}pX}PG#@(oRpHX|lOB@g289pVLje)0G zZARy?+9$uw3{ofT-LC0|HRq1#GJ7L6TBRjsBDICE_@Hz0IQ`GqYib_8xmU98dDUfT zHq7Ni-jDNe!)oFAN#iaL@@f&j=Gt)y`%wWKwTCkt<&=l zmS=0R)B00*O1|2JSKeG5pqML?v#_m}GX36GNgIJ>|A?JM% zq2e2IbU{5D4R*n4 ztlMG5plO%3OY9<5JN;=08WBChaq3LXw=AQ%^yly`ed@Z~Me@)tal@}Vn3 z%BqwSY2IB5m9*sjKtmoLxoh|FpD21jD|tkM?`v*z&3S5^;&qu!{-LU72KU1W17tN# z$JV+XaFRjDjY(%uPh4CaTgj8`v(@q7>_yVRY#Wef`@Zd5aH1@-%xIKY-TtcPFu)3u zX)Bd@RR*H_&}}LpCU%aFqh-pON9&5h8(L)byOEKw+T8`h3{PHctY~%P9KKyOs?0sE zs)sY^B|9YS%vaTzZCM@2xY0@0v;>^`SXd8!KwM4coE=l|%bPukG$Z2SSFdwof@Dh& z@{yWv(B&F->GP)oigvO=&-lJ5@oN#kS_=>cfwb-+;uIe4}K0OGQsb4X3n~ zT`557hf`Iu$eRLoH>|HC+=s1(ubCO76kW1fc=cv_t`WM^>%Up5i5|M_5%0QvFK53z zsU4dx+Eh-AM*(SK)8WXAEvyNb6OA%mZm7ao4b?yj4RdoSfA537YK&?zCU`y8s!&jM zxY-Ok5%QiiQTM3lBKBQY4ZkM20OyE=f^_*s;q^&Qby}^TCh%^B{1$cTy}YW8Lrc{s z?g~v;tQ~8M^hFv}Urjz_qTUDH40(EL;9x|;cC8i-pKgk$@q;LglI*EspAI`u9ZC@!`# z&TKIs5D)-eq0WO97O4zcd7&4LZ}r@6+A{4uIv_D?QAwSVq}US`g9*p)!RJEV1dO77 z;#-s%buB2CS*l+dzYWC^kCJhDS(^x<{r(dEOtR>2-d1={(72gNz_$M2>Zl7F+M-}c zz(%cN{u)Psgg;Y)lkFej61j%UCKCtC@!!(0id2e#gGOy>MNMi8TmjRDft-%T<$hFA zXK+*AU#5Q&aqe?n5aqa$On-aBS#S=2(=7E65>X~5a*eX-5JZ@vg=-<$-@ef%Rg1-e zqceJAgb?mrBfq7GKtySwQtv3i_KM~k} z*~b13LMt;`e|l-7wgK+B_0bd?-n-wnBMd{RBW^dkEzdU!ri?F3MvGP|Dv*?yyt|7U zCg5rVeR8@-8ZTbI79`S1d4b6vN8c}?qx2C&5(X=Z`7%{IsI?5@)WOOHLZnPHaF5yp z<0O~ZgajHZ8_VS#>)$ng7pdnwXT7bM_^wBqmFC&#WJjOtFEgQL8!k@fBaWw=AlMS_ z)LBp5yTxe&JoB*mF7C1|+qu49kxHQrv7i3nbz{QRC3NJ3J?=IIji<@#)_Sz< zP*Shij|h`I{bB1V3+{8*1A~-I^6G4dD$3VrZ0Q_4wsRGp4ZM=ZeY}*HCo5JXs`+ux zWCb*p6cpV^1#zUg)G-)eKD*1|i!&UV0i0>dc^eoMhOq9tkFP3g^YqI1aumHn_tqbcluN&{I<{?S1X_KTPVx9|Tvg5JNMY96mewea($MlbQ?fiVRn zQ?7J;6z)S4L^!}DS1*%!v<(bEIvFn*wV`*}p&|E7!z|pTV~(~ph53Gm1aJAh>yKiy z)qL9a60c=7Rt`F6Q}A^K<$ACN$Yy#DoNc@mC2nKmT}lS|uB8lJ>IBn2 zKijXq%zH;a?V*E3qyBqMT(~OnctsQbum-oFe?82%J&IOYtHos2u(r`a4|M95QLZK4 z{QDB}bef}%Hsk3zE2P*#7ffEev@acQo`i`wKrqnYrnkO`%3sm|1q(;M!gA^=1?<-m zlfT^vd{;o%22FCeNgSB`P2mWCTL2#&{2+2oyjPvgrmGoK&b-m}j?Wzr7DP@1!KJlj zd(!q4?-Y2@7`0RwJAHhAUgEx!<5vC>upsBlfD|}#esJKjN&DbHGB%v?7Dz{g;mMwf zjQpS>W`R!gnc#dW_L7_$!nez)?+A9S^~qK3EC$E7ALHU|n)&(xAkxst1kgxD+GhKY z!$?KZM>9jiIK`o#6)c#}30y3c3r@s+Rl`FpoGNTKl7%`2)!BesX;5i4WIx7mqFOPz z40OY)y@1u$SyyRcw9>QQ{=@?wDN4v~81<{uz#HfXnc#@^8jTiMp?=YLRqPI-0`Ffb z_g5Vkt+K%m_t=-8U|RKM%fJI6CgUxpc)hd8h^N|v#ddblaPGpKS}V<}L=>J1^H20+ z+QbuW?r@8ws)5F%-}04Vb>IGgob67KY(89pjLv0xUS$a!FFFpwNj=msNIF?QY9IO0={d^7oXcb=rF|u%owhx1-gPxz~U!@;}t0S*S^pE2YZM+mNb^|-Xr(1 ztpmph`Zc#p-F4yCO1m^MQ1`2f1$+!watYnk%o*uLZS=jOY`|n*}c0Oj%>@KQymqD3U)J`;aT<*h+W6}}y!UG}O zA;ArIhz1R#r4_g}gfn_Pi#5bhc=B^=+CppY!}8k|DT`_D2~s(=M}Iki5=J)a9^L zD}kQ!xL}ZY>i?mhkp*n9ATsaaHBDv~U}$i_dfBTE+-BddptsCTyO`CjA9_G0m-z46LqA>=vzBLS%qZv&h2^(w zKJ}%9ivNXiJFyS;9+_gkpt;^pYAmQ#kPQ{kwtMv-`cETT7wGq8aHEBN^(m-Tz=m+0Nl`bvr-A0kvkUfJX) zr|k0E2!5wAl%uM-rUU7v4f`MY+U5W|TIKlTtBRodTU||2p}&w2uIUmDCo9;{g8Um3 zLfF1GM=l%pjm^-%j(@+~q$+%p;j={ZQ_n;~ImITNH_+gvWz*c+roW9sf; zbZUQCrUY`Qv_R`zby8>XV4ogq|CmB2DgrYne^Cr zt4Awl5NeOjqQxDEo6kX=w%xr!V8|0m-0k1h&5qTx_)W{z9no^iRf5cIN$z_n)$Y2R zjU?3PZW|Z|gSG3Ob=&*da?7twLxi;hU}8(B>{t~_g=!&_tf*V5Ij2VizTChG6>J(? z4riT-=_F!|vjHgBzyC>Pz78#zn?Kll$!# zmlnmXyCobrfd?$>UO=VcucR}YNN4)zKXr&FIsiNP{0D7>tZZz{pgfy`Vfb1RZZZX@ z?=d5S)o=8BDIR{vKIE@uNVzu|lN<=-w$br`f(nj(9&1TZYC?eKn~C1v1=;f5TveFQ zl`JGhU9_PLSJyHrc=56e;pUy`5^e!d#icqGg+6{K zX#Om9M>Jr6se@ZYz*-Xg>mHv3w-9^@C3E}&P;4KSsu2h{SE?KltR+{! zS2?!PfgZ>`M7d1r`Fo%$B=wtQy#XIDN@PhN0K*VStaz*r$KQ_}IR72OEoWG}dU!#q za|7@(1h$~7dRV+z(T_qe;1-R4i(T+!gD&0*SAQP>?k99=`M5pQ>Hp*DL3)!!4@#%# zoo^WjiwG{a0bUqj$XieV{fj|U6|J<8?)zXe*BiM|C;|26;RN+JYR=D6TdC zqhc{as}6Tn%SixTd8f&2hgE&^x-)cDu1N*ijwxZ*UR&uTSY}S3Rfsd)`gME#Hw|_e z3pfO?6`ANx#%`{Ept-mDnd#N`Ttd$U%-fExNo|<5u4JiX!gjouY#Kge((|l5=nmag z|DOug=X+jjE%$y`dSJd1s9`1o3Rg=|x-Jqw0E<17`fsgVb8(DTfVk^4Hh~`{ov5VY zy!{K-&3GyK5rgl=_8(m3L%r;bw~`=CHcf6QnRPiYj7a$xfj&d>fcq(+3AzbfnN!L`Ll_aLlaxD75k ze_+;>XZvnZvZlD#yiPLz=$*nd?XBVrZwh}A3RJ2+9zDsg7yKc*;Q)e`H_D%Z9XU>x zl~qQf+}MQtd!a(3dyG`7r&1-8wr8u;Tz9l2leEIL%6IV+xR0Itk={sndZFnc(uj3s zG==0ZP_&Awy1JjPn^Xj@q}@uz7>u?jnE+G5(uML~ZH2q6Njtl)4xkRfMfX0{GQ7q2xeBkAPRcCSrI#En=F6`8Mn2DlB~^1P zY17Hhs9iCDd$oe>JM*74^d`hKoka3 zk3<#xmZax$1%HcZb)!PnLP_@H4a%?H^AAllQ+;kGa({ST$`lxdb0F0|%x~1IhwKf@ z>HEFmExAu>GcfD{&|SyRi0%jRLn_JfFsYH@RoM5Q8)UV;U) z!PW=GXa$a0l3##lJQ#_M(H@2vj}@ob?|OL+mjdIVuDJM|Us_sq`PTnRTC~a8OvNB& z6$5_pI^hNI0vv}D`17*-(I@wRVW`?e#Z$1x9YgKxu`gnwjh(lid77IF<>fM;%u!aG zd@4A2b>5-nH!({)$EZTz@*5(eW%9(2DQ6SyuznJ?MMHC= zfiis&Ydkp<*J$y#EUUPxYMN>IPv){dyS9f->FdMG^*6x?#E*sTW2LwzpB^)6R?Dd1 zO9A)87#+}ERu|(rLoE(f_hHg(65I>nUN~GK#k_2VD0E6e286w&Y^X6fhXl<-o>D4d zqeE9cj*f!OJ26-p?S*Fjn1I#Tnhl$l)aeQ}YD}#ikLirgSQ>rY zhJKg&!Cgv+P=yV{!p|CSa#j`*k~?ePEctqJuuNAIAVErBWj!=n8QJ@H`csUskNQKvh{CHaMP($+yjW=cK|hm@wi7-ax1v5o z`e(v(I&1PZiTmggZd#rT^hQN+PS+a2#WE;X@6qY>yuejdw)L;U#8-3+O>XV9#2m>A z6BGl-6wTM8b;tShE4iFArBs@M?tY!3=BAjZeV4Y?lpDhv*SPq&dAH}SXrm?`uaHhp zr(!RUkgt6bSbnH4?*aP73S06V+i#bPP44s}4UYJ1NxUwgdKn?ze@Kg^n<)UundkIc zbrqMF)H38C^p*wh&Z|t{nU7>vTDEzbz`Y&lmBnbrN&ydv^I}P;9ovb&Ba%4FJ2D@k zhHm!KINSe)p3e%_CR0)fDnf(9#TY4%vPTwj#+Z|Ejtq)qi(%ou4i1iKB3A+gD3oJm zWsn=xsRoXojoUAL<8Y084E=(|`ypmO*pbkyrS35$!Yf$^&;`vO=NPsML3rZCzd791 zE0!r^+MnuIz`5kqn3x+w_`|c_C(x6PDAP5F_v75ge!akL%6HqjqV>IfrH1;J|h~ zM)tXs|1xw$#snkmUhVZrZO9F>dTl%KBr7x-m?cI(@J~U4e2T}GeQ{uKA09W|{cy!> zA)KywF>|o(I1zseHU*15NKHMh`f}C*am}Ru3e3U(_S`^yc31dhQr~4N-Sy%A^)pZm z;T?k3TK3=89sI<1G%?8vUr>*2e_X>r>JKWPyRM<{Q;{yO#!DhlVsY5a`~+|X6kvW< z;(Z`|j#|Psa0iZRTCD!TISK4dbs_)zucrj{gTRQV{JDK_crqgSpoU=)-saWS%=xm! zKb)HEM+p9K0{giU_``(v|IY&M-~CqbH-x{1i0lc=w}P3?cV{H)?laJ+CE;!Qi&b9! z8>;+Mw?dyPMZFU_2eT*z)lJhQ3SCFeM8Kr%{;!7iJAv;Wg-$N3JUsbhsEN&NMhC(P z*DLgq-e(w11@*`ErSd@%r#~u~rNNNp@PixMDI$HqPJ5XiVef2Ko%7_An?=?KW<;-!*niG z zobzLQ^7~cucYq|K`i_#89I6=7I1EDCm~cE01MpULQ|iugnb6T%A3*Y3qsqZ?m6`6CB8kkf zFN;D)pAzhI(JJS|olr~0@fs0e8dSmn*qQ1R|4;rxIs}O}zbK&i=x@3d=*uXf4BTpZ zDcoQN)_|5u9|+Op6>2X80|k_9LW@4$%=RSQp#XnoX+A#Q_FVloiH!+=H2(L67x$`- z4DU;07AEI3B@sjhwPq!Lr`g0sU8rK8eZDh%4DAS=w=-WN=>b=%X1IyYZP8AAFFC+z zV}U&N6`Jcz!O;9f{arhQn)*7*Bf?*H8O~+`RvX?-=>iG3 zoaR@d)T!V?CBJo!63X4iCJUl4xTb*lfj0;~h3Q8Zv90NAA~W>0ghJ19tM!UBDo1i- z-DZ0v_9MYMdJtG|=_e?8H8dFoUPW3HGyTJs2}$7cX_WyStlq+%tXzf~)Ukm>zU@S{ zOG(F`>6(`gliPWA7qC+J90W+U*rPVd!QiO^!@%<_N{jVnFrgpk#{Q*pq~3Z}YQF-) zjpv)A#h((`a*5v_^nYfdSO%I+5PAT*Z}gwyA(Ip3-JO{tM&XPWfL5gdDr;3Z&FWQe zMTM|Iuv3n-q@67GwsR}%pKsPIkLlB_w>}&HL&uV~X@lD5PfDyT4~36(18`TONl$k& zD1f(+kOvh=IB&3!4>@#lxDU@DMt8%^=UDa-tH7|YlBS~P+h+`S%BvDh8z>Nh`hx~fseM1`c%sg|<2Qo;X=%~cO z7-EE7Q4!l{s^b+__KaA`i z$Qa}c@fhmPNdohNuP4vx^lsq{F=@>1KVKDDD!1kz=8ogtcDWz-ZIv+F8y<}nG~apy zKG_H>2C2K4bTe?jAmruZ9_qN&Yw>pg{w>VXNC9J7_}6FzNCV@tk>sbIm_1*_vyH}z zr?#O+ZFY|iqgDN24{jqP1^6>^UK{#@g&VYtso0 z>&D})bzL&;Bo-|)ucIDUG*++?d=(1?#x%+N*Lj0RMI2VpUoZxB;5tMz(r7yP2sqd7 z=@_i`S-y6^`hp|PNG~1|sN`@o{>Z@8HeL(%V%94@yB+c2O9@!T&2BLE;D{=IG$5=2 zJS7eA)UZj7Q}3^MrX_7r9&`e^_s(0Lu(nW#>b6iOvS`upzCH!DuYv)s2S-Dr4dJ3c zZoXDOhdEuISb~U#;0+9AE~gERY8lw^X8%@+T2r0VAsNB>!e(QSCL8_iZa^HVr5fwLGpTHiNeSJe8N+gMH*r!j^_2mqhb@DGh zekW3I5exL7K!9fLHt|%OBSN9O^(C&u0eI!K%1@Q|8DLmL!W@$u5LY&&z>FOpHx`3- zdFAl?f+rsyWoumygha9Rg3Iq1(@9gn7q9!Qql*71E+zD(r79aw`NsK!wxdqpjlvS9 zi&o|m8zG@L!CsF1oS+$1kPPou@>EZ{aTUKph&X#R`C#p z8e3PVJp?C+{JW=wFX?QATLsBQcD*ji*e}Kr2{B0N*E03#0`P{@-bSbKn^B+zNF|V} z%O(G8iP*YUl>G#QoACQ@F=}DtSrz1xhhbB{>2UBHuSTi+?Oa z^U#S_C~z*qRpyZxsQV7dd@44;5izFoTA^xdV(9Ep*1`t${?vkbU+L=hIo^v~E- zM+L>GB^YahPR@)2H#;|tjXnx$xh$LXtsHh2q7ADkYQ-p+v@B#fhjr>yX8`}oiLll$t0N*vy;lx zS&{RbQElivzrHSRA&x*++ZZ421-*A3J@!w7IQGCLJkbNJMv;7%c5IU@h zc1(3e#ce72?S?TS9|LGK=oAtOSchGpQ<(}5z(XXIi|%y?%kpZp|0;|)Ww#h(EZj)d z*apcS6GTyZd<)UsYNEUw$Rij+kG-7+;Ny;CLMXmA*T#%aw8(5V6ICIx*a7OK)5(^> zh7i`FuNxs-d#dg%V<*DsS4Fi4zI9*jhc}y;`h#n(u+6wY#~g@nZc33W1}tJ<9BI0Qw0_{hZ!9+sClYRs@n|Qy{qz%1HgP_fZLH7FFBcmGUMenD zg-NBa`+XWBx%0_p|7oDfZPX|v_y^W>6ECAxGSfi@^??_(twkoH!lS<@w!}7v;U2T_ zJA_N53a8$2#ZNHQ;e%#w1hs(l9S2dS1=M}GGJI@Mt-2fB|ESxA2HE>5@5X%ozd3oB zO+$PN->s5d+^Rj1NOT;*vIkT$LmIq^r)_xJ!dyX3%!bZU3s~@qIMgyFV}C?Cj05Vz zZTh!x2h%-uXk?!)azhZIjbLl&gU=%oHWvb~0Zv_?&P^MPUyz?9*6i}f#4m7Z*<7vX z{=|y|2Bz(?-GL3LU zMaKG`i;{BZR$RW+E+lq9b*xfL+((~JwQy|ro2rkI&B>J~p2x8&2R9RAiM|dS(L{bI zU%L4xzI3O~%eCvR<}L99?UPv!KBjA*qDc5}7JI(jQhJnAn#EfB<|}&}Y6e-I1=~!X zF&J&c?z`@8QjN4C=V>f+m2;y9@qDWuRES(R6yP5Bqwz0*{d>N5y5i;KZcB=c42eN4UPHpn5@CoC-{n6pu~}wFbLgedq_aZi zK*G>0WbUah3(>Zy_9#{lDu1#RzfLkH B6s7= ar_len: + for i, k_cache in enumerate(k_caches): + k_cache[:, :, pos - ar_len] = new_k_caches[i][:, :, 0] - for i, v_cache in enumerate(v_caches): - v_cache[:, pos, :] = new_v_caches[i] + for i, v_cache in enumerate(v_caches): + v_cache[:, pos - ar_len, :] = new_v_caches[i][:, 0, :] + atten_mask[:, :, pos - ar_len] = 0 - atten_mask[0][pos] = 0 pos += 1 return (atten_mask, pos, k_caches, v_caches) -def shift_pointer_updator( - atten_mask, pos, k_caches, v_caches, new_k_caches, new_v_caches +def shift_pointer_updater( + ar_len, atten_mask, pos, k_caches, v_caches, new_k_caches, new_v_caches ): - k_caches = [ - torch.cat([k_cache[:, :, 1:], new_k_caches[i]], dim=-1) - for i, k_cache in enumerate(k_caches) - ] - v_caches = [ - torch.cat([v_cache[:, 1:, :], new_v_caches[i]], dim=1) - for i, v_cache in enumerate(v_caches) - ] + # Update the KV cache input for the next inference when the position exceeds the autoregressive length. + if pos >= ar_len: + k_caches = [ + torch.cat([k_cache[:, :, 1:], new_k_caches[i][:, :, :1]], dim=-1) + for i, k_cache in enumerate(k_caches) + ] + v_caches = [ + torch.cat([v_cache[:, 1:, :], new_v_caches[i][:, :1, :]], dim=1) + for i, v_cache in enumerate(v_caches) + ] + atten_mask[:, :, -pos - 1] = 0 pos += 1 - atten_mask[0][-pos - 1] = 0 return (atten_mask, pos, k_caches, v_caches) @@ -123,15 +129,15 @@ def _kv_calibrate( user_prompts, module: torch.fx.GraphModule, tokenizer, + ar_len=1, max_seq_len=512, - updator=smart_mask_updator, + updater=smart_mask_updater, use_i64_token=False, ): _, atten_mask, _, k_caches, v_caches = example_inputs # TODO: change criteria & support batch inputs if necessary - pos = torch.tensor(0, dtype=torch.int32) - max_cache_len = max_seq_len - 1 + all_pos = torch.arange(0, max_seq_len, 1, dtype=torch.int32).unsqueeze(0) token_list = [] # Llama2 tokenizer has no special tokens @@ -144,21 +150,50 @@ def _kv_calibrate( else: raise RuntimeError("Unkown tokenizer") + pos = len(token_list) if len(token_list) < ar_len else ar_len + dtype = torch.int64 if use_i64_token else torch.int32 + with torch.no_grad(): - while token_list[-1] != tokenizer.eos_id and pos < max_cache_len: - dtype = torch.int64 if use_i64_token else torch.int32 - token = torch.full((1, 1), token_list[pos], dtype=dtype) + while token_list[-1] != tokenizer.eos_id and pos < max_seq_len: + tmp_token_list = torch.tensor( + token_list[pos - ar_len : pos], dtype=dtype + ).reshape(1, -1) + tmp_pos = all_pos[:, pos - ar_len : pos] + tmp_atten_mask = atten_mask + if pos < ar_len: + tmp_token_list = torch.cat( + [ + torch.zeros((1, ar_len - pos), dtype=dtype), + torch.tensor(token_list, dtype=dtype).reshape(1, -1), + ], + dim=1, + ) + tmp_pos = torch.cat( + [ + torch.zeros((1, ar_len - pos), dtype=torch.int32), + all_pos[:, :pos], + ], + dim=1, + ) + tmp_atten_mask = torch.cat( + [ + torch.ones(1, ar_len, max_seq_len - pos) * -255.0, + atten_mask[:, :, -pos:], + ], + dim=-1, + ) + logits, new_k_caches, new_v_caches = module( - token, - atten_mask, - torch.full((1, 1), pos), + tmp_token_list, + tmp_atten_mask, + tmp_pos, *k_caches, *v_caches, ) - atten_mask, pos, k_caches, v_caches = updator( - atten_mask, pos, k_caches, v_caches, new_k_caches, new_v_caches + atten_mask, pos, k_caches, v_caches = updater( + ar_len, atten_mask, pos, k_caches, v_caches, new_k_caches, new_v_caches ) - if pos >= len(token_list): + if pos > len(token_list): token_list.append(torch.argmax(logits[:, -1], dim=-1).item()) print(f"kv calibration data:\n{tokenizer.decode(token_list)}") @@ -173,7 +208,6 @@ def _prefill_calibrate( use_i64_token=False, ): _, atten_mask = example_inputs - max_cache_len = max_seq_len - 1 # TODO: change criteria & support batch inputs if necessary @@ -192,20 +226,24 @@ def _prefill_calibrate( dtype = torch.int64 if use_i64_token else torch.int32 with torch.no_grad(): - while token_list[-1] != tokenizer.eos_id and pos < max_cache_len: + while token_list[-1] != tokenizer.eos_id and pos < max_seq_len: tmp_token_list = torch.tensor(token_list, dtype=dtype).reshape(1, -1) - if pos < max_cache_len: + if pos < max_seq_len: tmp_token_list = torch.cat( [ tmp_token_list, - torch.zeros((1, max_cache_len - pos), dtype=dtype), + torch.zeros((1, max_seq_len - pos), dtype=dtype), ], dim=1, ) - logits, new_k_caches, new_v_caches = module( + results = module( tmp_token_list, atten_mask, ) + if len(results) == 3: + logits, new_k_caches, new_v_caches = results + elif len(results) == 1: + logits = results token_list.append(torch.argmax(logits[:, pos - 1], dim=-1).item()) pos += 1 @@ -217,8 +255,9 @@ def calibrate( user_prompts, module: torch.fx.GraphModule, tokenizer, + ar_len=1, max_seq_len=512, - kv_updator=smart_mask_updator, + kv_updater=smart_mask_updater, use_i64_token=False, ): if len(example_inputs) == 2: @@ -236,8 +275,9 @@ def calibrate( user_prompts, module, tokenizer, + ar_len, max_seq_len, - updator=kv_updator, + updater=kv_updater, use_i64_token=use_i64_token, ) else: @@ -268,56 +308,36 @@ def _tag_ios(self, gm: torch.fx.GraphModule, fixed_point_type): # shape of k caches and v caches kv_cache_shape = { - # single head, kv mode input + # single head, kv input (self.llama_meta["get_head_dim"], self.llama_meta["get_max_seq_len"]), (self.llama_meta["get_max_seq_len"], self.llama_meta["get_head_dim"]), - # single head, kv mode output - (self.llama_meta["get_head_dim"], 1), - (1, self.llama_meta["get_head_dim"]), - # single head, bert mode - (self.llama_meta["get_head_dim"], self.llama_meta["get_max_seq_len"] - 1), - (self.llama_meta["get_max_seq_len"] - 1, self.llama_meta["get_head_dim"]), + # single head, kv output + (self.llama_meta["get_head_dim"], self.llama_meta["get_ar_len"]), + (self.llama_meta["get_ar_len"], self.llama_meta["get_head_dim"]), } io_shape = { - # kv mode + # logit output ( self.llama_meta["get_max_batch_size"], - 1, - self.llama_meta["get_vocab_size"], - ), - # bert mode - ( - self.llama_meta["get_max_batch_size"], - self.llama_meta["get_max_seq_len"] - 1, + self.llama_meta["get_ar_len"], self.llama_meta["get_vocab_size"], ), } atten_mask_shape = { - # kv mode - (self.llama_meta["get_max_batch_size"], self.llama_meta["get_max_seq_len"]), - # bert mode ( - self.llama_meta["get_max_seq_len"] - 1, - self.llama_meta["get_max_seq_len"] - 1, + self.llama_meta["get_max_batch_size"], + self.llama_meta["get_ar_len"], + self.llama_meta["get_max_seq_len"], ), } freq_shape = { - # kv mode - (1, self.llama_meta["get_head_dim"] // 2), - # bert mode - ( - self.llama_meta["get_max_seq_len"] - 1, - self.llama_meta["get_head_dim"] // 2, - ), + (self.llama_meta["get_ar_len"], self.llama_meta["get_head_dim"] // 2), } freq_op = { - # kv mode exir_ops.edge.aten.select.int, - # bert mode - exir_ops.edge.aten.slice_copy.Tensor, } for n in gm.graph.nodes: @@ -376,8 +396,9 @@ def quantize(self, quant_dtype, args, tokenizer, custom_annotations=()): args.prompt, fx_graph_module, tokenizer=tokenizer, + ar_len=self.llama_meta["get_ar_len"], max_seq_len=self.llama_meta["get_max_seq_len"], - kv_updator=args.kv_updator, + kv_updater=args.kv_updater, use_i64_token=args.embedding_quantize is not None, ) @@ -467,12 +488,14 @@ def compile(args, pte_filename, tokenizer): kv_config = ModelArgs(**json.load(f)) # TODO: support batch inputs if necessary kv_config.max_batch_size = 1 - kv_config.max_seq_len = args.kv_seq_len + kv_config.max_seq_len = args.max_seq_len kv_config.use_kv_cache = True prefill_config = copy.copy(kv_config) - prefill_config.max_seq_len = args.prefill_seq_len - prefill_config.use_kv_cache = False + prefill_config.max_seq_len = args.max_seq_len + prefill_config.use_kv_cache = ( + False if args.max_seq_len == args.prefill_ar_len else True + ) state_dict = torch.load( args.checkpoint, weights_only=True, map_location="cpu", mmap=True @@ -484,27 +507,29 @@ def compile(args, pte_filename, tokenizer): if args.model_mode == "kv": llama_instance_list.append( LlamaModel( - kv_config, output_new_cache_only=True, use_i64_token=use_i64_token - ) - ) - elif args.model_mode == "prefill": - llama_instance_list.append( - LlamaModel( - prefill_config, - output_new_cache_only=False, + kv_config, + ar_len=1, + output_new_cache_only=True, + output_cache=True, use_i64_token=use_i64_token, ) ) elif args.model_mode == "hybrid": llama_instance_list.append( LlamaModel( - kv_config, output_new_cache_only=True, use_i64_token=use_i64_token + kv_config, + ar_len=1, + output_new_cache_only=True, + output_cache=True, + use_i64_token=use_i64_token, ) ) llama_instance_list.append( LlamaModel( prefill_config, - output_new_cache_only=False, + ar_len=args.prefill_ar_len, + output_new_cache_only=True, + output_cache=True, use_i64_token=use_i64_token, ) ) @@ -606,7 +631,7 @@ def compile(args, pte_filename, tokenizer): start_lowering_ts = time.time() quant_attrs = None - if args.model_mode in ["kv", "prefill"]: + if args.model_mode in ["kv"]: llama_instance_list[0].lowering_modules( args.artifact, fixed_point_type, @@ -783,12 +808,10 @@ def compile(args, pte_filename, tokenizer): def inference(args, quant_attrs, pte_filename, runtime_tokenizer_path, pre_gen_pte=""): workspace = f"/data/local/tmp/{getpass.getuser()}/executorch/single_llama" - if args.model_mode == "prefill": + if args.model_mode == "kv": eval_mode = 0 - elif args.model_mode == "kv": - eval_mode = 1 elif args.model_mode == "hybrid": - eval_mode = 2 + eval_mode = 1 else: raise RuntimeError(f"Unknown model_mode: {args.model_mode}.") @@ -807,7 +830,7 @@ def post_process(): with open(f"{args.artifact}/outputs/outputs.txt", "r") as f: outputs.append(f.read()) - seq_len = args.prefill_seq_len if args.model_mode == "prefill" else args.kv_seq_len + seq_len = args.max_seq_len runner_args = " ".join( [ f'--prompt "{args.prompt}"', @@ -824,9 +847,9 @@ def post_process(): # x86 emulator is intended for CI and not performance. Check only the first few tokens. seq_len = min(seq_len, 16) - if args.kv_updator == smart_mask_updator: + if args.kv_updater == smart_mask_updater: logging.warning( - "x86 only support ShiftPointer, overwrite kv_updator to ShiftPointer" + "x86 only support ShiftPointer, overwrite kv_updater to ShiftPointer" ) qnn_sdk = os.getenv("QNN_SDK_ROOT") @@ -839,7 +862,7 @@ def post_process(): f"--model_path {pte_path}", f"--seq_len {seq_len}", f"--output_path {args.artifact}/outputs/outputs.txt", - f"--kv_updator ShiftPointer", + f"--kv_updater ShiftPointer", runner_args, ] ) @@ -859,7 +882,7 @@ def post_process(): f"--model_path {pte_filename}.pte", f"--seq_len {seq_len}", "--output_path outputs/outputs.txt", - f"--kv_updator {'SmartMask' if args.kv_updator == smart_mask_updator else 'ShiftPointer'}", + f"--kv_updater {'SmartMask' if args.kv_updater == smart_mask_updater else 'ShiftPointer'}", runner_args, ] ) @@ -998,28 +1021,28 @@ def _build_parser(): parser.add_argument( "--model_mode", - help="Export and inference prefill mode, kv mode or hybrid mode", + help="Export and inference kv mode or hybrid mode", default="kv", - choices=["prefill", "kv", "hybrid"], + choices=["kv", "hybrid"], type=str, ) parser.add_argument( - "--prefill_seq_len", - help="Ouput sequence length for llama. Use this option for prefill or hybrid mode", - default=32, + "--max_seq_len", + help="This refers to maximum number of tokens that the model can process & consider at once to generate predictions/responses.", + default=512, type=int, ) parser.add_argument( - "--kv_seq_len", - help="Ouput sequence length for llama. Use this option for kv or hybrid mode", - default=512, + "--prefill_ar_len", + help="The auto-regression (AR) length determines the number of tokens to consume and the number of logits to produce. Use this option to process the prompt and generate the key-value (kv) cache, which serves as a prompt processor for hybrid mode.", + default=32, type=int, ) parser.add_argument( - "--kv_updator", + "--kv_updater", help="Choose how to update kv cache during runtime", choices=["smart_mask", "shift_pointer"], default="smart_mask", @@ -1045,12 +1068,10 @@ def export_llama(args) -> None: if args.model_mode == "kv": pte_filename = "kv_llama_qnn" - elif args.model_mode == "prefill": - pte_filename = "prefill_llama_qnn" elif args.model_mode == "hybrid": assert ( - args.kv_seq_len >= args.prefill_seq_len - ), "Please ensure kv_seq_len is >= prefill_seq_len" + args.max_seq_len >= args.prefill_ar_len + ), "Please ensure max_seq_len is >= prefill_ar_len" pte_filename = "hybrid_llama_qnn" else: raise RuntimeError(f"Unknown model_mode: {args.model_mode}.") @@ -1073,13 +1094,13 @@ def export_llama(args) -> None: else: raise RuntimeError(f"Unknown llama_model: {args.llama_model}.") - if args.kv_updator == "smart_mask": + if args.kv_updater == "smart_mask": args.shared_buffer = True - args.kv_updator = smart_mask_updator - elif args.kv_updator == "shift_pointer": - args.kv_updator = shift_pointer_updator + args.kv_updater = smart_mask_updater + elif args.kv_updater == "shift_pointer": + args.kv_updater = shift_pointer_updater else: - exit(f"Using an unkown kv update {args.kv_updator}") + exit(f"Using an unkown kv update {args.kv_updater}") if args.pre_gen_pte: quant_attrs = json.load( diff --git a/examples/qualcomm/oss_scripts/llama/model/static_llama.py b/examples/qualcomm/oss_scripts/llama/model/static_llama.py index 253abc9578c..09cc7504224 100755 --- a/examples/qualcomm/oss_scripts/llama/model/static_llama.py +++ b/examples/qualcomm/oss_scripts/llama/model/static_llama.py @@ -153,10 +153,7 @@ def forward_sha( y = y.reshape(bsz, seq_len, -1) if self.output_new_cache_only: - if k_caches and v_caches: - return y, k, v - # batch_prefill mode. Consider to remove, it's not really used - return y, k[-1], v[-1] + return y, k, v return y, kh, vh @@ -298,7 +295,12 @@ def forward( class LlamaModel(nn.Module): def __init__( - self, config: ModelArgs, output_new_cache_only=True, use_i64_token=False + self, + config: ModelArgs, + ar_len=1, + output_new_cache_only=True, + output_cache=True, + use_i64_token=False, ): super().__init__() self.dim = config.dim @@ -311,8 +313,10 @@ def __init__( self.vocab_size = config.vocab_size self.rope_freq_base = config.rope_freq_base self.use_kv_cache = config.use_kv_cache + self.ar_len = ar_len self.output_new_cache_only = output_new_cache_only self.use_i64_token = use_i64_token + self.output_cache = output_cache self.layers = nn.ModuleList( [ @@ -359,10 +363,10 @@ def forward( output_v_cache = [] # following tensors should be invariant across batches freqs_cos = ( - self.freqs_cos[input_pos][0] if self.use_kv_cache else self.freqs_cos[:-1] + self.freqs_cos[input_pos][0] if self.use_kv_cache else self.freqs_cos ) freqs_sin = ( - self.freqs_sin[input_pos][0] if self.use_kv_cache else self.freqs_sin[:-1] + self.freqs_sin[input_pos][0] if self.use_kv_cache else self.freqs_sin ) hidden_states = self.tok_embeddings(tokens) @@ -388,19 +392,36 @@ def forward( hidden_states = self.norm(hidden_states) logits = self.output(hidden_states) - return logits, output_k_cache, output_v_cache + if self.output_cache: + return logits, output_k_cache, output_v_cache + return logits def get_example_inputs(self, use_kv_cache=True): dtype = torch.int64 if self.use_i64_token else torch.int32 - if use_kv_cache: - tokens = torch.randint( - self.vocab_size, (self.max_batch_size, 1), dtype=dtype - ) + tokens = torch.randint( + self.vocab_size, (self.max_batch_size, self.ar_len), dtype=dtype + ) - pos_ids = torch.zeros((self.max_batch_size, 1), dtype=torch.int32) + atten_mask = torch.full((self.ar_len, self.ar_len), torch.tensor(-255.0)) + mask_cond = torch.arange(atten_mask.size(-1)) + atten_mask.masked_fill_( + mask_cond < (mask_cond + 1).view(atten_mask.size(-1), 1), 0 + ) + if self.max_seq_len != self.ar_len: + atten_mask = torch.cat( + [ + torch.ones(self.ar_len, self.max_seq_len - self.ar_len) * -255.0, + atten_mask, + ], + dim=-1, + ) + atten_mask = atten_mask[None, :, :].expand( + self.max_batch_size, self.ar_len, self.max_seq_len + ) + if use_kv_cache: + pos_ids = torch.zeros((self.max_batch_size, self.ar_len), dtype=torch.int32) k_cache, v_cache = [], [] - atten_mask = torch.full((self.max_batch_size, self.max_seq_len), -255.0) - atten_mask[:, -1] = 0 + for _ in range(self.n_layers): for _ in range(self.n_kv_heads): # transpose first to decrease the runtime efforts @@ -408,13 +429,13 @@ def get_example_inputs(self, use_kv_cache=True): torch.zeros( self.max_batch_size, self.head_dim, - self.max_seq_len - 1, + self.max_seq_len - self.ar_len, ) ) v_cache.append( torch.zeros( self.max_batch_size, - self.max_seq_len - 1, + self.max_seq_len - self.ar_len, self.head_dim, ) ) @@ -426,10 +447,6 @@ def get_example_inputs(self, use_kv_cache=True): v_cache, ) - max_promp = self.max_seq_len - 1 - tokens = torch.arange(0, max_promp, 1, dtype=dtype).unsqueeze(0) - atten_mask = torch.triu(torch.rand((max_promp, max_promp)), 1) - atten_mask[atten_mask != 0] = -255 return ( tokens, atten_mask, @@ -438,6 +455,7 @@ def get_example_inputs(self, use_kv_cache=True): def get_metadata(self): # TODO: modify this when enabling LLAMA 7B return { + "get_ar_len": self.ar_len, "get_bos_id": 1, "get_eos_id": 2, "get_dim": self.dim, diff --git a/examples/qualcomm/oss_scripts/llama/qnn_llama_runner.cpp b/examples/qualcomm/oss_scripts/llama/qnn_llama_runner.cpp index 1bc90a11f9d..0a1635223e6 100644 --- a/examples/qualcomm/oss_scripts/llama/qnn_llama_runner.cpp +++ b/examples/qualcomm/oss_scripts/llama/qnn_llama_runner.cpp @@ -48,11 +48,11 @@ DEFINE_int32( DEFINE_int32( eval_mode, 1, - "0: PromptProcessor(prefill) / 1: TokenGenerator(kv) / 2: HybridMode (prefill+kv)"); + "0: TokenGenerator(kv) / 1: HybridMode (prefill+kv)"); DEFINE_double(logits_scale, 0.0, "Logits scale"); DEFINE_int32(logits_offset, 0, "Logits offset"); DEFINE_string( - kv_updator, + kv_updater, "How to update kv cache. Choose between SmartMask and ShiftPointer", "SmartMask"); @@ -67,7 +67,7 @@ int main(int argc, char** argv) { FLAGS_logits_offset, FLAGS_temperature, FLAGS_eval_mode, - FLAGS_kv_updator); + FLAGS_kv_updater); std::vector buf; buf.reserve(5 * FLAGS_seq_len); // assume each token is around 5 char std::ofstream fout(FLAGS_output_path.c_str()); diff --git a/examples/qualcomm/oss_scripts/llama/runner/io_manager.cpp b/examples/qualcomm/oss_scripts/llama/runner/io_manager.cpp index badaea0ca73..cfa3b392894 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/io_manager.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/io_manager.cpp @@ -54,7 +54,10 @@ std::vector IoMgrBase::get_output_tensors( ShiftPointerIoMgr::ShiftPointerIoMgr( std::vector>& modules, + int32_t context_len, + int32_t prefill_ar_len, int32_t prefill_cache_len, + int32_t kv_ar_len, int32_t kv_cache_len, int32_t vocab_size, int32_t num_layers, @@ -66,7 +69,10 @@ ShiftPointerIoMgr::ShiftPointerIoMgr( const bool use_int64_token) : IoMgrBase(modules), shard_layers_({num_layers}), + context_len_(context_len), + kv_ar_len_(kv_ar_len), kv_cache_len_(kv_cache_len), + prefill_ar_len_(prefill_ar_len), prefill_cache_len_(prefill_cache_len), vocab_size_(vocab_size), num_layers_(num_layers), @@ -75,7 +81,8 @@ ShiftPointerIoMgr::ShiftPointerIoMgr( eval_mode_(eval_mode), prefill_forward_name_(prefill_forward_name), kv_forward_name_(kv_forward_name), - use_int64_token_(use_int64_token) { + use_int64_token_(use_int64_token), + is_bert_(prefill_cache_len_ == 0) { if (!prefill_forward_name_.empty()) { input_tensors_[prefill_forward_name_] = std::vector>(modules.size()); @@ -113,15 +120,14 @@ void ShiftPointerIoMgr::init_io() { IO* ptr = static_cast(data_ptr_.get()); std::memset(ptr, 0, sizeof(IO)); - int32_t max_cache_len = std::max(kv_cache_len_, prefill_cache_len_); - int32_t k_in_size = (head_dim_ + 1) * max_cache_len; - int32_t v_cache_size = (num_heads_ + 1) * max_cache_len * head_dim_; - int32_t k_cache_out_size = num_heads_ * head_dim_; - if (eval_mode_ == EvalMode::kHybrid || eval_mode_ == EvalMode::kPrefill) { - k_cache_out_size *= prefill_cache_len_; - } + int32_t max_ar_len = std::max(kv_ar_len_, prefill_ar_len_); + int32_t k_in_size = (head_dim_ + 1) * kv_cache_len_; + // Use context length to prevent exceeding the range when the AR-N model + // updates the last block in hybrid mode. + int32_t v_cache_size = (num_heads_ + 1) * context_len_ * head_dim_; + int32_t k_cache_out_size = num_heads_ * max_ar_len * head_dim_; - // Init kv vector shape, general enough to be shared across all 3 modes. + // Init kv vector shape, general enough to be shared across all modes. ptr->k_cache_out.reserve(num_layers_); ptr->v_cache.reserve(num_layers_); for (int layer = 0; layer < num_layers_; layer++) { @@ -130,14 +136,15 @@ void ShiftPointerIoMgr::init_io() { } auto init_prefill = [&]() { - ptr->prefill_input_toks.resize(prefill_cache_len_); - ptr->prefill_atten_mask.resize(prefill_cache_len_ * prefill_cache_len_); - ptr->prefill_logits.resize(prefill_cache_len_ * vocab_size_); + ptr->prefill_input_toks.resize(prefill_ar_len_, 0); + ptr->prefill_input_pos.resize(prefill_ar_len_, 0); + ptr->prefill_attention_mask.resize((prefill_ar_len_ * context_len_), 0); + ptr->prefill_logits.resize(prefill_ar_len_ * vocab_size_); }; auto init_kv = [&]() { - ptr->kv_logits.resize(vocab_size_); - ptr->kv_attention_mask.resize((kv_cache_len_ + 1), 0); + ptr->kv_logits.resize(kv_ar_len_ * vocab_size_); + ptr->kv_attention_mask.resize((kv_ar_len_ * context_len_), 0); ptr->k_cache.reserve(num_layers_); for (int layer = 0; layer < num_layers_; layer++) { ptr->k_cache.emplace_back(); @@ -149,9 +156,6 @@ void ShiftPointerIoMgr::init_io() { }; switch (eval_mode_) { - case EvalMode::kPrefill: - init_prefill(); - break; case EvalMode::kKVCached: init_kv(); break; @@ -177,37 +181,38 @@ void ShiftPointerIoMgr::prepare_kv_io( IO* ptr = static_cast(data_ptr_.get()); // [I]: input_tokens - Result input_tok = methods_meta[0]->input_tensor_meta(0); - input_tok_ = std::make_unique( - input_tok->scalar_type(), - input_tok->sizes().size(), - const_cast(input_tok->sizes().data()), - &ptr->input_tok, - const_cast(input_tok->dim_order().data())); - input_tensors_[kv_forward_name_][0].push_back(input_tok_.get()); + Result kv_input_toks = methods_meta[0]->input_tensor_meta(0); + kv_input_toks_ = std::make_unique( + kv_input_toks->scalar_type(), + kv_input_toks->sizes().size(), + const_cast(kv_input_toks->sizes().data()), + &ptr->kv_input_toks, + const_cast(kv_input_toks->dim_order().data())); + input_tensors_[kv_forward_name_][0].push_back(kv_input_toks_.get()); // [I]: atten_mask - Result atten_mask = methods_meta[0]->input_tensor_meta(1); - attention_mask_ = std::make_unique( - atten_mask->scalar_type(), - atten_mask->sizes().size(), - const_cast(atten_mask->sizes().data()), + Result kv_attention_mask = methods_meta[0]->input_tensor_meta(1); + kv_attention_mask_ = std::make_unique( + kv_attention_mask->scalar_type(), + kv_attention_mask->sizes().size(), + const_cast(kv_attention_mask->sizes().data()), ptr->kv_attention_mask.data(), - const_cast(atten_mask->dim_order().data())); - input_tensors_[kv_forward_name_][0].push_back(attention_mask_.get()); + const_cast( + kv_attention_mask->dim_order().data())); + input_tensors_[kv_forward_name_][0].push_back(kv_attention_mask_.get()); // [I]: input_pos - Result input_pos = methods_meta[0]->input_tensor_meta(2); - input_pos_ = std::make_unique( - input_pos->scalar_type(), - input_pos->sizes().size(), - const_cast(input_pos->sizes().data()), - &ptr->input_pos, - const_cast(input_pos->dim_order().data())); - input_tensors_[kv_forward_name_][0].push_back(input_pos_.get()); + Result kv_input_pos = methods_meta[0]->input_tensor_meta(2); + kv_input_pos_ = std::make_unique( + kv_input_pos->scalar_type(), + kv_input_pos->sizes().size(), + const_cast(kv_input_pos->sizes().data()), + &ptr->kv_input_pos, + const_cast(kv_input_pos->dim_order().data())); + input_tensors_[kv_forward_name_][0].push_back(kv_input_pos_.get()); // [I] kv_cache - int index = 3; // bypass input_tokens, input_pos, atten_mask + int index = 3; // bypass input_tokens, atten_mask, input_pos for (int offset = 0, shard_index = 0, v_stride = kv_cache_len_ * head_dim_; shard_index < modules_.size(); offset += shard_layers_[shard_index], shard_index++) { @@ -304,7 +309,7 @@ void ShiftPointerIoMgr::prepare_prefill_io( IO* ptr = static_cast(data_ptr_.get()); - // [I]: pre_input_tokens + // [I]: prefill_input_tokens Result prefill_input_toks = methods_meta[0]->input_tensor_meta(0); prefill_input_toks_ = std::make_unique( prefill_input_toks->scalar_type(), @@ -314,25 +319,81 @@ void ShiftPointerIoMgr::prepare_prefill_io( const_cast( prefill_input_toks->dim_order().data())); input_tensors_[prefill_forward_name_][0].push_back(prefill_input_toks_.get()); - // [I]: prefill_attn_mask - for (int i = 0; i < prefill_cache_len_; ++i) { - for (int j = 0; j < prefill_cache_len_; ++j) { - if (i < j) { - ptr->prefill_atten_mask[i * prefill_cache_len_ + j] = 0; - } else { - ptr->prefill_atten_mask[i * prefill_cache_len_ + j] = 65535; + // [I]: prefill_attention_mask + for (int i = 0; i < prefill_ar_len_; ++i) { + for (int j = 0, + offset = i * context_len_ + (context_len_ - prefill_ar_len_); + j < prefill_ar_len_; + ++j) { + if (i >= j) { + ptr->prefill_attention_mask[j + offset] = 65535; } } } - Result prefill_atten_mask = methods_meta[0]->input_tensor_meta(1); - prefill_attn_mask_ = std::make_unique( - prefill_atten_mask->scalar_type(), - prefill_atten_mask->sizes().size(), - const_cast(prefill_atten_mask->sizes().data()), - ptr->prefill_atten_mask.data(), + Result prefill_attention_mask = + methods_meta[0]->input_tensor_meta(1); + prefill_attention_mask_ = std::make_unique( + prefill_attention_mask->scalar_type(), + prefill_attention_mask->sizes().size(), + const_cast( + prefill_attention_mask->sizes().data()), + ptr->prefill_attention_mask.data(), const_cast( - prefill_atten_mask->dim_order().data())); - input_tensors_[prefill_forward_name_][0].push_back(prefill_attn_mask_.get()); + prefill_attention_mask->dim_order().data())); + input_tensors_[prefill_forward_name_][0].push_back( + prefill_attention_mask_.get()); + + if (!is_bert_) { + // [I]: prefill_input_pos + Result prefill_input_pos = + methods_meta[0]->input_tensor_meta(2); + prefill_input_pos_ = std::make_unique( + prefill_input_pos->scalar_type(), + prefill_input_pos->sizes().size(), + const_cast(prefill_input_pos->sizes().data()), + ptr->prefill_input_pos.data(), + const_cast( + prefill_input_pos->dim_order().data())); + input_tensors_[prefill_forward_name_][0].push_back( + prefill_input_pos_.get()); + + // [I] kv_cache + int index = 3; // bypass input_tokens, atten_mask, input_pos + // Add prefill offset to align the v_out pointer with the decode model. + for (int offset = 0, + shard_index = 0, + v_stride = kv_cache_len_ * head_dim_, + prefill_offset = (kv_cache_len_ - prefill_cache_len_) * head_dim_; + shard_index < modules_.size(); + offset += shard_layers_[shard_index], shard_index++) { + for (int cache_group = 0; cache_group < 2; ++cache_group) { + for (int layer = 0; layer < shard_layers_[shard_index]; ++layer) { + for (int head = 0; head < num_heads_; ++head, ++index) { + Result kv_cache = + methods_meta[shard_index]->input_tensor_meta(index); + std::vector>& cache = + (cache_group == 0 ? k_cache_in_[prefill_forward_name_] + : v_cache_in_[prefill_forward_name_]); + void* cache_ptr = (cache_group == 0) + ? static_cast(ptr->k_cache[layer + offset][head].data()) + : static_cast( + ptr->v_cache[layer + offset].data() + head * v_stride + + prefill_offset); + + cache.emplace_back(std::make_unique( + kv_cache->scalar_type(), + kv_cache->sizes().size(), + const_cast(kv_cache->sizes().data()), + cache_ptr, + const_cast( + kv_cache->dim_order().data()))); + input_tensors_[prefill_forward_name_][shard_index].push_back( + cache.back().get()); + } + } + } + } + } // [O]: logits int logit_index = 0; Result logits = @@ -348,18 +409,11 @@ void ShiftPointerIoMgr::prepare_prefill_io( // [O] kv_cache int index = 1; - // prefill_k_stride should be equal to prefill_v_stride in prefill mode. // In hybrid mode, we use kv mode cache len for v stride since we want to // update prefill's result onto kv modes input. - int32_t prefill_k_stride = prefill_cache_len_ * head_dim_; - int32_t prefill_v_stride = - std::max(prefill_cache_len_, kv_cache_len_) * head_dim_; + int32_t prefill_k_stride = prefill_ar_len_ * head_dim_; + int32_t prefill_v_stride = kv_cache_len_ * head_dim_; - if (eval_mode_ == EvalMode::kPrefill) { - ET_CHECK_MSG( - prefill_k_stride == prefill_v_stride, - "prefill_k_stride should be equal to prefill_v_stride"); - } for (int offset = 0, shard_index = 0; shard_index < modules_.size(); offset += shard_layers_[shard_index], shard_index++) { for (int cache_group = 0; cache_group < 2; ++cache_group) { @@ -397,13 +451,11 @@ void ShiftPointerIoMgr::update_prefill_to_kv_io( int64_t pos, std::vector>& output_tensors) { ET_CHECK_MSG(kv_cache_len_ != 0, "k_cache_len_ should not equal to 0"); - ET_CHECK_MSG( - prefill_cache_len_ != 0, "prefill_cache_len_ should not equal to 0"); IO* ptr = static_cast(data_ptr_.get()); - ptr->input_tok = + ptr->kv_input_toks = use_int64_token_ ? cur_token : static_cast(cur_token); - ptr->input_pos = static_cast(pos); + ptr->kv_input_pos = static_cast(pos); // If prompt len is 30, prefill will handle to pos = 30. // At this point, pos should be 31. for (int i = 0; i < pos + 1; i++) { @@ -435,17 +487,29 @@ void ShiftPointerIoMgr::update_prefill_to_kv_io( } } + // Update k_cache std::vector>& k_cache_in = k_cache_in_[kv_forward_name_]; std::vector>& k_cache_out = k_cache_out_[prefill_forward_name_]; + // copy from last to prevent from overwriting values + size_t copied_size = pos * sizeof(uint8_t); for (int i = 0; i < k_cache_in.size(); ++i) { uint8_t* ptr_in = k_cache_in[i]->mutable_data(); - const uint8_t* ptr_out = k_cache_out[i]->data(); - for (size_t j = 0, offset = kv_cache_len_; j < head_dim_; - ++j, offset += kv_cache_len_) { - for (int k = 0, k_stride = j * prefill_cache_len_; k < pos; k++) { - ptr_in[offset + k] = ptr_out[k_stride + k]; + if (is_bert_) { + const uint8_t* ptr_out = k_cache_out[i]->data(); + for (size_t j = 0, offset = kv_cache_len_; j < head_dim_; + ++j, offset += kv_cache_len_) { + for (int k = 0, k_stride = j * prefill_ar_len_; k < pos; k++) { + ptr_in[offset + k] = ptr_out[k_stride + k]; + } + } + } else { + for (int j = head_dim_; j > -1; --j) { + memcpy( + ptr_in + j * kv_cache_len_, + ptr_in + j * prefill_cache_len_, + copied_size); } } k_cache_in[i]->set_data(ptr_in + pos); @@ -458,10 +522,10 @@ void ShiftPointerIoMgr::update_kv_io( std::vector>& output_tensors) { IO* ptr = static_cast(data_ptr_.get()); // update input_tok - ptr->input_tok = + ptr->kv_input_toks = use_int64_token_ ? cur_token : static_cast(cur_token); // update position_ids - ptr->input_pos = static_cast(pos); + ptr->kv_input_pos = static_cast(pos); // update causal mask for next token ptr->kv_attention_mask[kv_cache_len_ - pos] = 65535; @@ -505,47 +569,101 @@ void ShiftPointerIoMgr::update_prefill_io( int64_t cur_token, int64_t pos, std::vector>& output_tensors) { + (void)cur_token; (void)output_tensors; - IO* ptr = static_cast(data_ptr_.get()); - // Support CPU 4-bit embedding, which requires int64 input. - // However, for QNN embedding, only int32 input is needed. - // Therefore, we need to cast to the correct type to write the data. - if (use_int64_token_) { - ptr->prefill_input_toks[pos] = cur_token; - } else { - int32_t* prefill_input_toks_ptr = - reinterpret_cast(ptr->prefill_input_toks.data()); - prefill_input_toks_ptr[pos] = static_cast(cur_token); + + if (!is_bert_) { + // update v_cache + auto& v_cache_in = v_cache_in_[prefill_forward_name_]; + auto& v_cache_out = v_cache_out_[prefill_forward_name_]; + for (int i = 0; i < v_cache_in.size(); i++) { + v_cache_in[i]->set_data( + v_cache_in[i]->mutable_data() + prefill_ar_len_ * head_dim_); + v_cache_out[i]->set_data( + v_cache_out[i]->mutable_data() + + prefill_ar_len_ * head_dim_); + } + + for (int shard = 0; shard < output_tensors.size(); shard++) { + for (int index = 0; index < output_tensors[shard].size(); index++) { + ET_CHECK_MSG( + modules_[shard]->set_output( + prefill_forward_name_, output_tensors[shard][index], index) == + Error::Ok, + "failed to set output tensor for module %d's %d'th output " + "while updating kv_cache output tensors", + shard, + index); + } + } + + auto& k_cache_in = k_cache_in_[prefill_forward_name_]; + auto& k_cache_out = k_cache_out_[prefill_forward_name_]; + // update k_cache by single thread, this part is cpu cache sensitive + for (int i = 0; i < k_cache_in.size(); ++i) { + uint8_t* ptr_in = k_cache_in[i]->mutable_data(); + const uint8_t* ptr_out = k_cache_out[i]->data(); + for (size_t j = 0, offset = prefill_cache_len_; j < head_dim_; + ++j, offset += prefill_cache_len_) { + for (int k = 0, k_stride = j * prefill_ar_len_; k < prefill_ar_len_; + k++) { + ptr_in[offset + k] = ptr_out[k_stride + k]; + } + } + k_cache_in[i]->set_data(ptr_in + prefill_ar_len_); + } } } void ShiftPointerIoMgr::fill_prefill_toks( + int64_t start_pos, std::vector& prompt_tokens) { IO* ptr = static_cast(get_mutable_ptr()); - for (int i = 0; i < prompt_tokens.size(); i++) { - // Support CPU 4-bit embedding, which requires int64 input. - // However, for QNN embedding, only int32 input is needed. - // Therefore, we need to cast to the correct type to write the data. - if (use_int64_token_) { - ptr->prefill_input_toks[i] = prompt_tokens[i]; - } else { - int32_t* prefill_input_toks_ptr = - reinterpret_cast(ptr->prefill_input_toks.data()); - prefill_input_toks_ptr[i] = static_cast(prompt_tokens[i]); + for (int i = 0; i < prefill_ar_len_; i++) { + if (!is_bert_) { + ptr->prefill_input_pos[i] = start_pos + i; + } + + if (start_pos + i < prompt_tokens.size()) { + // Support CPU 4-bit embedding, which requires int64 input. + // However, for QNN embedding, only int32 input is needed. + // Therefore, we need to cast to the correct type to write the data. + if (use_int64_token_) { + ptr->prefill_input_toks[i] = prompt_tokens[start_pos + i]; + } else { + int32_t* prefill_input_toks_ptr = + reinterpret_cast(ptr->prefill_input_toks.data()); + prefill_input_toks_ptr[i] = + static_cast(prompt_tokens[start_pos + i]); + } + } + if (start_pos >= prefill_ar_len_) { + for (int j = 0, + offset = i * context_len_ + + (context_len_ - prefill_ar_len_ - start_pos); + j < prefill_ar_len_; + ++j) { + ptr->prefill_attention_mask[offset + j] = 65535; + } } } } void ShiftPointerIoMgr::fill_kv_tok_mask(int64_t pos, int64_t cur_token) { IO* ptr = static_cast(get_mutable_ptr()); - ptr->input_tok = + ptr->kv_input_toks = use_int64_token_ ? cur_token : static_cast(cur_token); + ptr->kv_input_pos = static_cast(pos); + ; ptr->kv_attention_mask[kv_cache_len_] = 65535; } SmartMaskIoMgr::SmartMaskIoMgr( std::vector>& modules, + int32_t context_len, + int32_t prefill_ar_len, int32_t prefill_cache_len, + int32_t kv_ar_len, int32_t kv_cache_len, int32_t vocab_size, int32_t num_layers, @@ -557,7 +675,10 @@ SmartMaskIoMgr::SmartMaskIoMgr( const bool use_int64_token) : IoMgrBase(modules), shard_layers_({num_layers}), + context_len_(context_len), + kv_ar_len_(kv_ar_len), kv_cache_len_(kv_cache_len), + prefill_ar_len_(prefill_ar_len), prefill_cache_len_(prefill_cache_len), vocab_size_(vocab_size), num_layers_(num_layers), @@ -566,12 +687,17 @@ SmartMaskIoMgr::SmartMaskIoMgr( eval_mode_(eval_mode), prefill_forward_name_(prefill_forward_name), kv_forward_name_(kv_forward_name), - use_int64_token_(use_int64_token) { + use_int64_token_(use_int64_token), + is_bert_(prefill_cache_len == 0) { if (!prefill_forward_name_.empty()) { input_tensors_[prefill_forward_name_] = std::vector>(modules.size()); output_tensors_[prefill_forward_name_] = std::vector>(modules.size()); + k_cache_in_[prefill_forward_name_] = + std::vector>(); + v_cache_in_[prefill_forward_name_] = + std::vector>(); k_cache_out_[prefill_forward_name_] = std::vector>(); v_cache_out_[prefill_forward_name_] = @@ -597,20 +723,20 @@ SmartMaskIoMgr::SmartMaskIoMgr( } std::unordered_map SmartMaskIoMgr::get_io_elements() { - size_t cache_len = std::max(kv_cache_len_, prefill_cache_len_); - size_t cache_in_ele = num_layers_ * num_heads_ * head_dim_ * cache_len; - size_t cache_out_ele = num_layers_ * num_heads_ * head_dim_; + int32_t max_ar_len = std::max(kv_ar_len_, prefill_ar_len_); + size_t cache_in_ele = num_layers_ * num_heads_ * head_dim_ * kv_cache_len_; + size_t cache_out_ele = num_layers_ * num_heads_ * head_dim_ * max_ar_len; return std::unordered_map{ - {"input_tok_ele", 1}, - {"input_pos_ele", 1}, + {"kv_input_toks_ele", kv_ar_len_}, + {"kv_input_pos_ele", kv_ar_len_}, {"cache_in_ele", cache_in_ele}, {"cache_out_ele", cache_out_ele}, - // 1 for the input prompt - {"atten_mask_ele", cache_len + 1}, - {"kv_logits_ele", vocab_size_}, - {"prefill_input_toks_ele", prefill_cache_len_}, - {"prefill_atten_mask_ele", prefill_cache_len_ * prefill_cache_len_}, - {"prefill_logits_ele", prefill_cache_len_ * vocab_size_}}; + {"kv_attention_mask_ele", kv_ar_len_ * context_len_}, + {"kv_logits_ele", kv_ar_len_ * vocab_size_}, + {"prefill_input_toks_ele", prefill_ar_len_}, + {"prefill_input_pos_ele", prefill_ar_len_}, + {"prefill_attention_mask_ele", prefill_ar_len_ * context_len_}, + {"prefill_logits_ele", prefill_ar_len_ * vocab_size_}}; } std::unordered_map SmartMaskIoMgr::get_io_bytes() { @@ -623,21 +749,23 @@ std::unordered_map SmartMaskIoMgr::get_io_bytes() { byte % static_cast(alignment)); }; return std::unordered_map{ - {"input_tok_bytes", - align(element_map["input_tok_ele"] * sizeof(int32_t))}, - {"input_pos_bytes", - align(element_map["input_pos_ele"] * sizeof(int32_t))}, + {"kv_input_toks_bytes", + align(element_map["kv_input_toks_ele"] * sizeof(int32_t))}, + {"kv_input_pos_bytes", + align(element_map["kv_input_pos_ele"] * sizeof(int32_t))}, {"cache_in_bytes", align(element_map["cache_in_ele"] * sizeof(uint8_t))}, {"cache_out_bytes", align(element_map["cache_out_ele"] * sizeof(uint8_t))}, - {"atten_mask_bytes", - align(element_map["atten_mask_ele"] * sizeof(uint16_t))}, + {"kv_attention_mask_bytes", + align(element_map["kv_attention_mask_ele"] * sizeof(uint16_t))}, {"kv_logits_bytes", align(element_map["kv_logits_ele"] * sizeof(uint16_t))}, {"prefill_input_toks_bytes", align(element_map["prefill_input_toks_ele"] * sizeof(int32_t))}, - {"prefill_atten_mask_bytes", - align(element_map["prefill_atten_mask_ele"] * sizeof(uint16_t))}, + {"prefill_input_pos_bytes", + align(element_map["prefill_input_pos_ele"] * sizeof(int32_t))}, + {"prefill_attention_mask_bytes", + align(element_map["prefill_attention_mask_ele"] * sizeof(uint16_t))}, {"prefill_logits_bytes", align(element_map["prefill_logits_ele"] * sizeof(uint16_t))}}; } @@ -654,10 +782,10 @@ void SmartMaskIoMgr::IO::init_io_ptrs( for (const auto& iter : io_bytes_map) { std::string key = iter.first; size_t size = iter.second; - if (key == "input_tok_bytes") { - input_tok = reinterpret_cast(cur_ptr); - } else if (key == "input_pos_bytes") { - input_pos = reinterpret_cast(cur_ptr); + if (key == "kv_input_toks_bytes") { + kv_input_toks = reinterpret_cast(cur_ptr); + } else if (key == "kv_input_pos_bytes") { + kv_input_pos = reinterpret_cast(cur_ptr); } else if (key == "cache_in_bytes" || key == "cache_out_bytes") { auto& k_cache_ref = (key == "cache_in_bytes") ? k_cache : k_cache_out; auto& v_cache_ref = (key == "cache_in_bytes") ? v_cache : v_cache_out; @@ -679,14 +807,16 @@ void SmartMaskIoMgr::IO::init_io_ptrs( } } continue; - } else if (key == "atten_mask_bytes") { + } else if (key == "kv_attention_mask_bytes") { kv_attention_mask = reinterpret_cast(cur_ptr); } else if (key == "kv_logits_bytes") { kv_logits = reinterpret_cast(cur_ptr); } else if (key == "prefill_input_toks_bytes") { prefill_input_toks = reinterpret_cast(cur_ptr); - } else if (key == "prefill_atten_mask_bytes") { - prefill_atten_mask = reinterpret_cast(cur_ptr); + } else if (key == "prefill_input_pos_bytes") { + prefill_input_pos = reinterpret_cast(cur_ptr); + } else if (key == "prefill_attention_mask_bytes") { + prefill_attention_mask = reinterpret_cast(cur_ptr); } else if (key == "prefill_logits_bytes") { prefill_logits = reinterpret_cast(cur_ptr); } else { @@ -720,15 +850,10 @@ void SmartMaskIoMgr::init_io() { std::unordered_map io_bytes_map = get_io_bytes(); switch (eval_mode_) { - case EvalMode::kPrefill: - io_bytes_map.erase("input_tok_bytes"); - io_bytes_map.erase("input_pos_bytes"); - io_bytes_map.erase("atten_mask_bytes"); - io_bytes_map.erase("kv_logits_bytes"); - break; case EvalMode::kKVCached: io_bytes_map.erase("prefill_input_toks_bytes"); - io_bytes_map.erase("prefill_atten_mask_bytes"); + io_bytes_map.erase("prefill_input_pos_bytes"); + io_bytes_map.erase("prefill_attention_mask_bytes"); io_bytes_map.erase("prefill_logits_bytes"); break; case EvalMode::kHybrid: @@ -774,53 +899,55 @@ void SmartMaskIoMgr::prepare_kv_io( std::unordered_map io_bytes_map = get_io_bytes(); // [I]: input_tokens - Result input_tok = methods_meta[0]->input_tensor_meta(0); - input_tok_ = std::make_unique( - input_tok->scalar_type(), - input_tok->sizes().size(), - const_cast(input_tok->sizes().data()), - ptr->input_tok, - const_cast(input_tok->dim_order().data())); - input_tensors_[kv_forward_name_][0].push_back(input_tok_.get()); + Result kv_input_toks = methods_meta[0]->input_tensor_meta(0); + kv_input_toks_ = std::make_unique( + kv_input_toks->scalar_type(), + kv_input_toks->sizes().size(), + const_cast(kv_input_toks->sizes().data()), + ptr->kv_input_toks, + const_cast(kv_input_toks->dim_order().data())); + input_tensors_[kv_forward_name_][0].push_back(kv_input_toks_.get()); ptr->add_custom_mem_info( - ptr->input_tok, - io_bytes_map["input_tok_bytes"], - input_tok->scalar_type(), - input_tok.get()); + ptr->kv_input_toks, + io_bytes_map["kv_input_toks_bytes"], + kv_input_toks->scalar_type(), + kv_input_toks.get()); // [I]: atten_mask - Result atten_mask = methods_meta[0]->input_tensor_meta(1); - attention_mask_ = std::make_unique( - atten_mask->scalar_type(), - atten_mask->sizes().size(), - const_cast(atten_mask->sizes().data()), + std::fill_n(ptr->kv_attention_mask, kv_ar_len_ * context_len_, 0); + Result kv_attention_mask = methods_meta[0]->input_tensor_meta(1); + kv_attention_mask_ = std::make_unique( + kv_attention_mask->scalar_type(), + kv_attention_mask->sizes().size(), + const_cast(kv_attention_mask->sizes().data()), ptr->kv_attention_mask, - const_cast(atten_mask->dim_order().data())); - input_tensors_[kv_forward_name_][0].push_back(attention_mask_.get()); + const_cast( + kv_attention_mask->dim_order().data())); + input_tensors_[kv_forward_name_][0].push_back(kv_attention_mask_.get()); ptr->add_custom_mem_info( ptr->kv_attention_mask, - io_bytes_map["atten_mask_bytes"], - atten_mask->scalar_type(), - atten_mask.get()); + io_bytes_map["kv_attention_mask_bytes"], + kv_attention_mask->scalar_type(), + kv_attention_mask.get()); // [I]: input_pos - Result input_pos = methods_meta[0]->input_tensor_meta(2); - input_pos_ = std::make_unique( - input_pos->scalar_type(), - input_pos->sizes().size(), - const_cast(input_pos->sizes().data()), - ptr->input_pos, - const_cast(input_pos->dim_order().data())); - input_tensors_[kv_forward_name_][0].push_back(input_pos_.get()); + Result kv_input_pos = methods_meta[0]->input_tensor_meta(2); + kv_input_pos_ = std::make_unique( + kv_input_pos->scalar_type(), + kv_input_pos->sizes().size(), + const_cast(kv_input_pos->sizes().data()), + ptr->kv_input_pos, + const_cast(kv_input_pos->dim_order().data())); + input_tensors_[kv_forward_name_][0].push_back(kv_input_pos_.get()); ptr->add_custom_mem_info( - ptr->input_pos, - io_bytes_map["input_pos_bytes"], - input_pos->scalar_type(), - input_pos.get()); + ptr->kv_input_pos, + io_bytes_map["kv_input_pos_bytes"], + kv_input_pos->scalar_type(), + kv_input_pos.get()); // [I] kv_cache size_t layered_head_count = num_layers_ * num_heads_; - int index = 3; // bypass input_tokens, input_pos, atten_mask + int index = 3; // bypass input_tokens, atten_mask, input_pos for (int offset = 0, shard_index = 0; shard_index < modules_.size(); offset += shard_layers_[shard_index], shard_index++) { for (int cache_group = 0; cache_group < 2; ++cache_group) { @@ -913,12 +1040,11 @@ void SmartMaskIoMgr::update_kv_io( int64_t pos, std::vector>& output_tensors) { IO* ptr = static_cast(data_ptr_.get()); - size_t cache_len = std::max(kv_cache_len_, prefill_cache_len_); // update input_tok - *ptr->input_tok = + *ptr->kv_input_toks = use_int64_token_ ? cur_token : static_cast(cur_token); // update position_ids - *ptr->input_pos = static_cast(pos); + *ptr->kv_input_pos = static_cast(pos); // update smart mask for previous cache ptr->kv_attention_mask[pos] = 65535; @@ -937,7 +1063,8 @@ void SmartMaskIoMgr::update_kv_io( for (int i = 0; i < k_cache_in.size(); ++i) { uint8_t* ptr_in = k_cache_in[i]->mutable_data() + pos; const uint8_t* ptr_out = k_cache_out[i]->data(); - for (size_t j = 0, offset = 0; j < head_dim_; ++j, offset += cache_len) { + for (size_t j = 0, offset = 0; j < head_dim_; + ++j, offset += kv_cache_len_) { ptr_in[offset] = ptr_out[j]; } } @@ -958,7 +1085,6 @@ void SmartMaskIoMgr::prepare_prefill_io( IO* ptr = static_cast(data_ptr_.get()); std::unordered_map io_bytes_map = get_io_bytes(); - int32_t cache_len = methods_meta[0]->input_tensor_meta(0)->sizes()[1]; // [I]: pre_input_tokens Result prefill_input_toks = methods_meta[0]->input_tensor_meta(0); prefill_input_toks_ = std::make_unique( @@ -975,30 +1101,92 @@ void SmartMaskIoMgr::prepare_prefill_io( executorch::aten::ScalarType::Int, prefill_input_toks.get()); - // [I]: prefill_attn_mask - for (int i = 0; i < cache_len; ++i) { - for (int j = 0; j < cache_len; ++j) { + // [I]: prefill_attention_mask + for (int i = 0; i < prefill_ar_len_; ++i) { + for (int j = 0, + offset = i * context_len_ + (context_len_ - prefill_ar_len_); + j < prefill_ar_len_; + ++j) { if (i < j) { - ptr->prefill_atten_mask[i * cache_len + j] = 0; + ptr->prefill_attention_mask[j + offset] = 0; } else { - ptr->prefill_atten_mask[i * cache_len + j] = 65535; + ptr->prefill_attention_mask[j + offset] = 65535; } } } - Result prefill_atten_mask = methods_meta[0]->input_tensor_meta(1); - prefill_attn_mask_ = std::make_unique( - prefill_atten_mask->scalar_type(), - prefill_atten_mask->sizes().size(), - const_cast(prefill_atten_mask->sizes().data()), - ptr->prefill_atten_mask, + Result prefill_attention_mask = + methods_meta[0]->input_tensor_meta(1); + prefill_attention_mask_ = std::make_unique( + prefill_attention_mask->scalar_type(), + prefill_attention_mask->sizes().size(), + const_cast( + prefill_attention_mask->sizes().data()), + ptr->prefill_attention_mask, const_cast( - prefill_atten_mask->dim_order().data())); - input_tensors_[prefill_forward_name_][0].push_back(prefill_attn_mask_.get()); + prefill_attention_mask->dim_order().data())); + input_tensors_[prefill_forward_name_][0].push_back( + prefill_attention_mask_.get()); ptr->add_custom_mem_info( - ptr->prefill_atten_mask, - io_bytes_map["prefill_atten_mask_bytes"], + ptr->prefill_attention_mask, + io_bytes_map["prefill_attention_mask_bytes"], executorch::aten::ScalarType::Bits16, - prefill_atten_mask.get()); + prefill_attention_mask.get()); + + if (!is_bert_) { + // [I]: prefill_input_pos + Result prefill_input_pos = + methods_meta[0]->input_tensor_meta(2); + prefill_input_pos_ = std::make_unique( + prefill_input_pos->scalar_type(), + prefill_input_pos->sizes().size(), + const_cast(prefill_input_pos->sizes().data()), + ptr->prefill_input_pos, + const_cast( + prefill_input_pos->dim_order().data())); + input_tensors_[prefill_forward_name_][0].push_back( + prefill_input_pos_.get()); + ptr->add_custom_mem_info( + ptr->prefill_input_pos, + io_bytes_map["prefill_input_pos_bytes"], + prefill_input_pos->scalar_type(), + prefill_input_pos.get()); + + // [I] kv_cache + size_t layered_head_count = num_layers_ * num_heads_; + int index = 3; // bypass input_tokens, atten_mask, input_pos + for (int offset = 0, shard_index = 0; shard_index < modules_.size(); + offset += shard_layers_[shard_index], shard_index++) { + for (int cache_group = 0; cache_group < 2; ++cache_group) { + for (int layer = 0; layer < shard_layers_[shard_index]; ++layer) { + for (int head = 0; head < num_heads_; ++head, ++index) { + Result kv_cache = + methods_meta[shard_index]->input_tensor_meta(index); + std::vector>& cache = + (cache_group == 0 ? k_cache_in_[prefill_forward_name_] + : v_cache_in_[prefill_forward_name_]); + uint8_t* cache_ptr = (cache_group == 0) + ? ptr->k_cache[layer + offset][head] + : ptr->v_cache[layer + offset][head]; + + cache.emplace_back(std::make_unique( + kv_cache->scalar_type(), + kv_cache->sizes().size(), + const_cast(kv_cache->sizes().data()), + cache_ptr, + const_cast( + kv_cache->dim_order().data()))); + ptr->add_custom_mem_info( + cache_ptr, + io_bytes_map["cache_in_bytes"] / layered_head_count, + kv_cache->scalar_type(), + kv_cache.get()); + input_tensors_[prefill_forward_name_][shard_index].push_back( + cache.back().get()); + } + } + } + } + } // [O]: logits int logit_index = 0; @@ -1031,8 +1219,8 @@ void SmartMaskIoMgr::prepare_prefill_io( (cache_group == 0 ? k_cache_out_[prefill_forward_name_] : v_cache_out_[prefill_forward_name_]); void* cache_ptr = (cache_group == 0) - ? ptr->k_cache[layer + offset][head] - : ptr->v_cache[layer + offset][head]; + ? ptr->k_cache_out[layer + offset][head] + : ptr->v_cache_out[layer + offset][head]; cache.emplace_back(std::make_unique( kv_cache->scalar_type(), kv_cache->sizes().size(), @@ -1042,7 +1230,7 @@ void SmartMaskIoMgr::prepare_prefill_io( kv_cache->dim_order().data()))); ptr->add_custom_mem_info( cache_ptr, - io_bytes_map["cache_in_bytes"] / layered_head_count, + io_bytes_map["cache_out_bytes"] / layered_head_count, executorch::aten::ScalarType::Byte, kv_cache.get()); output_tensors_[prefill_forward_name_][shard_index].push_back( @@ -1059,24 +1247,50 @@ void SmartMaskIoMgr::update_prefill_to_kv_io( std::vector>& output_tensors) { IO* ptr = static_cast(data_ptr_.get()); - *ptr->input_tok = + *ptr->kv_input_toks = use_int64_token_ ? cur_token : static_cast(cur_token); - *ptr->input_pos = static_cast(pos); + *ptr->kv_input_pos = static_cast(pos); // pos means the cur_token pos for (int i = 0; i < pos; i++) { ptr->kv_attention_mask[i] = 65535; } - // Update K is enough, copy from last to prevent from overwriting values - size_t copied_size = prefill_cache_len_ * sizeof(uint8_t); - for (int l = 0; l < num_layers_; l++) { - for (int h = 0; h < num_heads_; h++) { - uint8_t* k_cache = ptr->k_cache[l][h]; - for (int hd = head_dim_ - 1; hd > -1; hd--) { - memcpy( - k_cache + (kv_cache_len_ * hd), - k_cache + (prefill_cache_len_ * hd), - copied_size); + if (is_bert_) { + // update v_cache + auto& v_cache_in = v_cache_in_[kv_forward_name_]; + auto& v_cache_out = v_cache_out_[prefill_forward_name_]; + // update v_cache by single thread, this part is cpu cache sensitive + size_t copied_size = kv_cache_len_ * head_dim_ * sizeof(uint8_t); + for (int i = 0; i < v_cache_in.size(); ++i) { + uint8_t* ptr_in = v_cache_in[i]->mutable_data(); + const uint8_t* ptr_out = v_cache_out[i]->data(); + memcpy(ptr_in, ptr_out, copied_size); + } + + auto& k_cache_in = k_cache_in_[kv_forward_name_]; + auto& k_cache_out = k_cache_out_[prefill_forward_name_]; + for (int i = 0; i < k_cache_in.size(); ++i) { + uint8_t* ptr_in = k_cache_in[i]->mutable_data(); + const uint8_t* ptr_out = k_cache_out[i]->data(); + for (size_t j = 0, offset = 0; j < head_dim_; + ++j, offset += kv_cache_len_) { + for (size_t k = 0, k_stride = j * prefill_ar_len_; k < pos; k++) { + ptr_in[offset + k] = ptr_out[k_stride + k]; + } + } + } + } else { + // Update K is enough, copy from last to prevent from overwriting values + size_t copied_size = pos * sizeof(uint8_t); + for (int l = 0; l < num_layers_; l++) { + for (int h = 0; h < num_heads_; h++) { + uint8_t* k_cache = ptr->k_cache[l][h]; + for (int hd = head_dim_ - 1; hd > -1; hd--) { + memcpy( + k_cache + (kv_cache_len_ * hd), + k_cache + (prefill_cache_len_ * hd), + copied_size); + } } } } @@ -1087,38 +1301,71 @@ void SmartMaskIoMgr::update_prefill_io( int64_t pos, std::vector>& output_tensors) { (void)output_tensors; - IO* ptr = static_cast(data_ptr_.get()); - // Support CPU 4-bit embedding, which requires int64 input. - // However, for QNN embedding, only int32 input is needed. - // Therefore, we need to cast to the correct type to write the data. - if (use_int64_token_) { - ptr->prefill_input_toks[pos] = cur_token; - } else { - int32_t* prefill_input_toks_ptr = - reinterpret_cast(ptr->prefill_input_toks); - prefill_input_toks_ptr[pos] = static_cast(cur_token); + + if (!is_bert_) { + // update v_cache + auto& v_cache_in = v_cache_in_[prefill_forward_name_]; + auto& v_cache_out = v_cache_out_[prefill_forward_name_]; + // update v_cache by single thread, this part is cpu cache sensitive + size_t copied_size = prefill_ar_len_ * head_dim_ * sizeof(uint8_t); + for (int i = 0; i < v_cache_in.size(); ++i) { + uint8_t* ptr_in = + v_cache_in[i]->mutable_data() + pos * head_dim_; + const uint8_t* ptr_out = v_cache_out[i]->data(); + memcpy(ptr_in, ptr_out, copied_size); + } + + auto& k_cache_in = k_cache_in_[prefill_forward_name_]; + auto& k_cache_out = k_cache_out_[prefill_forward_name_]; + for (int i = 0; i < k_cache_in.size(); ++i) { + uint8_t* ptr_in = k_cache_in[i]->mutable_data(); + const uint8_t* ptr_out = k_cache_out[i]->data(); + for (size_t j = 0, offset = pos; j < head_dim_; + ++j, offset += prefill_cache_len_) { + for (size_t k = 0, k_stride = j * prefill_ar_len_; k < prefill_ar_len_; + k++) { + ptr_in[offset + k] = ptr_out[k_stride + k]; + } + } + } } } -void SmartMaskIoMgr::fill_prefill_toks(std::vector& prompt_tokens) { +void SmartMaskIoMgr::fill_prefill_toks( + int64_t start_pos, + std::vector& prompt_tokens) { IO* ptr = static_cast(get_mutable_ptr()); - for (int i = 0; i < prompt_tokens.size(); i++) { - // Support CPU 4-bit embedding, which requires int64 input. - // However, for QNN embedding, only int32 input is needed. - // Therefore, we need to cast to the correct type to write the data. - if (use_int64_token_) { - ptr->prefill_input_toks[i] = prompt_tokens[i]; - } else { - int32_t* prefill_input_toks_ptr = - reinterpret_cast(ptr->prefill_input_toks); - prefill_input_toks_ptr[i] = static_cast(prompt_tokens[i]); + for (int i = 0; i < prefill_ar_len_; i++) { + if (!is_bert_) { + ptr->prefill_input_pos[i] = start_pos + i; + } + + if (start_pos + i < prompt_tokens.size()) { + // Support CPU 4-bit embedding, which requires int64 input. + // However, for QNN embedding, only int32 input is needed. + // Therefore, we need to cast to the correct type to write the data. + if (use_int64_token_) { + ptr->prefill_input_toks[i] = prompt_tokens[start_pos + i]; + } else { + int32_t* prefill_input_toks_ptr = + reinterpret_cast(ptr->prefill_input_toks); + prefill_input_toks_ptr[i] = + static_cast(prompt_tokens[start_pos + i]); + } + } + if (start_pos >= prefill_ar_len_) { + for (int j = 0, offset = i * context_len_ + (start_pos - prefill_ar_len_); + j < prefill_ar_len_; + ++j) { + ptr->prefill_attention_mask[offset + j] = 65535; + } } } } void SmartMaskIoMgr::fill_kv_tok_mask(int64_t pos, int64_t cur_token) { IO* ptr = static_cast(get_mutable_ptr()); - *ptr->input_tok = + *ptr->kv_input_toks = use_int64_token_ ? cur_token : static_cast(cur_token); ptr->kv_attention_mask[kv_cache_len_] = 65535; } diff --git a/examples/qualcomm/oss_scripts/llama/runner/io_manager.h b/examples/qualcomm/oss_scripts/llama/runner/io_manager.h index 3a59ab6924e..f1887b99280 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/io_manager.h +++ b/examples/qualcomm/oss_scripts/llama/runner/io_manager.h @@ -23,8 +23,7 @@ namespace example { enum EvalMode { - kPrefill = 0, - kKVCached, + kKVCached = 0, kHybrid, kUnsupported, }; @@ -42,7 +41,9 @@ class IoMgrBase { const std::vector< executorch::runtime::Result>& methods_meta) = 0; - virtual void fill_prefill_toks(std::vector& prompt_tokens) = 0; + virtual void fill_prefill_toks( + int64_t start_pos, + std::vector& prompt_tokens) = 0; virtual void fill_kv_tok_mask(int64_t pos, int64_t cur_token) = 0; virtual void update_prefill_to_kv_io( int64_t cur_token, @@ -81,7 +82,10 @@ class ShiftPointerIoMgr : public IoMgrBase { public: ShiftPointerIoMgr( std::vector>& modules, + int32_t context_len, + int32_t prefill_ar_len, int32_t prefill_cache_len, + int32_t kv_ar_len, int32_t kv_cache_len, int32_t vocab_size, int32_t num_layers, @@ -101,7 +105,9 @@ class ShiftPointerIoMgr : public IoMgrBase { const std::vector< executorch::runtime::Result>& methods_meta) override; - void fill_prefill_toks(std::vector& prompt_tokens) override; + void fill_prefill_toks( + int64_t start_pos, + std::vector& prompt_tokens) override; void fill_kv_tok_mask(int64_t pos, int64_t cur_token) override; void update_prefill_to_kv_io( int64_t cur_token, @@ -119,25 +125,26 @@ class ShiftPointerIoMgr : public IoMgrBase { std::vector>& output_tensors) override; struct IO { - int64_t input_tok; - int32_t input_pos; + int64_t kv_input_toks; + int32_t kv_input_pos; std::vector>> k_cache; std::vector> v_cache; std::vector> k_cache_out; std::vector kv_attention_mask; std::vector kv_logits; std::vector prefill_input_toks; - std::vector prefill_atten_mask; + std::vector prefill_input_pos; + std::vector prefill_attention_mask; std::vector prefill_logits; }; private: - std::unique_ptr input_tok_; - std::unique_ptr input_pos_; - std::unique_ptr hidden_state_; - std::unique_ptr attention_mask_; + std::unique_ptr kv_input_toks_; + std::unique_ptr kv_input_pos_; + std::unique_ptr kv_attention_mask_; std::unique_ptr prefill_input_toks_; - std::unique_ptr prefill_attn_mask_; + std::unique_ptr prefill_input_pos_; + std::unique_ptr prefill_attention_mask_; std::unique_ptr prefill_logits_; std::unordered_map< std::string, @@ -157,7 +164,10 @@ class ShiftPointerIoMgr : public IoMgrBase { v_cache_out_; std::unique_ptr kv_logits_; std::vector shard_layers_; + int32_t context_len_{0}; + int32_t kv_ar_len_{0}; int32_t kv_cache_len_{0}; + int32_t prefill_ar_len_{0}; int32_t prefill_cache_len_{0}; int32_t vocab_size_; int32_t num_layers_; @@ -167,13 +177,17 @@ class ShiftPointerIoMgr : public IoMgrBase { std::string prefill_forward_name_; std::string kv_forward_name_; const bool use_int64_token_{false}; + const bool is_bert_{false}; }; class SmartMaskIoMgr : public IoMgrBase { public: SmartMaskIoMgr( std::vector>& modules, + int32_t context_len, + int32_t prefill_ar_len, int32_t prefill_cache_len, + int32_t kv_ar_len, int32_t kv_cache_len, int32_t vocab_size, int32_t num_layers, @@ -193,7 +207,9 @@ class SmartMaskIoMgr : public IoMgrBase { const std::vector< executorch::runtime::Result>& methods_meta) override; - void fill_prefill_toks(std::vector& prompt_tokens) override; + void fill_prefill_toks( + int64_t start_pos, + std::vector& prompt_tokens) override; void fill_kv_tok_mask(int64_t pos, int64_t cur_token) override; void update_prefill_to_kv_io( int64_t cur_token, @@ -216,22 +232,24 @@ class SmartMaskIoMgr : public IoMgrBase { struct IO { void* shared_buffer_base; - int64_t* input_tok; - int32_t* input_pos; + int64_t* kv_input_toks; + int32_t* kv_input_pos; // layer -> head -> head_dim * seq_len std::vector> k_cache; std::vector> v_cache; // layer -> head -> head_dim std::vector> k_cache_out; std::vector> v_cache_out; - // max_seq_len + // kv_ar_len_ * context_len_ uint16_t* kv_attention_mask; - // vocab_size + // kv_ar_len_ * vocab_size uint16_t* kv_logits; + // prefill_ar_len_ int64_t* prefill_input_toks; - // prefill_cache_len_ ^ 2 - uint16_t* prefill_atten_mask; - // vocab_size * prefill_cache_len_ + int32_t* prefill_input_pos; + // prefill_ar_len_ * context_len_ + uint16_t* prefill_attention_mask; + // vocab_size * prefill_ar_len_ uint16_t* prefill_logits; size_t num_layers_; @@ -252,12 +270,12 @@ class SmartMaskIoMgr : public IoMgrBase { }; private: - std::unique_ptr input_tok_; - std::unique_ptr input_pos_; - std::unique_ptr hidden_state_; - std::unique_ptr attention_mask_; + std::unique_ptr kv_input_toks_; + std::unique_ptr kv_input_pos_; + std::unique_ptr kv_attention_mask_; std::unique_ptr prefill_input_toks_; - std::unique_ptr prefill_attn_mask_; + std::unique_ptr prefill_input_pos_; + std::unique_ptr prefill_attention_mask_; std::unique_ptr prefill_logits_; std::unordered_map< std::string, @@ -277,7 +295,10 @@ class SmartMaskIoMgr : public IoMgrBase { v_cache_out_; std::unique_ptr kv_logits_; std::vector shard_layers_; + int32_t context_len_{0}; + int32_t kv_ar_len_{0}; int32_t kv_cache_len_{0}; + int32_t prefill_ar_len_{0}; int32_t prefill_cache_len_{0}; int32_t vocab_size_; int32_t num_layers_; @@ -287,6 +308,9 @@ class SmartMaskIoMgr : public IoMgrBase { std::string prefill_forward_name_; std::string kv_forward_name_; const bool use_int64_token_{false}; + // If the cache length is zero, it indicates a BERT model, which does not use + // position ids or KV cache inputs. + const bool is_bert_{false}; }; } // namespace example diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp index 70ba25a0972..da1997a5060 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp @@ -45,7 +45,7 @@ Runner::Runner( const int32_t logits_offset, const float temperature, const int eval_mode, - const std::string& kv_updator) + const std::string& kv_updater) : n_bos_(1), n_eos_(1), tokenizer_path_(tokenizer_path), @@ -53,7 +53,7 @@ Runner::Runner( logits_offset_(logits_offset), temperature_(temperature), eval_mode_(static_cast(eval_mode)), - kv_updator_(kv_updator) { + kv_updater_(kv_updater) { for (size_t i = 0; i < models_path.size(); ++i) { modules_.push_back(std::make_shared( models_path[i], Module::LoadMode::MmapUseMlockIgnoreErrors)); @@ -77,10 +77,6 @@ Error Runner::load() { } switch (eval_mode_) { - case EvalMode::kPrefill: - prefill_forward_name_ = "forward"; - method_names_.emplace_back(prefill_forward_name_); - break; case EvalMode::kKVCached: kv_forward_name_ = "forward"; method_names_.emplace_back(kv_forward_name_); @@ -106,17 +102,22 @@ Error Runner::load() { } if (!prefill_forward_name_.empty()) { - // Use input tokens length to retrieve prefill cache len - // Cache len equals to prefill model seq_len - 1 - prefill_cache_len_ = get_methods_meta(prefill_forward_name_)[0] - ->input_tensor_meta(0) - ->sizes()[1]; + // Use attention mask length to retrieve prefill_ar_len and context length + // Prefill cache length equals to context_len - prefill_ar_len + auto atten_mask_meta = + get_methods_meta(prefill_forward_name_)[0]->input_tensor_meta(1); + prefill_ar_len_ = atten_mask_meta->sizes()[1]; + context_len_ = atten_mask_meta->sizes()[2]; + prefill_cache_len_ = context_len_ - prefill_ar_len_; } if (!kv_forward_name_.empty()) { - // Use k cache length to retirieve kv cache len - // Cache len equals to kv model seq_len - 1 - kv_cache_len_ = - get_methods_meta(kv_forward_name_)[0]->input_tensor_meta(3)->sizes()[2]; + // Use attention mask length to retrieve kv ar len and context length + // Cache len equals to kv model context_len - kv_ar_len + auto atten_mask_meta = + get_methods_meta(kv_forward_name_)[0]->input_tensor_meta(1); + kv_ar_len_ = atten_mask_meta->sizes()[1]; + context_len_ = atten_mask_meta->sizes()[2]; + kv_cache_len_ = context_len_ - kv_ar_len_; } // retrieve any method meta, can be either prefill or kv @@ -130,10 +131,13 @@ Error Runner::load() { executorch::aten::ScalarType::Long; ET_CHECK_MSG(num_layers != -1, "Could not retrieve num layers"); - if (kv_updator_ == "SmartMask") { + if (kv_updater_ == "SmartMask") { io_mgr_ = std::make_unique( modules_, + context_len_, + prefill_ar_len_, prefill_cache_len_, + kv_ar_len_, kv_cache_len_, vocab_size_, num_layers, @@ -143,10 +147,13 @@ Error Runner::load() { prefill_forward_name_, kv_forward_name_, use_int64_token_); - } else if (kv_updator_ == "ShiftPointer") { + } else if (kv_updater_ == "ShiftPointer") { io_mgr_ = std::make_unique( modules_, + context_len_, + prefill_ar_len_, prefill_cache_len_, + kv_ar_len_, kv_cache_len_, vocab_size_, num_layers, @@ -157,16 +164,13 @@ Error Runner::load() { kv_forward_name_, use_int64_token_); } else { - ET_LOG(Error, "Using an unknown updator %s", kv_updator_.c_str()); + ET_LOG(Error, "Using an unknown updater %s", kv_updater_.c_str()); } ET_LOG(Info, "creating io_memory"); // prepare io io_mgr_->init_io(); switch (eval_mode_) { - case EvalMode::kPrefill: - io_mgr_->prepare_prefill_io(get_methods_meta(prefill_forward_name_)); - break; case EvalMode::kKVCached: io_mgr_->prepare_kv_io(get_methods_meta(kv_forward_name_)); break; @@ -324,8 +328,7 @@ Error Runner::generate( break; } - int max_seq_len = std::max(prefill_cache_len_, kv_cache_len_) + 1; - seq_len = (seq_len > 0 && seq_len <= max_seq_len) ? seq_len : max_seq_len; + seq_len = (seq_len > 0 && seq_len <= context_len_) ? seq_len : context_len_; Result> encode_res = tokenizer_->encode(prompt_, n_bos_, 0); ET_CHECK_OK_OR_RETURN_ERROR( @@ -333,61 +336,46 @@ Error Runner::generate( std::vector prompt_tokens = encode_res.get(); int num_prompt_tokens = prompt_tokens.size(); - ET_CHECK_MSG(num_prompt_tokens < max_seq_len, "max seq length exceeded"); ET_CHECK_MSG( num_prompt_tokens < seq_len, "sequence length exceeded - please increase the seq_len value"); - if (eval_mode_ == EvalMode::kHybrid) { - int prefill_seq_len = get_methods_meta(prefill_forward_name_)[0] - ->input_tensor_meta(0) - ->sizes()[1] + - 1; - ET_CHECK_MSG( - num_prompt_tokens < prefill_seq_len, - "For hybrid mode, please ensure prompt length(%d) is less than prefill's seq_len(%d)", - num_prompt_tokens, - prefill_seq_len); - } int64_t pos = 0, prev_token, cur_token = prompt_tokens[0]; if (token_callback) { token_callback(prompt_); } auto prefill_execute = [&](const std::string& method_name) { - io_mgr_->fill_prefill_toks(prompt_tokens); + int num_iters = 1 + ((num_prompt_tokens - 1) / prefill_ar_len_); + ET_LOG( + Info, + "Prompt Processor: total %d tokens (AR-%d * %d iters)", + num_prompt_tokens, + prefill_ar_len_, + num_iters); - pos = num_prompt_tokens - 1; - cur_token = prompt_tokens[pos]; - while (pos < seq_len - 1) { - // inference + for (int i = 0; i < num_iters; i++) { + io_mgr_->fill_prefill_toks(pos, prompt_tokens); run_model_step(method_name, inputs[method_name]); - Tensor& logits_tensor = output_tensors[method_name].back()[0]; - prev_token = cur_token; - long sample_start_time_ms = time_in_ms(); - cur_token = logitsToToken(logits_tensor, pos); - stats_.aggregate_sampling_time_ms += time_in_ms() - sample_start_time_ms; - - io_mgr_->update_prefill_io(cur_token, ++pos, output_tensors[method_name]); - auto piece_res = tokenizer_->decode(prev_token, cur_token); - ET_CHECK(piece_res.ok()); - if (token_callback) { - token_callback(piece_res.get().c_str()); - } - - if (pos == num_prompt_tokens) { - stats_.first_token_ms = time_in_ms(); - stats_.prompt_eval_end_ms = time_in_ms(); - } - - if (pos >= num_prompt_tokens && eos_id_.count(cur_token) > 0) { - ET_LOG(Info, "\nReached to the end of generation"); - break; - } - // prefill model inferences once for prompt in the hybrid mode - if (eval_mode_ == EvalMode::kHybrid) { - break; - } + io_mgr_->update_prefill_io(cur_token, pos, output_tensors[method_name]); + pos += prefill_ar_len_; } + Tensor& logits_tensor = output_tensors[method_name].back()[0]; + prev_token = prompt_tokens[num_prompt_tokens - 1]; + long sample_start_time_ms = time_in_ms(); + cur_token = logitsToToken( + logits_tensor, + (num_prompt_tokens + prefill_ar_len_ - 1) % prefill_ar_len_); + stats_.aggregate_sampling_time_ms += time_in_ms() - sample_start_time_ms; + + auto piece_res = tokenizer_->decode(prev_token, cur_token); + ET_CHECK(piece_res.ok()); + if (token_callback) { + token_callback(piece_res.get().c_str()); + } + + pos = num_prompt_tokens; + stats_.first_token_ms = time_in_ms(); + stats_.prompt_eval_end_ms = time_in_ms(); }; auto kv_execute = [&](const std::string& method_name) { @@ -429,9 +417,6 @@ Error Runner::generate( }; switch (eval_mode_) { - case EvalMode::kPrefill: - prefill_execute(prefill_forward_name_); - break; case EvalMode::kKVCached: kv_execute(kv_forward_name_); break; diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.h b/examples/qualcomm/oss_scripts/llama/runner/runner.h index b6ba1360bff..e659ac55164 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/runner.h +++ b/examples/qualcomm/oss_scripts/llama/runner/runner.h @@ -33,7 +33,7 @@ class Runner { const int32_t logits_offset, const float temperature, const int eval_mode, - const std::string& kv_updator); + const std::string& kv_updater); struct Stats { // Scaling factor for timestamps - in this case, we use ms. @@ -89,7 +89,10 @@ class Runner { std::string prompt_; // metadata + int32_t context_len_{0}; + int32_t prefill_ar_len_{0}; int32_t prefill_cache_len_{0}; + int32_t kv_ar_len_{0}; int32_t kv_cache_len_{0}; int32_t vocab_size_; int32_t bos_id_; @@ -111,7 +114,7 @@ class Runner { std::string kv_forward_name_; std::vector method_names_; LlamaVersion llama_version_; - std::string kv_updator_; + std::string kv_updater_; }; } // namespace example diff --git a/exir/lowered_backend_module.py b/exir/lowered_backend_module.py index 720877f0555..dde6a397d9a 100644 --- a/exir/lowered_backend_module.py +++ b/exir/lowered_backend_module.py @@ -890,7 +890,7 @@ def _unsafe_adjust_original_program( # noqa: C901 del original_program._state_dict[input_target] elif input_spec.kind == InputKind.BUFFER: if input_spec.persistent: - del original_program._state_dict[input_target] + original_program._state_dict.pop(input_target, None) else: del original_program._constants[input_spec.target] elif input_spec.kind == InputKind.CONSTANT_TENSOR: From b5344c17d6069f6fbe3b28db7bf67376cc2fb629 Mon Sep 17 00:00:00 2001 From: Jack <32371937+jackzhxng@users.noreply.github.com> Date: Tue, 25 Feb 2025 01:58:09 -0800 Subject: [PATCH 076/584] Use to_edge_lower_and_transform for XNNPack (#8624) --- examples/models/llama/export_llama_lib.py | 132 +++++++++++++++------- examples/models/llava/export_llava.py | 1 - extension/llm/export/builder.py | 28 ++++- 3 files changed, 113 insertions(+), 48 deletions(-) diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index 6d9ba750431..3a1f423aa27 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -676,47 +676,62 @@ def _validate_args(args): ) -def _export_llama(args) -> LLMEdgeManager: # noqa: C901 - _validate_args(args) - - pt2e_quant_params, quantizers, quant_dtype = get_quantizer_and_quant_params(args) - - # export_to_edge - builder_exported = _prepare_for_llama_export(args).export() - - builder_exported.run_canonical_optimizations() - - if args.export_only: - exit() - - builder_exported_to_edge = builder_exported.pt2e_quantize( - quantizers - ).export_to_edge() - - modelname = builder_exported_to_edge.modelname - - # to_backend +def _to_edge_and_lower_llama_xnnpack( + builder_exported, + modelname, + additional_passes, + pt2e_quant_params, + quantizers, + quant_dtype, + args, +) -> LLMEdgeManager: # noqa: C901 partitioners = [] # Order matters here, dynamic quantization should be applied first when both xnnpack and xnnpack_extended_ops are enabled - if ( - pt2e_quant_params is not None and pt2e_quant_params.quantize_linear is not None - ) or (args.xnnpack): - partitioners.append( - get_xnnpack_partitioner(dynamic_quant_only_partitioner=True) - ) + partitioners.append(get_xnnpack_partitioner(dynamic_quant_only_partitioner=True)) - # force xnnpack to be true if pt2e_quant_params is not None and args.xnnpack is False - args.xnnpack = True - modelname = f"xnnpack_dq_{modelname}" + modelname = f"xnnpack_dq_{modelname}" if args.xnnpack_extended_ops: - assert args.xnnpack, "xnnpack_extended_ops requires xnnpack to be enabled" partitioners.append( get_xnnpack_partitioner(dynamic_quant_only_partitioner=False) ) modelname = f"xnnpack_{modelname}" + logging.info("Lowering model using following partitioner(s): ") + for partitioner in partitioners: + logging.info(f"--> {partitioner.__class__.__name__}") + + # TODO: Enable generating ETRecord with XNNPack and to_edge_transform_and_lower(). + if args.generate_etrecord: + raise NotImplementedError( + "export_llama does not support XNNPack and generating ETRecord at the moment." + ) + + builder = builder_exported.pt2e_quantize(quantizers).to_edge_transform_and_lower( + partitioners + ) + if args.verbose: + print_delegation_info(builder.edge_manager.exported_program().graph_module) + + return builder.to_executorch(passes=additional_passes) + + +def _to_edge_and_lower_llama( # noqa: C901 + builder_exported, + modelname, + additional_passes, + pt2e_quant_params, + quantizers, + quant_dtype, + args, +): + builder_exported_to_edge = builder_exported.pt2e_quantize( + quantizers + ).export_to_edge() + + # to_backend + partitioners = [] if args.vulkan: partitioners.append( get_vulkan_partitioner( @@ -731,7 +746,6 @@ def _export_llama(args) -> LLMEdgeManager: # noqa: C901 modelname = f"vulkan_{modelname}" # Need to remove asserts from the graph to prevent graph breaks - # pyre-ignore: Undefined attribute [16]: `Optional` has no attribute `exported_program`. remove_asserts(builder_exported_to_edge.edge_manager.exported_program()) if args.mps: @@ -760,13 +774,11 @@ def _export_llama(args) -> LLMEdgeManager: # noqa: C901 # pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.utils.utils` from executorch.backends.qualcomm.utils.utils import _transform, tag_quant_io - # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `qualcomm`, Optional type has no attribute `exported_program` _transform(builder_exported_to_edge.edge_manager.exported_program()) if args.num_sharding > 0: model_sharding.split_graph( builder_exported_to_edge.edge_manager.exported_program(), - # pyre-fixme[16]: `Optional` has no attribute `__getitem__`. builder_exported_to_edge.metadata["get_n_layers"], shares=args.num_sharding, ) @@ -792,19 +804,15 @@ def _export_llama(args) -> LLMEdgeManager: # noqa: C901 atten.head_dim, ) ) - # pyre-ignore tag_quant_io( builder_exported_to_edge.edge_manager.exported_program().graph_module, - partial(get_custom_quant_ios_dtype, cache_shape), # pyre-ignore + partial(get_custom_quant_ios_dtype, cache_shape), ) logging.info("Lowering model using following partitioner(s): ") for partitioner in partitioners: logging.info(f"--> {partitioner.__class__.__name__}") - additional_passes = [] - if args.model in TORCHTUNE_DEFINED_MODELS: - additional_passes = [InitializedMutableBufferPass(["kv_cache_pos"])] if args.generate_etrecord: if not builder_exported_to_edge.edge_manager: raise ValueError("Unable to generate etrecord due to missing edge manager.") @@ -818,7 +826,6 @@ def _export_llama(args) -> LLMEdgeManager: # noqa: C901 if args.num_sharding > 0 and args.qnn: from executorch.backends.qualcomm.utils.utils import canonicalize_program - # pyre-fixme[16]: Module `backends` has no attribute `qualcomm`. canonicalize_program(builder.edge_manager.exported_program()) builder = builder.to_executorch( @@ -840,11 +847,55 @@ def _export_llama(args) -> LLMEdgeManager: # noqa: C901 if args.num_sharding > 0 and args.qnn: from executorch.backends.qualcomm.utils.utils import canonicalize_program - # pyre-fixme[16]: Module `backends` has no attribute `qualcomm`. canonicalize_program(builder.edge_manager.exported_program()) builder = builder.to_executorch(passes=additional_passes) + return builder + + +def _export_llama(args) -> LLMEdgeManager: # noqa: C901 + _validate_args(args) + + pt2e_quant_params, quantizers, quant_dtype = get_quantizer_and_quant_params(args) + + additional_passes = [] + if args.model in TORCHTUNE_DEFINED_MODELS: + additional_passes = [InitializedMutableBufferPass(["kv_cache_pos"])] + + # export_to_edge + builder_exported = _prepare_for_llama_export(args).export() + builder_exported.run_canonical_optimizations() + modelname = builder_exported.modelname + + if args.export_only: + exit() + + if pt2e_quant_params is not None and pt2e_quant_params.quantize_linear is not None: + # Force xnnpack to be true if pt2e_quant_params is not None and args.xnnpack is False + args.xnnpack = True + + if args.xnnpack: + builder = _to_edge_and_lower_llama_xnnpack( + builder_exported, + modelname, + additional_passes, + pt2e_quant_params, + quantizers, + quant_dtype, + args, + ) + else: + builder = _to_edge_and_lower_llama( + builder_exported, + modelname, + additional_passes, + pt2e_quant_params, + quantizers, + quant_dtype, + args, + ) + if args.profile_memory: generate_memory_trace(builder.export_program, "memory_profile.json") @@ -866,7 +917,6 @@ def _export_llama(args) -> LLMEdgeManager: # noqa: C901 output_file = f"{builder.output_dir}/{modelname}.pte" builder.save_to_pte(output_file) - return builder diff --git a/examples/models/llava/export_llava.py b/examples/models/llava/export_llava.py index 82c7aca09e0..a5057e5e850 100644 --- a/examples/models/llava/export_llava.py +++ b/examples/models/llava/export_llava.py @@ -67,7 +67,6 @@ def export(self) -> "LlavaEdgeManager": dynamic_shapes=dynamic_shape, strict=False, ) - # pyre-ignore: Incompatible attribute type [8]: Attribute `pre_autograd_graph_module` declared in class `LLMEdgeManager` has type `Optional[GraphModule]` but is used as type `Module`. self.pre_autograd_graph_module = self.export_program.module() return self diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py index 88d2bc0cab9..ec6cfa41ad8 100644 --- a/extension/llm/export/builder.py +++ b/extension/llm/export/builder.py @@ -21,7 +21,7 @@ DuplicateDynamicQuantChainPass, ) from executorch.backends.xnnpack._passes.convert_to_linear import ConvertToLinearPass -from executorch.exir import EdgeProgramManager +from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower from executorch.exir.backend.partitioner import Partitioner from executorch.exir.backend.utils import format_delegated_graph @@ -39,7 +39,7 @@ from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e from torch.ao.quantization.quantizer import Quantizer from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer -from torch.export import export_for_training +from torch.export import export_for_training, ExportedProgram from torch.nn.attention import SDPBackend FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s" @@ -89,8 +89,8 @@ def __init__( dynamic_shapes: Optional[Any] = None, ): self.model = model - # graph module returned from export() - self.pre_autograd_graph_module: Optional[torch.fx.GraphModule] = None + self.pre_autograd_exported_program: Optional[ExportedProgram] = None + self.pre_autograd_graph_module: Optional[torch.nn.Module] = None self.modelname = modelname self.max_seq_len = max_seq_len self.dtype = dtype @@ -218,8 +218,8 @@ def export(self) -> "LLMEdgeManager": kwargs=self.example_kwarg_inputs, dynamic_shapes=dynamic_shape, ) - # pyre-fixme[8]: Attribute has type `Optional[GraphModule]`; used as # `Module`. + self.pre_autograd_exported_program = exported_module self.pre_autograd_graph_module = exported_module.module() if hasattr(self.args, "export_only") and self.args.export_only: torch.export.save(exported_module, self.args.output_name) @@ -330,7 +330,10 @@ def pt2e_quantize(self, quantizers: Optional[List[Quantizer]]) -> "LLMEdgeManage assert ( self.pre_autograd_graph_module is not None ), "Please run export() first" - m = prepare_pt2e(self.pre_autograd_graph_module, composed_quantizer) + m = prepare_pt2e( + self.pre_autograd_graph_module, # pyre-ignore[6] + composed_quantizer, + ) logging.info( f"Calibrating with tasks: {self.calibration_tasks}, limit: {self.calibration_limit}, calibration_data: {self.calibration_data}, tokenizer_path: {self.tokenizer_path}, seq_length: {self.calibration_seq_length}" ) @@ -430,6 +433,19 @@ def to_backend(self, partitioners: Optional[List[Partitioner]]) -> "LLMEdgeManag return self + def to_edge_transform_and_lower( + self, partitioners: Optional[List[Partitioner]] + ) -> "LLMEdgeManager": + if partitioners is None: + logging.info("No partitioner provided, skipping backend lowering...") + edge_config = self._get_edge_config() + self.edge_manager = to_edge_transform_and_lower( + self.pre_autograd_exported_program, + partitioner=partitioners, + compile_config=edge_config, + ) + return self + def to_executorch( self, passes: Optional[List[ExportPass]] = None ) -> "LLMEdgeManager": From dc6a95698cf898346af842f97ecbe35f2233a1bd Mon Sep 17 00:00:00 2001 From: Zingo Andersen Date: Tue, 25 Feb 2025 14:36:19 +0100 Subject: [PATCH 077/584] Arm backend: Add support for DevTools BundleIO (#8680) Add support for input and output testing using BundleIO in devtools. Can be tested with: run.sh --model_name=add --target=ethos-u85-128 --bundleio aot_arm_compiler.py changes --bundleio - Will generate a bundle .bpte file with input and reference output data --model_name - now also support .pt/.pth files e.g. a torch.save():ed model --model_input - If supplied will overried the model input data supply a torch.save():ed data --intermediates - Will now save model.pth, input_xx.pt and output_ref_xx.pt files so testing can be reproduced arm_executor_runner.cpp - can be build with ET_BUNDLE_IO to enable a .bpte and a input/output_ref testing flow test_model.py script uses bundleio test_arm_baremetal.sh script now tests models with BundleIO so output is compared to ref output Change-Id: I25b9ecc0adbf697d1d8cadc5d67ffd32f8c93db6 Signed-off-by: Zingo Andersen --- backends/arm/scripts/build_executorch.sh | 49 ++-- .../arm/scripts/build_executorch_runner.sh | 25 +- backends/arm/scripts/run_fvp.sh | 8 +- backends/arm/test/test_arm_baremetal.sh | 36 +-- backends/arm/test/test_model.py | 22 +- examples/arm/aot_arm_compiler.py | 271 ++++++++++++++---- examples/arm/executor_runner/CMakeLists.txt | 29 +- .../executor_runner/arm_executor_runner.cpp | 241 ++++++++++++---- examples/arm/run.sh | 55 +++- 9 files changed, 562 insertions(+), 174 deletions(-) diff --git a/backends/arm/scripts/build_executorch.sh b/backends/arm/scripts/build_executorch.sh index f868d264f48..798aa627d65 100755 --- a/backends/arm/scripts/build_executorch.sh +++ b/backends/arm/scripts/build_executorch.sh @@ -16,18 +16,17 @@ et_root_dir=$(realpath ${et_root_dir}) toolchain_cmake=${script_dir}/../../../examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake toolchain_cmake=$(realpath ${toolchain_cmake}) - - et_build_root="${et_root_dir}/arm_test" build_type="Release" +build_devtools=false build_with_etdump=false - help() { echo "Usage: $(basename $0) [options]" echo "Options:" echo " --et_build_root= Build output root folder to use, defaults to ${et_build_root}" echo " --build_type= Build with Release, Debug or RelWithDebInfo, default is ${build_type}" + echo " --devtools Build Devtools libs" echo " --etdump Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log" exit 0 } @@ -37,6 +36,7 @@ for arg in "$@"; do -h|--help) help ;; --et_build_root=*) et_build_root="${arg#*=}";; --build_type=*) build_type="${arg#*=}";; + --devtools) build_devtools=true ;; --etdump) build_with_etdump=true ;; *) ;; @@ -44,25 +44,25 @@ for arg in "$@"; do done et_build_dir="${et_build_root}/cmake-out" + +# Used for flatcc host excutable if Devtools is used et_build_host_dir=${et_build_root}/cmake-out-host-tools set -x cd "${et_root_dir}" -build_with_etdump_flags="" if [ "$build_with_etdump" = true ] ; then ( set +x ; echo "--------------------------------------------------------------------------------" ; - echo "Build ExecuTorch Libraries host flatcc bin ${build_type} into ${et_build_host_dir} - ${et_build_host_dir}/bin/flatcc" ; + echo "Build ExecuTorch Libraries host flatcc bin ${build_type} into ${et_build_host_dir}/bin/flatcc" ; echo "--------------------------------------------------------------------------------" ) - # Build host flatcc bin # This is a way to work around that the flatcc executable get build for target (e.g. Arm) later # and get replaced. flatcc is a tool used on the host for etdump and BundleIO handling. # The way to solve this is to generate it once for the host, then copy it to ${et_build_host_dir}/bin # and later point that out with -DFLATCC_EXECUTABLE=${et_build_host_dir}/bin/flatcc later. - mkdir -p ${et_build_host_dir} + cmake \ -DCMAKE_INSTALL_PREFIX=${et_build_host_dir} \ -DCMAKE_BUILD_TYPE=${build_type} \ @@ -79,18 +79,13 @@ if [ "$build_with_etdump" = true ] ; then -B"${et_build_host_dir}" \ "${et_root_dir}" - # Copy host flatcc excutable to it's saved when we build for target (Arm) later + # third-party/flatcc/bin/flatcc gets build already in the in the cmake config step above + # so there is no cmake building step done + + # Copy host flatcc excutable so it's saved when we build for target (Arm) later + et_build_host_dir=$(realpath ${et_build_host_dir}) mkdir -p ${et_build_host_dir}/bin cp third-party/flatcc/bin/flatcc ${et_build_host_dir}/bin - - # Add DevTools flags use in the Target build below - build_with_etdump_flags="-DEXECUTORCH_BUILD_DEVTOOLS=ON \ - -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ - -DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=OFF \ - -DFLATCC_ALLOW_WERROR=OFF \ - -DFLATCC_EXECUTABLE=${et_build_host_dir}/bin/flatcc " - echo "build_with_etdump_flags=$build_with_etdump_flags" fi ( set +x ; @@ -98,6 +93,25 @@ fi echo "Build ExecuTorch target libs ${build_type} into '${et_build_dir}'" ; echo "--------------------------------------------------------------------------------" ) +build_devtools_flags=" -DEXECUTORCH_BUILD_DEVTOOLS=OFF " +if [ "$build_devtools" = true ] ; then + build_devtools_flags=" -DEXECUTORCH_BUILD_DEVTOOLS=ON " +fi + +build_with_etdump_flags=" -DEXECUTORCH_ENABLE_EVENT_TRACER=OFF " +if [ "$build_with_etdump" = true ] ; then + # Add DevTools flags use in the Target build below + build_with_etdump_flags="-DEXECUTORCH_BUILD_DEVTOOLS=ON \ + -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ + -DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=OFF \ + -DFLATCC_ALLOW_WERROR=OFF \ + -DFLATCC_EXECUTABLE=${et_build_host_dir}/bin/flatcc " +fi + +echo "Building with Devtools: ${build_devtools_flags} ${build_with_etdump_flags}" + + # Build cmake \ -DCMAKE_INSTALL_PREFIX=${et_build_dir} \ @@ -108,6 +122,7 @@ cmake \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ -DEXECUTORCH_ENABLE_LOGGING=ON \ + ${build_devtools_flags} \ ${build_with_etdump_flags} \ -DFLATC_EXECUTABLE="$(which flatc)" \ -B"${et_build_dir}" \ diff --git a/backends/arm/scripts/build_executorch_runner.sh b/backends/arm/scripts/build_executorch_runner.sh index afa8f27bdff..3e658928274 100755 --- a/backends/arm/scripts/build_executorch_runner.sh +++ b/backends/arm/scripts/build_executorch_runner.sh @@ -15,6 +15,7 @@ pte_file="" target="ethos-u55-128" build_type="Release" system_config="" +bundleio=false build_with_etdump=false extra_build_flags="" output_folder_set=false @@ -22,6 +23,9 @@ output_folder="." et_build_root="${et_root_dir}/arm_test" ethosu_tools_dir=${et_root_dir}/examples/arm/ethos-u-scratch +build_bundleio_flags=" -DET_BUNDLE_IO=OFF " +build_with_etdump_flags=" -DEXECUTORCH_ENABLE_EVENT_TRACER=OFF " + help() { echo "Usage: $(basename $0) [options]" echo "Options:" @@ -30,6 +34,7 @@ help() { echo " --build_type= Build with Release, Debug or RelWithDebInfo, default is ${build_type}" echo " --system_config= System configuration to select from the Vela configuration file (see vela.ini). Default: Ethos_U55_High_End_Embedded for EthosU55 targets, Ethos_U85_SYS_DRAM_Mid for EthosU85 targets." echo " NOTE: If given, this option must match the given target. This option also sets timing adapter values customized for specific hardware, see ./executor_runner/CMakeLists.txt." + echo " --bundleio Support both pte and Bundle IO bpte using Devtools BundelIO with Input/RefOutput included" echo " --etdump Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log" echo " --extra_build_flags= Extra flags to pass to cmake like -DET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE=60000 Default: none " echo " --output= Output folder Default: /_.pte" @@ -45,6 +50,7 @@ for arg in "$@"; do --target=*) target="${arg#*=}";; --build_type=*) build_type="${arg#*=}";; --system_config=*) system_config="${arg#*=}";; + --bundleio) bundleio=true ;; --etdump) build_with_etdump=true ;; --extra_build_flags=*) extra_build_flags="${arg#*=}";; --output=*) output_folder="${arg#*=}" ; output_folder_set=true ;; @@ -64,9 +70,8 @@ et_build_dir=${et_build_root}/cmake-out et_build_dir=$(realpath ${et_build_dir}) if [ "$output_folder_set" = false ] ; then - pte_folder=$(cd -- "$( dirname -- "${pte_file}" )" &> /dev/null && pwd) - pte_short_name=$(basename -- "${pte_file}" ".pte") - output_folder="$pte_folder/$pte_short_name" + # remove file ending + output_folder=${pte_file%.*} fi if [[ ${system_config} == "" ]] @@ -86,18 +91,21 @@ else target_cpu=cortex-m85 fi echo "--------------------------------------------------------------------------------" -echo "Build Arm Baremetal executor_runner for ${target} with ${pte_file} using ${system_config} to '${output_folder}/cmake-out'" +echo "Build Arm Baremetal executor_runner for ${target} with ${pte_file} using ${system_config} ${extra_build_flags} to '${output_folder}/cmake-out'" echo "--------------------------------------------------------------------------------" cd ${et_root_dir}/examples/arm/executor_runner -build_with_etdump_flags="" +if [ "$bundleio" = true ] ; then + build_bundleio_flags=" -DET_BUNDLE_IO=ON " +fi + if [ "$build_with_etdump" = true ] ; then - echo "Building with etdump e.g. -DEXECUTORCH_ENABLE_EVENT_TRACER=ON" build_with_etdump_flags=" -DEXECUTORCH_ENABLE_EVENT_TRACER=ON " fi -mkdir -p "$output_folder" +echo "Building with BundleIO/etdump/extra flags: ${build_bundleio_flags} ${build_with_etdump_flags} ${extra_build_flags}" +mkdir -p "${output_folder}" cmake \ -DCMAKE_BUILD_TYPE=${build_type} \ @@ -105,9 +113,10 @@ cmake \ -DTARGET_CPU=${target_cpu} \ -DET_DIR_PATH:PATH=${et_root_dir} \ -DET_BUILD_DIR_PATH:PATH=${et_build_dir} \ - -DET_PTE_FILE_PATH:PATH="${pte_file}" \ + -DET_PTE_FILE_PATH:PATH="${pte_file}" \ -DETHOS_SDK_PATH:PATH=${ethos_u_root_dir} \ -DETHOSU_TARGET_NPU_CONFIG=${target} \ + ${build_bundleio_flags} \ ${build_with_etdump_flags} \ -DPYTHON_EXECUTABLE=$(which python3) \ -DSYSTEM_CONFIG=${system_config} \ diff --git a/backends/arm/scripts/run_fvp.sh b/backends/arm/scripts/run_fvp.sh index 568f07011f2..e0237a9c414 100755 --- a/backends/arm/scripts/run_fvp.sh +++ b/backends/arm/scripts/run_fvp.sh @@ -19,12 +19,14 @@ _setup_msg="please refer to ${et_root_dir}/examples/arm/setup.sh to properly ins elf_file="" target="ethos-u55-128" +timeout="240" help() { echo "Usage: $(basename $0) [options]" echo "Options:" echo " --elf= elf file to run" echo " --target= Target to build and run for Default: ${target}" + echo " --timeout= Maximum target runtime, used to detect hanging, might need to be higer on large models Default: ${timeout}" exit 0 } @@ -33,6 +35,7 @@ for arg in "$@"; do -h|--help) help ;; --elf=*) elf_file="${arg#*=}";; --target=*) target="${arg#*=}";; + --timeout=*) timeout="${arg#*=}";; *) ;; esac @@ -63,6 +66,7 @@ num_macs=$(echo ${target} | cut -d - -f 3) echo "--------------------------------------------------------------------------------" echo "Running ${elf_file} for ${target} run with FVP:${fvp_model} num_macs:${num_macs}" +echo "WARNING: Corstone FVP is not cycle accurate and should NOT be used to determine valid runtime" echo "--------------------------------------------------------------------------------" log_file=$(mktemp) @@ -75,7 +79,7 @@ if [[ ${target} == *"ethos-u55"* ]]; then -C mps3_board.uart0.out_file='-' \ -C mps3_board.uart0.shutdown_on_eot=1 \ -a "${elf_file}" \ - --timelimit 220 2>&1 | tee ${log_file} || true # seconds + --timelimit ${timeout} 2>&1 | tee ${log_file} || true # seconds echo "[${BASH_SOURCE[0]}] Simulation complete, $?" elif [[ ${target} == *"ethos-u85"* ]]; then ${fvp_model} \ @@ -86,7 +90,7 @@ elif [[ ${target} == *"ethos-u85"* ]]; then -C mps4_board.uart0.out_file='-' \ -C mps4_board.uart0.shutdown_on_eot=1 \ -a "${elf_file}" \ - --timelimit 220 2>&1 | tee ${log_file} || true # seconds + --timelimit ${timeout} 2>&1 | tee ${log_file} || true # seconds echo "[${BASH_SOURCE[0]}] Simulation complete, $?" else echo "Running ${elf_file} for ${target} is not supported" diff --git a/backends/arm/test/test_arm_baremetal.sh b/backends/arm/test/test_arm_baremetal.sh index 6c2784501b0..90b34241f3d 100755 --- a/backends/arm/test/test_arm_baremetal.sh +++ b/backends/arm/test/test_arm_baremetal.sh @@ -92,18 +92,18 @@ test_run_ethosu_fvp() { # End to End model tests using run.sh # TOSA quantized echo "${TEST_SUITE_NAME}: Test ethos-u target TOSA" - examples/arm/run.sh --target=TOSA --model_name=add - examples/arm/run.sh --target=TOSA --model_name=mul + examples/arm/run.sh --et_build_root=arm_test/test_run --target=TOSA --model_name=add + examples/arm/run.sh --et_build_root=arm_test/test_run --target=TOSA --model_name=mul # Ethos-U55 echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U55" - examples/arm/run.sh --target=ethos-u55-128 --model_name=add - examples/arm/run.sh --target=ethos-u55-128 --model_name=mul + examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u55-128 --model_name=add + examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u55-128 --model_name=mul # Ethos-U85 echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U85" - examples/arm/run.sh --target=ethos-u85-128 --model_name=add - examples/arm/run.sh --target=ethos-u85-128 --model_name=mul + examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u85-128 --model_name=add + examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u85-128 --model_name=mul echo "${TEST_SUITE_NAME}: PASS" } @@ -113,26 +113,26 @@ test_models_ethosu_fvp() { # End to End model tests using model_test.py source examples/arm/ethos-u-scratch/setup_path.sh # Build common libs once - python3 backends/arm/test/test_model.py --build_libs + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --build_libs # TOSA quantized echo "${TEST_SUITE_NAME}: Test ethos-u target TOSA" - python3 backends/arm/test/test_model.py --target=TOSA --model=mv2 - python3 backends/arm/test/test_model.py --target=TOSA --model=mv3 - python3 backends/arm/test/test_model.py --target=TOSA --model=lstm - python3 backends/arm/test/test_model.py --target=TOSA --model=edsr + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA --model=mv2 + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA --model=mv3 + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA --model=lstm + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=TOSA --model=edsr # Ethos-U55 echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U55" - python3 backends/arm/test/test_model.py --target=ethos-u55-128 --model=mv2 - python3 backends/arm/test/test_model.py --target=ethos-u55-64 --model=mv3 - python3 backends/arm/test/test_model.py --target=ethos-u55-256 --model=lstm + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-128 --model=mv2 --extra_flags="-DET_ATOL=1.20 -DET_RTOL=1.20" + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-64 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00" + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-256 --model=lstm --extra_flags="-DET_ATOL=0.02 -DET_RTOL=0.02" # Ethos-U85 echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U85" - python3 backends/arm/test/test_model.py --target=ethos-u85-256 --model=mv2 - python3 backends/arm/test/test_model.py --target=ethos-u85-1024 --model=mv3 - python3 backends/arm/test/test_model.py --target=ethos-u85-128 --model=lstm + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=mv2 --extra_flags="-DET_ATOL=1.20 -DET_RTOL=1.20" + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-1024 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00" + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=lstm --extra_flags="-DET_ATOL=0.02 -DET_RTOL=0.02" echo "${TEST_SUITE_NAME}: PASS" } @@ -146,4 +146,4 @@ test_full_ethosu_fvp() { # All End to End model tests -${TEST_SUITE} \ No newline at end of file +${TEST_SUITE} diff --git a/backends/arm/test/test_model.py b/backends/arm/test/test_model.py index 990b9e5f70b..b94a5f65256 100755 --- a/backends/arm/test/test_model.py +++ b/backends/arm/test/test_model.py @@ -56,7 +56,12 @@ def get_args(): default=False, help="Don't save temporary files during compilation", ) - + parser.add_argument( + "--extra_flags", + required=False, + default=None, + help="Extra cmake flags to pass the when building the executor_runner", + ) args = parser.parse_args() if args.model and "ethos-u" in args.target and args.system_config is None: @@ -95,6 +100,8 @@ def build_libs(et_build_root: str, script_path: str): os.path.join(script_path, "build_executorch.sh"), f"--et_build_root={et_build_root}", "--build_type=Release", + "--devtools", + "--etdump", ] ) run_external_cmd( @@ -148,6 +155,7 @@ def build_pte( "examples.arm.aot_arm_compiler", "--delegate", "--quantize", + "--bundleio", intermediate, f"--model_name={model_name}", f"--target={target}", @@ -158,7 +166,7 @@ def build_pte( ] ) - pte_file = os.path.join(output, f"{model_name}_arm_delegate_{args.target}.pte") + pte_file = os.path.join(output, f"{model_name}_arm_delegate_{args.target}.bpte") return pte_file @@ -168,17 +176,26 @@ def build_ethosu_runtime( pte_file: str, target: str, system_config: str, + extra_flags: str, elf_build_path: str, ): + + extra_build_flag = "" + if extra_flags: + extra_build_flag = f"--extra_build_flags={extra_flags}" + run_external_cmd( [ "bash", os.path.join(script_path, "build_executorch_runner.sh"), f"--et_build_root={et_build_root}", f"--pte={pte_file}", + "--bundleio", + "--etdump", f"--target={target}", "--build_type=Release", f"--system_config={system_config}", + extra_build_flag, f"--output={elf_build_path}", ] ) @@ -239,6 +256,7 @@ def run_elf_with_fvp(script_path: str, elf_file: str, target: str): pte_file, args.target, args.system_config, + args.extra_flags, elf_build_path, ) print(f"ELF file created: {elf_file} ") diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py index f7f2105b99c..1f224983d4e 100644 --- a/examples/arm/aot_arm_compiler.py +++ b/examples/arm/aot_arm_compiler.py @@ -13,9 +13,10 @@ import os from pathlib import Path -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple import torch +from examples.devtools.scripts.export_bundled_program import save_bundled_program from executorch.backends.arm.arm_backend import ( ArmCompileSpecBuilder, get_tosa_spec, @@ -36,6 +37,8 @@ MobileNetV2Evaluator, ) from executorch.devtools.backend_debug import get_delegation_info +from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite + from executorch.exir import ( EdgeCompileConfig, ExecutorchBackendConfig, @@ -56,27 +59,50 @@ logging.basicConfig(level=logging.WARNING, format=FORMAT) -def get_model_and_inputs_from_name(model_name: str) -> Tuple[torch.nn.Module, Any]: +def get_model_and_inputs_from_name( + model_name: str, model_input: str | None +) -> Tuple[torch.nn.Module, Any]: """Given the name of an example pytorch model, return it and example inputs. Raises RuntimeError if there is no example model corresponding to the given name. """ + example_inputs = None + if model_input is not None: + logging.info(f"Load model input from {model_input}") + if model_input.endswith(".pt"): + example_inputs = torch.load(model_input, weights_only=False) + else: + raise RuntimeError( + f"Model input data '{model_input}' is not a valid name. Use --model_input .pt e.g. saved with torch.save()" + ) + # Case 1: Model is defined in this file if model_name in models.keys(): + logging.info(f"Internal model {model_name}") model = models[model_name]() - example_inputs = models[model_name].example_input + if example_inputs is None: + example_inputs = models[model_name].example_input # Case 2: Model is defined in examples/models/ elif model_name in MODEL_NAME_TO_MODEL.keys(): logging.warning( "Using a model from examples/models not all of these are currently supported" ) - model, example_inputs, _, _ = EagerModelFactory.create_model( + logging.info( + f"Load {model_name} -> {MODEL_NAME_TO_MODEL[model_name]} from examples/models" + ) + + model, tmp_example_inputs, _, _ = EagerModelFactory.create_model( *MODEL_NAME_TO_MODEL[model_name] ) + if example_inputs is None: + example_inputs = tmp_example_inputs # Case 3: Model is in an external python file loaded as a module. # ModelUnderTest should be a torch.nn.module instance # ModelInputs should be a tuple of inputs to the forward function elif model_name.endswith(".py"): + logging.info( + f"Load model file {model_name} Variable ModelUnderTest= ModelInputs=" + ) import importlib.util # load model's module and add it @@ -84,13 +110,22 @@ def get_model_and_inputs_from_name(model_name: str) -> Tuple[torch.nn.Module, An module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) model = module.ModelUnderTest - example_inputs = module.ModelInputs - + if example_inputs is None: + example_inputs = module.ModelInputs + # Case 4: Model is in an saved model file torch.save(model) + elif model_name.endswith(".pth") or model_name.endswith(".pt"): + logging.info(f"Load model file {model_name}") + model = torch.load(model_name, weights_only=False) + if example_inputs is None: + raise RuntimeError( + f"Model '{model_name}' requires input data specify --model_input .pt" + ) else: raise RuntimeError( f"Model '{model_name}' is not a valid name. Use --help for a list of available models." ) - + logging.debug(f"Loaded model: {model}") + logging.debug(f"Loaded input: {example_inputs}") return model, example_inputs @@ -107,7 +142,7 @@ def quantize( logging.debug(f"Original model: {model}") quantizer = None if is_ethosu(compile_specs): - quantizer = EthosUQuantizer(compile_spec) + quantizer = EthosUQuantizer(compile_specs) elif is_tosa(compile_specs): quantizer = TOSAQuantizer(get_tosa_spec(compile_specs)) else: @@ -365,13 +400,19 @@ def dump_delegation_info(edge, intermediate_files_folder: Optional[str] = None): file.write(delegation_info_string) -def get_args(): # noqa C901 +def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "-m", "--model_name", required=True, - help=f"Provide model name. Valid ones: {set(list(models.keys())+list(MODEL_NAME_TO_MODEL.keys()))}", + help=f"Model file .py/.pth/.pt, builtin model or a model from examples/models. Valid names: {set(list(models.keys())+list(MODEL_NAME_TO_MODEL.keys()))}", + ) + parser.add_argument( + "--model_input", + required=False, + default=None, + help="Provide model input .pt file, or python variable name", ) parser.add_argument( "-d", @@ -381,6 +422,13 @@ def get_args(): # noqa C901 default=False, help="Flag for producing ArmBackend delegated model", ) + parser.add_argument( + "--bundleio", + action="store_true", + required=False, + default=False, + help="Flag for producing BundleIO bpte file with input/output test/ref data.", + ) parser.add_argument( "-t", "--target", @@ -436,7 +484,7 @@ def get_args(): # noqa C901 "--output", action="store", required=False, - help="Location for outputs, if not the default of cwd.", + help="Filename (if .pte or .bpte is used) or a folder for outputs, if not specified the default is to place files in cwd.", ) parser.add_argument( "--system_config", @@ -468,10 +516,6 @@ def get_args(): # noqa C901 + "This is required for running quantized models with unquantized input." ) - if args.quantize and not args.delegate: - logging.error("--delegate must be set when using --quanitze flag.") - exit(1) - # if we have custom ops, register them before processing the model if args.so_library is not None: logging.info(f"Loading custom ops from {args.so_library}") @@ -503,12 +547,136 @@ def get_args(): # noqa C901 return args -if __name__ == "__main__": +def save_bpte_program(exec_prog, original_model: torch.nn.Module, output_name: str): + # Construct MethodTestSuite for Each Method + + # Generate Test Suites + method_names = [ + method.name for method in exec_prog.executorch_program.execution_plan + ] + + program_inputs = {m_name: [example_inputs] for m_name in method_names} + + method_test_suites: List[MethodTestSuite] = [] + for m_name in method_names: + method_inputs = program_inputs[m_name] + + # To create a bundled program, we first create every test cases from input. We leverage eager model + # to generate expected output for each test input, and use MethodTestCase to hold the information of + # each test case. We gather all MethodTestCase for same method into one MethodTestSuite, and generate + # bundled program by all MethodTestSuites. + method_test_cases: List[MethodTestCase] = [] + + if args.intermediates: + # Save model.pth + intermediates_path = Path(args.intermediates) + model_path = os.path.join(intermediates_path, "model.pth") + try: + torch.save(original_model, model_path) + except: + logging.warning(f"Could not torch.save(model, {model_path})") + method_index = 0 + for method_input in method_inputs: + output_ref = original_model(*method_input) + + logging.debug(f"input_{method_index}: {method_input}") + logging.debug(f"output_ref_{method_index}: {output_ref}") + + if args.intermediates: + # Save model input and referece output + input_path = os.path.join( + intermediates_path, f"input_{method_index}.pt" + ) + try: + torch.save(method_input, input_path) + except: + logging.warning( + f"Could not torch.save(input_{method_index}, {input_path})" + ) + refoutput_path = os.path.join( + intermediates_path, f"output_ref_{method_index}.pt" + ) + try: + torch.save(output_ref, refoutput_path) + except: + logging.warning( + f"Could not torch.save(output_ref_{method_index}, {refoutput_path})" + ) + + method_test_cases.append( + MethodTestCase( + inputs=method_input, + expected_outputs=output_ref, + ) + ) + + method_index = method_index + 1 + + method_test_suites.append( + MethodTestSuite( + method_name=m_name, + test_cases=method_test_cases, + ) + ) + + # Generate BundledProgram + save_bundled_program(exec_prog, method_test_suites, output_name) + + +def to_edge_TOSA_delegate( + args, + model: torch.nn.Module, +): + model_int8 = None + # As we can target multiple output encodings, one must + # be specified. + compile_spec = get_compile_spec( + args.target, + args.intermediates, + args.system_config, + args.memory_mode, + ) + if args.quantize: + model = quantize( + model, + args.model_name, + compile_spec, + example_inputs, + args.evaluate, + args.evaluate_config, + ) + model_int8 = model + # Wrap quantized model back into an exported_program + exported_program = torch.export.export_for_training(model, example_inputs) + + if args.intermediates: + os.makedirs(args.intermediates, exist_ok=True) + + if is_ethosu(compile_spec): + partitioner = EthosUPartitioner(compile_spec) + elif is_tosa(compile_spec): + partitioner = TOSAPartitioner(compile_spec) + else: + raise RuntimeError(f"Unhandled compile spec: {compile_spec}") + + edge = to_edge_transform_and_lower( + exported_program, + partitioner=[partitioner], + compile_config=EdgeCompileConfig( + _check_ir_validity=False, + ), + ) + return model_int8, edge + + +if __name__ == "__main__": # noqa: C901 args = get_args() # Pick model from one of the supported lists - model, example_inputs = get_model_and_inputs_from_name(args.model_name) - model = model.eval() + original_model, example_inputs = get_model_and_inputs_from_name( + args.model_name, args.model_input + ) + model = original_model.eval() # export_for_training under the assumption we quantize, the exported form also works # in to_edge if we don't quantize @@ -519,44 +687,7 @@ def get_args(): # noqa C901 # Quantize if required model_int8 = None if args.delegate: - # As we can target multiple output encodings, one must - # be specified. - compile_spec = get_compile_spec( - args.target, - args.intermediates, - args.system_config, - args.memory_mode, - ) - if args.quantize: - model = quantize( - model, - args.model_name, - compile_spec, - example_inputs, - args.evaluate, - args.evaluate_config, - ) - model_int8 = model - # Wrap quantized model back into an exported_program - exported_program = torch.export.export_for_training(model, example_inputs) - - if args.intermediates: - os.makedirs(args.intermediates, exist_ok=True) - - if is_ethosu(compile_spec): - partitioner = EthosUPartitioner(compile_spec) - elif is_tosa(compile_spec): - partitioner = TOSAPartitioner(compile_spec) - else: - raise RuntimeError(f"Unhandled compile spec: {compile_spec}") - - edge = to_edge_transform_and_lower( - exported_program, - partitioner=[partitioner], - compile_config=EdgeCompileConfig( - _check_ir_validity=False, - ), - ) + model_int8, edge = to_edge_TOSA_delegate(args, model) else: edge = to_edge_transform_and_lower( exported_program, @@ -587,11 +718,33 @@ def get_args(): # noqa C901 else f"_arm_{args.target}" ) + if args.bundleio: + output_name = f"{output_name}.bpte" + else: + output_name = f"{output_name}.pte" + if args.output is not None: - output_name = os.path.join(args.output, output_name) + if args.output.endswith(".pte") or args.output.endswith(".bpte"): + # --output is a pte or bundle pte filename use it as output name + if args.bundleio and not args.output.endswith(".bpte"): + raise RuntimeError( + f"--bundleio expects a .bpte file ending to --output and not .pte {args.output}" + ) + if not args.bundleio and not args.output.endswith(".pte"): + raise RuntimeError( + f"When not using --bundleio a .bpte file should not be use as --output {args.output}" + ) + output_name = args.output + else: + # --output is a folder + output_name = os.path.join(args.output, output_name) - save_pte_program(exec_prog, output_name) - print(f"PTE file saved as {output_name}.pte") + if args.bundleio: + save_bpte_program(exec_prog, original_model, output_name) + print(f"Bundle PTE file saved as {output_name}") + else: + save_pte_program(exec_prog, output_name) + print(f"PTE file saved as {output_name}") if args.evaluate: evaluate_model( diff --git a/examples/arm/executor_runner/CMakeLists.txt b/examples/arm/executor_runner/CMakeLists.txt index d43a7047080..11891e2fb93 100644 --- a/examples/arm/executor_runner/CMakeLists.txt +++ b/examples/arm/executor_runner/CMakeLists.txt @@ -9,11 +9,14 @@ project(arm_executor_runner) option(SEMIHOSTING "Enable semihosting" OFF) option(ET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE "Set ET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE to specify memory alloction pool size" OFF) option(ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE "Set ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE to specify temp alloction pool size" OFF) +option(ET_BUNDLE_IO "Set to compile in BundleIO support" OFF) +option(ET_ATOL "Set atol to use for BundleIO testing" OFF) +option(ET_RTOL "Set rtol to use for BundleIO testing" OFF) if(NOT DEFINED ET_PTE_FILE_PATH AND NOT ${SEMIHOSTING}) message( FATAL_ERROR - "ET_PTE_FILE_PATH must specify a model .pte, for bare metal systems the " + "ET_PTE_FILE_PATH must specify a model .pte or .bpte, for bare metal systems the " "model is built into the binary." ) endif() @@ -373,6 +376,18 @@ if(EXECUTORCH_ENABLE_EVENT_TRACER) ) endif() +if(ET_BUNDLE_IO) + add_library(bundled_program STATIC IMPORTED) + set_property( + TARGET bundled_program + PROPERTY IMPORTED_LOCATION + "${ET_BUILD_DIR_PATH}/lib/libbundled_program.a" + ) + list(APPEND arm_executor_runner_link + bundled_program + ) +endif() + # Need whole-archive to ensure C++ ctor's are called - this may be wasteful for # bin size as we link in a number of other symbols target_link_libraries( @@ -402,6 +417,18 @@ if(ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE) target_compile_definitions(arm_executor_runner PUBLIC ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE=${ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE}) endif() +if(ET_BUNDLE_IO) + target_compile_definitions(arm_executor_runner PUBLIC -DET_BUNDLE_IO) +endif() + +if(ET_ATOL) + target_compile_definitions(arm_executor_runner PUBLIC ET_ATOL=${ET_ATOL}) +endif() + +if(ET_RTOL) + target_compile_definitions(arm_executor_runner PUBLIC ET_RTOL=${ET_RTOL}) +endif() + # Fixup compilation of retarget.c if(SEMIHOSTING) # Remove this when MLBEDSW-8910 is closed. diff --git a/examples/arm/executor_runner/arm_executor_runner.cpp b/examples/arm/executor_runner/arm_executor_runner.cpp index 2d08f733eba..48237acdf22 100644 --- a/examples/arm/executor_runner/arm_executor_runner.cpp +++ b/examples/arm/executor_runner/arm_executor_runner.cpp @@ -1,17 +1,12 @@ /* Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. - * Copyright 2023-2024 Arm Limited and/or its affiliates. + * Copyright 2023-2025 Arm Limited and/or its affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include -#include -#include -#include -#include - #include #include #include @@ -19,8 +14,17 @@ #include #include #include +#include +#include +#include +#include #include "arm_perf_monitor.h" + +#if defined(ET_BUNDLE_IO) +#include +#endif + #if defined(ET_EVENT_TRACER_ENABLED) #include #if !defined(SEMIHOSTING) @@ -102,6 +106,24 @@ unsigned char __attribute__(( section("input_data_sec"), aligned(16))) method_allocation_pool[method_allocation_pool_size]; +#if defined(ET_BUNDLE_IO) + +const size_t testset_idx = 0; // BundleIO test indexes to test if used + +#if defined(ET_ATOL) +const float et_atol = ET_ATOL; +#else +const float et_atol = 0.01; +#endif + +#if defined(ET_RTOL) +const float et_rtol = ET_RTOL; +#else +const float et_rtol = 0.01; +#endif + +#endif + /** * The temp_allocation_pool is used for allocating temporary data during kernel * or delegate execution. This will be reset after each kernel or delegate call. @@ -409,15 +431,41 @@ int main(int argc, const char* argv[]) { } } #endif - ET_LOG(Info, "Model in %p %c", model_pte, model_pte[0]); - auto loader = BufferDataLoader(model_pte, pte_size); - ET_LOG(Info, "Model PTE file loaded. Size: %lu bytes.", pte_size); + ET_LOG( + Info, "PTE in %p %c Size: %lu bytes", model_pte, model_pte[0], pte_size); + + // Find the offset to the embedded Program. + const void* program_data = model_pte; + size_t program_data_len = pte_size; + +#if defined(ET_BUNDLE_IO) + bool bundle_io = executorch::bundled_program::is_bundled_program( + reinterpret_cast(model_pte), pte_size); + if (bundle_io) { + // BundleIO bpte is provided, dig out the actual model from the data area + Error status = executorch::bundled_program::get_program_data( + reinterpret_cast(model_pte), + pte_size, + &program_data, + &program_data_len); + + ET_CHECK_MSG( + status == Error::Ok, + "get_program_data() from bundle PTE failed: 0x%x", + (unsigned int)status); + } +#endif + auto loader = BufferDataLoader(program_data, program_data_len); + ET_LOG(Info, "PTE Model data loaded. Size: %lu bytes.", program_data_len); + + // Parse the program file. This is immutable, and can also be reused + // between multiple execution invocations across multiple threads. Result program = Program::load(&loader); if (!program.ok()) { ET_LOG( Info, "Program loading failed @ 0x%p: 0x%" PRIx32, - model_pte, + program_data, program.error()); } @@ -483,6 +531,7 @@ int main(int argc, const char* argv[]) { executorch::runtime::EventTracer* event_tracer_ptr = nullptr; #if defined(ET_EVENT_TRACER_ENABLED) + ET_LOG(Info, "Setting up ETDump"); torch::executor::ETDumpGen etdump_gen = torch::executor::ETDumpGen(); event_tracer_ptr = &etdump_gen; #endif @@ -499,21 +548,75 @@ int main(int argc, const char* argv[]) { } size_t method_loaded_memsize = method_allocator.used_size() - method_loaded_membase; - ET_LOG(Info, "Method loaded."); + ET_LOG(Info, "Method '%s' loaded.", method_name); ET_LOG(Info, "Preparing inputs..."); size_t input_membase = method_allocator.used_size(); - auto inputs = - ::prepare_input_tensors(*method, method_allocator, input_buffers); - - if (!inputs.ok()) { - ET_LOG( - Info, - "Preparing inputs tensors for method %s failed with status 0x%" PRIx32, - method_name, - inputs.error()); +#if defined(ET_BUNDLE_IO) + if (bundle_io) { + // Get inputs from bundled IO ".bpte" data + // Useful for testing + ET_LOG(Info, "Input testset[%d] from bundled bpte", testset_idx); + Error status = executorch::bundled_program::load_bundled_input( + *method, model_pte, testset_idx); + ET_CHECK_MSG( + status == Error::Ok, + "load_bundled_input failed with status 0x%" PRIx32, + status); + } else +#endif + { + // Here you would add code to get input from your Hardware + // Get inputs from SEMIHOSTING or fake it with a lot of "1" + // Use "static" to force to compiler to remove this when it goes out of + // scope + static auto prepared_inputs = + ::prepare_input_tensors(*method, method_allocator, input_buffers); + + if (!prepared_inputs.ok()) { + ET_LOG( + Info, + "Preparing inputs tensors for method %s failed with status 0x%" PRIx32, + method_name, + prepared_inputs.error()); + } } +#ifdef DUMP_INPUT + { + std::vector inputs(method->inputs_size()); + ET_LOG(Info, "%zu inputs: ", inputs.size()); + Error status = method->get_inputs(inputs.data(), inputs.size()); + ET_CHECK(status == Error::Ok); + + for (int i = 0; i < inputs.size(); ++i) { + Tensor t = inputs[i].toTensor(); + // The output might be collected and parsed so printf() is used instead + // of ET_LOG() here + for (int j = 0; j < inputs[i].toTensor().numel(); ++j) { + if (t.scalar_type() == ScalarType::Int) { + printf( + "Input[%d][%d]: (int) %d\n", + i, + j, + inputs[i].toTensor().const_data_ptr()[j]); + } else if (t.scalar_type() == ScalarType::Float) { + printf( + "Input[%d][%d]: (float) %f\n", + i, + j, + inputs[i].toTensor().const_data_ptr()[j]); + } else if (t.scalar_type() == ScalarType::Char) { + printf( + "Input[%d][%d]: (char) %d\n", + i, + j, + inputs[i].toTensor().const_data_ptr()[j]); + } + } + } + } +#endif size_t input_memsize = method_allocator.used_size() - input_membase; ET_LOG(Info, "Input prepared."); @@ -524,7 +627,8 @@ int main(int argc, const char* argv[]) { StopMeasurements(); size_t executor_memsize = method_allocator.used_size() - executor_membase; - ET_LOG(Info, "model_pte_loaded_size: %lu bytes.", pte_size); + ET_LOG(Info, "model_pte_program_size: %lu bytes.", program_data_len); + ET_LOG(Info, "model_pte_loaded_size: %lu bytes.", pte_size); #if defined(SEMIHOSTING) if (input_file_allocator.size() > 0) { ET_LOG( @@ -575,50 +679,34 @@ int main(int argc, const char* argv[]) { ET_LOG(Info, "%zu outputs: ", outputs.size()); status = method->get_outputs(outputs.data(), outputs.size()); ET_CHECK(status == Error::Ok); + for (int i = 0; i < outputs.size(); ++i) { Tensor t = outputs[i].toTensor(); #if !defined(SEMIHOSTING) +#if !defined(ET_BUNDLE_IO) // The output might be collected and parsed so printf() is used instead // of ET_LOG() here for (int j = 0; j < outputs[i].toTensor().numel(); ++j) { if (t.scalar_type() == ScalarType::Int) { printf( - "Output[%d][%d]: %d\n", + "Output[%d][%d]: (int) %d\n", i, j, outputs[i].toTensor().const_data_ptr()[j]); } else if (t.scalar_type() == ScalarType::Float) { printf( - "Output[%d][%d]: %f\n", + "Output[%d][%d]: (float) %f\n", i, j, outputs[i].toTensor().const_data_ptr()[j]); } else if (t.scalar_type() == ScalarType::Char) { printf( - "Output[%d][%d]: %d\n", + "Output[%d][%d]: (char) %d\n", i, j, outputs[i].toTensor().const_data_ptr()[j]); } } -#if defined(ET_EVENT_TRACER_ENABLED) - ETDumpResult result = etdump_gen.get_etdump_data(); - if (result.buf != nullptr && result.size > 0) { - // On a device with no file system we can't just write it out - // to the file-system so we base64 encode it and dump it on the log. - int mode = 0; - size_t len = result.size; - size_t encoded_len = base64_encoded_size(result.size, mode); - uint8_t* encoded_buf = reinterpret_cast( - method_allocator.allocate(encoded_len + 1)); - int ret = base64_encode( - encoded_buf, (uint8_t*)result.buf, &encoded_len, &len, mode); - encoded_buf[encoded_len] = 0x00; // Ensure null termination - ET_LOG(Info, "Writing etdump.bin [base64]"); - printf( - "#---\nbase64 -i -d <<<\"\\\n%s\\\n\" >etdump.bin\npython3 -m devtools.inspector.inspector_cli --etdump_path etdump.bin --source_time_scale cycles --target_time_scale cycles\n#---\n", - encoded_buf); - } #endif #else char out_filename[255]; @@ -631,21 +719,66 @@ int main(int argc, const char* argv[]) { outputs[i].toTensor().nbytes(), out_file); fclose(out_file); -#if defined(ET_EVENT_TRACER_ENABLED) - etdump_result result = etdump_gen.get_etdump_data(); - if (result.buf != nullptr && result.size > 0) { - // On a device with a file system we can just write it out - // to the file-system. - char etdump_filename = "etdump.bin"; - ET_LOG(Info, "Writing etdump to file: %s", etdump_filename); - FILE* f = fopen(etdump_filename, "w+"); - fwrite((uint8_t*)result.buf, 1, result.size, f); - fclose(f); - free(result.buf); - } #endif + } + +#if defined(ET_BUNDLE_IO) + if (bundle_io) { + // Verify the result. + status = executorch::bundled_program::verify_method_outputs( + *method, model_pte, testset_idx, et_rtol, et_atol); + if (status == Error::Ok) { + ET_LOG(Info, "Model output match expected BundleIO bpte ref data."); + ET_LOG(Info, "TEST: BundleIO index[%d] Test_result: PASS", testset_idx); + } else { + ET_LOG( + Error, + "Model output don't match expected BundleIO bpte ref data. rtol=%f atol=%f", + et_rtol, + et_atol); + ET_LOG(Error, "TEST: BundleIO index[%d] Test_result: FAIL", testset_idx); + } + ET_CHECK_MSG( + status == Error::Ok, + "Bundle verification failed with status 0x%" PRIx32, + status); + } #endif + +#if defined(ET_EVENT_TRACER_ENABLED) +#if !defined(SEMIHOSTING) + ETDumpResult result = etdump_gen.get_etdump_data(); + if (result.buf != nullptr && result.size > 0) { + // On a device with no file system we can't just write it out + // to the file-system so we base64 encode it and dump it on the log. + int mode = 0; + size_t len = result.size; + size_t encoded_len = base64_encoded_size(result.size, mode); + uint8_t* encoded_buf = + reinterpret_cast(method_allocator.allocate(encoded_len + 1)); + int ret = base64_encode( + encoded_buf, (uint8_t*)result.buf, &encoded_len, &len, mode); + encoded_buf[encoded_len] = 0x00; // Ensure null termination + ET_LOG(Info, "Writing etdump.bin [base64]"); + printf( + "#---\nbase64 -i -d <<<\"\\\n%s\\\n\" >etdump.bin\npython3 -m devtools.inspector.inspector_cli --etdump_path etdump.bin --source_time_scale cycles --target_time_scale cycles\n#---\n", + encoded_buf); + } +#else + etdump_result result = etdump_gen.get_etdump_data(); + if (result.buf != nullptr && result.size > 0) { + // On a device with a file system we can just write it out + // to the file-system. + char etdump_filename = "etdump.bin"; + ET_LOG(Info, "Writing etdump to file: %s", etdump_filename); + FILE* f = fopen(etdump_filename, "w+"); + fwrite((uint8_t*)result.buf, 1, result.size, f); + fclose(f); + free(result.buf); } +#endif +#endif + out: ET_LOG(Info, "Program complete, exiting."); #if defined(SEMIHOSTING) diff --git a/examples/arm/run.sh b/examples/arm/run.sh index ce92312b652..5f1e3764de2 100755 --- a/examples/arm/run.sh +++ b/examples/arm/run.sh @@ -18,11 +18,14 @@ et_root_dir=$(realpath ${et_root_dir}) model_name="" +model_input_set=false +model_input="" aot_arm_compiler_flags="--delegate --quantize" portable_kernels="aten::_softmax.out" target="ethos-u55-128" output_folder_set=false output_folder="." +bundleio=false build_with_etdump=false build_type="Release" extra_build_flags="" @@ -35,11 +38,13 @@ ethos_u_scratch_dir=${script_dir}/ethos-u-scratch function help() { echo "Usage: $(basename $0) [options]" echo "Options:" - echo " --model_name= Model to run, can be a builtin, examples/models or a filename Default to all builtin models" + echo " --model_name= Model file .py/.pth/.pt, builtin model or a model from examples/models. Passed to aot_arm_compiler" + echo " --model_input= Provide model input .pt file to override the input in the model file. Passed to aot_arm_compiler" echo " --aot_arm_compiler_flags= Only used if --model_name is used Default: ${aot_arm_compiler_flags}" echo " --portable_kernels= Comma separated list of portable (non delagated) kernels to include Default: ${portable_kernels}" echo " --target= Target to build and run for Default: ${target}" echo " --output= Target build output folder Default: ${output_folder}" + echo " --bundleio Create Bundled pte using Devtools BundelIO with Input/RefOutput included" echo " --etdump Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log" echo " --build_type= Build with Release, Debug or RelWithDebInfo, default is ${build_type}" echo " --extra_build_flags= Extra flags to pass to cmake like -DET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE=60000 Default: none " @@ -56,10 +61,12 @@ for arg in "$@"; do case $arg in -h|--help) help ;; --model_name=*) model_name="${arg#*=}";; + --model_input=*) model_input="${arg#*=}" ; model_input_set=true ;; --aot_arm_compiler_flags=*) aot_arm_compiler_flags="${arg#*=}";; --portable_kernels=*) portable_kernels="${arg#*=}";; --target=*) target="${arg#*=}";; --output=*) output_folder="${arg#*=}" ; output_folder_set=true ;; + --bundleio) bundleio=true ;; --etdump) build_with_etdump=true ;; --build_type=*) build_type="${arg#*=}";; --extra_build_flags=*) extra_build_flags="${arg#*=}";; @@ -121,13 +128,21 @@ hash arm-none-eabi-gcc \ # Build executorch libraries cd $et_root_dir +devtools_flag="" +bundleio_flag="" +et_dump_flag="" if [ "$build_with_etdump" = true ] ; then + devtools_flag="--devtools --etdump" et_dump_flag="--etdump" -else - et_dump_flag="" fi -backends/arm/scripts/build_executorch.sh --et_build_root="${et_build_root}" --build_type=$build_type $et_dump_flag +if [ "$bundleio" = true ] ; then + devtools_flag="--devtools --etdump" + bundleio_flag="--bundleio" + et_dump_flag="--etdump" +fi + +backends/arm/scripts/build_executorch.sh --et_build_root="${et_build_root}" --build_type=$build_type $devtools_flag backends/arm/scripts/build_portable_kernels.sh --et_build_root="${et_build_root}" --build_type=$build_type --portable_kernels=$portable_kernels # Build a lib quantized_ops_aot_lib @@ -157,12 +172,21 @@ for i in "${!test_model[@]}"; do echo "--------------------------------------------------------------------------------" cd $et_root_dir - model_short_name=$(basename -- "${model}" ".py") - model_filename=${model_short_name}_arm_${target}.pte + # Remove path and file exetension to get model_short_name + ext=${model##*.} + model_short_name=$(basename -- "${model}" .$ext) + model_filename=${model_short_name}_arm_${target} if [[ "${model_compiler_flags}" == *"--delegate"* ]]; then # Name aligned with default aot_arm_compiler output - model_filename=${model_short_name}_arm_delegate_${target}.pte + model_filename=${model_short_name}_arm_delegate_${target} + fi + elf_folder=${model_filename} + + if [ "$bundleio" = true ] ; then + model_filename=${model_filename}.bpte + else + model_filename=${model_filename}.pte fi if [ "$output_folder_set" = false ] ; then @@ -170,15 +194,19 @@ for i in "${!test_model[@]}"; do fi output_folder=$(realpath ${output_folder}) - mkdir -p ${output_folder} - pte_file=$(realpath -m ${output_folder}/${model_filename}) + pte_file="${output_folder}/${model_filename}" - rm -f "${pte_file}" + mkdir -p ${output_folder} - ARM_AOT_CMD="python3 -m examples.arm.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --intermediate=${output_folder} --output=${output_folder} --so_library=$SO_LIB --system_config=${system_config} --memory_mode=${memory_mode}" + # Remove old pte files + rm -f "${output_folder}/${model_filename}" + + ARM_AOT_CMD="python3 -m examples.arm.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --intermediate=${output_folder} --output=${pte_file} --so_library=$SO_LIB --system_config=${system_config} --memory_mode=${memory_mode} $bundleio_flag" echo "CALL ${ARM_AOT_CMD}" >&2 ${ARM_AOT_CMD} 1>&2 + pte_file=$(realpath ${pte_file}) + [[ -f ${pte_file} ]] || { >&2 echo "Failed to generate a pte file - ${pte_file}"; exit 1; } echo "pte_data_size: $(wc -c ${pte_file})" echo "pte_file: ${pte_file}" @@ -188,10 +216,11 @@ for i in "${!test_model[@]}"; do else set -x # Rebuild the application as the pte is imported as a header/c array - backends/arm/scripts/build_executorch_runner.sh "--pte=${pte_file}" --build_type=$build_type --target=$target --system_config=$system_config $et_dump_flag --extra_build_flags="$extra_build_flags" --ethosu_tools_dir="$ethos_u_scratch_dir" --output="${output_folder}" + backends/arm/scripts/build_executorch_runner.sh --et_build_root="${et_build_root}" --pte="${pte_file}" --build_type=${build_type} --target=${target} --system_config=${system_config} ${bundleio_flag} ${et_dump_flag} --extra_build_flags="${extra_build_flags}" --ethosu_tools_dir="${ethos_u_scratch_dir}" if [ "$build_only" = false ] ; then # Execute the executor_runner on FVP Simulator - backends/arm/scripts/run_fvp.sh --elf=${output_folder}/cmake-out/arm_executor_runner --target=$target + elf_file="${output_folder}/${elf_folder}/cmake-out/arm_executor_runner" + backends/arm/scripts/run_fvp.sh --elf=${elf_file} --target=$target fi set +x fi From 5848cc3dfaf55e6152d9d3fb0dcba8cb6b17d9b9 Mon Sep 17 00:00:00 2001 From: Jacob Stevens Date: Tue, 25 Feb 2025 09:03:02 -0500 Subject: [PATCH 078/584] Resolve many sign compare errors (#8651) Resolve many sign compare errors (#8651) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/8651 In preparation for a full transition, resolve many sign compare errors. This is chunked to separate these many changes. Reviewed By: swolchok Differential Revision: D70110991 --- kernels/portable/cpu/op_any.cpp | 9 +- kernels/portable/cpu/op_cdist_forward.cpp | 12 +-- kernels/portable/cpu/op_constant_pad_nd.cpp | 9 +- kernels/portable/cpu/op_convolution.cpp | 35 ++++---- kernels/portable/cpu/op_diagonal_copy.cpp | 10 ++- kernels/portable/cpu/op_flip.cpp | 9 +- kernels/portable/cpu/op_full.cpp | 3 +- kernels/portable/cpu/op_full_like.cpp | 3 +- kernels/portable/cpu/op_gather.cpp | 5 +- kernels/portable/cpu/op_glu.cpp | 15 ++-- kernels/portable/cpu/op_index_put.cpp | 5 +- kernels/portable/cpu/op_index_select.cpp | 5 +- .../cpu/op_linear_scratch_example.cpp | 4 +- kernels/portable/cpu/op_masked_select.cpp | 15 ++-- kernels/portable/cpu/op_max.cpp | 5 +- kernels/portable/cpu/op_mean.cpp | 3 +- kernels/portable/cpu/op_min.cpp | 5 +- kernels/portable/cpu/op_native_group_norm.cpp | 11 +-- kernels/portable/cpu/op_native_layer_norm.cpp | 7 +- kernels/portable/cpu/op_nonzero.cpp | 9 +- kernels/portable/cpu/op_ones.cpp | 3 +- kernels/portable/cpu/op_permute_copy.cpp | 5 +- kernels/portable/cpu/op_pixel_shuffle.cpp | 13 +-- kernels/portable/cpu/op_pixel_unshuffle.cpp | 13 +-- kernels/portable/cpu/op_prod.cpp | 5 +- kernels/portable/cpu/op_repeat.cpp | 5 +- kernels/portable/cpu/op_repeat_interleave.cpp | 9 +- kernels/portable/cpu/op_roll.cpp | 9 +- kernels/portable/cpu/op_scatter.cpp | 9 +- kernels/portable/cpu/op_scatter_add.cpp | 5 +- kernels/portable/cpu/op_select_scatter.cpp | 5 +- kernels/portable/cpu/op_slice_scatter.cpp | 7 +- .../portable/cpu/op_split_with_sizes_copy.cpp | 17 ++-- kernels/portable/cpu/op_sum.cpp | 3 +- kernels/portable/cpu/op_topk.cpp | 15 ++-- kernels/portable/cpu/op_tril.cpp | 13 +-- kernels/portable/cpu/op_unbind_copy.cpp | 8 +- kernels/portable/cpu/op_unsqueeze_copy.cpp | 3 +- .../portable/cpu/op_upsample_bilinear2d.cpp | 9 +- kernels/portable/cpu/op_var.cpp | 5 +- kernels/portable/cpu/op_zeros.cpp | 3 +- kernels/portable/cpu/targets.bzl | 3 + .../portable/cpu/util/activation_ops_util.cpp | 12 +-- .../portable/cpu/util/advanced_index_util.cpp | 76 ++++++++-------- kernels/portable/cpu/util/broadcast_util.h | 5 +- kernels/portable/cpu/util/copy_ops_util.cpp | 89 ++++++++++--------- kernels/portable/cpu/util/elementwise_util.h | 7 +- kernels/portable/cpu/util/index_util.cpp | 15 ++-- kernels/portable/cpu/util/kernel_ops_util.cpp | 33 +++---- kernels/portable/cpu/util/kernel_ops_util.h | 13 +-- .../cpu/util/normalization_ops_util.cpp | 11 +-- kernels/portable/cpu/util/padding_util.cpp | 9 +- kernels/portable/cpu/util/padding_util.h | 20 ++--- kernels/portable/cpu/util/reduce_util.cpp | 2 +- kernels/portable/cpu/util/repeat_util.cpp | 7 +- kernels/portable/cpu/util/slice_util.cpp | 9 +- kernels/portable/cpu/util/transpose_util.h | 9 +- kernels/portable/cpu/vec_ops.h | 41 ++++----- runtime/core/exec_aten/util/dim_order_util.h | 3 +- runtime/core/exec_aten/util/tensor_util.h | 2 + test/size_test.cpp | 5 +- 61 files changed, 395 insertions(+), 319 deletions(-) diff --git a/kernels/portable/cpu/op_any.cpp b/kernels/portable/cpu/op_any.cpp index ef09e4837ab..2cfdf36740b 100644 --- a/kernels/portable/cpu/op_any.cpp +++ b/kernels/portable/cpu/op_any.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -34,7 +35,7 @@ Tensor& any_all_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { const auto data_in = in.const_data_ptr(); auto data_out = out.mutable_data_ptr(); data_out[0] = static_cast(false); - for (auto i = 0; i < in.numel(); ++i) { + for (const auto i : c10::irange(in.numel())) { if (static_cast(data_in[i])) { data_out[0] = static_cast(true); break; @@ -83,12 +84,12 @@ Tensor& any_dims_out( CTYPE_OUT* out_data = out.mutable_data_ptr(); if (dim_list.has_value() && dim_list.value().empty()) { const CTYPE_IN* in_data = in.const_data_ptr(); - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { out_data[out_ix] = static_cast(static_cast(in_data[out_ix])); } } else { - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { bool any = false; if (in.numel() > 0) { any = map_reduce_over_dim_list( @@ -138,7 +139,7 @@ Tensor& any_out( ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, name, CTYPE_IN, [&] { ET_SWITCH_TWO_TYPES(Bool, Byte, out_type, ctx, name, CTYPE_OUT, [&] { CTYPE_OUT* out_data = out.mutable_data_ptr(); - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { CTYPE_OUT any = false; if (in.numel() > 0) { std::tuple acc = diff --git a/kernels/portable/cpu/op_cdist_forward.cpp b/kernels/portable/cpu/op_cdist_forward.cpp index 1489ec6f6ed..03d6d47ec75 100644 --- a/kernels/portable/cpu/op_cdist_forward.cpp +++ b/kernels/portable/cpu/op_cdist_forward.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include #include @@ -34,7 +35,7 @@ void cdist(const Tensor& x1, const Tensor& x2, Tensor& out, double p) { // If the last dimension of x1 (which is equal to the last dimension of x2) // has size 0, then the output is filled with 0s. if (x1.numel() == 0) { - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { out_data[out_ix] = 0; } return; @@ -64,7 +65,7 @@ void cdist(const Tensor& x1, const Tensor& x2, Tensor& out, double p) { size_t x2_inner_size = R * M; size_t out_inner_size = P * R; - for (size_t b = 0; b < out_batch_numel; ++b) { + for (const auto b : c10::irange(out_batch_numel)) { size_t x1_base_ix = b * x1_inner_size; size_t x2_base_ix = b * x2_inner_size; size_t out_base_ix = b * out_inner_size; @@ -81,14 +82,13 @@ void cdist(const Tensor& x1, const Tensor& x2, Tensor& out, double p) { x2_base_ix = linearize_access_indexes(out_base_coord, out.dim(), x2); } } - size_t out_ix = 0; - for (size_t i = 0; i < P; ++i) { + for (const auto i : c10::irange(P)) { const CTYPE* row_i = x1_data + x1_base_ix + i * M; - for (size_t j = 0; j < R; ++j) { + for (const auto j : c10::irange(R)) { const CTYPE* row_j = x2_data + x2_base_ix + j * M; CTYPE agg = 0; - for (size_t k = 0; k < M; ++k) { + for (const auto k : c10::irange(M)) { CTYPE diff = std::abs(row_i[k] - row_j[k]); agg = Norm::reduce(agg, Norm::map(diff, p)); } diff --git a/kernels/portable/cpu/op_constant_pad_nd.cpp b/kernels/portable/cpu/op_constant_pad_nd.cpp index 328207d70f3..71dc7ff658f 100644 --- a/kernels/portable/cpu/op_constant_pad_nd.cpp +++ b/kernels/portable/cpu/op_constant_pad_nd.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -56,7 +57,7 @@ void apply_padding_to_dim( size_t out_step_len = out_strides[dim]; size_t in_step_len = self_strides[dim]; - for (size_t i = 0; i < pad_before; ++i) { + for ([[maybe_unused]] const auto i : c10::irange(pad_before)) { set_all_to_value(out_data, out_step_len, value); out_data += out_step_len; } @@ -75,7 +76,7 @@ void apply_padding_to_dim( } // Otherwise, call this function recursively else { - for (size_t i = 0; i < self_sizes[dim]; ++i) { + for ([[maybe_unused]] const auto i : c10::irange(self_sizes[dim])) { apply_padding_to_dim( ndim, self_data, @@ -94,7 +95,7 @@ void apply_padding_to_dim( } } - for (int i = 0; i < pad_after; ++i) { + for ([[maybe_unused]] const auto i : c10::irange(pad_after)) { set_all_to_value(out_data, out_step_len, value); out_data += out_step_len; } @@ -124,7 +125,7 @@ void constant_pad_nd_out_impl( // Collect sizes and strides of input and output tensors and determine the // last padded dimension size_t last_padded_dim = 0; - for (size_t i = 0; i < ndim; ++i) { + for (const auto i : c10::irange(ndim)) { self_sizes[i] = self.size(i); self_strides[i] = getTrailingDims(self, static_cast(i)); out_sizes[i] = out.size(i); diff --git a/kernels/portable/cpu/op_convolution.cpp b/kernels/portable/cpu/op_convolution.cpp index cdd37e8f78a..44da2cc0f1f 100644 --- a/kernels/portable/cpu/op_convolution.cpp +++ b/kernels/portable/cpu/op_convolution.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -91,25 +92,25 @@ void conv2d_impl( if (!transposed) { w_coord[0] = out_c; // Compute 2D output region - for (size_t out_y = 0; out_y < out_H; ++out_y) { + for (const auto out_y : c10::irange(out_H)) { out_coord[2] = out_y; - for (size_t out_x = 0; out_x < out_W; ++out_x) { + for (const auto out_x : c10::irange(out_W)) { out_coord[3] = out_x; CTYPE accum = 0.0f; - for (size_t in_c = in_c_start; in_c < in_c_start + in_C_per_group; - ++in_c) { + for (const auto in_c : + c10::irange(in_c_start, in_c_start + in_C_per_group)) { in_coord[1] = in_c; w_coord[1] = in_c - in_c_start; - for (size_t w_y = 0; w_y < w_H; ++w_y) { + for (const auto w_y : c10::irange(w_H)) { w_coord[2] = w_y; size_t in_y = stride_y * out_y + dilation_y * w_y - padding_y; in_coord[2] = in_y; // Only proceed if input y coordinate is within bounds if (in_y >= 0 && in_y < in_H) { - for (size_t w_x = 0; w_x < w_W; ++w_x) { + for (const auto w_x : c10::irange(w_W)) { w_coord[3] = w_x; size_t in_x = stride_x * out_x + dilation_x * w_x - padding_x; @@ -143,14 +144,14 @@ void conv2d_impl( } else { // transposed convolution w_coord[1] = out_c - out_c_start; - for (size_t in_y = 0; in_y < in_H; ++in_y) { + for (const auto in_y : c10::irange(in_H)) { in_coord[2] = in_y; - for (size_t in_x = 0; in_x < in_W; ++in_x) { + for (const auto in_x : c10::irange(in_W)) { in_coord[3] = in_x; - for (size_t in_c = in_c_start; in_c < in_c_start + in_C_per_group; - ++in_c) { + for (const auto in_c : + c10::irange(in_c_start, in_c_start + in_C_per_group)) { in_coord[1] = in_c; size_t in_idx = @@ -158,14 +159,14 @@ void conv2d_impl( CTYPE in_val = in_ptr[in_idx]; w_coord[0] = in_c; - for (size_t w_y = 0; w_y < w_H; ++w_y) { + for (const auto w_y : c10::irange(w_H)) { w_coord[2] = w_y; size_t out_y = stride_y * in_y + dilation_y * w_y - padding_y; out_coord[2] = out_y; // Only proceed if output y coordinate is within bounds if (out_y >= 0 && out_y < out_H) { - for (size_t w_x = 0; w_x < w_W; ++w_x) { + for (const auto w_x : c10::irange(w_W)) { w_coord[3] = w_x; size_t out_x = stride_x * in_x + dilation_x * w_x - padding_x; out_coord[3] = out_x; @@ -302,7 +303,7 @@ void convolution_wrapper( memset(out_ptr, 0, out.nbytes()); } else { // If bias is present, we initialize the output to the bias value - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { out_ptr[out_ix] = load_bias(&bias_ptr [((out_ix / out_strides[1]) % out_C) * bias.value().element_size()]); @@ -310,13 +311,13 @@ void convolution_wrapper( } } - for (size_t batch = 0; batch < out_N; ++batch) { - for (size_t group = 0; group < groups; ++group) { + for (const auto batch : c10::irange(out_N)) { + for (const auto group : c10::irange(groups)) { // Align channel offset based on the group size_t out_c_start = group * out_C_per_group; // Populate all the out channels in the group - for (size_t out_c = out_c_start; out_c < out_c_start + out_C_per_group; - ++out_c) { + for (const auto out_c : + c10::irange(out_c_start, out_c_start + out_C_per_group)) { conv2d_impl( in_ptr, in_sizes, diff --git a/kernels/portable/cpu/op_diagonal_copy.cpp b/kernels/portable/cpu/op_diagonal_copy.cpp index 6d923a6d904..445bfd2027f 100644 --- a/kernels/portable/cpu/op_diagonal_copy.cpp +++ b/kernels/portable/cpu/op_diagonal_copy.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include #include @@ -40,20 +41,21 @@ void diagonal_copy_impl( size_t new_ndim = out.dim(); int64_t new_sizes[kTensorDimensionLimit]; - for (size_t i = 0; i < new_ndim; ++i) { + for (const auto i : c10::irange(new_ndim)) { new_sizes[i] = out.size(i); } int64_t new_strides[kTensorDimensionLimit]; size_t shift = 0; - for (size_t d = 0; d < in.dim(); ++d) { - if (d == dim1 || d == dim2) { + size_t in_dim = in.dim(); + for (const auto d : c10::irange(in_dim)) { + if (static_cast(d) == dim1 || static_cast(d) == dim2) { shift++; } else { new_strides[d - shift] = in.strides().at(d); } } - new_strides[in.dim() - 2] = in.strides().at(dim1) + in.strides().at(dim2); + new_strides[in_dim - 2] = in.strides().at(dim1) + in.strides().at(dim2); as_strided_copy( in, {new_sizes, new_ndim}, {new_strides, new_ndim}, storage_offset, out); diff --git a/kernels/portable/cpu/op_flip.cpp b/kernels/portable/cpu/op_flip.cpp index 41e99953c93..8ad122b7e7e 100644 --- a/kernels/portable/cpu/op_flip.cpp +++ b/kernels/portable/cpu/op_flip.cpp @@ -5,6 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -25,7 +26,7 @@ size_t unflip_flat_ix(size_t ix, const Tensor& in, ArrayRef flip_dim) { indexToCoordinate(in, ix, ix_coord); size_t unflip_coord[kTensorDimensionLimit]; - for (size_t d = 0; d < in.dim(); d++) { + for (const auto d : c10::irange(in.dim())) { if (flip_dim[d]) { unflip_coord[d] = in.size(d) - ix_coord[d] - 1; } else { @@ -54,10 +55,10 @@ Tensor& flip_out( ET_KERNEL_CHECK(ctx, check_flip_args(in, dims, out), InvalidArgument, out); bool flip_dim_data[kTensorDimensionLimit]; - for (size_t i = 0; i < in.dim(); i++) { + for (const auto i : c10::irange(in.dim())) { flip_dim_data[i] = false; } - for (size_t i = 0; i < dims.size(); i++) { + for (const auto i : c10::irange(dims.size())) { const auto d = dims[i] < 0 ? dims[i] + nonzero_dim(in) : dims[i]; flip_dim_data[d] = true; } @@ -70,7 +71,7 @@ Tensor& flip_out( const CTYPE* in_data = in.const_data_ptr(); CTYPE* out_data = out.mutable_data_ptr(); - for (size_t ix = 0; ix < out.numel(); ++ix) { + for (const auto ix : c10::irange(in.numel())) { out_data[ix] = in_data[unflip_flat_ix(ix, in, flip_dim)]; } }); diff --git a/kernels/portable/cpu/op_full.cpp b/kernels/portable/cpu/op_full.cpp index 668033a44af..69b4c8fd150 100644 --- a/kernels/portable/cpu/op_full.cpp +++ b/kernels/portable/cpu/op_full.cpp @@ -5,6 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -44,7 +45,7 @@ Tensor& full_out( ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, name, CTYPE_OUT, [&] { CTYPE_OUT val_casted = static_cast(val); auto data_out = out.mutable_data_ptr(); - for (size_t i = 0; i < out.numel(); ++i) { + for (const auto i : c10::irange(out.numel())) { data_out[i] = val_casted; } }); diff --git a/kernels/portable/cpu/op_full_like.cpp b/kernels/portable/cpu/op_full_like.cpp index 2aeb45d22f4..7671cd61ea9 100644 --- a/kernels/portable/cpu/op_full_like.cpp +++ b/kernels/portable/cpu/op_full_like.cpp @@ -5,6 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -60,7 +61,7 @@ Tensor& full_like_out( ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, name, CTYPE_OUT, [&] { CTYPE_OUT val_casted = static_cast(val); auto data_out = out.mutable_data_ptr(); - for (size_t i = 0; i < out.numel(); ++i) { + for (const auto i : c10::irange(out.numel())) { data_out[i] = val_casted; } }); diff --git a/kernels/portable/cpu/op_gather.cpp b/kernels/portable/cpu/op_gather.cpp index 3f2e365503f..9899c21a94e 100644 --- a/kernels/portable/cpu/op_gather.cpp +++ b/kernels/portable/cpu/op_gather.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include #include @@ -37,12 +38,12 @@ void gather_helper( return; } - for (size_t ix = 0; ix < index.numel(); ++ix) { + for (const auto ix : c10::irange(index.numel())) { size_t ix_coord[kTensorDimensionLimit]; indexToCoordinate(index, ix, ix_coord); size_t in_coord[kTensorDimensionLimit]; - for (size_t i = 0; i < out.dim(); ++i) { + for (const auto i : c10::irange(out.dim())) { if (i == dim) { in_coord[i] = index_data[ix]; } else { diff --git a/kernels/portable/cpu/op_glu.cpp b/kernels/portable/cpu/op_glu.cpp index 20fb3cf0290..edc82c55eb8 100644 --- a/kernels/portable/cpu/op_glu.cpp +++ b/kernels/portable/cpu/op_glu.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include #include @@ -37,7 +38,7 @@ float exp_overload(float f) { template void sigmoid_tensor(Tensor& out) { CTYPE_OUT* out_data = out.mutable_data_ptr(); - for (size_t i = 0; i < out.numel(); i++) { + for (const auto i : c10::irange(out.numel())) { out_data[i] = 1.0 / (1.0 + exp_overload(-out_data[i])); } } @@ -57,13 +58,13 @@ void mul_tensors(const Tensor& in, int64_t dim, Tensor& out) { const CTYPE_IN* input_data_base = in.const_data_ptr(); CTYPE_OUT* output_data_base = out.mutable_data_ptr(); - for (size_t i = 0; i < leading_dims; i++) { + for (const auto i : c10::irange(leading_dims)) { const CTYPE_IN* input_data = input_data_base + i * dim_length_in * trailing_dims; CTYPE_OUT* output_data = output_data_base + i * dim_length_out * trailing_dims; - for (size_t j = 0; j < num_values; j++) { - for (size_t k = 0; k < trailing_dims; ++k) { + for ([[maybe_unused]] const auto j : c10::irange(num_values)) { + for (const auto k : c10::irange(trailing_dims)) { output_data[k] = static_cast(input_data[k]) * output_data[k]; } input_data += trailing_dims; @@ -94,13 +95,13 @@ void slice_tensor( const CTYPE_IN* input_data_base = in.const_data_ptr(); CTYPE_OUT* output_data_base = out.mutable_data_ptr(); - for (size_t i = 0; i < leading_dims; i++) { + for (const auto i : c10::irange(leading_dims)) { const CTYPE_IN* input_data = input_data_base + (i * dim_length_in + non_negative_start) * trailing_dims; CTYPE_OUT* output_data = output_data_base + i * dim_length_out * trailing_dims; - for (size_t j = 0; j < num_values; j++) { - for (size_t k = 0; k < trailing_dims; ++k) { + for ([[maybe_unused]] const auto j : c10::irange(num_values)) { + for (const auto k : c10::irange(trailing_dims)) { output_data[k] = static_cast(input_data[k]); } input_data += trailing_dims; diff --git a/kernels/portable/cpu/op_index_put.cpp b/kernels/portable/cpu/op_index_put.cpp index f22026d759c..942892c31ec 100644 --- a/kernels/portable/cpu/op_index_put.cpp +++ b/kernels/portable/cpu/op_index_put.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -116,7 +117,7 @@ Tensor& index_put_out( // Compute the number of elements in the indexed space size_t x_numel = 1; - for (size_t i = 0; i < x_dim; i++) { + for (const auto i : c10::irange(x_dim)) { x_numel *= x_sizes[i]; } @@ -124,7 +125,7 @@ Tensor& index_put_out( const CTYPE* const values_data = values.const_data_ptr(); CTYPE* const out_data = out.mutable_data_ptr(); - for (auto x_ix = 0; x_ix < x_numel; x_ix++) { + for (const auto x_ix : c10::irange(x_numel)) { size_t in_ix = 0; size_t x_coord[kTensorDimensionLimit]; diff --git a/kernels/portable/cpu/op_index_select.cpp b/kernels/portable/cpu/op_index_select.cpp index 98f8f9f7ab0..fb39a42e5a2 100644 --- a/kernels/portable/cpu/op_index_select.cpp +++ b/kernels/portable/cpu/op_index_select.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include #include @@ -73,10 +74,10 @@ Tensor& index_select_out( ET_SWITCH_TWO_TYPES( Long, Int, ix_type, ctx, "index_select.out", CTYPE, [&]() { const CTYPE* const index_arr = index.mutable_data_ptr(); - for (int i = 0; i < leading_dims; i++) { + for (const auto i : c10::irange(leading_dims)) { const char* src = input_data + i * in_dim_length * length_per_step; char* dest = out_data + i * out_dim_length * length_per_step; - for (auto j = 0; j < out_dim_length; j++) { + for (const auto j : c10::irange(out_dim_length)) { const char* copy_src = src + index_arr[j] * length_per_step; memcpy(dest, copy_src, length_per_step); dest += length_per_step; diff --git a/kernels/portable/cpu/op_linear_scratch_example.cpp b/kernels/portable/cpu/op_linear_scratch_example.cpp index eae2417fe32..096fea8bc4c 100644 --- a/kernels/portable/cpu/op_linear_scratch_example.cpp +++ b/kernels/portable/cpu/op_linear_scratch_example.cpp @@ -102,7 +102,9 @@ Tensor& linear_scratch_example( // add the bias if (bias.has_value()) { - ET_CHECK_MSG(K == bias.value().numel(), "Unexpected numel for bias"); + ET_CHECK_MSG( + static_cast(K) == bias.value().numel(), + "Unexpected numel for bias"); for (size_t i = 0; i < M; ++i) { for (size_t j = 0; j < K; ++j) { scalar_t* scratch_ptr = diff --git a/kernels/portable/cpu/op_masked_select.cpp b/kernels/portable/cpu/op_masked_select.cpp index b176000f6c8..88a568be5ac 100644 --- a/kernels/portable/cpu/op_masked_select.cpp +++ b/kernels/portable/cpu/op_masked_select.cpp @@ -5,6 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -53,14 +54,14 @@ Tensor& masked_select_out( ctx, false, InvalidArgument, out, "Failed to broadcast input and mask"); } size_t broadcast_numel = 1; - for (size_t i = 0; i < broadcast_ndim; i++) { + for (const auto i : c10::irange(broadcast_ndim)) { broadcast_numel *= broadcast_sizes[i]; } // Compute the number of out elements size_t mask_true_count = 0; const bool* const mask_data = mask.const_data_ptr(); - for (size_t i = 0; i < mask.numel(); ++i) { + for (const auto i : c10::irange(mask.numel())) { if (mask_data[i]) { mask_true_count++; } @@ -79,10 +80,10 @@ Tensor& masked_select_out( // Figure out if `in` is broadcasted bool in_is_broadcasted = false; - if (in.dim() != broadcast_ndim) { + if (in.dim() != static_cast(broadcast_ndim)) { in_is_broadcasted = true; } else { - for (size_t i = 0; i < in.dim(); ++i) { + for (const auto i : c10::irange(in.dim())) { if (in.size(i) != broadcast_sizes[i]) { in_is_broadcasted = true; } @@ -91,10 +92,10 @@ Tensor& masked_select_out( // Figure out if `mask` is broadcasted bool mask_is_broadcasted = false; - if (mask.dim() != broadcast_ndim) { + if (mask.dim() != static_cast(broadcast_ndim)) { mask_is_broadcasted = true; } else { - for (size_t i = 0; i < mask.dim(); ++i) { + for (const auto i : c10::irange(mask.dim())) { if (mask.size(i) != broadcast_sizes[i]) { mask_is_broadcasted = true; } @@ -105,7 +106,7 @@ Tensor& masked_select_out( bool any_is_broadcasted = (in_is_broadcasted || mask_is_broadcasted); size_t out_ix = 0; - for (size_t i = 0; i < broadcast_numel; ++i) { + for (const auto i : c10::irange(broadcast_numel)) { size_t in_linear_index = i; size_t mask_linear_index = i; diff --git a/kernels/portable/cpu/op_max.cpp b/kernels/portable/cpu/op_max.cpp index c5b5d2fb6bc..f206ee05b99 100644 --- a/kernels/portable/cpu/op_max.cpp +++ b/kernels/portable/cpu/op_max.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -82,7 +83,7 @@ std::tuple max_out( CTYPE* max_data = max.mutable_data_ptr(); long* max_indices_data = max_indices.mutable_data_ptr(); - for (size_t out_ix = 0; out_ix < max.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(max.numel())) { std::tuple acc = reduce_over_dim( [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) { if (!std::isnan(acc_val) && (std::isnan(v) || v > acc_val)) { @@ -124,7 +125,7 @@ max_unary_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { const auto data_in = in.const_data_ptr(); auto data_out = out.mutable_data_ptr(); data_out[0] = lower_bound(); - for (auto i = 0; i < in.numel(); ++i) { + for (const auto i : c10::irange(in.numel())) { CTYPE_OUT val = static_cast(data_in[i]); if (std::isnan(val)) { data_out[0] = val; diff --git a/kernels/portable/cpu/op_mean.cpp b/kernels/portable/cpu/op_mean.cpp index c0316e685d6..77f74ae7cac 100644 --- a/kernels/portable/cpu/op_mean.cpp +++ b/kernels/portable/cpu/op_mean.cpp @@ -5,6 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -49,7 +50,7 @@ Tensor& mean_dim_out( out.scalar_type(), ctx, "mean.out", CTYPE_OUT, [&] { CTYPE_OUT* out_data = out.mutable_data_ptr(); const size_t num = get_reduced_dim_product(in, dim_list); - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { CTYPE_OUT sum = 0; if (in.numel() > 0) { sum = map_reduce_over_dim_list( diff --git a/kernels/portable/cpu/op_min.cpp b/kernels/portable/cpu/op_min.cpp index ca8a9135ccd..683ef751a9d 100644 --- a/kernels/portable/cpu/op_min.cpp +++ b/kernels/portable/cpu/op_min.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -82,7 +83,7 @@ std::tuple min_out( CTYPE* min_data = min.mutable_data_ptr(); long* min_indices_data = min_indices.mutable_data_ptr(); - for (size_t out_ix = 0; out_ix < min.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(min.numel())) { std::tuple acc = reduce_over_dim( [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) { if (!std::isnan(acc_val) && (std::isnan(v) || v < acc_val)) { @@ -124,7 +125,7 @@ min_unary_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { const auto data_in = in.const_data_ptr(); auto data_out = out.mutable_data_ptr(); data_out[0] = upper_bound(); - for (auto i = 0; i < in.numel(); ++i) { + for (const auto i : c10::irange(in.numel())) { CTYPE_OUT val = static_cast(data_in[i]); if (std::isnan(val)) { data_out[0] = val; diff --git a/kernels/portable/cpu/op_native_group_norm.cpp b/kernels/portable/cpu/op_native_group_norm.cpp index d4937532161..c373dfe26bd 100644 --- a/kernels/portable/cpu/op_native_group_norm.cpp +++ b/kernels/portable/cpu/op_native_group_norm.cpp @@ -5,6 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -51,7 +52,7 @@ void group_norm( CTYPE* rstd_data = rstd.mutable_data_ptr(); if (inner_size == 0) { - for (int i = 0; i < leading; ++i) { + for (const auto i : c10::irange(leading)) { mean_data[i] = static_cast(0); rstd_data[i] = static_cast(NAN); } @@ -72,7 +73,7 @@ void group_norm( bias_data = nullptr; } - for (int i = 0; i < leading; ++i) { + for (const auto i : c10::irange(leading)) { const CTYPE* x = input_data + i * inner_size; // compute E[X] and Var[x] = E[x^2] - E[x]^2 @@ -86,12 +87,12 @@ void group_norm( // Calculate the elements of output if (weight_data == nullptr && bias_data == nullptr) { CTYPE* y = out_data + i * inner_size; - for (size_t j = 0; j < inner_size; j++) { + for (const auto j : c10::irange(inner_size)) { y[j] = (x[j] - mean_value) * rstd_value; } } else { const size_t g = i % G; - for (size_t j = 0; j < D; j++) { + for (const auto j : c10::irange(D)) { const size_t ch = g * D + j; const CTYPE scale = rstd_value * (weight_data == nullptr ? 1.0 : weight_data[ch]); @@ -99,7 +100,7 @@ void group_norm( -scale * mean_value + (bias_data == nullptr ? 0.0 : bias_data[ch]); x = input_data + (i * D + j) * HxW; CTYPE* y = out_data + (i * D + j) * HxW; - for (size_t k = 0; k < HxW; k++) { + for (const auto k : c10::irange(HxW)) { y[k] = scale * x[k] + beta; } } diff --git a/kernels/portable/cpu/op_native_layer_norm.cpp b/kernels/portable/cpu/op_native_layer_norm.cpp index 2e70e5d2ba9..66c80b7cccc 100644 --- a/kernels/portable/cpu/op_native_layer_norm.cpp +++ b/kernels/portable/cpu/op_native_layer_norm.cpp @@ -5,6 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -45,7 +46,7 @@ void layer_norm( CTYPE* rstd_data = rstd.mutable_data_ptr(); if (normalized == 0) { - for (int i = 0; i < leading; ++i) { + for (const auto i : c10::irange(leading)) { mean_data[i] = static_cast(0); rstd_data[i] = static_cast(NAN); } @@ -67,7 +68,7 @@ void layer_norm( } const CTYPE ct_normalized = static_cast(normalized); - for (int i = 0; i < leading; ++i) { + for (const auto i : c10::irange(leading)) { const CTYPE* x = input_data + i * normalized; CTYPE* y = out_data + i * normalized; @@ -79,7 +80,7 @@ void layer_norm( CTYPE std = std::sqrt(variance + eps); // Calculate the elements of output - for (int j = 0; j < normalized; ++j) { + for (const auto j : c10::irange(normalized)) { CTYPE w = weight_data ? weight_data[j] : static_cast(1); CTYPE b = bias_data ? bias_data[j] : static_cast(0); y[j] = (x[j] - mean_value) / std * w + b; diff --git a/kernels/portable/cpu/op_nonzero.cpp b/kernels/portable/cpu/op_nonzero.cpp index 20e10be4b65..5a319b95749 100644 --- a/kernels/portable/cpu/op_nonzero.cpp +++ b/kernels/portable/cpu/op_nonzero.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -26,7 +27,7 @@ namespace { void increment_index(size_t* index, const ArrayRef sizes) { for (ssize_t i = sizes.size() - 1; i >= 0; --i) { index[i]++; - if (index[i] == sizes[i]) { + if (static_cast(index[i]) == sizes[i]) { index[i] = 0; } else { return; @@ -45,7 +46,7 @@ void nonzero(KernelRuntimeContext& ctx, const Tensor& input, Tensor& output) { int32_t num_nonzero = 0; // Count number of non zeros - for (size_t i = 0; i < lim; ++i) { + for (const auto i : c10::irange(lim)) { if (in_data[i] != 0) { num_nonzero++; } @@ -68,9 +69,9 @@ void nonzero(KernelRuntimeContext& ctx, const Tensor& input, Tensor& output) { size_t out_idx = 0; // Loop again and this time write the proper indices into out - for (size_t i = 0; i < lim; i++) { + for (const auto i : c10::irange(lim)) { if (in_data[i] != 0) { - for (size_t j = 0; j < input.dim(); j++) { + for (const auto j : c10::irange(input.dim())) { out_data[out_idx++] = index[j]; } } diff --git a/kernels/portable/cpu/op_ones.cpp b/kernels/portable/cpu/op_ones.cpp index 9135966e9d8..68826ab1a1f 100644 --- a/kernels/portable/cpu/op_ones.cpp +++ b/kernels/portable/cpu/op_ones.cpp @@ -5,6 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#include #include @@ -22,7 +23,7 @@ Tensor& ones_out(KernelRuntimeContext& ctx, IntArrayRef size, Tensor& out) { ScalarType out_type = out.scalar_type(); ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, __func__, CTYPE, [&] { auto out_data = out.mutable_data_ptr(); - for (size_t i = 0; i < out.numel(); i++) { + for (const auto i : c10::irange(out.numel())) { out_data[i] = static_cast(1); } }); diff --git a/kernels/portable/cpu/op_permute_copy.cpp b/kernels/portable/cpu/op_permute_copy.cpp index 237b31ee988..719f8fcb445 100644 --- a/kernels/portable/cpu/op_permute_copy.cpp +++ b/kernels/portable/cpu/op_permute_copy.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -26,7 +27,7 @@ void increment_coordinate_permuted( for (int i = dims.size() - 1; i >= 0; i--) { size_t d = dims[i] >= 0 ? dims[i] : dims[i] + tensor.dim(); coordinate[d]++; - if (coordinate[d] == tensor.size(d)) { + if (static_cast(coordinate[d]) == tensor.size(d)) { coordinate[d] = 0; } else { return; @@ -70,7 +71,7 @@ Tensor& permute_copy_out( const CTYPE* const in_data = in.const_data_ptr(); CTYPE* const out_data = out.mutable_data_ptr(); - for (size_t i = 0; i < out.numel(); ++i) { + for (const auto i : c10::irange(out.numel())) { out_data[i] = in_data[executorch::runtime::coordinateToIndexWithTrailingDimsMemo( in, in_coord, trailing_dims_memo)]; diff --git a/kernels/portable/cpu/op_pixel_shuffle.cpp b/kernels/portable/cpu/op_pixel_shuffle.cpp index a3bb417d9d5..fd9f1739e57 100644 --- a/kernels/portable/cpu/op_pixel_shuffle.cpp +++ b/kernels/portable/cpu/op_pixel_shuffle.cpp @@ -5,6 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -38,12 +39,12 @@ void pixel_shuffle_impl(const Tensor& in, int64_t upscale_factor, Tensor& out) { // input tensor shape of [n, c, s1, s2, h, w] // output tensor shape of [n, c, h, s1, w, s2] size_t i = 0; - for (size_t n = 0; n < leading_dims; n++) { - for (size_t c = 0; c < sub_channels; c++) { - for (size_t h = 0; h < height; h++) { - for (size_t s1 = 0; s1 < S; s1++) { - for (size_t w = 0; w < width; w++) { - for (size_t s2 = 0; s2 < S; s2++) { + for (const auto n : c10::irange(leading_dims)) { + for (const auto c : c10::irange(sub_channels)) { + for (const auto h : c10::irange(height)) { + for (const auto s1 : c10::irange(S)) { + for (const auto w : c10::irange(width)) { + for (const auto s2 : c10::irange(S)) { size_t input_offset = n * stride_n + c * stride_c + s1 * stride_s1 + s2 * stride_s2 + h * stride_h + w; std::memcpy( diff --git a/kernels/portable/cpu/op_pixel_unshuffle.cpp b/kernels/portable/cpu/op_pixel_unshuffle.cpp index f0bd5e4d10f..68d7bbbc27a 100644 --- a/kernels/portable/cpu/op_pixel_unshuffle.cpp +++ b/kernels/portable/cpu/op_pixel_unshuffle.cpp @@ -5,6 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -41,12 +42,12 @@ void pixel_unshuffle_impl( // input tensor shape of [n, c, h, s1, w, s2] // output tensor shape of [n, c, s1, s2, h, w] size_t i = 0; - for (size_t n = 0; n < leading_dims; n++) { - for (size_t c = 0; c < sub_channels; c++) { - for (size_t h = 0; h < height; h++) { - for (size_t s1 = 0; s1 < S; s1++) { - for (size_t w = 0; w < width; w++) { - for (size_t s2 = 0; s2 < S; s2++) { + for (const auto n : c10::irange(leading_dims)) { + for (const auto c : c10::irange(sub_channels)) { + for (const auto h : c10::irange(height)) { + for (const auto s1 : c10::irange(S)) { + for (const auto w : c10::irange(width)) { + for (const auto s2 : c10::irange(S)) { size_t output_offset = n * stride_n + c * stride_c + s1 * stride_s1 + s2 * stride_s2 + h * stride_h + w; std::memcpy( diff --git a/kernels/portable/cpu/op_prod.cpp b/kernels/portable/cpu/op_prod.cpp index 61bda38f68f..27d18ca2570 100644 --- a/kernels/portable/cpu/op_prod.cpp +++ b/kernels/portable/cpu/op_prod.cpp @@ -5,6 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -38,7 +39,7 @@ Tensor& prod_out( const auto data_in = in.const_data_ptr(); auto data_out = out.mutable_data_ptr(); data_out[0] = static_cast(1); - for (auto i = 0; i < in.numel(); ++i) { + for (const auto i : c10::irange(in.numel())) { data_out[0] *= static_cast(data_in[i]); } }); @@ -76,7 +77,7 @@ Tensor& prod_int_out( ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, name, CTYPE_IN, [&] { ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, name, CTYPE_OUT, [&] { CTYPE_OUT* out_data = out.mutable_data_ptr(); - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { CTYPE_OUT prod = 1; if (in.numel() > 0) { std::tuple acc = diff --git a/kernels/portable/cpu/op_repeat.cpp b/kernels/portable/cpu/op_repeat.cpp index dc9a7232152..1d42cc90189 100644 --- a/kernels/portable/cpu/op_repeat.cpp +++ b/kernels/portable/cpu/op_repeat.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -29,11 +30,11 @@ bool calculate_output_size( repeats.size(), self_sizes.size()); - int32_t i = 0; + size_t i = 0; for (; i < (repeats.size() - self_sizes.size()); ++i) { out_sizes_ptr[i] = static_cast(repeats[i]); } - int32_t j = 0; + size_t j = 0; for (; i < repeats.size(); ++i) { out_sizes_ptr[i] = static_cast(repeats[i]) * self_sizes[j]; diff --git a/kernels/portable/cpu/op_repeat_interleave.cpp b/kernels/portable/cpu/op_repeat_interleave.cpp index 61c9fbfdb82..4ee77695f86 100644 --- a/kernels/portable/cpu/op_repeat_interleave.cpp +++ b/kernels/portable/cpu/op_repeat_interleave.cpp @@ -5,6 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#include #include @@ -30,13 +31,13 @@ bool check_repeat_interleave_args( if (repeats.scalar_type() == ScalarType::Long) { const int64_t* const repeats_data = repeats.const_data_ptr(); - for (size_t i = 0; i < repeats.numel(); ++i) { + for (const auto i : c10::irange(repeats.numel())) { ET_CHECK_OR_RETURN_FALSE( repeats_data[i] >= 0, "repeats cannot be negative"); } } else { const int32_t* const repeats_data = repeats.const_data_ptr(); - for (size_t i = 0; i < repeats.numel(); ++i) { + for (const auto i : c10::irange(repeats.numel())) { ET_CHECK_OR_RETURN_FALSE( repeats_data[i] >= 0, "repeats cannot be negative"); } @@ -62,7 +63,7 @@ Tensor& repeat_interleave_Tensor_out( ET_SWITCH_TWO_TYPES(Int, Long, repeats.scalar_type(), ctx, name, CTYPE, [&] { const CTYPE* repeats_data = repeats.const_data_ptr(); - for (size_t ix = 0; ix < repeats.numel(); ++ix) { + for (const auto ix : c10::irange(repeats.numel())) { repeats_sum += static_cast(repeats_data[ix]); } }); @@ -96,7 +97,7 @@ Tensor& repeat_interleave_Tensor_out( const CTYPE* repeats_data = repeats.const_data_ptr(); CTYPE* out_data = out.mutable_data_ptr(); size_t out_ix = 0; - for (size_t ix = 0; ix < repeats.numel(); ix++) { + for (const auto ix : c10::irange(repeats.numel())) { for (CTYPE i = 0; i < repeats_data[ix]; i++, out_ix++) { out_data[out_ix] = static_cast(ix); } diff --git a/kernels/portable/cpu/op_roll.cpp b/kernels/portable/cpu/op_roll.cpp index ee735758c52..109be64fbed 100644 --- a/kernels/portable/cpu/op_roll.cpp +++ b/kernels/portable/cpu/op_roll.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -36,7 +37,7 @@ size_t unshift_flat_ix(size_t ix, const Tensor& in, IntArrayRef dim_shifts) { indexToCoordinate(in, ix, ix_coord); size_t shifted_coord[kTensorDimensionLimit]; - for (size_t d = 0; d < in.dim(); d++) { + for (const auto d : c10::irange(in.dim())) { shifted_coord[d] = (ix_coord[d] + in.size(d) - dim_shifts[d] % in.size(d)) % in.size(d); } @@ -68,10 +69,10 @@ Tensor& roll_out( } int64_t dim_shift_array[kTensorDimensionLimit]; - for (size_t i = 0; i < in.dim(); i++) { + for (const auto i : c10::irange(in.dim())) { dim_shift_array[i] = 0; } - for (size_t i = 0; i < dims.size(); i++) { + for (const auto i : c10::irange(dims.size())) { const auto d = dims[i] < 0 ? dims[i] + in.dim() : dims[i]; dim_shift_array[d] += shifts[i]; } @@ -85,7 +86,7 @@ Tensor& roll_out( const CTYPE* in_data = in.const_data_ptr(); CTYPE* out_data = out.mutable_data_ptr(); - for (size_t ix = 0; ix < out.numel(); ++ix) { + for (const auto ix : c10::irange(out.numel())) { out_data[ix] = in_data[unshift_flat_ix(ix, in, dim_shifts)]; } }); diff --git a/kernels/portable/cpu/op_scatter.cpp b/kernels/portable/cpu/op_scatter.cpp index af4ca8a8390..f8f4b21264e 100644 --- a/kernels/portable/cpu/op_scatter.cpp +++ b/kernels/portable/cpu/op_scatter.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include #include @@ -41,7 +42,7 @@ void scatter_src_helper( dim += nonzero_dim(in); } - for (size_t ix = 0; ix < index.numel(); ++ix) { + for (const auto ix : c10::irange(index.numel())) { // @lint-ignore CLANGTIDY facebook-hte-CArray size_t ix_coord[kTensorDimensionLimit]; indexToCoordinate(index, ix, ix_coord); @@ -50,7 +51,7 @@ void scatter_src_helper( // @lint-ignore CLANGTIDY facebook-hte-CArray size_t out_coord[kTensorDimensionLimit]; - for (size_t i = 0; i < out.dim(); ++i) { + for (const auto i : c10::irange(out.dim())) { if (i == dim) { out_coord[i] = index_data[ix]; } else { @@ -80,14 +81,14 @@ void scatter_value_helper( dim += nonzero_dim(in); } - for (size_t ix = 0; ix < index.numel(); ++ix) { + for (const auto ix : c10::irange(index.numel())) { // @lint-ignore CLANGTIDY facebook-hte-CArray size_t ix_coord[kTensorDimensionLimit]; indexToCoordinate(index, ix, ix_coord); // @lint-ignore CLANGTIDY facebook-hte-CArray size_t out_coord[kTensorDimensionLimit]; - for (size_t i = 0; i < out.dim(); ++i) { + for (const auto i : c10::irange(out.dim())) { if (i == dim) { out_coord[i] = index_data[ix]; } else { diff --git a/kernels/portable/cpu/op_scatter_add.cpp b/kernels/portable/cpu/op_scatter_add.cpp index 1b53777e731..b83a56c2e01 100644 --- a/kernels/portable/cpu/op_scatter_add.cpp +++ b/kernels/portable/cpu/op_scatter_add.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include #include @@ -28,14 +29,14 @@ void scatter_add_helper( const Tensor& index, Tensor& out, int64_t dim) { - for (size_t ix = 0; ix < index.numel(); ++ix) { + for (const auto ix : c10::irange(index.numel())) { size_t ix_coord[kTensorDimensionLimit]; indexToCoordinate(index, ix, ix_coord); size_t src_ix = coordinateToIndex(src, ix_coord); size_t out_coord[kTensorDimensionLimit]; - for (size_t i = 0; i < out.dim(); ++i) { + for (const auto i : c10::irange(out.dim())) { if (i == dim) { out_coord[i] = index_data[ix]; } else { diff --git a/kernels/portable/cpu/op_select_scatter.cpp b/kernels/portable/cpu/op_select_scatter.cpp index e4622d8fda2..18c39c005d5 100644 --- a/kernels/portable/cpu/op_select_scatter.cpp +++ b/kernels/portable/cpu/op_select_scatter.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include #include @@ -79,8 +80,8 @@ Tensor& select_scatter_out( CTYPE* const out_data = out.mutable_data_ptr(); const CTYPE_SRC* const src_data = src.const_data_ptr(); - for (size_t i = 0; i < leading_dims; ++i) { - for (size_t j = 0; j < trailing_stride; ++j) { + for (const auto i : c10::irange(leading_dims)) { + for (const auto j : c10::irange(trailing_stride)) { out_data[start_offset + i * out_step + j] = convert(src_data[i * trailing_stride + j]); } diff --git a/kernels/portable/cpu/op_slice_scatter.cpp b/kernels/portable/cpu/op_slice_scatter.cpp index c2fe2d70581..5a9138a0359 100644 --- a/kernels/portable/cpu/op_slice_scatter.cpp +++ b/kernels/portable/cpu/op_slice_scatter.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -85,10 +86,10 @@ Tensor& slice_scatter_out( size_t src_offset = 0; - for (int i = 0; i < leading_dims; i++) { + for (const auto i : c10::irange(leading_dims)) { size_t out_offset = (i * dim_length + start) * trailing_dims; - for (int j = 0; j < num_values; j++) { - for (size_t k = 0; k < trailing_dims; ++k) { + for ([[maybe_unused]] const auto j : c10::irange(num_values)) { + for (const auto k : c10::irange(trailing_dims)) { out_data[out_offset + k] = convert(src_data[src_offset + k]); } diff --git a/kernels/portable/cpu/op_split_with_sizes_copy.cpp b/kernels/portable/cpu/op_split_with_sizes_copy.cpp index daa0845c6f3..c99a7fb6815 100644 --- a/kernels/portable/cpu/op_split_with_sizes_copy.cpp +++ b/kernels/portable/cpu/op_split_with_sizes_copy.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -38,7 +39,8 @@ void split_with_sizes_copy_out( check_split_with_sizes_copy_args(in, split_sizes, dim, out), InvalidArgument, ); - for (size_t i = 0; i < out.size(); ++i) { + // All output tensors must have the same dim order as the input + for (const auto i : c10::irange(out.size())) { ET_KERNEL_CHECK( ctx, tensors_have_same_dim_order(in, out[i]), InvalidArgument, ); } @@ -52,11 +54,12 @@ void split_with_sizes_copy_out( // Check that all chunks broadcast to their respective out tensor Tensor::SizesType target_out_sizes[kTensorDimensionLimit]; size_t target_out_ndim = in.dim(); - for (size_t d = 0; d < in.dim(); ++d) { + + for (const auto d : c10::irange(in.dim())) { target_out_sizes[d] = static_cast(in.size(d)); } - for (size_t i = 0; i < split_sizes.size(); i++) { + for (const auto i : c10::irange(split_sizes.size())) { target_out_sizes[dim] = static_cast(split_sizes[i]); ET_KERNEL_CHECK( ctx, @@ -76,7 +79,7 @@ void split_with_sizes_copy_out( const CTYPE_IN* in_data = in.const_data_ptr(); // Iterate through list of out tensors - for (size_t i = 0; i < out.size(); ++i) { + for (const auto i : c10::irange(out.size())) { const Tensor& out_tensor = out[i]; // If out tensor is empty, no action is required @@ -99,8 +102,8 @@ void split_with_sizes_copy_out( // Simpler logic if there's no broadcasting if (!is_broadcasted) { const CTYPE_IN* src = in_data; - for (size_t j = 0; j < leading_dims; ++j) { - for (size_t k = 0; k < chunk_step; ++k) { + for ([[maybe_unused]] const auto j : c10::irange(leading_dims)) { + for (const auto k : c10::irange(chunk_step)) { out_data[k] = convert(src[k]); } src += step; @@ -119,7 +122,7 @@ void split_with_sizes_copy_out( // For each element in the out tensor, find its corresponding index // in the input tensor and copy it over - for (size_t ix = 0; ix < out_tensor.numel(); ++ix) { + for (const auto ix : c10::irange(out_tensor.numel())) { size_t out_coord[kTensorDimensionLimit]; delinearize_index(ix, out_tensor, out_coord, kTensorDimensionLimit); diff --git a/kernels/portable/cpu/op_sum.cpp b/kernels/portable/cpu/op_sum.cpp index 0fec3e37f2a..81cf4b5a175 100644 --- a/kernels/portable/cpu/op_sum.cpp +++ b/kernels/portable/cpu/op_sum.cpp @@ -5,6 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -48,7 +49,7 @@ Tensor& sum_dim_out( ET_SWITCH_REALHBBF16_TYPES( out.scalar_type(), ctx, "sum.IntList_out", CTYPE_OUT, [&] { CTYPE_OUT* out_data = out.mutable_data_ptr(); - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { CTYPE_OUT sum = 0; if (in.numel() > 0) { sum = map_reduce_over_dim_list( diff --git a/kernels/portable/cpu/op_topk.cpp b/kernels/portable/cpu/op_topk.cpp index e6ba9afef2c..c56545b9235 100644 --- a/kernels/portable/cpu/op_topk.cpp +++ b/kernels/portable/cpu/op_topk.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -40,8 +41,8 @@ bool get_topk_target_size( Tensor::SizesType* target_size, size_t* target_dim) { *target_dim = in.dim(); - for (size_t i = 0; i < *target_dim; ++i) { - if (i == dim) { + for (const auto i : c10::irange(*target_dim)) { + if (static_cast(i) == dim) { target_size[i] = k; } else { target_size[i] = in.size(i); @@ -90,19 +91,19 @@ void perform_topk( const size_t outer_stride_in = dim_size * dim_stride; const size_t outer_stride_out = k * dim_stride; - bool use_partial_sort = k * 64 <= dim_size; + bool use_partial_sort = k * 64 <= static_cast(dim_size); // Loop through all outer dimensions - for (size_t outer_idx = 0; outer_idx < outer_size; ++outer_idx) { + for (const auto outer_idx : c10::irange(outer_size)) { size_t outer_in = outer_idx * outer_stride_in; size_t outer_out = outer_idx * outer_stride_out; // Loop through all inner dimensions - for (size_t inner_idx = 0; inner_idx < dim_stride; ++inner_idx) { + for (const auto inner_idx : c10::irange(dim_stride)) { size_t base_in = outer_in + inner_idx; size_t base_out = outer_out + inner_idx; // Populate the queue with the values from the input tensor - for (size_t i = 0; i < dim_size; ++i) { + for (const auto i : c10::irange(dim_size)) { size_t in_ix = base_in + i * dim_stride; queue[i].first = in_data[in_ix]; queue[i].second = i; @@ -126,7 +127,7 @@ void perform_topk( } // Write the topk values and indices to the output tensors - for (size_t i = 0; i < k; ++i) { + for (const auto i : c10::irange(k)) { size_t out_ix = base_out + i * dim_stride; values_data[out_ix] = queue[i].first; diff --git a/kernels/portable/cpu/op_tril.cpp b/kernels/portable/cpu/op_tril.cpp index 9e28cff825f..b21c9918a99 100644 --- a/kernels/portable/cpu/op_tril.cpp +++ b/kernels/portable/cpu/op_tril.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include #include @@ -44,8 +45,8 @@ void apply_tril( int64_t num_cols, int64_t row_stride, int64_t col_stride) { - for (int64_t i = 0; i < num_rows; i++) { - for (int64_t j = 0; j < std::min(num_cols, i + diagonal + 1); j++) { + for (const auto i : c10::irange(num_rows)) { + for (const auto j : c10::irange(std::min(num_cols, i + diagonal + 1))) { out[i * row_stride + j * col_stride] = self[i * row_stride + j * col_stride]; } @@ -63,21 +64,21 @@ void tril_kernel( const Tensor& out) { // Dynamically compute `self` sizes and strides. - int64_t ndim = self.dim(); + size_t ndim = static_cast(self.dim()); ET_KERNEL_CHECK_MSG( ctx, ndim < kTensorDimensionLimit, InvalidArgument, , - "ndim %" PRId64 " >= %zu", + "ndim %zu >= %zu", ndim, kTensorDimensionLimit); int64_t sizes[kTensorDimensionLimit]; int64_t strides[kTensorDimensionLimit]; - for (size_t i = 0; i < ndim; ++i) { + for (const auto i : c10::irange(ndim)) { sizes[i] = self.size(i); strides[i] = getTrailingDims(self, static_cast(i)); } @@ -102,7 +103,7 @@ void tril_kernel( int64_t row_stride = strides_ref[ndim - 2]; int64_t col_stride = strides_ref[ndim - 1]; - for (int64_t i = 0; i < batch_size; i++) { + for (const auto i : c10::irange(batch_size)) { CTYPE* __restrict__ data_self_ptr = &data_self[i * self_stride]; CTYPE* __restrict__ data_out_ptr = &data_out[i * self_stride]; diff --git a/kernels/portable/cpu/op_unbind_copy.cpp b/kernels/portable/cpu/op_unbind_copy.cpp index b8ab1e489f2..bcf65c673b3 100644 --- a/kernels/portable/cpu/op_unbind_copy.cpp +++ b/kernels/portable/cpu/op_unbind_copy.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -36,7 +37,7 @@ void unbind_copy_int_out( ET_KERNEL_CHECK( ctx, check_unbind_copy_args(input, dim, out), InvalidArgument, ); - for (int i = 0; i < out.size(); ++i) { + for (const auto i : c10::irange(out.size())) { ET_KERNEL_CHECK( ctx, tensors_have_same_dim_order(input, out[i]), InvalidArgument, ); } @@ -64,8 +65,9 @@ void unbind_copy_int_out( size_t input_offset = i * trailing_dims; CTYPE_OUT* const dest = out[i].mutable_data_ptr(); size_t dest_offset = 0; - for (size_t j = 0; j < leading_dims; ++j) { - for (size_t k = 0; k < trailing_dims; ++k) { + for ([[maybe_unused]] const auto j : + c10::irange(leading_dims)) { + for (const auto k : c10::irange(trailing_dims)) { dest[dest_offset + k] = convert( input_data[input_offset + k]); } diff --git a/kernels/portable/cpu/op_unsqueeze_copy.cpp b/kernels/portable/cpu/op_unsqueeze_copy.cpp index e6eec2e8916..3f0e44ab2ae 100644 --- a/kernels/portable/cpu/op_unsqueeze_copy.cpp +++ b/kernels/portable/cpu/op_unsqueeze_copy.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -38,7 +39,7 @@ Tensor& unsqueeze_copy_out( ET_KERNEL_CHECK(ctx, self.dim() + 1 == out.dim(), InvalidArgument, out); ET_KERNEL_CHECK(ctx, dim <= self.dim(), InvalidArgument, out); - for (size_t i = 0; i < out.dim(); ++i) { + for (const auto i : c10::irange(out.dim())) { if (i < dim) { expected_output_size[i] = self.size(i); } else if (i > dim) { diff --git a/kernels/portable/cpu/op_upsample_bilinear2d.cpp b/kernels/portable/cpu/op_upsample_bilinear2d.cpp index c30abe5e33e..69f7917822b 100644 --- a/kernels/portable/cpu/op_upsample_bilinear2d.cpp +++ b/kernels/portable/cpu/op_upsample_bilinear2d.cpp @@ -5,6 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -29,9 +30,9 @@ void upsample_bilinear2d_kernel_impl( auto out_data = out.mutable_data_ptr(); auto in_plane = in_data; - for (auto n = 0; n < out.size(0); n++) { - for (auto c = 0; c < out.size(1); c++) { - for (auto h = 0; h < out.size(2); h++) { + for ([[maybe_unused]] const auto n : c10::irange(out.size(0))) { + for ([[maybe_unused]] const auto c : c10::irange(out.size(1))) { + for (const auto h : c10::irange(out.size(2))) { // Compute source index and weights. int64_t in_h1, in_h2; float weight_h, inv_weight_h; @@ -47,7 +48,7 @@ void upsample_bilinear2d_kernel_impl( out.sizes()[2], align_corners); - for (auto w = 0; w < out.size(3); w++) { + for (const auto w : c10::irange(out.size(3))) { int64_t in_w1, in_w2; float weight_w, inv_weight_w; diff --git a/kernels/portable/cpu/op_var.cpp b/kernels/portable/cpu/op_var.cpp index c3627281481..0cffca450c8 100644 --- a/kernels/portable/cpu/op_var.cpp +++ b/kernels/portable/cpu/op_var.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -27,11 +28,11 @@ void compute_variance( const double denominator) { CTYPE_OUT* out_data = out.mutable_data_ptr(); if (num == 0 || denominator <= 0) { - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { out_data[out_ix] = NAN; } } else { - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { CTYPE_OUT sum = map_reduce_over_dim_list( [](CTYPE_IN v) { return static_cast(v); }, [](CTYPE_OUT outv, CTYPE_OUT acc) { return acc + outv; }, diff --git a/kernels/portable/cpu/op_zeros.cpp b/kernels/portable/cpu/op_zeros.cpp index e24324e55fd..a3c70795705 100644 --- a/kernels/portable/cpu/op_zeros.cpp +++ b/kernels/portable/cpu/op_zeros.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -24,7 +25,7 @@ bool check_sizes( executorch::aten::ArrayRef size_int64_t, executorch::aten::ArrayRef size_int32_t) { ET_LOG_AND_RETURN_IF_FALSE(size_int64_t.size() == size_int32_t.size()); - for (int i = 0; i < size_int64_t.size(); i++) { + for (const auto i : c10::irange(size_int64_t.size())) { ET_LOG_AND_RETURN_IF_FALSE(((int64_t)size_int32_t[i] == size_int64_t[i])); } diff --git a/kernels/portable/cpu/targets.bzl b/kernels/portable/cpu/targets.bzl index 20434459489..f6ef7150679 100644 --- a/kernels/portable/cpu/targets.bzl +++ b/kernels/portable/cpu/targets.bzl @@ -40,6 +40,9 @@ def define_common_targets(): # reevaluated before becoming a public API. runtime.cxx_library( name = "vec_ops", + exported_deps = [ + "//executorch/runtime/core/portable_type/c10/c10:c10", + ], srcs = [], exported_headers = ["vec_ops.h"], visibility = ["//executorch/kernels/portable/cpu/...", "//executorch/kernels/quantized/..."], diff --git a/kernels/portable/cpu/util/activation_ops_util.cpp b/kernels/portable/cpu/util/activation_ops_util.cpp index 70be6367c76..fe26d4fda04 100644 --- a/kernels/portable/cpu/util/activation_ops_util.cpp +++ b/kernels/portable/cpu/util/activation_ops_util.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -44,8 +45,8 @@ bool check_glu_args(const Tensor& in, int64_t dim, Tensor& out) { out.size(non_negative_dim) == dim_size / 2, "output tensor must have half the size of the input tensor along the specified dimension."); - for (size_t i = 0; i < in.dim(); ++i) { - if (i != non_negative_dim) { + for (const auto i : c10::irange(in.dim())) { + if (static_cast(i) != non_negative_dim) { if (out.size(i) != in.size(i)) { #if ET_LOG_ENABLED auto out_shape_str = executorch::runtime::tensor_shape_to_c_string( @@ -94,9 +95,10 @@ Error resize_glu_out(const Tensor& in, int64_t dim, Tensor& out) { executorch::aten::SizesType expected_output_size[kTensorDimensionLimit]; const size_t non_negative_dim = dim < 0 ? dim + in.dim() : dim; - for (size_t i = 0; i < in.dim(); i++) { - expected_output_size[i] = - (i == non_negative_dim) ? (in.size(i) / 2) : in.size(i); + for (const auto i : c10::irange(in.dim())) { + expected_output_size[i] = (static_cast(i) == non_negative_dim) + ? (in.size(i) / 2) + : in.size(i); } ArrayRef output_size{ diff --git a/kernels/portable/cpu/util/advanced_index_util.cpp b/kernels/portable/cpu/util/advanced_index_util.cpp index 68faa192b44..304ba3a3f96 100644 --- a/kernels/portable/cpu/util/advanced_index_util.cpp +++ b/kernels/portable/cpu/util/advanced_index_util.cpp @@ -5,6 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -20,7 +21,7 @@ using TensorOptList = namespace { bool check_indices_dtypes(TensorOptList indices) { - for (auto i = 0; i < indices.size(); i++) { + for (const auto i : c10::irange(indices.size())) { if (indices[i].has_value()) { const Tensor& index = indices[i].value(); ScalarType ix_type = index.scalar_type(); @@ -43,13 +44,13 @@ bool is_mask_index(const Tensor& index) { bool check_mask_indices(const Tensor& in, TensorOptList indices) { size_t in_i = 0; - for (auto i = 0; i < indices.size(); i++) { + for (const auto i : c10::irange(indices.size())) { if (indices[i].has_value()) { const Tensor& index = indices[i].value(); if (is_mask_index(index)) { ET_CHECK_OR_RETURN_FALSE( index.dim() > 0, "Zero-dimensional mask index not allowed"); - for (auto j = 0; j < index.dim(); j++) { + for (const auto j : c10::irange(index.dim())) { if (index.size(j) != in.size(in_i + j)) { #if ET_LOG_ENABLED auto mask_shape = executorch::runtime::tensor_shape_to_c_string( @@ -82,7 +83,7 @@ template size_t _count_trues_in_mask_index(const Tensor& index) { const CTYPE_IX* const index_ptr = index.const_data_ptr(); size_t sum = 0; - for (size_t i = 0; i < index.numel(); ++i) { + for (const auto i : c10::irange(index.numel())) { if (index_ptr[i]) { sum += 1; } @@ -110,7 +111,7 @@ void _query_mask_index(const Tensor& index, size_t query_idx, size_t* res) { // true. size_t count = 0; size_t flat_ix = 0; - for (size_t i = 0; i < index.numel(); ++i) { + for (const auto i : c10::irange(index.numel())) { if (index_ptr[i]) { if (count == query_idx) { flat_ix = i; @@ -157,7 +158,8 @@ bool check_index_args(const Tensor& in, TensorOptList indices, Tensor& out) { ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out)); ET_LOG_AND_RETURN_IF_FALSE(check_indices_dtypes(indices)); ET_CHECK_OR_RETURN_FALSE( - indices.size() <= in.dim(), "Indexing too many dimensions"); + static_cast(indices.size()) <= in.dim(), + "Indexing too many dimensions"); ET_LOG_AND_RETURN_IF_FALSE(check_mask_indices(in, indices)); return true; } @@ -165,7 +167,7 @@ bool check_index_args(const Tensor& in, TensorOptList indices, Tensor& out) { size_t count_index_blocks(TensorOptList indices) { size_t block_count = 0; bool in_block = false; - for (size_t i = 0; i < indices.size(); i++) { + for (const auto i : c10::irange(indices.size())) { if (indices[i].has_value()) { if (!in_block) { in_block = true; @@ -184,13 +186,14 @@ bool get_indices_broadcast_shape( size_t* ix_ndim) { // Holds the (reversed) broadcasted shape of the indices. Tensor::SizesType rev_ix_sizes[kTensorDimensionLimit]; - size_t curr_ndim = 0; + ssize_t curr_ndim = 0; - for (size_t i = 0; i < indices.size(); i++) { + for (const auto i : c10::irange(indices.size())) { if (indices[i].has_value()) { const Tensor& index = indices[i].value(); if (is_mask_index(index)) { - size_t len = count_trues_in_mask_index(index); + Tensor::SizesType len = + static_cast(count_trues_in_mask_index(index)); if (curr_ndim == 0) { curr_ndim = 1; rev_ix_sizes[0] = len; @@ -200,8 +203,9 @@ bool get_indices_broadcast_shape( ET_CHECK_OR_RETURN_FALSE(false, "Broadcast of mask index failed."); } } else { - for (size_t j = 0; j < index.dim(); j++) { - size_t rev_j_size = index.size(index.dim() - j - 1); + for (const auto j : c10::irange(index.dim())) { + Tensor::SizesType rev_j_size = + static_cast(index.size(index.dim() - j - 1)); if (j >= curr_ndim) { curr_ndim = j + 1; rev_ix_sizes[j] = rev_j_size; @@ -215,7 +219,7 @@ bool get_indices_broadcast_shape( } } - for (size_t i = 0; i < curr_ndim; i++) { + for (const auto i : c10::irange(curr_ndim)) { ix_sizes[i] = rev_ix_sizes[curr_ndim - i - 1]; } (*ix_ndim) = curr_ndim; @@ -223,8 +227,8 @@ bool get_indices_broadcast_shape( } size_t get_indices_broadcast_ndim(TensorOptList indices) { - size_t ndim = 0; - for (size_t i = 0; i < indices.size(); i++) { + ssize_t ndim = 0; + for (const auto i : c10::irange(indices.size())) { if (indices[i].has_value()) { const Tensor& index = indices[i].value(); if (is_mask_index(index)) { @@ -243,7 +247,7 @@ size_t get_indices_broadcast_ndim(TensorOptList indices) { size_t get_num_indexed_dims(TensorOptList indices) { size_t num_indexed_dims = 0; - for (size_t i = 0; i < indices.size(); i++) { + for (const auto i : c10::irange(indices.size())) { if (indices[i].has_value()) { const Tensor& index = indices[i].value(); if (is_mask_index(index)) { @@ -258,7 +262,7 @@ size_t get_num_indexed_dims(TensorOptList indices) { size_t get_num_null_indices(TensorOptList indices) { size_t num_null_indices = 0; - for (size_t i = 0; i < indices.size(); i++) { + for (const auto i : c10::irange(indices.size())) { if (!indices[i].has_value()) { num_null_indices += 1; } @@ -290,7 +294,7 @@ bool get_index_out_target_size( size_t num_indexed_dims = get_num_indexed_dims(indices); ET_CHECK_OR_RETURN_FALSE( - num_null_indices + num_indexed_dims <= in.dim(), + static_cast(num_null_indices + num_indexed_dims) <= in.dim(), "Indexing too many dimensions"); ET_CHECK_OR_RETURN_FALSE( @@ -301,22 +305,22 @@ bool get_index_out_target_size( if (adjacent) { size_t start = get_num_leading_null_indices(indices); - for (size_t i = 0; i < start; i++) { + for (const auto i : c10::irange(start)) { out_sizes[i] = in.size(i); } - for (size_t i = 0; i < broadcast_ndim; i++) { + for (const auto i : c10::irange(broadcast_ndim)) { out_sizes[i + start] = broadcast_sizes[i]; } - for (size_t i = num_indexed_dims + start; i < in.dim(); i++) { + for (const auto i : c10::irange(num_indexed_dims + start, in.dim())) { out_sizes[i + broadcast_ndim - num_indexed_dims] = in.size(i); } } else { - for (size_t i = 0; i < broadcast_ndim; i++) { + for (const auto i : c10::irange(broadcast_ndim)) { out_sizes[i] = broadcast_sizes[i]; } size_t in_i = 0; size_t out_i = broadcast_ndim; - for (size_t i = 0; i < indices.size(); i++) { + for (const auto i : c10::irange(indices.size())) { if (!indices[i].has_value()) { out_sizes[out_i++] = in.size(in_i++); } else { @@ -328,7 +332,8 @@ bool get_index_out_target_size( } } } - for (size_t i = num_indexed_dims + num_null_indices; i < in.dim(); i++) { + for (const auto i : + c10::irange(num_indexed_dims + num_null_indices, in.dim())) { out_sizes[i + broadcast_ndim - num_indexed_dims] = in.size(i); } } @@ -348,25 +353,25 @@ void compute_dim_map( size_t num_null_indices = get_num_null_indices(indices); if (adjacent) { - for (auto i = 0; i < start; i++) { + for (const auto i : c10::irange(start)) { dim_map[i] = i; } - for (auto i = start; i < start + num_indexed_dims; i++) { + for (const auto i : c10::irange(start, start + num_indexed_dims)) { dim_map[i] = -1; } - for (auto i = start + num_indexed_dims; i < in.dim(); i++) { + for (const auto i : c10::irange(start + num_indexed_dims, in.dim())) { dim_map[i] = i - num_indexed_dims + broadcast_ndim; } } else { size_t in_i = 0; size_t out_i = broadcast_ndim; - for (size_t i = 0; i < indices.size(); i++) { + for (const auto i : c10::irange(indices.size())) { if (!indices[i].has_value()) { dim_map[in_i++] = out_i++; } else { const Tensor& index = indices[i].value(); if (is_mask_index(index)) { - for (auto j = 0; j < index.dim(); j++) { + for ([[maybe_unused]] const auto j : c10::irange(index.dim())) { dim_map[in_i++] = -1; } } else { @@ -374,7 +379,8 @@ void compute_dim_map( } } } - for (size_t i = num_indexed_dims + num_null_indices; i < in.dim(); i++) { + for (const auto i : + c10::irange(num_indexed_dims + num_null_indices, in.dim())) { dim_map[i] = i - num_indexed_dims + broadcast_ndim; } } @@ -386,15 +392,15 @@ void compute_index_map( const Tensor& in, TensorOptList indices, int32_t* ix_map) { - for (size_t i = 0; i < in.dim(); i++) { + for (const auto i : c10::irange(in.dim())) { ix_map[i] = -1; } size_t in_i = 0; - for (size_t i = 0; i < indices.size(); i++) { + for (const auto i : c10::irange(indices.size())) { if (indices[i].has_value()) { const Tensor& index = indices[i].value(); if (is_mask_index(index)) { - for (auto j = 0; j < index.dim(); j++) { + for ([[maybe_unused]] const auto j : c10::irange(index.dim())) { ix_map[in_i++] = i; } } else { @@ -422,7 +428,7 @@ bool get_in_coord( const Tensor& index = indices[ix_map[i]].value(); size_t ix_coord[kTensorDimensionLimit]; - for (auto j = 0; j < broadcast_ndim; j++) { + for (const auto j : c10::irange(broadcast_ndim)) { ix_coord[j] = out_coord[j + start]; } @@ -430,7 +436,7 @@ bool get_in_coord( size_t query_ix = ix_coord[broadcast_ndim - 1]; size_t query_result[kTensorDimensionLimit]; query_mask_index(index, query_ix, query_result); - for (auto j = 0; j < index.dim(); j++) { + for (const auto j : c10::irange(index.dim())) { in_coord[i + j] = query_result[j]; } i += index.dim() - 1; diff --git a/kernels/portable/cpu/util/broadcast_util.h b/kernels/portable/cpu/util/broadcast_util.h index 35344345242..10bd07baee2 100644 --- a/kernels/portable/cpu/util/broadcast_util.h +++ b/kernels/portable/cpu/util/broadcast_util.h @@ -8,6 +8,7 @@ #pragma once +#include #include #include @@ -289,7 +290,7 @@ inline void apply_binary_elementwise_fn( const CTYPE_B* const data_b = b.const_data_ptr(); CTYPE_OUT* const data_out = out.mutable_data_ptr(); - for (size_t i = 0; i < out.numel(); ++i) { + for (const auto i : c10::irange(out.numel())) { size_t a_linear_index = i; size_t b_linear_index = i; @@ -337,7 +338,7 @@ inline void apply_ternary_elementwise_fn( const CTYPE_C* const data_c = c.const_data_ptr(); CTYPE_OUT* const data_out = out.mutable_data_ptr(); - for (size_t i = 0; i < out.numel(); ++i) { + for (const auto i : c10::irange(out.numel())) { size_t a_linear_index = i; size_t b_linear_index = i; size_t c_linear_index = i; diff --git a/kernels/portable/cpu/util/copy_ops_util.cpp b/kernels/portable/cpu/util/copy_ops_util.cpp index f0e1d8b30d2..bd01a1be329 100644 --- a/kernels/portable/cpu/util/copy_ops_util.cpp +++ b/kernels/portable/cpu/util/copy_ops_util.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -26,7 +27,7 @@ size_t as_strided_copy_compute_storage_nbytes( // size of the underlying storage is 1 bigger than the offset // of the last element according to stride size_t size = 1; - for (size_t i = 0; i < sizes.size(); ++i) { + for (const auto i : c10::irange(sizes.size())) { if (sizes[i] == 0) { return 0; } @@ -80,7 +81,7 @@ bool check_cat_args( // Find the first non-empty tensor in the list to use as a reference size_t ref_i = 0; - for (size_t i = 0; i < tensors.size(); ++i) { + for (const auto i : c10::irange(tensors.size())) { if (tensors[i].numel() > 0) { ref_i = i; break; @@ -90,7 +91,7 @@ bool check_cat_args( // "All tensors must either have the same shape (except in the concatenating // dimension) or be empty." // https://pytorch.org/docs/stable/generated/torch.cat.html - for (size_t i = 0; i < tensors.size(); ++i) { + for (const auto i : c10::irange(tensors.size())) { // All input dtypes must be castable to the output dtype. ET_LOG_AND_RETURN_IF_FALSE( canCast(tensors[i].scalar_type(), out.scalar_type())); @@ -106,7 +107,7 @@ bool check_cat_args( ET_LOG_AND_RETURN_IF_FALSE( tensor_is_rank(tensors[ref_i], tensors[i].dim())); - for (size_t d = 0; d < tensors[i].dim(); ++d) { + for (const auto d : c10::irange(tensors[i].dim())) { if (d != dim) { ET_LOG_AND_RETURN_IF_FALSE( tensors_have_same_size_at_dims(tensors[i], d, tensors[ref_i], d)); @@ -132,7 +133,7 @@ void get_cat_out_target_size( // calculate out dim size_t ref_i = 0; size_t cat_dim_size = 0; - for (size_t i = 0; i < tensors.size(); ++i) { + for (const auto i : c10::irange(tensors.size())) { if (tensors[i].numel() > 0) { cat_dim_size += tensors[i].size(dim); } @@ -143,15 +144,14 @@ void get_cat_out_target_size( *out_ndim = tensors[ref_i].dim(); - for (size_t d = 0; d < *out_ndim; ++d) { - if (d != dim) { + for (const auto d : c10::irange(*out_ndim)) { + if (static_cast(d) != dim) { out_sizes[d] = tensors[ref_i].size(d); } else { out_sizes[d] = cat_dim_size; } } } - bool check_expand_copy_args( const Tensor& input, ArrayRef expand_sizes, @@ -231,7 +231,7 @@ bool check_permute_copy_args(const Tensor& in, IntArrayRef dims, Tensor& out) { bool dim_exist[kTensorDimensionLimit]; memset(dim_exist, false, sizeof(dim_exist)); - for (int i = 0; i < dims.size(); i++) { + for (const auto i : c10::irange(dims.size())) { ET_LOG_AND_RETURN_IF_FALSE(tensor_has_dim(in, dims[i])); // Convert dimension to a non-negative number in the range // [0 .. in.dim() - 1]. @@ -258,7 +258,7 @@ bool check_unbind_copy_args(const Tensor& in, int64_t dim, TensorList out) { const ssize_t dim_size = in.size(dim); ET_CHECK_OR_RETURN_FALSE( - dim_size == out.size(), + dim_size == static_cast(out.size()), "out tensorlist's length %zd must equal unbind dim %" PRId64 " size = %zd.", out.size(), @@ -266,7 +266,7 @@ bool check_unbind_copy_args(const Tensor& in, int64_t dim, TensorList out) { dim_size); // Validate each output. - for (size_t i = 0; i < out.size(); ++i) { + for (const auto i : c10::irange(out.size())) { // All output dtypes must be the same. ET_CHECK_OR_RETURN_FALSE( out[i].scalar_type() == out[0].scalar_type(), @@ -284,7 +284,8 @@ bool check_unbind_copy_args(const Tensor& in, int64_t dim, TensorList out) { in.dim() - 1); // Check the shape of the output. - for (ssize_t d = 0, out_d = 0; d < in.dim(); ++d) { + ssize_t out_d = 0; + for (const auto d : c10::irange(in.dim())) { if (d != dim) { ET_CHECK_OR_RETURN_FALSE( out[i].size(out_d) == in.size(d), @@ -309,7 +310,7 @@ void get_permute_copy_out_target_size( size_t* out_ndim) { *out_ndim = in.dim(); - for (size_t i = 0; i < in.dim(); ++i) { + for (const auto i : c10::irange(in.dim())) { out_sizes[i] = in.size(dims[i] >= 0 ? dims[i] : dims[i] + in.dim()); } } @@ -348,7 +349,7 @@ void get_pixel_shuffle_out_target_size( *out_ndim = in.dim(); const executorch::aten::SizesType casted_upscale_factor = upscale_factor; - size_t i = 0; + ssize_t i = 0; for (; i < in.dim() - 3; ++i) { // Copy all leading dimensions in. out_sizes[i] = in.size(i); @@ -370,7 +371,7 @@ void get_pixel_unshuffle_out_target_size( *out_ndim = in.dim(); const executorch::aten::SizesType casted_factor = downscale_factor; - size_t i = 0; + ssize_t i = 0; for (; i < in.dim() - 3; ++i) { // Copy all leading dimensions in. out_sizes[i] = in.size(i); @@ -404,7 +405,7 @@ void get_select_copy_out_target_size( size_t* out_ndim) { *out_ndim = in.dim() - 1; - for (size_t d = 0; d < in.dim() - 1; ++d) { + for (const auto d : c10::irange(in.dim() - 1)) { if (d < dim) { out_sizes[d] = in.size(d); } else { @@ -426,7 +427,7 @@ bool check_split_with_sizes_copy_args( "Number of split sizes must match the number of output tensors"); int64_t sum = 0; - for (int i = 0; i < split_sizes.size(); i++) { + for (const auto i : c10::irange(split_sizes.size())) { ET_CHECK_OR_RETURN_FALSE( split_sizes[i] >= 0, "All split sizes must be non negative."); sum += split_sizes[i]; @@ -448,7 +449,7 @@ void get_split_with_sizes_copy_out_target_size( size_t* out_ndim) { *out_ndim = in.dim(); - for (size_t d = 0; d < in.dim(); ++d) { + for (const auto d : c10::irange(in.dim())) { out_sizes[d] = in.size(d); } out_sizes[dim] = split_size; @@ -483,7 +484,7 @@ void get_squeeze_copy_dim_out_target_size( } size_t out_d = 0; - for (size_t in_d = 0; in_d < in.dim(); ++in_d) { + for (const auto in_d : c10::irange(in.dim())) { if (in_d != dim || in.size(in_d) != 1) { out_sizes[out_d] = in.size(in_d); ++out_d; @@ -497,12 +498,12 @@ bool check_squeeze_copy_dims_args( const Tensor out) { ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out)); - for (size_t i = 0; i < dims.size(); ++i) { + for (const auto i : c10::irange(dims.size())) { const int64_t dim = dims[i] < 0 ? dims[i] + nonzero_dim(in) : dims[i]; ET_LOG_AND_RETURN_IF_FALSE(tensor_has_dim(in, dim)); // Check that a dim does not appear twice in dims - for (size_t j = 0; j < dims.size(); ++j) { + for (const auto j : c10::irange(dims.size())) { if (i != j) { const int64_t dim_temp = dims[j] < 0 ? dims[j] + nonzero_dim(in) : dims[j]; @@ -530,7 +531,7 @@ void get_squeeze_copy_dims_out_target_size( // A dim is only removed if the size at the given dim is 1. executorch::aten::SizesType dims_to_remove = 0; - for (size_t i = 0; i < dims.size(); ++i) { + for (const auto i : c10::irange(dims.size())) { int64_t dim = dims[i] < 0 ? dims[i] + nonzero_dim(in) : dims[i]; if (in.size(dim) == 1) { ++dims_to_remove; @@ -539,9 +540,9 @@ void get_squeeze_copy_dims_out_target_size( *out_ndim = in.dim() - dims_to_remove; size_t out_d = 0; - for (size_t in_d = 0; in_d < in.dim(); ++in_d) { + for (const auto in_d : c10::irange(in.dim())) { bool in_d_in_dims = false; - for (size_t i = 0; i < dims.size(); ++i) { + for (const auto i : c10::irange(dims.size())) { int64_t dim = dims[i] < 0 ? dims[i] + nonzero_dim(in) : dims[i]; if (in_d == dim) { in_d_in_dims = true; @@ -564,13 +565,13 @@ bool check_stack_args( // All input tensors need to be of the same size // https://pytorch.org/docs/stable/generated/torch.stack.html - for (size_t i = 0; i < tensors.size(); i++) { + for (const auto i : c10::irange(tensors.size())) { // All input dtypes must be castable to the output dtype. ET_LOG_AND_RETURN_IF_FALSE( canCast(tensors[i].scalar_type(), out.scalar_type())); ET_LOG_AND_RETURN_IF_FALSE(tensor_is_rank(tensors[i], tensors[0].dim())); - for (size_t d = 0; d < tensors[i].dim(); d++) { + for (const auto d : c10::irange(tensors[i].dim())) { ET_LOG_AND_RETURN_IF_FALSE( tensors_have_same_size_at_dims(tensors[i], d, tensors[0], d)); } @@ -590,13 +591,14 @@ void get_stack_out_target_size( size_t* out_ndim) { *out_ndim = tensors[0].dim() + 1; - for (size_t d = 0; d < *out_ndim; ++d) { - if (d < dim) { - out_sizes[d] = tensors[0].size(d); - } else if (d == dim) { - out_sizes[d] = tensors.size(); + for (const auto d : c10::irange(*out_ndim)) { + int64_t d_ = static_cast(d); + if (d_ < dim) { + out_sizes[d_] = tensors[0].size(d_); + } else if (d_ == dim) { + out_sizes[d_] = tensors.size(); } else { - out_sizes[d] = tensors[0].size(d - 1); + out_sizes[d_] = tensors[0].size(d_ - 1); } } } @@ -658,7 +660,7 @@ bool check_split_copy_args( } else { int64_t expected_out_len = (dim_size + split_size - 1) / split_size; ET_CHECK_OR_RETURN_FALSE( - out.size() == expected_out_len, + static_cast(out.size()) == expected_out_len, "Unexpected out.size() %zu: ceil(input.size(%" PRId64 ")=%zd" " / split_size=%" PRId64 ") is %" PRId64, @@ -674,7 +676,7 @@ bool check_split_copy_args( } // Validate each output. - for (size_t i = 0; i < out.size(); ++i) { + for (const auto i : c10::irange(out.size())) { // All output dtypes must be the same. ET_CHECK_OR_RETURN_FALSE( out[i].scalar_type() == out[0].scalar_type(), @@ -692,7 +694,7 @@ bool check_split_copy_args( input.dim()); // Check the shape of the output. - for (ssize_t d = 0; d < out[i].dim(); ++d) { + for (const auto d : c10::irange(out[i].dim())) { if (d == dim) { // This is the split dimension, which may be different. if (i < out.size() - 1) { @@ -759,7 +761,8 @@ bool check__to_dim_order_copy_args( executorch::aten::ArrayRef dim_order_ref = dim_order.value(); // dim order size shall equal to input dim - ET_LOG_AND_RETURN_IF_FALSE(dim_order_ref.size() == input.dim()); + ET_LOG_AND_RETURN_IF_FALSE( + static_cast(dim_order_ref.size()) == input.dim()); ET_LOG_AND_RETURN_IF_FALSE( is_channels_last_dim_order( @@ -770,7 +773,7 @@ bool check__to_dim_order_copy_args( // Out tensor shall have same dim order as dim_order auto out_dim_order = out.dim_order(); ET_LOG_AND_RETURN_IF_FALSE(out_dim_order.size() == dim_order_ref.size()); - for (size_t i = 0; i < dim_order_ref.size(); i++) { + for (const auto i : c10::irange(dim_order_ref.size())) { ET_LOG_AND_RETURN_IF_FALSE(out_dim_order[i] == dim_order_ref[i]); } } else { // dim_order is not set, preserve the dim order of input @@ -779,7 +782,7 @@ bool check__to_dim_order_copy_args( auto out_dim_order = out.dim_order(); auto input_dim_order = input.dim_order(); ET_LOG_AND_RETURN_IF_FALSE(out_dim_order.size() == input_dim_order.size()); - for (size_t i = 0; i < input_dim_order.size(); i++) { + for (const auto i : c10::irange(input_dim_order.size())) { ET_LOG_AND_RETURN_IF_FALSE(out_dim_order[i] == input_dim_order[i]); } } @@ -804,7 +807,7 @@ bool check_unsqueeze_copy_args( // 4. out.size(dim) == 1 ET_LOG_AND_RETURN_IF_FALSE(input.dim() == out.dim() - 1); - for (size_t d = 0; d < out.dim(); d++) { + for (auto const d : c10::irange(out.dim())) { auto dim_normalized = dim; if (dim_normalized < 0) { dim_normalized += out.dim(); @@ -857,7 +860,7 @@ bool check_view_copy_args( // The size of out should equal target size. bool size_inferred = false; - for (int i = 0; i < size_int64_t.size(); i++) { + for (auto const i : c10::irange(size_int64_t.size())) { // If this value is -1 it implies that this dimension is inferred. if (size_int64_t[i] == -1) { ET_CHECK_OR_RETURN_FALSE( @@ -880,9 +883,9 @@ bool get_view_copy_target_size( size_t out_numels_without_minus_1 = 1; int32_t minus_1_dim = -1; - ET_LOG_AND_RETURN_IF_FALSE(size_int64_t.size() == dim); + ET_LOG_AND_RETURN_IF_FALSE(static_cast(size_int64_t.size()) == dim); - for (size_t i = 0; i < dim; ++i) { + for (const auto i : c10::irange(dim)) { if (size_int64_t[i] != -1) { out_sizes[i] = static_cast(size_int64_t[i]); out_numels_without_minus_1 = out_numels_without_minus_1 * size_int64_t[i]; @@ -951,7 +954,7 @@ void get_diagonal_copy_out_target_size( } size_t shift = 0; - for (size_t d = 0; d < in.dim(); ++d) { + for (const auto d : c10::irange(in.dim())) { if (d == dim1 || d == dim2) { shift++; } else { diff --git a/kernels/portable/cpu/util/elementwise_util.h b/kernels/portable/cpu/util/elementwise_util.h index 3d06c7a3283..778006f1b99 100644 --- a/kernels/portable/cpu/util/elementwise_util.h +++ b/kernels/portable/cpu/util/elementwise_util.h @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -71,7 +72,7 @@ inline void apply_unitensor_elementwise_fn( char* const data_out = reinterpret_cast(out.mutable_data_ptr()); auto out_numel = out.numel(); - for (size_t i = 0; i < out_numel; ++i) { + for (const auto i : c10::irange(out_numel)) { auto result = compute_fun(load_a_to_common(&data_a[i * a_element_size])); store_common_to_out(result, &data_out[i * out_element_size]); } @@ -120,7 +121,7 @@ inline void apply_bitensor_elementwise_fn( char* const data_out = reinterpret_cast(out.mutable_data_ptr()); auto out_numel = out.numel(); - for (size_t i = 0; i < out_numel; ++i) { + for (const auto i : c10::irange(out_numel)) { size_t a_linear_index = i; size_t b_linear_index = i; @@ -210,7 +211,7 @@ inline void apply_tritensor_elementwise_fn( char* const data_out = reinterpret_cast(out.mutable_data_ptr()); auto out_numel = out.numel(); - for (size_t i = 0; i < out_numel; ++i) { + for (const auto i : c10::irange(out_numel)) { size_t a_linear_index = i; size_t b_linear_index = i; size_t c_linear_index = i; diff --git a/kernels/portable/cpu/util/index_util.cpp b/kernels/portable/cpu/util/index_util.cpp index 909b00db3aa..bcf15c4bb4c 100644 --- a/kernels/portable/cpu/util/index_util.cpp +++ b/kernels/portable/cpu/util/index_util.cpp @@ -5,6 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -35,7 +36,7 @@ bool check_gather_args( dim += nonzero_dim(in); } - for (size_t d = 0; d < nonzero_dim(in); ++d) { + for (const auto d : c10::irange(nonzero_dim(in))) { if (d != dim) { ET_CHECK_OR_RETURN_FALSE( nonempty_size(index, d) <= nonempty_size(in, d), @@ -46,7 +47,7 @@ bool check_gather_args( } } const long* index_data = index.const_data_ptr(); - for (size_t i = 0; i < index.numel(); ++i) { + for (const auto i : c10::irange(index.numel())) { ET_CHECK_OR_RETURN_FALSE( index_data[i] >= 0 && index_data[i] < nonempty_size(in, dim), "Index is out of bounds for dimension %zd with size %zd", @@ -84,7 +85,7 @@ bool check_index_select_args( if (index.scalar_type() == ScalarType::Long) { const int64_t* const index_ptr = index.const_data_ptr(); - for (size_t i = 0; i < index.numel(); ++i) { + for (const auto i : c10::irange(index.numel())) { ET_CHECK_OR_RETURN_FALSE( index_ptr[i] >= 0 && index_ptr[i] < nonempty_size(in, dim), "index[%zu] = %" PRId64 " is out of range [0, %zd)", @@ -94,7 +95,7 @@ bool check_index_select_args( } } else { const int32_t* const index_ptr = index.const_data_ptr(); - for (size_t i = 0; i < index.numel(); ++i) { + for (const auto i : c10::irange(index.numel())) { ET_CHECK_OR_RETURN_FALSE( index_ptr[i] >= 0 && index_ptr[i] < nonempty_size(in, dim), "index[%zu] = %" PRId32 " is out of range [0, %zd)", @@ -114,7 +115,7 @@ void get_index_select_out_target_size( executorch::aten::SizesType* out_sizes, size_t* out_ndim) { *out_ndim = in.dim(); - for (size_t i = 0; i < in.dim(); ++i) { + for (const auto i : c10::irange(in.dim())) { if (i == dim) { out_sizes[i] = index.numel(); } else { @@ -166,7 +167,7 @@ bool check_scatter_add_args( dim += nonzero_dim(self); } - for (size_t d = 0; d < nonzero_dim(self); ++d) { + for (const auto d : c10::irange(nonzero_dim(self))) { ET_CHECK_OR_RETURN_FALSE( nonempty_size(index, d) <= nonempty_size(src, d), "size of dimension %zd of index should be smaller than the size of that dimension of src", @@ -181,7 +182,7 @@ bool check_scatter_add_args( } } const long* index_data = index.const_data_ptr(); - for (size_t i = 0; i < index.numel(); ++i) { + for (const auto i : c10::irange(index.numel())) { ET_CHECK_OR_RETURN_FALSE( index_data[i] >= 0 && index_data[i] < nonempty_size(self, dim), "Index is out of bounds for dimension %zd with size %zd", diff --git a/kernels/portable/cpu/util/kernel_ops_util.cpp b/kernels/portable/cpu/util/kernel_ops_util.cpp index 1e851ccb1ef..c6a38fbb2f0 100644 --- a/kernels/portable/cpu/util/kernel_ops_util.cpp +++ b/kernels/portable/cpu/util/kernel_ops_util.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -47,7 +48,7 @@ bool param_array_is_valid( } // namespace bool int_array_all_ge(IntArrayRef array, int64_t val) { - for (size_t i = 0; i < array.size(); ++i) { + for (const auto i : c10::irange(array.size())) { if (array[i] < val) { ET_LOG( Error, @@ -88,7 +89,7 @@ bool padding_is_valid( if (enforce_half_kernel) { // Padding must be at most half of kernel size. - for (size_t i = 0; i < padding.size(); i++) { + for (const auto i : c10::irange(padding.size())) { if (padding[i] > val_at(kernel_size, i) / 2) { ET_LOG( Error, @@ -122,7 +123,7 @@ bool output_padding_is_valid( kernel_ndim, /*allow_empty=*/false)); - for (size_t i = 0; i < kernel_ndim; i++) { + for (const auto i : c10::irange(kernel_ndim)) { const int64_t op_i = val_at(output_padding, i); const int64_t s_i = val_at(stride, i); const int64_t d_i = val_at(dilation, i); @@ -138,12 +139,12 @@ bool output_size_is_valid( size_t kernel_ndim) { bool valid = true; size_t out_dim = output_size.size(); - for (size_t i = 0; i < out_dim - kernel_ndim; i++) { + for (const auto i : c10::irange(out_dim - kernel_ndim)) { if (output_size[i] < 0) { valid = false; } } - for (size_t i = out_dim - kernel_ndim; i < out_dim; i++) { + for (const auto i : c10::irange(out_dim - kernel_ndim, out_dim)) { if (output_size[i] <= 0) { valid = false; } @@ -153,7 +154,7 @@ bool output_size_is_valid( Error, "The provided combination of input and kernel parameters " "produces an invalid output size:"); - for (size_t d = 0; d < output_size.size(); ++d) { + for ([[maybe_unused]] const auto d : c10::irange(output_size.size())) { ET_LOG( Error, " size(%zu): %zu", d, static_cast(output_size[d])); } @@ -167,11 +168,11 @@ void get_unsqueezed_sizes( executorch::aten::SizesType* sizes_arr, size_t& ndim) { ndim = t.dim() + 1; - for (int d = 0; d < unsqueeze_dim; ++d) { + for (const auto d : c10::irange(unsqueeze_dim)) { sizes_arr[d] = t.size(d); } sizes_arr[unsqueeze_dim] = 1; - for (int d = (unsqueeze_dim + 1); d < ndim; d++) { + for (const auto d : c10::irange(unsqueeze_dim + 1, ndim)) { sizes_arr[d] = t.size(d - 1); } } @@ -181,7 +182,7 @@ void get_unsqueezed_dim_order( executorch::aten::DimOrderType unsqueeze_dim, executorch::aten::DimOrderType* dim_order_arr) { int offset = 0; - for (int i = 0; i < t.dim(); ++i) { + for (const auto i : c10::irange(t.dim())) { executorch::aten::DimOrderType dim = t.dim_order()[i]; if (dim == unsqueeze_dim) { dim_order_arr[i] = dim; @@ -213,7 +214,7 @@ int64_t _kernel_output_size_helper( if (ceil_mode) { // ensure that the last pooling starts inside the image // needed to avoid problems in ceil mode - if ((outputSize - 1) * stride >= inputSize + pad) { + if ((outputSize - 1) * stride >= static_cast(inputSize) + pad) { --outputSize; } } @@ -231,7 +232,7 @@ void calculate_kernel_output_sizes( bool ceil_mode, bool transposed, IntArrayRef output_padding) { - for (size_t i = 0; i < kernel_ndim; ++i) { + for (const auto i : c10::irange(kernel_ndim)) { auto dim = in.dim() - (kernel_ndim - i); int64_t k = val_at(kernel_size, i); int64_t s = val_at(stride, i, /*default_value=*/k); @@ -547,7 +548,8 @@ bool check_constant_pad_args( pad.size() % 2 == 0, "Padding array must be a multiple of 2"); ET_CHECK_OR_RETURN_FALSE( - pad.size() / 2 <= in.dim(), "Padding array contains too many elements"); + static_cast(pad.size() / 2) <= in.dim(), + "Padding array contains too many elements"); return true; } @@ -559,11 +561,12 @@ Error resize_constant_pad_output( Tensor::SizesType expected_output_size[kTensorDimensionLimit]; int pad_i = in.dim() - 1; - for (size_t i = 0; i < in.dim(); ++i, --pad_i) { + for (const auto i : c10::irange(in.dim())) { expected_output_size[i] = in.size(i); - if (pad_i >= 0 && pad_i < pad.size() / 2) { + if (pad_i >= 0 && static_cast(pad_i) < pad.size() / 2) { expected_output_size[i] += pad[2 * pad_i] + pad[2 * pad_i + 1]; } + --pad_i; } ArrayRef output_size{ @@ -601,7 +604,7 @@ Error resize_embedding_output( const Tensor& indices, const Tensor& out) { Tensor::SizesType expected_output_size[kTensorDimensionLimit]; - for (size_t i = 0; i < indices.dim(); i++) { + for (const auto i : c10::irange(indices.dim())) { expected_output_size[i] = indices.size(i); } const size_t embedding_dim = weight.size(1); diff --git a/kernels/portable/cpu/util/kernel_ops_util.h b/kernels/portable/cpu/util/kernel_ops_util.h index 812e887111b..5951b7d0492 100644 --- a/kernels/portable/cpu/util/kernel_ops_util.h +++ b/kernels/portable/cpu/util/kernel_ops_util.h @@ -10,6 +10,7 @@ #include +#include #include namespace torch { @@ -182,9 +183,9 @@ void kernel_reduction_then_map_2d( int64_t d_W = val_at(dilation, 1, /*default_value=*/1); // Compute 2D output region - for (size_t out_y = 0; out_y < out_H; ++out_y) { + for (const auto out_y : c10::irange(out_H)) { out_coord[in_dim - 2] = out_y; - for (size_t out_x = 0; out_x < out_W; ++out_x) { + for (const auto out_x : c10::irange(out_W)) { out_coord[in_dim - 1] = out_x; bool accum_initialized = false; @@ -212,7 +213,7 @@ void kernel_reduction_then_map_2d( count = (ih1 - ih0) * (iw1 - iw0); } - for (size_t w_y = 0; w_y < k_H; ++w_y) { + for (const auto w_y : c10::irange(k_H)) { int64_t stride_y = s_H; int64_t padding_y = p_H; int64_t dilation_y = d_H; @@ -220,7 +221,7 @@ void kernel_reduction_then_map_2d( size_t in_y = stride_y * out_y + dilation_y * w_y - padding_y; in_coord[in_dim - 2] = in_y; - for (size_t w_x = 0; w_x < k_W; ++w_x) { + for (const auto w_x : c10::irange(k_W)) { int64_t stride_x = s_W; int64_t padding_x = p_W; int64_t dilation_x = d_W; @@ -356,8 +357,8 @@ void apply_kernel_2d_reduce_then_map_fn( if (in.dim() == 4) { batch_size = in_sizes[0]; } - for (size_t batch = 0; batch < batch_size; ++batch) { - for (size_t channel = 0; channel < in_sizes[in.dim() - 3]; ++channel) { + for (const auto batch : c10::irange(batch_size)) { + for (const auto channel : c10::irange(in_sizes[in.dim() - 3])) { kernel_reduction_then_map_2d( reduce_fn, map_fn, diff --git a/kernels/portable/cpu/util/normalization_ops_util.cpp b/kernels/portable/cpu/util/normalization_ops_util.cpp index 9f3ce5cc112..db18cf0c053 100644 --- a/kernels/portable/cpu/util/normalization_ops_util.cpp +++ b/kernels/portable/cpu/util/normalization_ops_util.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -85,16 +86,16 @@ bool check_layer_norm_args( ndim >= 1, "Expected normalized_shape to be at least 1-dimensional, i.e., containing at least one element."); ET_CHECK_OR_RETURN_FALSE( - in.dim() >= ndim, + in.dim() >= static_cast(ndim), "Expected input tensor to have rank >= the length of normalized_shape."); size_t shift = in.dim() - ndim; - for (size_t d = 0; d < ndim; ++d) { + for (const auto d : c10::irange(ndim)) { ET_CHECK_OR_RETURN_FALSE( in.size(d + shift) == normalized_shape[d], "Expected normalized_shape to match the sizes of input's rightmost dimensions."); } executorch::aten::SizesType shape[ndim]; - for (size_t i = 0; i < ndim; ++i) { + for (const auto i : c10::irange(ndim)) { shape[i] = static_cast(normalized_shape[i]); } @@ -121,8 +122,8 @@ void get_layer_norm_out_target_size( size_t* mean_rstd_ndim) { *mean_rstd_ndim = in.dim(); - for (size_t d = 0; d < in.dim(); ++d) { - if (d < in.dim() - normalized_shape.size()) { + for (const auto d : c10::irange(in.dim())) { + if (d < static_cast(in.dim() - normalized_shape.size())) { mean_rstd_sizes[d] = in.size(d); } else { mean_rstd_sizes[d] = 1; diff --git a/kernels/portable/cpu/util/padding_util.cpp b/kernels/portable/cpu/util/padding_util.cpp index 251c7f1c44b..d5b6e26784b 100644 --- a/kernels/portable/cpu/util/padding_util.cpp +++ b/kernels/portable/cpu/util/padding_util.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -21,10 +22,10 @@ bool check_padding_args( executorch::aten::ArrayRef padding, Tensor& out, bool reflection) { - ET_LOG_AND_RETURN_IF_FALSE(padding.size() == 2 * n); + ET_LOG_AND_RETURN_IF_FALSE(static_cast(padding.size()) == 2 * n); ET_LOG_AND_RETURN_IF_FALSE(in.dim() == n + 1 || in.dim() == n + 2); ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out)); - for (size_t i = 1; i <= n; ++i) { + for (const auto i : c10::irange(1, n + 1)) { ET_LOG_AND_RETURN_IF_FALSE( in.size(in.dim() - i) + padding[2 * i - 2] + padding[2 * i - 1] >= 0); if (reflection) { @@ -43,10 +44,10 @@ void get_padding_out_target_size( Tensor::SizesType* out_sizes, size_t* out_ndim) { *out_ndim = in.dim(); - for (size_t i = 0; i < in.dim(); ++i) { + for (const auto i : c10::irange(in.dim())) { out_sizes[i] = in.size(i); } - for (size_t i = 1; i <= n; ++i) { + for (const auto i : c10::irange(1, n + 1)) { out_sizes[in.dim() - i] = in.size(in.dim() - i) + padding[2 * i - 2] + padding[2 * i - 1]; } diff --git a/kernels/portable/cpu/util/padding_util.h b/kernels/portable/cpu/util/padding_util.h index f8aa367a94b..50cfcc65643 100644 --- a/kernels/portable/cpu/util/padding_util.h +++ b/kernels/portable/cpu/util/padding_util.h @@ -7,6 +7,7 @@ */ #pragma once +#include #include @@ -51,11 +52,10 @@ void pad1d( const auto in_width = in.size(dim); const auto out_width = out.size(dim); const auto pad_left = padding[0]; - - for (size_t i = 0; i < outer; i++) { + for (const auto i : c10::irange(outer)) { size_t out_i_base = i * out_width; size_t in_i_base = i * in_width; - for (size_t w = 0; w < out_width; w++) { + for (const auto w : c10::irange(out_width)) { out_data[out_i_base + w] = in_data[in_i_base + padding_ix(w, in_width, pad_left)]; } @@ -80,14 +80,14 @@ void pad2d( const auto pad_left = padding[0]; const auto pad_top = padding[2]; - for (size_t i = 0; i < outer; i++) { + for (const auto i : c10::irange(outer)) { size_t out_i_base = i * out_height * out_width; size_t in_i_base = i * in_height * in_width; - for (size_t h = 0; h < out_height; h++) { + for (const auto h : c10::irange(out_height)) { size_t out_h_base = out_i_base + h * out_width; size_t in_h_base = in_i_base + padding_ix(h, in_height, pad_top) * in_width; - for (size_t w = 0; w < out_width; w++) { + for (const auto w : c10::irange(out_width)) { out_data[out_h_base + w] = in_data[in_h_base + padding_ix(w, in_width, pad_left)]; } @@ -116,18 +116,18 @@ void pad3d( const auto pad_top = padding[2]; const auto pad_front = padding[4]; - for (size_t i = 0; i < outer; i++) { + for (const auto i : c10::irange(outer)) { size_t out_i_base = i * out_depth * out_height * out_width; size_t in_i_base = i * in_depth * in_height * in_width; - for (size_t d = 0; d < out_depth; d++) { + for (const auto d : c10::irange(out_depth)) { size_t out_d_base = out_i_base + d * out_height * out_width; size_t in_d_base = in_i_base + padding_ix(d, in_depth, pad_front) * in_height * in_width; - for (size_t h = 0; h < out_height; h++) { + for (const auto h : c10::irange(out_height)) { size_t out_h_base = out_d_base + h * out_width; size_t in_h_base = in_d_base + padding_ix(h, in_height, pad_top) * in_width; - for (size_t w = 0; w < out_width; w++) { + for (const auto w : c10::irange(out_width)) { out_data[out_h_base + w] = in_data[in_h_base + padding_ix(w, in_width, pad_left)]; } diff --git a/kernels/portable/cpu/util/reduce_util.cpp b/kernels/portable/cpu/util/reduce_util.cpp index fb6ac202f44..2902cbfc138 100644 --- a/kernels/portable/cpu/util/reduce_util.cpp +++ b/kernels/portable/cpu/util/reduce_util.cpp @@ -186,7 +186,7 @@ size_t get_init_index( size_t mutable_out_ix = out_ix; auto strides = in.strides(); for (int64_t d = in.dim() - 1; d >= 0; d--) { - if (d != non_neg_dim) { + if (d != static_cast(non_neg_dim)) { init_ix += (mutable_out_ix % in.size(d)) * strides[d]; mutable_out_ix /= in.size(d); } diff --git a/kernels/portable/cpu/util/repeat_util.cpp b/kernels/portable/cpu/util/repeat_util.cpp index bcb7a7ae0f9..925fda9f793 100644 --- a/kernels/portable/cpu/util/repeat_util.cpp +++ b/kernels/portable/cpu/util/repeat_util.cpp @@ -67,7 +67,8 @@ bool check_repeat_args( } for (size_t i = 0; i < repeats.size(); i++) { ET_CHECK_OR_RETURN_FALSE( - reformat_self_size[i] * repeats[i] == out.size(i), + reformat_self_size[i] * repeats[i] == + static_cast(out.size(i)), "Expect out size at dimension %zu is %" PRId64 ", but now is %zd", i, reformat_self_size[i] * repeats[i], @@ -242,7 +243,7 @@ Error repeat_tensor( // one array a time. To do so, we iterate over all the valid values of slots // array. The repeat_internal() takes care of replicating the array along the // coordinates specified by repeats array. - while (slots[0] != limits[0]) { + while (static_cast(slots[0]) != limits[0]) { // Compute the offset (from origin) in the out tensor where the self // array (with indices in self tensor indicated by slots) will be copied. size_t out_offset = compute_access_offset(slots, strides, self_dim); @@ -256,7 +257,7 @@ Error repeat_tensor( slots[index]++; // If we have reached the limit in the innermost dimension, successively // increment the slot index of outer dimensions. - while (slots[index] == limits[index]) { + while (static_cast(slots[index]) == limits[index]) { if (index == 0) { break; } diff --git a/kernels/portable/cpu/util/slice_util.cpp b/kernels/portable/cpu/util/slice_util.cpp index e6444bd074a..5761dee0ba7 100644 --- a/kernels/portable/cpu/util/slice_util.cpp +++ b/kernels/portable/cpu/util/slice_util.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include #include @@ -42,7 +43,7 @@ void get_narrow_copy_out_target_size( size_t* out_ndim) { *out_ndim = in.dim(); - for (size_t d = 0; d < in.dim(); ++d) { + for (const auto d : c10::irange(in.dim())) { out_sizes[d] = in.size(d); } out_sizes[dim] = length; @@ -93,7 +94,7 @@ bool check_slice_scatter_args( // The size of src tensor should follow these rules: // - src.size(i) shall equal to input.size(i) if i != dim, // - src.size(dim) shall equal to num_values - for (size_t d = 0; d < input.dim() - 1; d++) { + for (const auto d : c10::irange(input.dim() - 1)) { if (d != dim) { ET_LOG_AND_RETURN_IF_FALSE( tensors_have_same_size_at_dims(input, d, src, d)); @@ -166,9 +167,9 @@ void compute_slice( const char* input_data = in.const_data_ptr(); char* dest = out.mutable_data_ptr(); - for (int i = 0; i < leading_dims; i++) { + for (const auto i : c10::irange(leading_dims)) { const char* src = input_data + (i * dim_length + start) * length_per_step; - for (int j = 0; j < length; j++) { + for ([[maybe_unused]] const auto j : c10::irange(length)) { memcpy(dest, src, length_per_step); src += step * length_per_step; dest += length_per_step; diff --git a/kernels/portable/cpu/util/transpose_util.h b/kernels/portable/cpu/util/transpose_util.h index 453446fd842..acd6a762d11 100644 --- a/kernels/portable/cpu/util/transpose_util.h +++ b/kernels/portable/cpu/util/transpose_util.h @@ -7,6 +7,7 @@ */ #pragma once +#include #include #include @@ -66,7 +67,7 @@ inline void increment_index_and_offset( // Impossible to happen at i = 0 due to precondition check before this // function is called offset += new_strides[i]; - if (index[i] == new_sizes[i]) { + if (static_cast(index[i]) == new_sizes[i]) { offset -= new_sizes[i] * new_strides[i]; index[i] = 0; } else { @@ -118,7 +119,7 @@ void transpose_tensors( // tensor in output tensor order. size_t non_1_dim_indices[kTensorDimensionLimit]; size_t num_non_1_dim_indices = 0; - for (size_t cur_dim = 0; cur_dim < dim; cur_dim++) { + for (const auto cur_dim : c10::irange(dim)) { if (new_sizes[cur_dim] != 1) { non_1_dim_indices[num_non_1_dim_indices++] = cur_dim; } @@ -128,7 +129,7 @@ void transpose_tensors( // Loop over and copy input elements into output size_t a_offset = 0; - for (ssize_t out_offset = 0; out_offset < a.numel(); out_offset++) { + for (const auto out_offset : c10::irange(a.numel())) { data_out[out_offset] = data_a[a_offset]; increment_index_and_offset( out_index, new_sizes, new_strides, indices, a_offset); @@ -164,7 +165,7 @@ inline void get_transpose_out_target_size( return; } - for (size_t i = 0; i < in.dim(); ++i) { + for (const auto i : c10::irange(in.dim())) { out_sizes[i] = in.size(i); } out_sizes[dim0] = in.size(dim1); diff --git a/kernels/portable/cpu/vec_ops.h b/kernels/portable/cpu/vec_ops.h index 617fa0b0652..7a1a488701b 100644 --- a/kernels/portable/cpu/vec_ops.h +++ b/kernels/portable/cpu/vec_ops.h @@ -9,6 +9,7 @@ #pragma once +#include #include #include #include @@ -48,7 +49,7 @@ inline void vec_addf( const float* __restrict__ x, const float* __restrict__ y, size_t size) { - for (size_t i = 0; i < size; ++i) { + for (const auto i : c10::irange(size)) { z[i] = x[i] + y[i]; } } @@ -60,7 +61,7 @@ inline void vec_scalef( const float* __restrict__ x, float scale, size_t size) { - for (size_t i = 0; i < size; ++i) { + for (const auto i : c10::irange(size)) { y[i] = x[i] * scale; } } @@ -75,10 +76,10 @@ inline void vec_matmul( int64_t m, int64_t n, int64_t p) { - for (size_t i = 0; i < m; ++i) { - for (size_t j = 0; j < p; ++j) { + for (const auto i : c10::irange(m)) { + for (const auto j : c10::irange(p)) { T sum = 0; - for (size_t k = 0; k < n; ++k) { + for (const auto k : c10::irange(n)) { sum += x[i * n + k] * y[k * p + j]; } z[i * p + j] = sum; @@ -95,10 +96,10 @@ inline void vec_quantized_matmul_int8( int64_t m, int64_t n, int64_t p) { - for (size_t i = 0; i < m; ++i) { - for (size_t j = 0; j < p; ++j) { + for (const auto i : c10::irange(m)) { + for (const auto j : c10::irange(p)) { T sum = 0; - for (size_t k = 0; k < n; ++k) { + for (const auto k : c10::irange(n)) { sum += x[i * n + k] * static_cast(y[k * p + j]) * s[k]; } z[i * p + j] = sum; @@ -124,13 +125,13 @@ inline void vec_quantized_matmul_transb_int8( int64_t g) { int64_t n_over_g = (n + g - 1) / g; - for (size_t i = 0; i < m; ++i) { - for (size_t j = 0; j < p; ++j) { + for (const auto i : c10::irange(m)) { + for (const auto j : c10::irange(p)) { T sum = 0; - for (size_t k = 0; k < n; k += g) { + for (int64_t k = 0; k < n; k += g) { T psum = 0; // the last group may have fewer than g elements - for (size_t k2 = k; k2 < bounds_min(k + g, n); k2++) { + for (const auto k2 : c10::irange(k, bounds_min(k + g, n))) { psum += x[i * n + k2] * static_cast(y[j * n + k2]); } sum += psum * s[j * n_over_g + k / g]; @@ -154,10 +155,10 @@ inline void vec_addmm( int64_t p, U beta, U alpha) { - for (size_t i = 0; i < m; ++i) { - for (size_t j = 0; j < p; ++j) { + for (const auto i : c10::irange(m)) { + for (const auto j : c10::irange(p)) { T sum = 0; - for (size_t k = 0; k < n; ++k) { + for (const auto k : c10::irange(n)) { sum += mat1_data[i * n + k] * mat2_data[k * p + j]; } out_data[i * p + j] = sum * alpha + self_data[i * p + j] * beta; @@ -176,7 +177,7 @@ inline float reduce_add(const T* x, size_t size) { template inline float vec_powerf(const T* x, size_t size) { float sum = 0; - for (size_t i = 0; i < size; ++i) { + for (const auto i : c10::irange(size)) { sum += x[i] * x[i]; } return sum; @@ -198,12 +199,12 @@ inline void vec_softmax(T* __restrict__ y, const U* __restrict__ x, int n) { U max_x = *std::max_element(x, x + n); T sum = 0; - for (int i = 0; i < n; ++i) { + for (const auto i : c10::irange(n)) { y[i] = expf(x[i] - max_x); sum += y[i]; } - for (int i = 0; i < n; ++i) { + for (const auto i : c10::irange(n)) { y[i] /= sum; } } @@ -227,7 +228,7 @@ inline void quantize_i8_f32( float scale, int32_t zero_point, size_t size) { - for (size_t i = 0; i < size; ++i) { + for (const auto i : c10::irange(size)) { float tmp = roundf(x[i] * scale + zero_point); y[i] = internal::clamp(tmp, -128.f, 127.f); } @@ -241,7 +242,7 @@ inline void dequantize_i8_f32( float scale, int32_t zero_point, size_t size) { - for (size_t i = 0; i < size; ++i) { + for (const auto i : c10::irange(size)) { y[i] = scale * (x[i] - zero_point); } } diff --git a/runtime/core/exec_aten/util/dim_order_util.h b/runtime/core/exec_aten/util/dim_order_util.h index 0aef3e5c6c9..7a31db9d6ad 100644 --- a/runtime/core/exec_aten/util/dim_order_util.h +++ b/runtime/core/exec_aten/util/dim_order_util.h @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -254,7 +255,7 @@ ET_NODISCARD inline Error stride_to_dim_order( sorter.quick_sort(array, 0, dims - 1); - for (auto i = 0; i < dims; i++) { + for (const auto i : c10::irange(dims)) { dim_order[i] = array[i].dim_order; } return Error::Ok; diff --git a/runtime/core/exec_aten/util/tensor_util.h b/runtime/core/exec_aten/util/tensor_util.h index d7917e37b19..eb5ce10b6f3 100644 --- a/runtime/core/exec_aten/util/tensor_util.h +++ b/runtime/core/exec_aten/util/tensor_util.h @@ -8,11 +8,13 @@ #pragma once +#include #include #include // std::array #include // PRId64 #include #include // size_t + #include #include diff --git a/test/size_test.cpp b/test/size_test.cpp index 1fab1e914e0..8f67368f64e 100644 --- a/test/size_test.cpp +++ b/test/size_test.cpp @@ -5,6 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -92,10 +93,10 @@ int main(int argc, char** argv) { ET_CHECK(status == Error::Ok); // It assumes the outputs are all tensors. - for (size_t i = 0; i < method->outputs_size(); i++) { + for (const auto i : c10::irange(method->outputs_size())) { auto output_tensor = output_list[i].toTensor(); [[maybe_unused]] auto data_output = output_tensor.const_data_ptr(); - for (size_t j = 0; j < output_list[i].toTensor().numel(); ++j) { + for (const auto j : c10::irange(output_tensor.numel())) { ET_LOG(Info, "%f", data_output[j]); } } From 55ea36d414654deb48af80f7ef9494ab35f67dc6 Mon Sep 17 00:00:00 2001 From: Oscar Andersson <87121123+oscarandersson8218@users.noreply.github.com> Date: Tue, 25 Feb 2025 15:49:22 +0100 Subject: [PATCH 079/584] Arm backend: Add check to not partition float inputs for BI (#8681) Add check to not partition float inputs for BI Floats are not supported in TOSA BI profile. Some supported operators are only quantized if the previous node was quantized. In practice, this means that if an unsupported operator preceeds such an operator, it will not be quantized and the input will be a float. This will likely lead to an assertion error or invalid TOSA graph. This patch aims to detect such nodes, and to reject them. Signed-off-by: Oscar Andersson --- .../tosa_supported_operators.py | 8 +- ...test_partition_decomposed_quantized_ops.py | 119 ++++++++++++++++-- backends/arm/tosa_partitioner.py | 17 ++- 3 files changed, 132 insertions(+), 12 deletions(-) diff --git a/backends/arm/operator_support/tosa_supported_operators.py b/backends/arm/operator_support/tosa_supported_operators.py index 6fe70aa696c..b67bded7fb9 100644 --- a/backends/arm/operator_support/tosa_supported_operators.py +++ b/backends/arm/operator_support/tosa_supported_operators.py @@ -310,11 +310,11 @@ def is_node_supported( if not input_quantized: return False - output_quantized = output_quantized or all( - (output_node.target == self.q_op) - or (not get_first_fake_tensor(output_node).dtype.is_floating_point) - for output_node in node.users + all_q_users = all( + (output_node.target == self.q_op) for output_node in node.users ) + is_floating_point = get_first_fake_tensor(node).dtype.is_floating_point + output_quantized = output_quantized or all_q_users or not is_floating_point if not output_quantized: return False diff --git a/backends/arm/test/misc/test_partition_decomposed_quantized_ops.py b/backends/arm/test/misc/test_partition_decomposed_quantized_ops.py index 3fe339e0f9e..5bb692ebcaf 100644 --- a/backends/arm/test/misc/test_partition_decomposed_quantized_ops.py +++ b/backends/arm/test/misc/test_partition_decomposed_quantized_ops.py @@ -19,21 +19,39 @@ ) input_t1 = Tuple[torch.Tensor] -aten_op: list[str] = ["torch.ops.aten.add.Tensor", "torch.ops.aten.softplus.default"] -exir_op: list[str] = [ +softplus_aten_op: list[str] = [ + "torch.ops.aten.add.Tensor", + "torch.ops.aten.softplus.default", +] +softplus_exir_op: list[str] = [ "executorch_exir_dialects_edge__ops_aten_add_Tensor", "executorch_exir_dialects_edge__ops_aten_mul_Tensor", "executorch_exir_dialects_edge__ops_aten_exp_default", "executorch_exir_dialects_edge__ops_aten_div_Tensor", ] +linear_residual_aten_op: list[str] = [ + "torch.ops.aten.linear.default", + "torch.ops.aten.gelu.default", + "torch.ops.aten.dropout.default", + "torch.ops.aten.add.Tensor", +] +linear_residual_exir_op: list[str] = [ + "executorch_exir_dialects_edge__ops_aten_gelu_default", + "executorch_exir_dialects_edge__ops_aten_clone_default", + "executorch_exir_dialects_edge__ops_aten_linear_default", + "executorch_exir_dialects_edge__ops_aten_add_Tensor", +] + test_data: dict[input_t1] = { "3d_rand": (torch.rand(1, 5, 5),), } -class Module(torch.nn.Module): +class SoftplusModule(torch.nn.Module): + """Module containing an addition followed by a Softplus. Softplus is currently not supported by TosaBackend.""" + def __init__(self): super().__init__() self.softplus = torch.nn.Softplus() @@ -42,10 +60,35 @@ def forward(self, x: torch.Tensor): return self.softplus(x + x) +class LinearResidualModule(torch.nn.Module): + """Module containing a residual and a linear layer followed by GELU and a Dropout. + GELU is currently not supported by TosaBackend nor TosaQuantizer. + """ + + def __init__( + self, + ): + super().__init__() + self.linear = torch.nn.Linear(in_features=5, out_features=3) + self.gelu = torch.nn.GELU() + self.dropout = torch.nn.Dropout(0.5) + + def forward(self, x: torch.Tensor): + x1 = self.linear(x) + x2 = self.gelu(x1) + x3 = self.dropout(x2) + return x1 + x3 + + +# Softplus is decomposed which messes up the quantization. This test tests that CheckProperQuantization does not +# partition nodes where quantization is not as expected. @common.parametrize("test_data", test_data) def test_softplus_tosa_MI(test_data: input_t1): pipeline = TosaPipelineMI[input_t1]( - Module(), test_data=test_data, aten_op=aten_op, exir_op=exir_op + SoftplusModule(), + test_data=test_data, + aten_op=softplus_aten_op, + exir_op=softplus_exir_op, ) # remove check_count.exir as there will be more than one delegate pipeline.pop_stage("check_count.exir") @@ -55,14 +98,76 @@ def test_softplus_tosa_MI(test_data: input_t1): @common.parametrize("test_data", test_data) def test_softplus_tosa_BI(test_data: input_t1): pipeline = TosaPipelineBI[input_t1]( - Module(), test_data=test_data, aten_op=aten_op, exir_op=exir_op + SoftplusModule(), + test_data=test_data, + aten_op=softplus_aten_op, + exir_op=softplus_exir_op, + ) + pipeline.pop_stage("check_not.exir") + # check that all ops in softplus_exir_op except add are rejected + pipeline.add_stage_after( + "to_edge_transform_and_lower", + pipeline.tester.check, + softplus_exir_op[1:], + suffix="exir_post_partition", + ) + pipeline.run() + + +# Since GELU will not be quantized by TosaQuantizer, the Dropout's input will not be quantized either. +# If so, the Dropout should not be partitioned by TosaPartitioner for TOSA BI profile. This test tests that the +# partitioner indeed does not partition the Dropout (clone) for TOSA BI. +@common.parametrize("test_data", test_data) +def test_linear_residaul_tosa_MI(test_data: input_t1): + pipeline = TosaPipelineMI[input_t1]( + LinearResidualModule(), + test_data=test_data, + aten_op=linear_residual_aten_op, + exir_op=linear_residual_exir_op, + use_to_edge_transform_and_lower=True, + ) + # remove check_count.exir as there will be more than one delegate + pipeline.pop_stage("check_count.exir") + pipeline.pop_stage("check_not.exir") + # check that all ops in linear_residual_exir_op except GELU are partitioned + pipeline.add_stage_after( + "to_edge_transform_and_lower", + pipeline.tester.check_not, + linear_residual_exir_op[1:], + suffix="exir_post_partition", + ) + pipeline.add_stage_after( + "to_edge_transform_and_lower", + pipeline.tester.check, + linear_residual_exir_op[:1], + suffix="exir_post_partition", + ) + pipeline.run() + + +@common.parametrize("test_data", test_data) +def test_linear_residual_tosa_BI(test_data: input_t1): + pipeline = TosaPipelineBI[input_t1]( + LinearResidualModule(), + test_data=test_data, + aten_op=linear_residual_aten_op, + exir_op=linear_residual_exir_op, + use_to_edge_transform_and_lower=True, ) + # remove check_count.exir as there will be more than one delegate + pipeline.pop_stage("check_count.exir") pipeline.pop_stage("check_not.exir") - # check that all ops in exir_op except add are rejected + # check that all ops in linear_residual_exir_op except GELU and Dropout are partitioned + pipeline.add_stage_after( + "to_edge_transform_and_lower", + pipeline.tester.check_not, + linear_residual_exir_op[2:], + suffix="exir_post_partition", + ) pipeline.add_stage_after( "to_edge_transform_and_lower", pipeline.tester.check, - exir_op[1:], + linear_residual_exir_op[:2], suffix="exir_post_partition", ) pipeline.run() diff --git a/backends/arm/tosa_partitioner.py b/backends/arm/tosa_partitioner.py index c2bc48d98d7..ab36a5a6f1e 100644 --- a/backends/arm/tosa_partitioner.py +++ b/backends/arm/tosa_partitioner.py @@ -14,6 +14,7 @@ get_tosa_spec, is_tosa, ) # usort: skip +from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor from executorch.backends.arm.operator_support.tosa_supported_operators import ( tosa_support_factory, ) @@ -66,7 +67,7 @@ def __init__( self.delegation_spec = DelegationSpec(TOSABackend.__name__, compile_spec) self.additional_checks = additional_checks - def partition(self, exported_program: ExportedProgram) -> PartitionResult: + def partition(self, exported_program: ExportedProgram) -> PartitionResult: # noqa # Run the CapabilityBasedPartitioner to return the largest possible # subgraphs containing the nodes with the tags @@ -110,6 +111,20 @@ def is_partitioned(node: torch.fx.Node, tag=tag) -> bool: del node.meta["delegation_tag"] break + if tosa_spec.support_float(): + continue + + if is_partitioned(node): + for input in node.all_input_nodes: + if is_partitioned(input): + continue + if get_first_fake_tensor(input).dtype.is_floating_point: + logger.info( + f"Not partitioning {node.name} becuase input {input.name} has floating point dtype." + ) + del node.meta["delegation_tag"] + break + tag_constant_data(exported_program) return PartitionResult( From 0276cf368259ed85a81d4218bec32ae2a23788aa Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Tue, 25 Feb 2025 07:21:07 -0800 Subject: [PATCH 080/584] [ET-VK] Adding a workgroup class to VecUtils (#8669) Pull Request resolved: https://github.com/pytorch/executorch/pull/8632 This diff adds a new class called `WorkgroupSize` to the `VecUtils` header file. The `WorkgroupSize` class takes three `uint32_t` values as parameters and stores them in a single `uint32_t` variable using bitwise operations. This class is used in the Vulkan backend to specify the size of a workgroup for a given operation. ghstack-source-id: 268172661 @exported-using-ghexport Differential Revision: [D70021019](https://our.internmc.facebook.com/intern/diff/D70021019/) Co-authored-by: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> --- backends/vulkan/runtime/utils/VecUtils.h | 44 ++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/backends/vulkan/runtime/utils/VecUtils.h b/backends/vulkan/runtime/utils/VecUtils.h index ad4434cf5af..c084a563544 100644 --- a/backends/vulkan/runtime/utils/VecUtils.h +++ b/backends/vulkan/runtime/utils/VecUtils.h @@ -479,5 +479,49 @@ inline int64_t multiply_integers(Iter begin, Iter end) { begin, end, static_cast(1), std::multiplies<>()); } +class WorkgroupSize final { + uint32_t val; + + public: + explicit WorkgroupSize() : val(0) {} + explicit WorkgroupSize(const uint32_t x, const uint32_t y, const uint32_t z) { + // shift numbers by multiple of 11 bits, since each local workgroup axis can + // be 1024 at most and which is 0x400. only z axis can't store 1024, because + // it would overflow uint32_t storage. + if (z == 1024) { + throw std::runtime_error( + "Workgroup size in z axis cannot be 1024 because it would overflow uint32_t storage"); + } + val = x | (y << 11) | (z << 22); + } + + explicit WorkgroupSize(const uvec3& vec) { + // shift numbers by multiple of 11 bits, since each local workgroup axis can + // be 1024 at most and which is 0x400. only z axis can't store 1024, because + // it would overflow uint32_t storage. + if (vec[2u] == 1024) { + throw std::runtime_error( + "Workgroup size in z axis cannot be 1024 because it would overflow uint32_t storage"); + } + val = vec[0u] | (vec[1u] << 11) | (vec[2u] << 22); + } + + explicit inline operator uvec3() const { + return { + val & 0x7ffu, + (val >> 11) & 0x7ffu, + (val >> 22), + }; + } + + explicit inline operator uint32_t() const { + return val; + } + + inline constexpr uint32_t operator[](const int idx) const { + return (val >> (11 * idx)) & 0x7ffu; + } +}; + } // namespace utils } // namespace vkcompute From 54910bcb72490890b3e37056faf51191a3e6500e Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Tue, 25 Feb 2025 07:55:54 -0800 Subject: [PATCH 081/584] [ET-VK] Adding reserve and append functions to SpecVarList (#8670) * [ET-VK] Adding a workgroup class to VecUtils Pull Request resolved: https://github.com/pytorch/executorch/pull/8632 This diff adds a new class called `WorkgroupSize` to the `VecUtils` header file. The `WorkgroupSize` class takes three `uint32_t` values as parameters and stores them in a single `uint32_t` variable using bitwise operations. This class is used in the Vulkan backend to specify the size of a workgroup for a given operation. ghstack-source-id: 268172661 @exported-using-ghexport Differential Revision: [D70021019](https://our.internmc.facebook.com/intern/diff/D70021019/) * [ET-VK] Adding reserve and append functions to SpecVarList Pull Request resolved: https://github.com/pytorch/executorch/pull/8633 This diff adds two new functions to the SpecVarList class in the Vulkan runtime library. The first function, reserve, allows the user to reserve a certain amount of space in the SpecVarList before adding any elements. The second function, append, allows the user to add a single SpecVar to the SpecVarList. These functions are useful for optimizing memory usage and improving performance in the Vulkan runtime. ghstack-source-id: 268172659 @exported-using-ghexport Differential Revision: [D70021782](https://our.internmc.facebook.com/intern/diff/D70021782/) --------- Co-authored-by: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> --- backends/vulkan/runtime/vk_api/Pipeline.cpp | 8 ++++++++ backends/vulkan/runtime/vk_api/Pipeline.h | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/backends/vulkan/runtime/vk_api/Pipeline.cpp b/backends/vulkan/runtime/vk_api/Pipeline.cpp index 0c66a085ad9..51b59ed4d1f 100644 --- a/backends/vulkan/runtime/vk_api/Pipeline.cpp +++ b/backends/vulkan/runtime/vk_api/Pipeline.cpp @@ -174,6 +174,14 @@ void SpecVarList::append(const SpecVarList& other) { vars.insert(vars.end(), other.vars.begin(), other.vars.end()); } +void SpecVarList::reserve(const size_t size) { + vars.reserve(size); +} + +void SpecVarList::append(const SpecVar& other) { + vars.push_back(other); +} + std::vector SpecVarList::generate_map_entries() const { std::vector map_entries; diff --git a/backends/vulkan/runtime/vk_api/Pipeline.h b/backends/vulkan/runtime/vk_api/Pipeline.h index 5460a0acba7..b9f4e3d2a35 100644 --- a/backends/vulkan/runtime/vk_api/Pipeline.h +++ b/backends/vulkan/runtime/vk_api/Pipeline.h @@ -82,6 +82,10 @@ class SpecVarList final { void append(const SpecVarList& other); + void reserve(const size_t size); + + void append(const SpecVar& other); + std::vector generate_map_entries() const; friend bool operator==(const SpecVarList& lhs, const SpecVarList& rhs); From 973eb1c4f2f076d382c881d1b133f601ceddd0b4 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Tue, 25 Feb 2025 07:56:54 -0800 Subject: [PATCH 082/584] [ET-VK] Replacing the use of uvec3 with WorkgroupSize class to reduce memory usage and improve processing speed (#8671) * [ET-VK] Adding a workgroup class to VecUtils Pull Request resolved: https://github.com/pytorch/executorch/pull/8632 This diff adds a new class called `WorkgroupSize` to the `VecUtils` header file. The `WorkgroupSize` class takes three `uint32_t` values as parameters and stores them in a single `uint32_t` variable using bitwise operations. This class is used in the Vulkan backend to specify the size of a workgroup for a given operation. ghstack-source-id: 268172661 @exported-using-ghexport Differential Revision: [D70021019](https://our.internmc.facebook.com/intern/diff/D70021019/) * [ET-VK] Adding reserve and append functions to SpecVarList Pull Request resolved: https://github.com/pytorch/executorch/pull/8633 This diff adds two new functions to the SpecVarList class in the Vulkan runtime library. The first function, reserve, allows the user to reserve a certain amount of space in the SpecVarList before adding any elements. The second function, append, allows the user to add a single SpecVar to the SpecVarList. These functions are useful for optimizing memory usage and improving performance in the Vulkan runtime. ghstack-source-id: 268172659 @exported-using-ghexport Differential Revision: [D70021782](https://our.internmc.facebook.com/intern/diff/D70021782/) * [ET-VK] Replacing the use of uvec3 with WorkgroupSize class to reduce memory usage and improve processing speed Pull Request resolved: https://github.com/pytorch/executorch/pull/8634 This diff replaces the use of `uvec3` with `WorkgroupSize` class to reduce memory usage and improve processing speed in the Vulkan backend of Executorch. ghstack-source-id: 268172660 @exported-using-ghexport Differential Revision: [D70021032](https://our.internmc.facebook.com/intern/diff/D70021032/) --------- Co-authored-by: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> --- backends/vulkan/runtime/api/Context.cpp | 16 +++++----------- backends/vulkan/runtime/api/Context.h | 14 +++++++++----- .../vulkan/runtime/graph/ops/BlitNode.cpp | 2 +- .../vulkan/runtime/graph/ops/DispatchNode.h | 2 +- .../vulkan/runtime/graph/ops/PrepackNode.cpp | 4 ++-- .../vulkan/runtime/graph/ops/PrepackNode.h | 2 +- backends/vulkan/runtime/vk_api/Command.cpp | 2 +- backends/vulkan/runtime/vk_api/Command.h | 6 +++--- backends/vulkan/runtime/vk_api/Pipeline.cpp | 19 ++++++++++++++----- backends/vulkan/runtime/vk_api/Pipeline.h | 4 ++++ backends/vulkan/runtime/vk_api/Shader.h | 2 +- 11 files changed, 42 insertions(+), 31 deletions(-) diff --git a/backends/vulkan/runtime/api/Context.cpp b/backends/vulkan/runtime/api/Context.cpp index 8178ada3a45..64f32e50f4e 100644 --- a/backends/vulkan/runtime/api/Context.cpp +++ b/backends/vulkan/runtime/api/Context.cpp @@ -74,7 +74,7 @@ void Context::cmd_reset_querypool() { void Context::report_shader_dispatch_start( const std::string& shader_name, const utils::uvec3& global_wg_size, - const utils::uvec3& local_wg_size, + const utils::WorkgroupSize& local_wg_size, const uint32_t dispatch_id) { if (querypool_) { querypool_.shader_profile_begin( @@ -82,7 +82,7 @@ void Context::report_shader_dispatch_start( dispatch_id, shader_name, vkapi::create_extent3d(global_wg_size), - vkapi::create_extent3d(local_wg_size)); + vkapi::create_extent3d((utils::uvec3)local_wg_size)); } } @@ -115,7 +115,7 @@ void Context::check_device_capabilities(const vkapi::ShaderInfo& shader) { vkapi::DescriptorSet Context::get_descriptor_set( const vkapi::ShaderInfo& shader_descriptor, - const utils::uvec3& local_workgroup_size, + const utils::WorkgroupSize& local_workgroup_size, const vkapi::SpecVarList& additional_constants, const uint32_t push_constants_size) { VkDescriptorSetLayout shader_layout = @@ -124,17 +124,11 @@ vkapi::DescriptorSet Context::get_descriptor_set( VkPipelineLayout pipeline_layout = pipeline_layout_cache().retrieve(shader_layout, push_constants_size); - vkapi::SpecVarList spec_constants = { - SV(local_workgroup_size[0u]), - SV(local_workgroup_size[1u]), - SV(local_workgroup_size[2u])}; - - spec_constants.append(additional_constants); - VkPipeline pipeline = pipeline_cache().retrieve( {pipeline_layout_cache().retrieve(shader_layout, push_constants_size), shader_cache().retrieve(shader_descriptor), - spec_constants}); + additional_constants, + local_workgroup_size}); cmd_.bind_pipeline(pipeline, pipeline_layout, local_workgroup_size); diff --git a/backends/vulkan/runtime/api/Context.h b/backends/vulkan/runtime/api/Context.h index 8bbcf79b45c..6cfbc64f141 100644 --- a/backends/vulkan/runtime/api/Context.h +++ b/backends/vulkan/runtime/api/Context.h @@ -11,6 +11,7 @@ // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName #include +#include #include #include @@ -150,7 +151,7 @@ class Context final { void report_shader_dispatch_start( const std::string& shader_name, const utils::uvec3& global_wg_size, - const utils::uvec3& local_wg_size, + const utils::WorkgroupSize& local_wg_size, const uint32_t dispatch_id = UINT32_MAX); /* @@ -189,13 +190,13 @@ class Context final { vkapi::DescriptorSet get_descriptor_set( const vkapi::ShaderInfo&, - const utils::uvec3&, + const utils::WorkgroupSize&, const vkapi::SpecVarList&, const uint32_t push_constants_size); inline vkapi::DescriptorSet get_descriptor_set( const vkapi::ShaderInfo& shader_descriptor, - const utils::uvec3& local_work_group_size) { + const utils::WorkgroupSize& local_work_group_size) { return get_descriptor_set(shader_descriptor, local_work_group_size, {}, 0u); } @@ -362,14 +363,17 @@ inline bool Context::submit_compute_job( report_shader_dispatch_start( shader.kernel_name, global_work_group, - local_work_group_size, + utils::WorkgroupSize(local_work_group_size), dispatch_id); // Factor out template parameter independent code to minimize code bloat. // Note that push constants are not exposed yet via this API, therefore the // push constants size is assumed to be 0. vkapi::DescriptorSet descriptor_set = get_descriptor_set( - shader, local_work_group_size, specialization_constants, 0u); + shader, + utils::WorkgroupSize(local_work_group_size), + specialization_constants, + 0u); detail::bind( descriptor_set, diff --git a/backends/vulkan/runtime/graph/ops/BlitNode.cpp b/backends/vulkan/runtime/graph/ops/BlitNode.cpp index 463a2d19c36..03ee4caa51a 100644 --- a/backends/vulkan/runtime/graph/ops/BlitNode.cpp +++ b/backends/vulkan/runtime/graph/ops/BlitNode.cpp @@ -46,7 +46,7 @@ void BlitNode::encode(ComputeGraph* graph) { kernel_name += vkapi::to_string(dst_tensor->dtype()); context->report_shader_dispatch_start( - kernel_name, utils::uvec3(), utils::uvec3(), node_id_); + kernel_name, utils::uvec3(), utils::WorkgroupSize(), node_id_); context->register_blit( pipeline_barrier, diff --git a/backends/vulkan/runtime/graph/ops/DispatchNode.h b/backends/vulkan/runtime/graph/ops/DispatchNode.h index 7d04f7714e9..4661b5bf9cf 100644 --- a/backends/vulkan/runtime/graph/ops/DispatchNode.h +++ b/backends/vulkan/runtime/graph/ops/DispatchNode.h @@ -92,7 +92,7 @@ class DispatchNode final : public ExecuteNode { protected: const vkapi::ShaderInfo shader_; const utils::uvec3 global_workgroup_size_; - const utils::uvec3 local_workgroup_size_; + const utils::WorkgroupSize local_workgroup_size_; const vkapi::ParamsBindList params_; const vkapi::SpecVarList spec_vars_; const std::vector push_constants_; diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp index bf501296b1b..0507b679e13 100644 --- a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp +++ b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp @@ -100,8 +100,8 @@ void PrepackNode::encode(ComputeGraph* graph) { // bound with the correct image layout. { vkapi::PipelineBarrier pipeline_barrier{}; - vkapi::DescriptorSet descriptor_set = - context->get_descriptor_set(noop_shader_, {1, 1, 1}); + vkapi::DescriptorSet descriptor_set = context->get_descriptor_set( + noop_shader_, utils::WorkgroupSize(1, 1, 1)); bind_tensor_to_descriptor_set( *packed, diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.h b/backends/vulkan/runtime/graph/ops/PrepackNode.h index 3e713303c3d..2d194e7f6a0 100644 --- a/backends/vulkan/runtime/graph/ops/PrepackNode.h +++ b/backends/vulkan/runtime/graph/ops/PrepackNode.h @@ -49,7 +49,7 @@ class PrepackNode final { const vkapi::ShaderInfo shader_; vkapi::ShaderInfo noop_shader_; const utils::uvec3 global_workgroup_size_; - const utils::uvec3 local_workgroup_size_; + const utils::WorkgroupSize local_workgroup_size_; const ValueRef tref_; const ValueRef packed_; const vkapi::ParamsBindList params_; diff --git a/backends/vulkan/runtime/vk_api/Command.cpp b/backends/vulkan/runtime/vk_api/Command.cpp index 3be790b53cf..3a5041f9500 100644 --- a/backends/vulkan/runtime/vk_api/Command.cpp +++ b/backends/vulkan/runtime/vk_api/Command.cpp @@ -81,7 +81,7 @@ void CommandBuffer::end() { void CommandBuffer::bind_pipeline( VkPipeline pipeline, VkPipelineLayout pipeline_layout, - const utils::uvec3 local_workgroup_size) { + const utils::WorkgroupSize local_workgroup_size) { VK_CHECK_COND( state_ == CommandBuffer::State::RECORDING, "Vulkan CommandBuffer: called bind_pipeline() on a command buffer whose state " diff --git a/backends/vulkan/runtime/vk_api/Command.h b/backends/vulkan/runtime/vk_api/Command.h index 99cd5d17c99..ff1e5934a5c 100644 --- a/backends/vulkan/runtime/vk_api/Command.h +++ b/backends/vulkan/runtime/vk_api/Command.h @@ -51,7 +51,7 @@ class CommandBuffer final { struct Bound { VkPipeline pipeline; VkPipelineLayout pipeline_layout; - utils::uvec3 local_workgroup_size; + utils::WorkgroupSize local_workgroup_size; VkDescriptorSet descriptors; explicit Bound() @@ -63,7 +63,7 @@ class CommandBuffer final { inline void reset() { pipeline = VK_NULL_HANDLE; pipeline_layout = VK_NULL_HANDLE; - local_workgroup_size = {0u, 0u, 0u}; + local_workgroup_size = utils::WorkgroupSize{0u, 0u, 0u}; descriptors = VK_NULL_HANDLE; } }; @@ -87,7 +87,7 @@ class CommandBuffer final { void begin(); void end(); - void bind_pipeline(VkPipeline, VkPipelineLayout, const utils::uvec3); + void bind_pipeline(VkPipeline, VkPipelineLayout, const utils::WorkgroupSize); void bind_descriptors(VkDescriptorSet); void set_push_constants(VkPipelineLayout, const void*, uint32_t); diff --git a/backends/vulkan/runtime/vk_api/Pipeline.cpp b/backends/vulkan/runtime/vk_api/Pipeline.cpp index 51b59ed4d1f..b5ee47cd2c7 100644 --- a/backends/vulkan/runtime/vk_api/Pipeline.cpp +++ b/backends/vulkan/runtime/vk_api/Pipeline.cpp @@ -275,14 +275,23 @@ ComputePipeline::ComputePipeline( const ComputePipeline::Descriptor& descriptor, VkPipelineCache pipeline_cache) : device_(device), handle_{VK_NULL_HANDLE} { - std::vector map_entries = - descriptor.specialization_constants.generate_map_entries(); + SpecVarList specialization_constants; + + specialization_constants.reserve( + 3 + descriptor.specialization_constants.size()); + specialization_constants.append(descriptor.local_wg_size[0]); + specialization_constants.append(descriptor.local_wg_size[1]); + specialization_constants.append(descriptor.local_wg_size[2]); + + specialization_constants.append(descriptor.specialization_constants); + const std::vector map_entries = + specialization_constants.generate_map_entries(); const VkSpecializationInfo specialization_info{ - descriptor.specialization_constants.size(), // mapEntryCount + specialization_constants.size(), // mapEntryCount map_entries.data(), // pMapEntries - descriptor.specialization_constants.data_nbytes(), // dataSize - descriptor.specialization_constants.data(), // pData + specialization_constants.data_nbytes(), // dataSize + specialization_constants.data(), // pData }; const VkPipelineShaderStageCreateInfo shader_stage_create_info{ diff --git a/backends/vulkan/runtime/vk_api/Pipeline.h b/backends/vulkan/runtime/vk_api/Pipeline.h index b9f4e3d2a35..3248051d12a 100644 --- a/backends/vulkan/runtime/vk_api/Pipeline.h +++ b/backends/vulkan/runtime/vk_api/Pipeline.h @@ -156,6 +156,7 @@ class ComputePipeline final { VkPipelineLayout pipeline_layout; VkShaderModule shader_module; SpecVarList specialization_constants; + utils::WorkgroupSize local_wg_size; }; explicit ComputePipeline( @@ -273,6 +274,9 @@ class ComputePipelineCache final { seed = utils::hash_combine(seed, new_seed); } + seed = utils::hash_combine( + seed, std::hash()((uint32_t)descriptor.local_wg_size)); + return seed; } }; diff --git a/backends/vulkan/runtime/vk_api/Shader.h b/backends/vulkan/runtime/vk_api/Shader.h index d9fec65febc..7d0fa7b7476 100644 --- a/backends/vulkan/runtime/vk_api/Shader.h +++ b/backends/vulkan/runtime/vk_api/Shader.h @@ -61,7 +61,7 @@ struct ShaderInfo final { ShaderLayout::Signature kernel_layout{}; // Shader Metadata - utils::uvec3 out_tile_size{1u, 1u, 1u}; + utils::WorkgroupSize out_tile_size{1u, 1u, 1u}; bool requires_shader_int16 = false; bool requires_16bit_storage = false; bool requires_8bit_storage = false; From 34906b83a18506c80f8262cf85be3bdd90a59f9c Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Tue, 25 Feb 2025 07:57:54 -0800 Subject: [PATCH 083/584] Fix optimized log_softmax along non-contiguous dim (#8666) Fix log_softmax along non-contiguous dim Pull Request resolved: https://github.com/pytorch/executorch/pull/8595 #8382 certainly didn't fix this problem (and added it on x86), but I don't think it was correct on ARM prior to that either. Added a regression test. ghstack-source-id: 268149462 @exported-using-ghexport Differential Revision: [D69928884](https://our.internmc.facebook.com/intern/diff/D69928884/) Co-authored-by: Scott Wolchok --- kernels/optimized/cpu/op_log_softmax.cpp | 19 ++++---- kernels/test/op_log_softmax_test.cpp | 57 ++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 8 deletions(-) diff --git a/kernels/optimized/cpu/op_log_softmax.cpp b/kernels/optimized/cpu/op_log_softmax.cpp index 1d2467bca5f..1822a06f29f 100644 --- a/kernels/optimized/cpu/op_log_softmax.cpp +++ b/kernels/optimized/cpu/op_log_softmax.cpp @@ -75,17 +75,20 @@ void log_softmax_kernel(const Tensor& input, int64_t dim, Tensor& out) { static_assert( std::is_same_v, "Below loop actually only supports float."); - const VecIn max_input_vec(max_input); - for (; d + VecOut::size() < dim_size; d += VecOut::size()) { - auto index = d * dim_stride; - auto in = VecIn::loadu(&input_data[index]); - auto out_ = (in - max_input_vec).exp(); - out_.store(&output_data[index]); + // It is not correct to vectorize if dim is not contiguous! + if (dim_stride == 1) { + const VecIn max_input_vec(max_input); + for (; d + VecOut::size() < dim_size; d += VecOut::size()) { + auto index = d * dim_stride; + auto in = VecIn::loadu(&input_data[index]); + auto out_ = (in - max_input_vec).exp(); + out_.store(&output_data[index]); #if defined(__aarch64__) && !defined(CPU_CAPABILITY_SVE) - temp_sum += vaddvq_f32(out_); + temp_sum += vaddvq_f32(out_); #else - temp_sum += at::vec::vec_reduce_all(std::plus(), out_); + temp_sum += at::vec::vec_reduce_all(std::plus(), out_); #endif + } } for (; d < dim_size; ++d) { output_data[d * dim_stride] = diff --git a/kernels/test/op_log_softmax_test.cpp b/kernels/test/op_log_softmax_test.cpp index 94047592a80..1b01ff8a78d 100644 --- a/kernels/test/op_log_softmax_test.cpp +++ b/kernels/test/op_log_softmax_test.cpp @@ -72,6 +72,59 @@ class OpLogSoftmaxOutTest : public OperatorTest { EXPECT_TENSOR_CLOSE(out, expected); } } + + template + void test_dtype_noncontiguous_dim() { + TensorFactory tf; + + // Dim 0 must be longer than the vector width of the machine (for + // float, this is 4 for ARM64 and 8 for AVX2) to exhibit problems. + // clang-format off + Tensor x = tf.make( + {9, 3}, + { + 0, 9, 18, + 1, 10, 19, + 2, 11, 20, + 3, 12, 21, + 4, 13, 22, + 5, 14, 23, + 6, 15, 24, + 7, 16, 25, + 8, 17, 26, + }); + // clang-format on + + Tensor out = tf.zeros({9, 3}); + + op_log_softmax_out(x, /*dim=*/0, /*half_to_float*/ false, out); + + // clang-format off + Tensor expected = tf.make( + {9, 3}, + { + -8.45855, -8.45855, -8.45855, + -7.45855, -7.45855, -7.45855, + -6.45855, -6.45855, -6.45855, + -5.45855, -5.45855, -5.45855, + -4.45855, -4.45855, -4.45855, + -3.45855, -3.45855, -3.45855, + -2.45855, -2.45855, -2.45855, + -1.45855, -1.45855, -1.45855, + -0.458552, -0.458552, -0.458552 + }); + // clang-format on + + if constexpr (DTYPE == ScalarType::BFloat16) { + EXPECT_TENSOR_CLOSE_WITH_TOL( + out, + expected, + 1e-2, + executorch::runtime::testing::internal::kDefaultAtol); + } else { + EXPECT_TENSOR_CLOSE(out, expected); + } + } }; TEST_F(OpLogSoftmaxOutTest, Smoke) { @@ -101,6 +154,10 @@ TEST_F(OpLogSoftmaxOutTest, AllDtypesSupported) { #undef TEST_ENTRY } +TEST_F(OpLogSoftmaxOutTest, NonContiguous) { + test_dtype_noncontiguous_dim(); +} + TEST_F(OpLogSoftmaxOutTest, MismatchedDimensionsDies) { if (SupportedFeatures::get()->is_aten) { GTEST_SKIP() << "ATen currently supports mismatched dimensions"; From a02ba23953332572137205cf081a0d492026a118 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Tue, 25 Feb 2025 07:58:36 -0800 Subject: [PATCH 084/584] Re-enable optimized gelu test in CMake (#8667) * Fix log_softmax along non-contiguous dim Pull Request resolved: https://github.com/pytorch/executorch/pull/8595 #8382 certainly didn't fix this problem (and added it on x86), but I don't think it was correct on ARM prior to that either. Added a regression test. ghstack-source-id: 268149462 @exported-using-ghexport Differential Revision: [D69928884](https://our.internmc.facebook.com/intern/diff/D69928884/) * Re-enable optimized gelu test in CMake Pull Request resolved: https://github.com/pytorch/executorch/pull/8597 I missed this line disabling the test. (Splitting out re-enable of log_softmax because I think that one needs fixes.) ghstack-source-id: 268149463 @exported-using-ghexport Differential Revision: [D69929122](https://our.internmc.facebook.com/intern/diff/D69929122/) --------- Co-authored-by: Scott Wolchok --- kernels/test/CMakeLists.txt | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/kernels/test/CMakeLists.txt b/kernels/test/CMakeLists.txt index 4250f1f7581..63b92ab525b 100644 --- a/kernels/test/CMakeLists.txt +++ b/kernels/test/CMakeLists.txt @@ -66,7 +66,7 @@ foreach(kernel ${_kernels}) cp "${CMAKE_CURRENT_BINARY_DIR}/../../kernels/${kernel}/${kernel}_ops_lib/*.h" "${CMAKE_CURRENT_BINARY_DIR}/include/${kernel}/executorch/kernels/${kernel}/" - DEPENDS "${kernel}_ops_lib" + DEPENDS "${kernel}_ops_lib" ) endforeach() @@ -278,10 +278,8 @@ set(_optimized_kernels_test_sources ${CMAKE_CURRENT_BINARY_DIR}/include/portable/executorch/kernels/test/supported_features.cpp ) -# We don't have sleef on OSS so we don't have gelu and log_softmax -list(REMOVE_ITEM _optimized_kernels_test_sources "op_gelu_test.cpp" - "op_log_softmax_test.cpp" -) +# We don't have sleef on OSS so we don't have log_softmax +list(REMOVE_ITEM _optimized_kernels_test_sources "op_log_softmax_test.cpp") et_cxx_test( optimized_kernels_test From 8cb01a61d4fe8dd5cc976453abdf5a2b39e05436 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Tue, 25 Feb 2025 08:14:29 -0800 Subject: [PATCH 085/584] Clean up optimized-oss.yaml (#8668) Pull Request resolved: https://github.com/pytorch/executorch/pull/8549 We don't need this now that we support log_softmax and gelu in OSS! ghstack-source-id: 268149465 @exported-using-ghexport Differential Revision: [D69475020](https://our.internmc.facebook.com/intern/diff/D69475020/) --------- Co-authored-by: Scott Wolchok --- build/cmake_deps.toml | 6 +- configurations/CMakeLists.txt | 2 +- configurations/targets.bzl | 18 ---- examples/models/llama/runner/targets.bzl | 5 +- kernels/optimized/CMakeLists.txt | 4 +- kernels/optimized/cpu/targets.bzl | 8 +- kernels/optimized/op_registration_util.bzl | 6 +- kernels/optimized/optimized-oss.yaml | 96 ------------------- kernels/optimized/targets.bzl | 8 -- kernels/test/CMakeLists.txt | 6 +- .../optimized/op_registration_util.bzl | 23 +++-- 11 files changed, 27 insertions(+), 155 deletions(-) delete mode 100644 kernels/optimized/optimized-oss.yaml diff --git a/build/cmake_deps.toml b/build/cmake_deps.toml index c44fcf92ea6..21a8e282929 100644 --- a/build/cmake_deps.toml +++ b/build/cmake_deps.toml @@ -117,9 +117,9 @@ deps = [ "executorch", ] -[targets.optimized_native_cpu_ops_oss] +[targets.optimized_native_cpu_ops] buck_targets = [ - "//configurations:optimized_native_cpu_ops_oss", + "//configurations:optimized_native_cpu_ops", ] filters = [ ".cpp$", @@ -437,6 +437,6 @@ deps = [ "portable_kernels", "quantized_kernels", "xnnpack_backend", - "optimized_native_cpu_ops_oss", + "optimized_native_cpu_ops", ] # ---------------------------------- LLama end ---------------------------------- diff --git a/configurations/CMakeLists.txt b/configurations/CMakeLists.txt index eddb8b2a12c..462124a6ea6 100644 --- a/configurations/CMakeLists.txt +++ b/configurations/CMakeLists.txt @@ -30,7 +30,7 @@ include(${EXECUTORCH_ROOT}/build/Codegen.cmake) if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED) # Merge optimized and portable definitions, taking optimized where available. merge_yaml( - FUNCTIONS_YAML ${EXECUTORCH_ROOT}/kernels/optimized/optimized-oss.yaml + FUNCTIONS_YAML ${EXECUTORCH_ROOT}/kernels/optimized/optimized.yaml FALLBACK_YAML ${EXECUTORCH_ROOT}/kernels/portable/functions.yaml OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR} ) diff --git a/configurations/targets.bzl b/configurations/targets.bzl index 6a5341c2904..5a39f7301ec 100644 --- a/configurations/targets.bzl +++ b/configurations/targets.bzl @@ -50,21 +50,3 @@ def define_common_targets(): "@EXECUTORCH_CLIENTS", ], ) - - # TODO(T183193812): delete this target after optimized-oss.yaml is gone - executorch_generated_lib( - name = "optimized_native_cpu_ops_oss", - deps = [ - "//executorch/kernels/optimized:optimized_operators", - "//executorch/kernels/optimized:optimized_oplist", - "//executorch/kernels/portable:executorch_aten_ops", - "//executorch/kernels/portable:operators", - ], - functions_yaml_target = "//executorch/kernels/optimized:optimized-oss.yaml", - fallback_yaml_target = "//executorch/kernels/portable:functions.yaml", - define_static_targets = True, - visibility = [ - "//executorch/examples/...", - "@EXECUTORCH_CLIENTS", - ], - ) diff --git a/examples/models/llama/runner/targets.bzl b/examples/models/llama/runner/targets.bzl index 9f095b93970..37827bb78a5 100644 --- a/examples/models/llama/runner/targets.bzl +++ b/examples/models/llama/runner/targets.bzl @@ -3,9 +3,6 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") def _get_operator_lib(aten = False): if aten: return ["//executorch/kernels/aten:generated_lib"] - elif runtime.is_oss: - # TODO(T183193812): delete this path after optimized-oss.yaml is no more. - return ["//executorch/configurations:optimized_native_cpu_ops_oss", "//executorch/extension/llm/custom_ops:custom_ops"] else: return ["//executorch/configurations:optimized_native_cpu_ops", "//executorch/extension/llm/custom_ops:custom_ops"] @@ -13,7 +10,7 @@ def get_qnn_dependency(): # buck build -c executorch.enable_qnn=true //executorch/examples/models/llama/runner:runner # Check if QNN is enabled before including the dependency if native.read_config("executorch", "enable_qnn", "false") == "true": - # //executorch/backends/qualcomm:qnn_executorch_backend doesn't work, + # //executorch/backends/qualcomm:qnn_executorch_backend doesn't work, # likely due to it's an empty library with dependency only return [ "//executorch/backends/qualcomm/runtime:runtime", diff --git a/kernels/optimized/CMakeLists.txt b/kernels/optimized/CMakeLists.txt index 1f3aff57ecf..235c6738d9a 100644 --- a/kernels/optimized/CMakeLists.txt +++ b/kernels/optimized/CMakeLists.txt @@ -49,12 +49,12 @@ target_compile_options(cpublas PUBLIC ${_common_compile_options}) # Generate C++ bindings to register kernels into both PyTorch (for AOT) and # Executorch (for runtime). Here select all ops in optimized.yaml -set(_yaml "${CMAKE_CURRENT_LIST_DIR}/optimized-oss.yaml") +set(_yaml "${CMAKE_CURRENT_LIST_DIR}/optimized.yaml") gen_selected_ops(LIB_NAME "optimized_ops_lib" OPS_SCHEMA_YAML "${_yaml}") generate_bindings_for_kernels( LIB_NAME "optimized_ops_lib" FUNCTIONS_YAML - ${CMAKE_CURRENT_SOURCE_DIR}/optimized-oss.yaml + ${CMAKE_CURRENT_SOURCE_DIR}/optimized.yaml ADD_EXCEPTION_BOUNDARY ) message("Generated files ${gen_command_sources}") diff --git a/kernels/optimized/cpu/targets.bzl b/kernels/optimized/cpu/targets.bzl index 2a66407a5ce..83b2c320266 100644 --- a/kernels/optimized/cpu/targets.bzl +++ b/kernels/optimized/cpu/targets.bzl @@ -1,5 +1,5 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") -load("@fbsource//xplat/executorch/kernels/optimized:op_registration_util.bzl", "define_op_target", "is_op_disabled", "op_target") +load("@fbsource//xplat/executorch/kernels/optimized:op_registration_util.bzl", "define_op_target", "op_target") _OPTIMIZED_ATEN_OPS = ( op_target( @@ -111,13 +111,11 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ - enabled_ops = [op for op in _OPTIMIZED_ATEN_OPS if not is_op_disabled(op["name"])] - # Define build targets for all operators registered in the tables above. - for op in enabled_ops: + for op in _OPTIMIZED_ATEN_OPS: define_op_target(**op) - aten_op_targets = [":{}".format(op["name"]) for op in enabled_ops] + aten_op_targets = [":{}".format(op["name"]) for op in _OPTIMIZED_ATEN_OPS] all_op_targets = aten_op_targets runtime.cxx_library( diff --git a/kernels/optimized/op_registration_util.bzl b/kernels/optimized/op_registration_util.bzl index 12a5f012a38..3ac89132380 100644 --- a/kernels/optimized/op_registration_util.bzl +++ b/kernels/optimized/op_registration_util.bzl @@ -2,8 +2,8 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") load("@fbsource//xplat/executorch/build:selects.bzl", "selects") load( "@fbsource//xplat/executorch/kernels/optimized:lib_defs.bzl", - "get_vec_preprocessor_flags", "get_vec_deps", + "get_vec_preprocessor_flags", ) load( "@fbsource//xplat/executorch/kernels/portable:op_registration_util.bzl", @@ -137,7 +137,3 @@ def define_op_target(name, compiler_flags, deps): compiler_flags = compiler_flags, deps = deps, ) - -def is_op_disabled(name): - # All ops are enabled for internal builds. - return False diff --git a/kernels/optimized/optimized-oss.yaml b/kernels/optimized/optimized-oss.yaml deleted file mode 100644 index a24aa9ca173..00000000000 --- a/kernels/optimized/optimized-oss.yaml +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This yaml file contains operators that have optimized kernels available. -# Note that this is a copy of optimized.yaml that does not include log_softmax, -# due to the OSS build not currently including sleef. -# TODO (T183193812) - -- op: _fft_r2c.out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_fft_r2c_out - -- op: add.out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_add_out - -- op: add.Scalar_out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_add_scalar_out - -- op: bmm.out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_bmm_out - -- op: div.out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_div_out - -- op: div.Scalar_out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_div_scalar_out - -- op: exp.out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_exp_out - -- op: sigmoid.out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_sigmoid_out - -- op: gelu.out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_gelu_out - -- op: le.Scalar_out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_le_scalar_out - -- op: le.Tensor_out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_le_tensor_out - -- op: linear.out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_linear_out - -- op: mul.out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_mul_out - -- op: mul.Scalar_out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_mul_scalar_out - -- op: native_layer_norm.out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_native_layer_norm_out - -- op: neg.out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_neg_out - -- op: sub.out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_sub_out - -- op: sub.Scalar_out - kernels: - - arg_meta: null - kernel_name: torch::executor::opt_sub_scalar_out diff --git a/kernels/optimized/targets.bzl b/kernels/optimized/targets.bzl index 9978d4196dd..c655cb149a3 100644 --- a/kernels/optimized/targets.bzl +++ b/kernels/optimized/targets.bzl @@ -19,14 +19,6 @@ def define_common_targets(is_fbcode=False): ], ) - runtime.export_file( - name = "optimized-oss.yaml", - visibility = [ - "//executorch/...", - "@EXECUTORCH_CLIENTS", - ], - ) - runtime.cxx_library( name = "optimized_operators", srcs = [], diff --git a/kernels/test/CMakeLists.txt b/kernels/test/CMakeLists.txt index 63b92ab525b..24adb8d9c80 100644 --- a/kernels/test/CMakeLists.txt +++ b/kernels/test/CMakeLists.txt @@ -270,17 +270,15 @@ set(_optimized_kernels_test_sources "op_le_test.cpp" "op_linear_test.cpp" "op_log_softmax_test.cpp" + "op_mm_test.cpp" "op_mul_test.cpp" "op_native_layer_norm_test.cpp" "op_neg_test.cpp" "op_sub_test.cpp" "UnaryUfuncRealHBBF16ToFloatHBF16Test.cpp" - ${CMAKE_CURRENT_BINARY_DIR}/include/portable/executorch/kernels/test/supported_features.cpp + ${CMAKE_CURRENT_BINARY_DIR}/include/optimized/executorch/kernels/test/supported_features.cpp ) -# We don't have sleef on OSS so we don't have log_softmax -list(REMOVE_ITEM _optimized_kernels_test_sources "op_log_softmax_test.cpp") - et_cxx_test( optimized_kernels_test SOURCES diff --git a/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl index c70757e29b9..d48a22cee37 100644 --- a/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl +++ b/shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl @@ -9,8 +9,13 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") load("@fbsource//xplat/executorch/build:selects.bzl", "selects") load( "@fbsource//xplat/executorch/kernels/optimized:lib_defs.bzl", + "get_vec_deps", "get_vec_preprocessor_flags", ) +load( + "@fbsource//xplat/executorch/kernels/portable:op_registration_util.bzl", + "get_compiler_optimization_flags", +) def op_target(name, deps = [], compiler_flags = []): """Registers an optimized implementation for an operator overload group. @@ -94,12 +99,17 @@ def define_op_library(name, compiler_flags, deps): "//executorch/kernels/test/...", "@EXECUTORCH_CLIENTS", ], - # kernels often have helpers with no prototypes just disabling the warning here as the headers - # are codegend and linked in later - compiler_flags = ["-Wno-missing-prototypes"], + compiler_flags = [ + # kernels often have helpers with no prototypes just disabling the warning here as the headers + # are codegend and linked in later + "-Wno-missing-prototypes", + # pragma unroll fails with -Os, don't need to warn us and + # fail Werror builds; see https://godbolt.org/z/zvf85vTsr + "-Wno-pass-failed", + ] + get_compiler_optimization_flags(), deps = [ "//executorch/runtime/kernel:kernel_includes", - ] + augmented_deps, + ] + augmented_deps + get_vec_deps(), preprocessor_flags = get_vec_preprocessor_flags(), # sleef needs to be added as a direct dependency of the operator target when building for Android, # or a linker error may occur. Not sure why this happens; it seems that fbandroid_platform_deps of @@ -134,8 +144,3 @@ def define_op_target(name, compiler_flags, deps): compiler_flags = compiler_flags, deps = deps, ) - -def is_op_disabled(name): - # TODO (gjcomer) Enable ops with sleef dependency in OSS - disabled_ops = ["op_log_softmax"] - return name in disabled_ops From d7f964a8739d3fed1c22a73055180cf3b10e6135 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Tue, 25 Feb 2025 08:15:08 -0800 Subject: [PATCH 086/584] [ExecuTorch] Arm Ethos: Add conftest TARGET (#8673) Pull Request resolved: https://github.com/pytorch/executorch/pull/8559 As title. ghstack-source-id: 268178172 @exported-using-ghexport @bypass-github-export-checks @bypass-github-pytorch-ci-checks @bypass-github-executorch-ci-checks Differential Revision: [D69714021](https://our.internmc.facebook.com/intern/diff/D69714021/) Co-authored-by: Digant Desai --- backends/arm/test/TARGETS | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/backends/arm/test/TARGETS b/backends/arm/test/TARGETS index ef092c55035..ea9a2f9f686 100644 --- a/backends/arm/test/TARGETS +++ b/backends/arm/test/TARGETS @@ -1,9 +1,18 @@ load("@fbcode_macros//build_defs:python_library.bzl", "python_library") python_library( - name = "common", - srcs = ["common.py"], + name = "conftest", + srcs = ["conftest.py"], + deps = [ + "//executorch/exir:lib", + ] +) + +python_library( + name = "runner_utils", + srcs = ["runner_utils.py"], deps = [ + ":conftest", "//executorch/backends/xnnpack/test/tester:tester", "//executorch/backends/arm:arm_backend", "//executorch/exir:lib", @@ -12,12 +21,14 @@ python_library( ) python_library( - name = "runner_utils", - srcs = ["runner_utils.py"], + name = "common", + srcs = ["common.py"], deps = [ + ":runner_utils", "//executorch/backends/xnnpack/test/tester:tester", "//executorch/backends/arm:arm_backend", "//executorch/exir:lib", "//executorch/exir/backend:compile_spec_schema", + "fbsource//third-party/pypi/pytest:pytest", ] ) From 6cb5c1adada34ec4206e191f326681f62e6bf132 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Tue, 25 Feb 2025 08:32:32 -0800 Subject: [PATCH 087/584] [devtool] introduce datasink class to etdump (#8677) Pull Request resolved: https://github.com/pytorch/executorch/pull/8496 this diff introduce datasink class, the class for managing the customized debug data storage pipeline. Detials can be found in https://docs.google.com/document/d/1y_m32mKdj-OgLcLUz9TKhBW3PC3bBDYSBbeAH544EfM/edit?tab=t.0 ghstack-source-id: 268137338 Differential Revision: [D69583422](https://our.internmc.facebook.com/intern/diff/D69583422/) Co-authored-by: gasoonjia Co-authored-by: Scott Wolchok --- devtools/etdump/buffer_data_sink.cpp | 51 ++++++++ devtools/etdump/buffer_data_sink.h | 91 +++++++++++++ devtools/etdump/data_sink_base.h | 61 +++++++++ devtools/etdump/etdump_flatcc.cpp | 20 +-- devtools/etdump/targets.bzl | 47 +++++++ .../etdump/tests/buffer_data_sink_test.cpp | 123 ++++++++++++++++++ devtools/etdump/tests/targets.bzl | 11 ++ devtools/etdump/utils.h | 32 +++++ runtime/core/error.h | 3 + 9 files changed, 425 insertions(+), 14 deletions(-) create mode 100644 devtools/etdump/buffer_data_sink.cpp create mode 100644 devtools/etdump/buffer_data_sink.h create mode 100644 devtools/etdump/data_sink_base.h create mode 100644 devtools/etdump/tests/buffer_data_sink_test.cpp create mode 100644 devtools/etdump/utils.h diff --git a/devtools/etdump/buffer_data_sink.cpp b/devtools/etdump/buffer_data_sink.cpp new file mode 100644 index 00000000000..08bac801ef2 --- /dev/null +++ b/devtools/etdump/buffer_data_sink.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +using ::executorch::runtime::Error; +using ::executorch::runtime::Result; + +namespace executorch { +namespace etdump { + +Result BufferDataSink::write(const void* ptr, size_t length) { + if (length == 0) { + return offset_; + } + + uint8_t* last_data_end = debug_buffer_.data() + offset_; + + // The beginning of the next data blob must be aligned to the alignment + uint8_t* cur_data_begin = internal::align_pointer(last_data_end, alignment_); + uint8_t* cur_data_end = cur_data_begin + length; + + if (cur_data_end > debug_buffer_.data() + debug_buffer_.size()) { + ET_LOG(Error, "Ran out of space to store intermediate outputs."); + return Error::OutOfResources; + } + + // Zero out the padding between data blobs + memset(last_data_end, 0, cur_data_begin - last_data_end); + memcpy(cur_data_begin, ptr, length); + offset_ = (size_t)(cur_data_end - debug_buffer_.data()); + + return (size_t)(cur_data_begin - debug_buffer_.data()); +} + +Result BufferDataSink::get_storage_size() const { + return debug_buffer_.size(); +} + +size_t BufferDataSink::get_used_bytes() const { + return offset_; +} + +} // namespace etdump +} // namespace executorch diff --git a/devtools/etdump/buffer_data_sink.h b/devtools/etdump/buffer_data_sink.h new file mode 100644 index 00000000000..522203443d4 --- /dev/null +++ b/devtools/etdump/buffer_data_sink.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +namespace executorch { +namespace etdump { + +/** + * BufferDataSink is a concrete implementation of the DataSinkBase class, + * designed to store debug data in a pre-allocated, user-owned buffer. This + * class provides methods to write raw data and tensor data into the buffer, + * ensuring proper alignment and managing padding as needed. + */ +class BufferDataSink : public DataSinkBase { + public: + /** + * Constructs a BufferDataSink with a given buffer. + * + * @param[in] buffer A Span object representing the buffer where data will be + * stored. + * @param[in] alignment The alignment requirement for the buffer. It must be + * a power of two. Default is 64. + */ + explicit BufferDataSink( + ::executorch::runtime::Span buffer, + size_t alignment = 64) + : debug_buffer_(buffer), offset_(0), alignment_(alignment) {} + + // Uncopiable and unassignable to avoid double assignment and free of the + // internal buffer. + BufferDataSink(const BufferDataSink&) = delete; + BufferDataSink& operator=(const BufferDataSink&) = delete; + + // Movable to be compatible with Result. + BufferDataSink(BufferDataSink&&) = default; + BufferDataSink& operator=(BufferDataSink&&) = default; + + ~BufferDataSink() override = default; + + /** + * Write data into the debug buffer and return the offset of the starting + * location of the data within the buffer. + * + * @param[in] ptr A pointer to the data to be written into the storage. + * @param[in] size The size of the data in bytes. + * @return A Result object containing either: + * - The offset of the starting location of the data within the + * debug buffer, or + * - An error code indicating the failure reason, if any issue + * occurs during the write process. + */ + ::executorch::runtime::Result write(const void* ptr, size_t size) + override; + + /** + * Retrieves the total size of the buffer. + * + * @return A Result object containing the total size of the buffer in bytes. + */ + ::executorch::runtime::Result get_storage_size() const; + + /** + * Retrieves the number of bytes currently used in the buffer. + * + * @return The amount of data currently stored in the buffer in bytes. + */ + size_t get_used_bytes() const override; + + private: + // A Span object representing the buffer used for storing debug data. + ::executorch::runtime::Span debug_buffer_; + + // The offset of the next available location in the buffer. + size_t offset_; + + // The alignment of the buffer. + size_t alignment_; +}; + +} // namespace etdump +} // namespace executorch diff --git a/devtools/etdump/data_sink_base.h b/devtools/etdump/data_sink_base.h new file mode 100644 index 00000000000..602c249ce9d --- /dev/null +++ b/devtools/etdump/data_sink_base.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace executorch { +namespace etdump { + +/** + * DataSinkBase is an abstract class that users can inherit and implement + * to customize the storage and management of debug data in ETDumpGen. This + * class provides a basic and essential interface for writing datablob to a + * user-defined storage, retrieving storage capacity, and tracking the amount of + * data stored. + */ +class DataSinkBase { + public: + /** + * Virtual destructor to ensure proper cleanup of derived classes. + */ + + virtual ~DataSinkBase() = default; + /** + * Write data into the debug storage. This method should be implemented + * by derived classes to handle the specifics of data storage. + * + * This function should return the offset of the starting location of the + * data within the debug storage if the write operation succeeds, or an + * Error code if any issue occurs during the write process. + * + * @param[in] ptr A pointer to the data to be written into the storage. + * @param[in] length The size of the data in bytes. + * @return A Result object containing either: + * - The offset of the starting location of the data within the + * debug storage, which will be recorded in the corresponding + * metadata of ETDump, or + * - An error code indicating the failure reason, if any issue + * occurs during the write process. + */ + virtual ::executorch::runtime::Result write( + const void* ptr, + size_t length) = 0; + + /** + * Get the number of bytes currently used in the debug storage. + * + * @return The amount of data currently stored in bytes. + */ + virtual size_t get_used_bytes() const = 0; +}; + +} // namespace etdump +} // namespace executorch diff --git a/devtools/etdump/etdump_flatcc.cpp b/devtools/etdump/etdump_flatcc.cpp index a34b5188c53..ec52621a956 100644 --- a/devtools/etdump/etdump_flatcc.cpp +++ b/devtools/etdump/etdump_flatcc.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -94,16 +95,6 @@ etdump_Tensor_ref_t add_tensor_entry( return etdump_Tensor_end(builder_); } -static uint8_t* alignPointer(void* ptr, size_t alignment) { - intptr_t addr = reinterpret_cast(ptr); - if ((addr & (alignment - 1)) == 0) { - // Already aligned. - return reinterpret_cast(ptr); - } - addr = (addr | (alignment - 1)) + 1; - return reinterpret_cast(addr); -} - } // namespace // Constructor implementation @@ -113,9 +104,10 @@ ETDumpGen::ETDumpGen(Span buffer) { // Initialize the flatcc builder_ using the buffer and buffer size. if (buffer.data() != nullptr) { - builder_ = (struct flatcc_builder*)alignPointer(buffer.data(), 64); - uintptr_t buffer_with_builder = - (uintptr_t)alignPointer(builder_ + sizeof(struct flatcc_builder), 64); + builder_ = + (struct flatcc_builder*)internal::align_pointer(buffer.data(), 64); + uintptr_t buffer_with_builder = (uintptr_t)internal::align_pointer( + builder_ + sizeof(struct flatcc_builder), 64); size_t builder_size = (size_t)(buffer_with_builder - (uintptr_t)buffer.data()); size_t min_buf_size = max_alloc_buf_size + builder_size; @@ -513,7 +505,7 @@ size_t ETDumpGen::copy_tensor_to_debug_buffer(executorch::aten::Tensor tensor) { return static_cast(-1); } uint8_t* offset_ptr = - alignPointer(debug_buffer_.data() + debug_buffer_offset_, 64); + internal::align_pointer(debug_buffer_.data() + debug_buffer_offset_, 64); debug_buffer_offset_ = (offset_ptr - debug_buffer_.data()) + tensor.nbytes(); ET_CHECK_MSG( debug_buffer_offset_ <= debug_buffer_.size(), diff --git a/devtools/etdump/targets.bzl b/devtools/etdump/targets.bzl index bf4807aa442..1d762e983c8 100644 --- a/devtools/etdump/targets.bzl +++ b/devtools/etdump/targets.bzl @@ -87,8 +87,54 @@ def define_common_targets(): exported_external_deps = ["flatccrt"], ) + runtime.cxx_library( + name = "utils", + srcs = [], + exported_headers = [ + "utils.h", + ], + visibility = [ + + ], + ) + for aten_mode in get_aten_mode_options(): aten_suffix = "_aten" if aten_mode else "" + + runtime.cxx_library( + name = "data_sink_base" + aten_suffix, + exported_headers = [ + "data_sink_base.h", + ], + exported_deps = [ + "//executorch/runtime/core/exec_aten/util:scalar_type_util" + aten_suffix, + ], + visibility = [ + "//executorch/...", + "@EXECUTORCH_CLIENTS", + ], + ) + + runtime.cxx_library( + name = "buffer_data_sink" + aten_suffix, + headers = [ + "buffer_data_sink.h", + ], + srcs = [ + "buffer_data_sink.cpp", + ], + deps = [ + ":utils", + ], + exported_deps = [ + "//executorch/runtime/core/exec_aten:lib" + aten_suffix, + ":data_sink_base" + aten_suffix, + ], + visibility = [ + "//executorch/...", + "@EXECUTORCH_CLIENTS", + ], + ) runtime.cxx_library( name = "etdump_flatcc" + aten_suffix, srcs = [ @@ -106,6 +152,7 @@ def define_common_targets(): ], exported_deps = [ ":etdump_schema_flatcc", + ":utils", "//executorch/runtime/core:event_tracer" + aten_suffix, "//executorch/runtime/core/exec_aten/util:scalar_type_util" + aten_suffix, ], diff --git a/devtools/etdump/tests/buffer_data_sink_test.cpp b/devtools/etdump/tests/buffer_data_sink_test.cpp new file mode 100644 index 00000000000..984f7776300 --- /dev/null +++ b/devtools/etdump/tests/buffer_data_sink_test.cpp @@ -0,0 +1,123 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include + +using namespace ::testing; +using ::executorch::aten::ScalarType; +using ::executorch::aten::Tensor; +using ::executorch::runtime::Error; +using ::executorch::runtime::Result; +using ::executorch::runtime::Span; +using torch::executor::testing::TensorFactory; + +class BufferDataSinkTest : public ::testing::Test { + protected: + void SetUp() override { + torch::executor::runtime_init(); + // Allocate a small buffer for testing + buffer_size_ = 128; // Small size for testing + buffer_ptr_ = malloc(buffer_size_); + buffer_ = Span(static_cast(buffer_ptr_), buffer_size_); + data_sink_ = std::make_unique(buffer_); + } + + void TearDown() override { + free(buffer_ptr_); + } + + size_t buffer_size_; + void* buffer_ptr_; + Span buffer_; + std::unique_ptr data_sink_; +}; + +TEST_F(BufferDataSinkTest, StorageSizeCheck) { + Result ret = data_sink_->get_storage_size(); + ASSERT_EQ(ret.error(), Error::Ok); + + size_t storage_size = ret.get(); + EXPECT_EQ(storage_size, buffer_size_); +} + +TEST_F(BufferDataSinkTest, WriteOneTensorAndCheckData) { + TensorFactory tf; + Tensor tensor = tf.make({1, 4}, {1.0, 2.0, 3.0, 4.0}); + + Result ret = + data_sink_->write(tensor.const_data_ptr(), tensor.nbytes()); + ASSERT_EQ(ret.error(), Error::Ok); + + size_t offset = ret.get(); + + EXPECT_NE(offset, static_cast(-1)); + + // Check that the data in the buffer matches the tensor data + const float* buffer_data = + reinterpret_cast(buffer_.data() + offset); + for (size_t i = 0; i < tensor.numel(); ++i) { + EXPECT_EQ(buffer_data[i], tensor.const_data_ptr()[i]); + } +} + +TEST_F(BufferDataSinkTest, WriteMultiTensorsAndCheckData) { + TensorFactory tf; + std::vector tensors = { + tf.make({1, 4}, {1.0, 2.0, 3.0, 4.0}), + tf.make({1, 4}, {5.0, 6.0, 7.0, 8.0})}; + for (const auto& tensor : tensors) { + Result ret = + data_sink_->write(tensor.const_data_ptr(), tensor.nbytes()); + ASSERT_EQ(ret.error(), Error::Ok); + + size_t offset = ret.get(); + EXPECT_NE(offset, static_cast(-1)); + // Check that the data in the buffer matches the tensor data + const float* buffer_data = + reinterpret_cast(buffer_.data() + offset); + for (size_t i = 0; i < tensor.numel(); ++i) { + EXPECT_EQ(buffer_data[i], tensor.const_data_ptr()[i]); + } + } +} + +TEST_F(BufferDataSinkTest, PointerAlignmentCheck) { + TensorFactory tf; + Tensor tensor = tf.make({1, 4}, {1.0, 2.0, 3.0, 4.0}); + Result ret = + data_sink_->write(tensor.const_data_ptr(), tensor.nbytes()); + ASSERT_EQ(ret.error(), Error::Ok); + + size_t offset = ret.get(); + EXPECT_NE(offset, static_cast(-1)); + // Check that the offset pointer is 64-byte aligned + const uint8_t* offset_ptr = buffer_.data() + offset; + EXPECT_EQ(reinterpret_cast(offset_ptr) % 64, 0); +} + +TEST_F(BufferDataSinkTest, WriteUntilOverflow) { + TensorFactory tf; + Tensor tensor = tf.zeros({1, 8}); // Large tensor to fill the buffer + + // Write tensors until we run out of space + for (size_t i = 0; i < 2; i++) { + Result ret = + data_sink_->write(tensor.const_data_ptr(), tensor.nbytes()); + ASSERT_EQ(ret.error(), Error::Ok); + } + + // Attempting to write another tensor should raise an error + Result ret = + data_sink_->write(tensor.const_data_ptr(), tensor.nbytes()); + ASSERT_EQ(ret.error(), Error::OutOfResources); +} diff --git a/devtools/etdump/tests/targets.bzl b/devtools/etdump/tests/targets.bzl index 5299b7c1cb7..c91267ff467 100644 --- a/devtools/etdump/tests/targets.bzl +++ b/devtools/etdump/tests/targets.bzl @@ -19,3 +19,14 @@ def define_common_targets(): "//executorch/runtime/core/exec_aten/testing_util:tensor_util", ], ) + + runtime.cxx_test( + name = "buffer_data_sink_test", + srcs = [ + "buffer_data_sink_test.cpp", + ], + deps = [ + "//executorch/devtools/etdump:buffer_data_sink", + "//executorch/runtime/core/exec_aten/testing_util:tensor_util", + ], + ) diff --git a/devtools/etdump/utils.h b/devtools/etdump/utils.h new file mode 100644 index 00000000000..8f9a78a1f99 --- /dev/null +++ b/devtools/etdump/utils.h @@ -0,0 +1,32 @@ +// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. + +#include +#include + +#pragma once + +namespace executorch { +namespace etdump { +namespace internal { + +/** + * Aligns a pointer to the next multiple of `alignment`. + * + * @param[in] ptr Pointer to align. + * @param[in] alignment Alignment to align to. Must be a power of 2. + * + * @returns A pointer aligned to `alignment`. + */ +inline uint8_t* align_pointer(void* ptr, size_t alignment) { + intptr_t addr = reinterpret_cast(ptr); + if ((addr & (alignment - 1)) == 0) { + // Already aligned. + return reinterpret_cast(ptr); + } + addr = (addr | (alignment - 1)) + 1; + return reinterpret_cast(addr); +} + +} // namespace internal +} // namespace etdump +} // namespace executorch diff --git a/runtime/core/error.h b/runtime/core/error.h index 7fbd92b7c08..73e343a5c45 100644 --- a/runtime/core/error.h +++ b/runtime/core/error.h @@ -82,6 +82,9 @@ enum class Error : error_code_t { /// Error caused by the contents of external data. InvalidExternalData = 0x24, + /// Does not have enough resources to perform the requested operation. + OutOfResources = 0x25, + /* * Delegate errors. */ From 875becd124a1a01a9b57f8de90a385ad9963e3fd Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Tue, 25 Feb 2025 08:43:39 -0800 Subject: [PATCH 088/584] [ExecuTorch] Arm Ethos: Add ArmTester TARGET (#8674) Pull Request resolved: https://github.com/pytorch/executorch/pull/8560 As title. ghstack-source-id: 268178174 @exported-using-ghexport @bypass-github-export-checks @bypass-github-pytorch-ci-checks @bypass-github-executorch-ci-checks Differential Revision: [D69714022](https://our.internmc.facebook.com/intern/diff/D69714022/) --------- Co-authored-by: Digant Desai Co-authored-by: Scott Wolchok --- backends/arm/test/TARGETS | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/backends/arm/test/TARGETS b/backends/arm/test/TARGETS index ea9a2f9f686..946d890c3e1 100644 --- a/backends/arm/test/TARGETS +++ b/backends/arm/test/TARGETS @@ -32,3 +32,17 @@ python_library( "fbsource//third-party/pypi/pytest:pytest", ] ) + +python_library( + name = "arm_tester", + srcs = glob(["tester/*.py"]), + deps = [ + ":runner_utils", + ":common", + "//executorch/backends/arm:tosa_mapping", + "//executorch/backends/arm:tosa_specification", + "//executorch/backends/arm/quantizer:arm_quantizer", + "//executorch/devtools/backend_debug:delegation_info", + "fbsource//third-party/pypi/tabulate:tabulate", + ] +) From fcb40f125cc198cb577eef09ce092082aa414aab Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Tue, 25 Feb 2025 08:48:48 -0800 Subject: [PATCH 089/584] fix headers attribute of cxx_test in OSS (#8662) We forgot to make header paths work for cxx_test headers. Test Plan: buck2 build //runtime/kernel/test:operator_registry_test --- shim_et/xplat/executorch/build/runtime_wrapper.bzl | 1 + 1 file changed, 1 insertion(+) diff --git a/shim_et/xplat/executorch/build/runtime_wrapper.bzl b/shim_et/xplat/executorch/build/runtime_wrapper.bzl index 5bfba568423..79bc6c7e699 100644 --- a/shim_et/xplat/executorch/build/runtime_wrapper.bzl +++ b/shim_et/xplat/executorch/build/runtime_wrapper.bzl @@ -268,6 +268,7 @@ def _cxx_test(*args, **kwargs): kwargs["deps"].append("//executorch/test/utils:utils") _patch_kwargs_cxx(kwargs) + env.patch_headers(kwargs) _patch_build_mode_flags(kwargs) _patch_test_compiler_flags(kwargs) From 88b344102878ab3386a37cbade219390c6e5aa02 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Tue, 25 Feb 2025 10:19:51 -0800 Subject: [PATCH 090/584] [ExecuTorch] Arm Ethos: Add pass tests (#8675) Pull Request resolved: https://github.com/pytorch/executorch/pull/8561 As title. Adds pytest.test_option["tosa_ref_model"] similar to "corestone_fvp". This is a hack. Once we buckify the reference model, we should remove this. It shouldn't have impact on the OSS test coverage. ghstack-source-id: 268178173 @bypass-github-export-checks @bypass-github-pytorch-ci-checks @bypass-github-executorch-ci-checks Differential Revision: [D69714010](https://our.internmc.facebook.com/intern/diff/D69714010/) --------- Co-authored-by: Digant Desai Co-authored-by: Scott Wolchok --- backends/arm/test/TARGETS | 22 ++++++++++ backends/arm/test/conftest.py | 42 +++++++++++++++---- backends/arm/test/passes/test_rescale_pass.py | 6 ++- backends/arm/test/pytest.ini | 3 +- backends/arm/test/runner_utils.py | 1 - backends/arm/test/targets.bzl | 35 ++++++++++++++++ 6 files changed, 97 insertions(+), 12 deletions(-) create mode 100644 backends/arm/test/targets.bzl diff --git a/backends/arm/test/TARGETS b/backends/arm/test/TARGETS index 946d890c3e1..0e99a349956 100644 --- a/backends/arm/test/TARGETS +++ b/backends/arm/test/TARGETS @@ -1,10 +1,16 @@ load("@fbcode_macros//build_defs:python_library.bzl", "python_library") +load(":targets.bzl", "define_arm_tests") + + +oncall("executorch") python_library( name = "conftest", srcs = ["conftest.py"], deps = [ "//executorch/exir:lib", + "//executorch/exir/backend:compile_spec_schema", + "fbsource//third-party/pypi/pytest:pytest", ] ) @@ -33,6 +39,19 @@ python_library( ] ) +python_library( + name = "common", + srcs = ["common.py"], + deps = [ + ":runner_utils", + "//executorch/backends/xnnpack/test/tester:tester", + "//executorch/backends/arm:arm_backend", + "//executorch/exir:lib", + "//executorch/exir/backend:compile_spec_schema", + "fbsource//third-party/pypi/pytest:pytest", + ] +) + python_library( name = "arm_tester", srcs = glob(["tester/*.py"]), @@ -42,7 +61,10 @@ python_library( "//executorch/backends/arm:tosa_mapping", "//executorch/backends/arm:tosa_specification", "//executorch/backends/arm/quantizer:arm_quantizer", + "//executorch/backends/arm:arm_partitioner", "//executorch/devtools/backend_debug:delegation_info", "fbsource//third-party/pypi/tabulate:tabulate", ] ) + +define_arm_tests() diff --git a/backends/arm/test/conftest.py b/backends/arm/test/conftest.py index 081d499d4d5..c6d92c45dd0 100644 --- a/backends/arm/test/conftest.py +++ b/backends/arm/test/conftest.py @@ -13,7 +13,12 @@ from typing import Any import pytest -import torch + +try: + import tosa_reference_model +except ImportError: + logging.warning("tosa_reference_model not found, can't run reference model tests") + tosa_reference_model = None """ This file contains the pytest hooks, fixtures etc. for the Arm test suite. @@ -24,18 +29,29 @@ def pytest_configure(config): - pytest._test_options = {} # type: ignore[attr-defined] - - if config.option.arm_run_corstoneFVP: + pytest._test_options["corstone_fvp"] = False # type: ignore[attr-defined] + if ( + getattr(config.option, "arm_run_corestoneFVP", False) + and config.option.arm_run_corstoneFVP + ): corstone300_exists = shutil.which("FVP_Corstone_SSE-300_Ethos-U55") corstone320_exists = shutil.which("FVP_Corstone_SSE-320") if not (corstone300_exists and corstone320_exists): raise RuntimeError( "Tests are run with --arm_run_corstoneFVP but corstone FVP is not installed." ) + # Only enable if we also have the TOSA reference model available. pytest._test_options["corstone_fvp"] = True # type: ignore[attr-defined] - pytest._test_options["fast_fvp"] = config.option.fast_fvp # type: ignore[attr-defined] + + pytest._test_options["fast_fvp"] = False # type: ignore[attr-defined] + if getattr(config.option, "fast_fvp", False): + pytest._test_options["fast_fvp"] = config.option.fast_fvp # type: ignore[attr-defined] + + # TODO: remove this flag once we have a way to run the reference model tests with Buck + pytest._test_options["tosa_ref_model"] = False # type: ignore[attr-defined] + if tosa_reference_model is not None: + pytest._test_options["tosa_ref_model"] = True # type: ignore[attr-defined] logging.basicConfig(level=logging.INFO, stream=sys.stdout) @@ -44,9 +60,15 @@ def pytest_collection_modifyitems(config, items): def pytest_addoption(parser): - parser.addoption("--arm_quantize_io", action="store_true", help="Deprecated.") - parser.addoption("--arm_run_corstoneFVP", action="store_true") - parser.addoption("--fast_fvp", action="store_true") + def try_addoption(*args, **kwargs): + try: + parser.addoption(*args, **kwargs) + except Exception: + pass + + try_addoption("--arm_quantize_io", action="store_true", help="Deprecated.") + try_addoption("--arm_run_corstoneFVP", action="store_true", help="Deprecated.") + try_addoption("--fast_fvp", action="store_true") def pytest_sessionstart(session): @@ -78,6 +100,8 @@ def set_random_seed(): Rerun with a specific seed found under a random seed test ARM_TEST_SEED=3478246 pytest --config-file=/dev/null --verbose -s --color=yes backends/arm/test/ops/test_avg_pool.py -k """ + import torch + if os.environ.get("ARM_TEST_SEED", "RANDOM") == "RANDOM": random.seed() # reset seed, in case any other test has fiddled with it seed = random.randint(0, 2**32 - 1) @@ -161,6 +185,8 @@ def _load_libquantized_ops_aot_lib(): res = subprocess.run(find_lib_cmd, capture_output=True) if res.returncode == 0: library_path = res.stdout.decode().strip() + import torch + torch.ops.load_library(library_path) else: raise RuntimeError( diff --git a/backends/arm/test/passes/test_rescale_pass.py b/backends/arm/test/passes/test_rescale_pass.py index 25052c448d1..90ad502378c 100644 --- a/backends/arm/test/passes/test_rescale_pass.py +++ b/backends/arm/test/passes/test_rescale_pass.py @@ -116,7 +116,7 @@ def _test_rescale_pipeline( ): """Tests a model with many ops that requires rescales. As more ops are quantized to int32 and need the InsertRescalesPass, make sure that they play nicely together.""" - ( + tester = ( ArmTester( module, example_inputs=test_data, @@ -126,8 +126,9 @@ def _test_rescale_pipeline( .export() .to_edge_transform_and_lower() .to_executorch() - .run_method_and_compare_outputs(test_data) ) + if conftest.is_option_enabled("tosa_ref_model"): + tester.run_method_and_compare_outputs(test_data) def _test_rescale_pipeline_ethosu( @@ -152,6 +153,7 @@ def _test_rescale_pipeline_ethosu( class TestRescales(unittest.TestCase): @parameterized.expand(RescaleNetwork.test_parameters) + @pytest.mark.tosa_ref_model def test_quantized_rescale(self, x, y): _test_rescale_pipeline(RescaleNetwork(), (x, y)) diff --git a/backends/arm/test/pytest.ini b/backends/arm/test/pytest.ini index 3af1f0d0971..e73bd7dbb26 100644 --- a/backends/arm/test/pytest.ini +++ b/backends/arm/test/pytest.ini @@ -2,4 +2,5 @@ addopts = --strict-markers markers = slow: Tests that take long time - corstone_fvp: Tests that use Corstone300 or Corstone320 FVP \ No newline at end of file + corstone_fvp: Tests that use Corstone300 or Corstone320 FVP # And also uses TOSA reference model + tosa_ref_model: Tests that use TOSA reference model # Temporary! diff --git a/backends/arm/test/runner_utils.py b/backends/arm/test/runner_utils.py index 2d182b4a410..5a0bfe2c37c 100644 --- a/backends/arm/test/runner_utils.py +++ b/backends/arm/test/runner_utils.py @@ -22,7 +22,6 @@ try: import tosa_reference_model except ImportError: - logger.warning("tosa_reference_model not found, can't run reference model tests") tosa_reference_model = None from executorch.backends.arm.arm_backend import get_tosa_spec, is_tosa diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl new file mode 100644 index 00000000000..3335dc958cd --- /dev/null +++ b/backends/arm/test/targets.bzl @@ -0,0 +1,35 @@ +# load("//caffe2/test/fb:defs.bzl", "define_tests") +load("@fbcode_macros//build_defs:python_pytest.bzl", "python_pytest") +load("@bazel_skylib//lib:paths.bzl", "paths") + +def define_arm_tests(): + # TODO Add more tests + test_files = native.glob(["passes/test_*.py"]) + + # https://github.com/pytorch/executorch/issues/8606 + test_files.remove("passes/test_ioquantization_pass.py") + + TESTS = {} + + for test_file in test_files: + test_file_name = paths.basename(test_file) + test_name = test_file_name.replace("test_", "").replace(".py", "") + + python_pytest( + name = test_name, + srcs = [test_file], + pytest_config = "pytest.ini", + resources = ["conftest.py"], + compile = "with-source", + typing = False, + preload_deps = [ + "//executorch/kernels/quantized:custom_ops_generated_lib", + ], + deps = [ + ":arm_tester", + ":conftest", + "//executorch/exir:lib", + "fbsource//third-party/pypi/pytest:pytest", + "fbsource//third-party/pypi/parameterized:parameterized", + ], + ) From d94b9f32a63fb4d1d0a3df046a0932062b020856 Mon Sep 17 00:00:00 2001 From: lucylq Date: Tue, 25 Feb 2025 11:02:00 -0800 Subject: [PATCH 091/584] Clean up buck targets Differential Revision: D70131772 Pull Request resolved: https://github.com/pytorch/executorch/pull/8655 --- extension/flat_tensor/targets.bzl | 2 +- extension/flat_tensor/test/targets.bzl | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/extension/flat_tensor/targets.bzl b/extension/flat_tensor/targets.bzl index 6f627492f24..0d49995aa6e 100644 --- a/extension/flat_tensor/targets.bzl +++ b/extension/flat_tensor/targets.bzl @@ -8,7 +8,6 @@ def define_common_targets(): ], exported_headers = ["flat_tensor_data_map.h"], deps = [ - "//executorch/extension/flat_tensor/serialize:generated_headers", "//executorch/runtime/core:core", "//executorch/runtime/core:evalue", "//executorch/runtime/core:named_data_map", @@ -17,6 +16,7 @@ def define_common_targets(): ], exported_deps = [ "//executorch/extension/flat_tensor/serialize:flat_tensor_header", + "//executorch/extension/flat_tensor/serialize:generated_headers", ], visibility = [ "//executorch/...", diff --git a/extension/flat_tensor/test/targets.bzl b/extension/flat_tensor/test/targets.bzl index 28baace3eeb..a2b96526ab5 100644 --- a/extension/flat_tensor/test/targets.bzl +++ b/extension/flat_tensor/test/targets.bzl @@ -47,9 +47,6 @@ def define_common_targets(is_fbcode=False): deps = [ "//executorch/extension/data_loader:file_data_loader", "//executorch/extension/flat_tensor:flat_tensor_data_map", - "//executorch/extension/flat_tensor/serialize:flat_tensor_header", - "//executorch/extension/flat_tensor/serialize:generated_headers", - "//executorch/extension/flat_tensor/serialize:schema", "//executorch/runtime/core:named_data_map", "//executorch/runtime/core/exec_aten:lib", ], From 857101d14f43f888884cba686d860abf18777eef Mon Sep 17 00:00:00 2001 From: Sebastian Larsson <38941629+Sebastian-Larsson@users.noreply.github.com> Date: Tue, 25 Feb 2025 20:20:10 +0100 Subject: [PATCH 092/584] Arm backend: Create negative operator_support check unsupported u55 ops (#8682) Since there will be more unsupported ops for u55 it makes more sense to create a new negative check for ops which are always unsupported on u55. bitwise_support.py is removed with this patch as it's redundent with the new negative check. Signed-off-by: Sebastian Larsson --- backends/arm/operator_support/__init__.py | 1 - .../arm/operator_support/bitwise_support.py | 33 ------------------- .../tosa_supported_operators.py | 33 +++++++++++++++++-- 3 files changed, 31 insertions(+), 36 deletions(-) delete mode 100644 backends/arm/operator_support/bitwise_support.py diff --git a/backends/arm/operator_support/__init__.py b/backends/arm/operator_support/__init__.py index 2ac23b0e91b..c6895cce492 100644 --- a/backends/arm/operator_support/__init__.py +++ b/backends/arm/operator_support/__init__.py @@ -6,7 +6,6 @@ # pyre-unsafe from . import ( # noqa - bitwise_support, convolution_support, pool_2d_support, reduce_sum_support, diff --git a/backends/arm/operator_support/bitwise_support.py b/backends/arm/operator_support/bitwise_support.py deleted file mode 100644 index e0604622064..00000000000 --- a/backends/arm/operator_support/bitwise_support.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright 2025 Arm Limited and/or its affiliates. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import torch.fx as fx -from executorch.backends.arm.operator_support.tosa_supported_operators import ( - register_tosa_support_check, - SupportedTOSAOperatorCheck, -) -from executorch.backends.arm.tosa_specification import Tosa_0_80, TosaSpecification -from executorch.exir.dialects._ops import ops as exir_ops - - -@register_tosa_support_check -class BitwiseSupported(SupportedTOSAOperatorCheck): - targets = [ - exir_ops.edge.aten.bitwise_and.Tensor, - exir_ops.edge.aten.bitwise_or.Tensor, - exir_ops.edge.aten.bitwise_xor.Tensor, - ] - - tosa_specs = [ - TosaSpecification.create_from_string("TOSA-0.80+BI"), - TosaSpecification.create_from_string("TOSA-0.80+MI"), - ] - - def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification): - # U55 case, Vela 4.2.0 (25.02 release) - if isinstance(tosa_spec, Tosa_0_80) and tosa_spec.is_U55_subset: - return False - - return True diff --git a/backends/arm/operator_support/tosa_supported_operators.py b/backends/arm/operator_support/tosa_supported_operators.py index b67bded7fb9..1268e2c912c 100644 --- a/backends/arm/operator_support/tosa_supported_operators.py +++ b/backends/arm/operator_support/tosa_supported_operators.py @@ -11,13 +11,13 @@ from typing import final, Optional, Sequence, Type import torch - import torch.fx as fx + from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor from executorch.backends.arm._passes.fuse_quantized_activation_pass import ( FuseQuantizedActivationPass, ) -from executorch.backends.arm.tosa_specification import TosaSpecification +from executorch.backends.arm.tosa_specification import Tosa_0_80, TosaSpecification from executorch.exir.dialects._ops import ops as exir_ops from torch.fx.passes.operator_support import any_chain, chain, OperatorSupportBase from torch.fx.passes.utils.source_matcher_utils import get_source_partitions @@ -90,6 +90,7 @@ def tosa_support_factory( if not tosa_spec.support_float(): negative_checks.append(NeedsDecompositionCheck()) negative_checks.append(CheckProperQuantization()) + negative_checks.append(EthosU55NotSupported(tosa_spec)) return chain( any_chain( BaseTOSASupportList(), @@ -111,6 +112,9 @@ def is_node_supported( supported = node.op == "call_function" and node.target in [ exir_ops.edge.aten.abs.default, exir_ops.edge.aten.add.Tensor, + exir_ops.edge.aten.bitwise_and.Tensor, + exir_ops.edge.aten.bitwise_or.Tensor, + exir_ops.edge.aten.bitwise_xor.Tensor, exir_ops.edge.aten.expand_copy.default, exir_ops.edge.aten.cat.default, exir_ops.edge.aten.clamp.default, @@ -170,6 +174,31 @@ def is_node_supported( return supported +class EthosU55NotSupported(OperatorSupportBase): + """ + Certain operators are not supported on U55. These are listed in `unsupported` in + is_node_supported(). + """ + + def __init__(self, tosa_spec: TosaSpecification): + self.tosa_spec = tosa_spec + + def is_node_supported( + self, submodules: typing.Mapping[str, torch.nn.Module], node: fx.Node + ) -> bool: + if isinstance(self.tosa_spec, Tosa_0_80) and self.tosa_spec.is_U55_subset: + unsupported_ops = [ + exir_ops.edge.aten.bitwise_and.Tensor, + exir_ops.edge.aten.bitwise_or.Tensor, + exir_ops.edge.aten.bitwise_xor.Tensor, + ] + + if node.target in unsupported_ops: + return False + + return True + + class NeedsDecompositionCheck(OperatorSupportBase): """ Targeted operators need to be decomposed prior to quantization in order to get a pair of q-dq-nodes surrounding From 8a8495274071a79b0982491c5dce56ea8e2df41d Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Tue, 25 Feb 2025 11:26:01 -0800 Subject: [PATCH 093/584] Fix Android prebuilt docs Now we only have one AAR. --- docs/source/android-prebuilt-library.md | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/docs/source/android-prebuilt-library.md b/docs/source/android-prebuilt-library.md index 324c63376c5..62bcdd2315e 100644 --- a/docs/source/android-prebuilt-library.md +++ b/docs/source/android-prebuilt-library.md @@ -1,15 +1,11 @@ -# Using Android prebuilt libraries (AAR) +# Using Android prebuilt library (AAR) -We provide two prebuilt Android libraries (AAR), `executorch.aar` for generic use case (image/audio processing) and `executorch_llama.aar` for LLAMA use case. +We provide a prebuilt Android library (AAR), `executorch.aar` for both generic (image/audio processing) and LLAMA use case. -## Contents of libraries +## Contents of library - `executorch.aar` - [Java library](https://github.com/pytorch/executorch/tree/main/extension/android/src/main/java/org/pytorch/executorch) - - JNI contains the JNI binding for [NativePeer.java](https://github.com/pytorch/executorch/blob/main/extension/android/src/main/java/org/pytorch/executorch/NativePeer.java) and ExecuTorch native library, including core ExecuTorch runtime libraries, XNNPACK backend, Portable kernels, Optimized kernels, and Quantized kernels. - - Comes with two ABI variants, arm64-v8a and x86_64. -- `executorch_llama.aar` - - [Java library](https://github.com/pytorch/executorch/tree/main/extension/android/src/main/java/org/pytorch/executorch) (Note: it contains the same Java classes as the previous Java, but it does not contain the JNI binding for generic Module/NativePeer Java code). - - JNI contains the JNI binding for [LlamaModule.java](https://github.com/pytorch/executorch/blob/main/extension/android/src/main/java/org/pytorch/executorch/LlamaModule.java) and ExecuTorch native library, including core ExecuTorch runtime libraries, XNNPACK backend, Portable kernels, Optimized kernels, Quantized kernels, and LLAMA-specific Custom ops library. + - JNI contains the JNI binding for the corresponding Java code, and ExecuTorch native library, including core ExecuTorch runtime libraries, XNNPACK backend, Portable kernels, Optimized kernels, Quantized kernels, and LLAMA-specific Custom ops library. - Comes with two ABI variants, arm64-v8a and x86_64. ## Downloading AAR From 0fb4f8565f96f323dfce173fb50cf3ee57454af5 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Tue, 25 Feb 2025 12:10:14 -0800 Subject: [PATCH 094/584] Unbreak //exectorch/docs:doctest internally (#8693) Added missing buck dep. Differential Revision: [D70099706](https://our.internmc.facebook.com/intern/diff/D70099706/) ghstack-source-id: 268011090 Pull Request resolved: https://github.com/pytorch/executorch/pull/8645 Co-authored-by: Scott Wolchok --- docs/TARGETS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/TARGETS b/docs/TARGETS index 6e8210dbdfe..a0281b8b782 100644 --- a/docs/TARGETS +++ b/docs/TARGETS @@ -9,8 +9,9 @@ python_binary( par_style = "xar", deps = [ "//caffe2:torch", - "//executorch/exir:lib", + "//executorch/backends/xnnpack/quantizer:xnnpack_quantizer", "//executorch/devtools:lib", + "//executorch/exir:lib", "//executorch/exir/backend/test:backend_with_compiler_demo", "//executorch/exir/backend/test:op_partitioner_demo", "//executorch/devtools/bundled_program/serialize:lib", From 93838e81e4c5f5f84936b0da32b943414ead1b2e Mon Sep 17 00:00:00 2001 From: Mengtao Yuan Date: Tue, 25 Feb 2025 12:21:26 -0800 Subject: [PATCH 095/584] Fix pyre error for logits Differential Revision: D70183946 Pull Request resolved: https://github.com/pytorch/executorch/pull/8687 --- examples/models/llama/llama_transformer.py | 44 +++++++++++----------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/examples/models/llama/llama_transformer.py b/examples/models/llama/llama_transformer.py index aba55705d20..3536936e47e 100644 --- a/examples/models/llama/llama_transformer.py +++ b/examples/models/llama/llama_transformer.py @@ -232,27 +232,29 @@ def forward( if self.apply_output: logits = self.output(h) - if self.output_prune_map is not None: - # expand to original size so that downstream applications can use the logits as-is. - if self.generate_full_logits: - # (1, seq_len, pruned_size) -> (1, seq_len, original_size) - expanded_logits = torch.full( - [logits.shape[0], logits.shape[1], self.vocab_size], - float("-inf"), - device=logits.device, - dtype=logits.dtype, - ) - expanded_logits[:, :, list(self.output_prune_map.values())] = logits - else: - # (1, pruned_size) -> (1, original_size) - expanded_logits = torch.full( - [logits.shape[0], self.vocab_size], - float("-inf"), - device=logits.device, - dtype=logits.dtype, - ) - expanded_logits[:, list(self.output_prune_map.values())] = logits - logits = expanded_logits + if self.output_prune_map is not None: + # expand to original size so that downstream applications can use the logits as-is. + if self.generate_full_logits: + # (1, seq_len, pruned_size) -> (1, seq_len, original_size) + expanded_logits = torch.full( + [logits.shape[0], logits.shape[1], self.vocab_size], + float("-inf"), + device=logits.device, + dtype=logits.dtype, + ) + expanded_logits[:, :, list(self.output_prune_map.values())] = logits + else: + # (1, pruned_size) -> (1, original_size) + expanded_logits = torch.full( + [logits.shape[0], self.vocab_size], + float("-inf"), + device=logits.device, + dtype=logits.dtype, + ) + expanded_logits[:, list(self.output_prune_map.values())] = logits + logits = expanded_logits + else: + logits = h if attn_options_update is not None: return logits, attn_options_update From f3fc096a2df3c7ea8667f1598ca880146116c6a6 Mon Sep 17 00:00:00 2001 From: Jack <32371937+jackzhxng@users.noreply.github.com> Date: Tue, 25 Feb 2025 13:41:10 -0800 Subject: [PATCH 096/584] Switch to new ao quant api for 8da4w (#8501) --- examples/models/llama/source_transformation/quantize.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py index 8923ab1fdec..e6d228d5da9 100644 --- a/examples/models/llama/source_transformation/quantize.py +++ b/examples/models/llama/source_transformation/quantize.py @@ -119,11 +119,10 @@ def quantize( # noqa C901 # Check for required args if group_size is None: raise Exception("For 8da4w quantization, group size must be specified.") - from torchao.quantization.quant_api import Int8DynActInt4WeightQuantizer - model = Int8DynActInt4WeightQuantizer( - precision=torch_dtype, groupsize=group_size - ).quantize(model) + from torchao.quantization import int8_dynamic_activation_int4_weight, quantize_ + + quantize_(model, int8_dynamic_activation_int4_weight(group_size=group_size)) if verbose: print("quantized model:", model) @@ -663,7 +662,7 @@ def convert_for_runtime(self) -> nn.Module: def quantized_model(self) -> nn.Module: model_updated_state_dict = self.create_quantized_state_dict(self.packed) self.convert_for_runtime() - self.mod.load_state_dict(model_updated_state_dict) + self.mod.load_state_dict(model_updated_state_dict, assign=True) return self.mod From 8f5c4419ff3e8a401e6cb56e5b99900bdcef2e1f Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Tue, 25 Feb 2025 14:47:42 -0800 Subject: [PATCH 097/584] Fix issues with named data map load_into Differential Revision: D70186266 Pull Request resolved: https://github.com/pytorch/executorch/pull/8686 --- .../flat_tensor/flat_tensor_data_map.cpp | 5 ++-- extension/flat_tensor/flat_tensor_data_map.h | 2 +- .../test/flat_tensor_data_map_test.cpp | 23 +++++++++++++++++++ runtime/core/named_data_map.h | 5 ++-- runtime/executor/tensor_parser_exec_aten.cpp | 13 ++++------- 5 files changed, 33 insertions(+), 15 deletions(-) diff --git a/extension/flat_tensor/flat_tensor_data_map.cpp b/extension/flat_tensor/flat_tensor_data_map.cpp index ff526e359d4..bf54ae014b5 100644 --- a/extension/flat_tensor/flat_tensor_data_map.cpp +++ b/extension/flat_tensor/flat_tensor_data_map.cpp @@ -141,7 +141,7 @@ ET_NODISCARD Result FlatTensorDataMap::get_data( DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::External)); } -ET_NODISCARD Result FlatTensorDataMap::load_data_into( +ET_NODISCARD Error FlatTensorDataMap::load_data_into( ET_UNUSED const char* key, ET_UNUSED void* buffer, ET_UNUSED size_t size) const { @@ -156,7 +156,7 @@ ET_NODISCARD Result FlatTensorDataMap::load_data_into( return tensor_layout.error(); } ET_CHECK_OR_RETURN_ERROR( - size < tensor_layout.get().nbytes(), + size <= tensor_layout.get().nbytes(), InvalidArgument, "Buffer size %zu is smaller than tensor size %zu", size, @@ -187,6 +187,7 @@ ET_NODISCARD Result FlatTensorDataMap::get_key( if (index < 0 || index >= flat_tensor_->tensors()->size()) { return Error::InvalidArgument; } + return flat_tensor_->tensors()->Get(index)->fully_qualified_name()->c_str(); } diff --git a/extension/flat_tensor/flat_tensor_data_map.h b/extension/flat_tensor/flat_tensor_data_map.h index 00f4bf07d19..972a5fa9c55 100644 --- a/extension/flat_tensor/flat_tensor_data_map.h +++ b/extension/flat_tensor/flat_tensor_data_map.h @@ -75,7 +75,7 @@ class FlatTensorDataMap final : public executorch::runtime::NamedDataMap { * * @returns an Error indicating if the load was successful. */ - ET_NODISCARD executorch::runtime::Result + ET_NODISCARD executorch::runtime::Error load_data_into(const char* key, void* buffer, size_t size) const override; /** diff --git a/extension/flat_tensor/test/flat_tensor_data_map_test.cpp b/extension/flat_tensor/test/flat_tensor_data_map_test.cpp index 681bc39a129..ac4583eda88 100644 --- a/extension/flat_tensor/test/flat_tensor_data_map_test.cpp +++ b/extension/flat_tensor/test/flat_tensor_data_map_test.cpp @@ -137,3 +137,26 @@ TEST_F(FlatTensorDataMapTest, FlatTensorDataMap_Keys) { Result key2_res = data_map->get_key(2); EXPECT_EQ(key2_res.error(), Error::InvalidArgument); } + +TEST_F(FlatTensorDataMapTest, FlatTensorDataMap_LoadInto) { + Result data_map = + FlatTensorDataMap::load(data_map_loader_.get()); + EXPECT_EQ(data_map.error(), Error::Ok); + + // get the metadata + auto meta_data_res = data_map->get_metadata("a"); + ASSERT_EQ(meta_data_res.error(), Error::Ok); + + // get data blob + void* data = malloc(meta_data_res->nbytes()); + auto load_into_error = + data_map->load_data_into("a", data, meta_data_res->nbytes()); + ASSERT_EQ(load_into_error, Error::Ok); + + // Check tensor data is correct. + float* data_a = static_cast(data); + for (int i = 0; i < 4; i++) { + EXPECT_EQ(data_a[i], 3.0); + } + free(data); +} diff --git a/runtime/core/named_data_map.h b/runtime/core/named_data_map.h index 68639ed872a..ef5e413db67 100644 --- a/runtime/core/named_data_map.h +++ b/runtime/core/named_data_map.h @@ -53,10 +53,9 @@ class ET_EXPERIMENTAL NamedDataMap { * size of the data for a given key. * @param buffer The buffer to load the data into. Must point to at least * `size` bytes of memory. - * @return Result containing the number of bytes written on success. This will - * fail if the buffer is too small. + * @returns an Error indicating if the load was successful. */ - ET_NODISCARD virtual Result + ET_NODISCARD virtual Error load_data_into(const char* key, void* buffer, size_t size) const = 0; /** diff --git a/runtime/executor/tensor_parser_exec_aten.cpp b/runtime/executor/tensor_parser_exec_aten.cpp index a1ac245acca..66202acabc3 100644 --- a/runtime/executor/tensor_parser_exec_aten.cpp +++ b/runtime/executor/tensor_parser_exec_aten.cpp @@ -224,17 +224,12 @@ ET_NODISCARD Result getTensorDataPtr( if (!planned_ptr.ok()) { return planned_ptr.error(); } - auto size = + auto load_error = named_data_map->load_data_into(fqn, planned_ptr.get(), nbytes); - if (size.error() != Error::Ok) { - return size.error(); + if (load_error != Error::Ok) { + return load_error; } - ET_CHECK_OR_RETURN_ERROR( - size.get() == nbytes, - InvalidExternalData, - "Expected to load %zu bytes, actually loaded %u bytes", - nbytes, - static_cast(size.get())); + return planned_ptr; } } From df8ad7dd24462cb19b31088092e1cd9881cb7067 Mon Sep 17 00:00:00 2001 From: Jack <32371937+jackzhxng@users.noreply.github.com> Date: Tue, 25 Feb 2025 14:48:38 -0800 Subject: [PATCH 098/584] Remove qwen test_model pull trigger (#8701) --- .ci/scripts/gather_test_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py index 515bc97cca3..d02213b9faf 100755 --- a/.ci/scripts/gather_test_models.py +++ b/.ci/scripts/gather_test_models.py @@ -90,7 +90,7 @@ def model_should_run_on_event(model: str, event: str) -> bool: We put higher priority and fast models to pull request and rest to push. """ if event == "pull_request": - return model in ["mv3", "vit", "qwen2_5"] # TODO: remove, just to test the ci + return model in ["mv3", "vit"] elif event == "push": # These are super slow. Only run it periodically return model not in ["dl3", "edsr", "emformer_predict"] From 2be4e94e5c0e7ca18465f012837ec3540b5b91c6 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Tue, 25 Feb 2025 15:15:05 -0800 Subject: [PATCH 099/584] remove llava from test-models-linux (#8702) See comment for explanation -- already have test-llava-runner-linux --- .ci/scripts/gather_test_models.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py index d02213b9faf..a0cc9efd521 100755 --- a/.ci/scripts/gather_test_models.py +++ b/.ci/scripts/gather_test_models.py @@ -104,8 +104,12 @@ def model_should_run_on_target_os(model: str, target_os: str) -> bool: For example, a big model can be disabled in macos due to the limited macos resources. """ if target_os == "macos": + # Disabled in macos due to limited resources, and should stay that way even if + # we otherwise re-enable. return model not in ["llava"] - return True + # Disabled globally because we have test-llava-runner-linux that does a more + # comprehensive E2E test of llava. + return model not in ["llava"] def export_models_for_ci() -> dict[str, dict]: From 5a594a7791d165b734706d26f62121e515885c77 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Tue, 25 Feb 2025 17:20:11 -0800 Subject: [PATCH 100/584] Add dtype, fix RMS norm for FP16 (#8641) * Add dtype, fix RMS norm for FP16 * up * up * Update llama_transformer.py --- examples/apple/coreml/llama/export.py | 81 ++++++------ .../apple/coreml/llama/llama_transformer.py | 122 +++++++++++++----- examples/apple/coreml/llama/readme.md | 8 +- examples/apple/coreml/llama/run.py | 104 ++++++++++++--- 4 files changed, 223 insertions(+), 92 deletions(-) diff --git a/examples/apple/coreml/llama/export.py b/examples/apple/coreml/llama/export.py index 58bc0859c79..cc9eb9f02ee 100644 --- a/examples/apple/coreml/llama/export.py +++ b/examples/apple/coreml/llama/export.py @@ -3,7 +3,6 @@ # pyre-strict import argparse -import json import sys @@ -20,10 +19,11 @@ from executorch.exir.passes import MemoryPlanningPass from executorch.exir.passes.quant_fusion_pass import QuantFusionPass from executorch.exir.passes.sym_shape_eval_pass import ConstraintBasedSymShapeEvalPass -from executorch.extension.export_util.utils import export_to_edge, save_pte_program +from executorch.exir.program._program import to_edge_with_preserved_ops +from executorch.extension.export_util.utils import save_pte_program sys.path.insert(0, ".") -from llama_transformer import InputManager, ModelArgs, Transformer +from llama_transformer import InputManager, load_model class SplitLinearModule(torch.nn.Module): @@ -141,42 +141,23 @@ def main() -> None: default=8, help="Maximum number of splits to divide linear layers", ) + parser.add_argument( + "--dtype", + type=str, + default="fp16", + ) export_args = parser.parse_args() - params_path = export_args.params - checkpoint_path = export_args.checkpoint - - # Load model args - with open(params_path, "r") as f: - params = json.loads(f.read()) - - args = ModelArgs( - max_seq_len=export_args.max_seq_length, - generate_full_logits=False, + model = load_model( + export_args.checkpoint, + export_args.params, + max_seq_length=export_args.max_seq_length, use_cache_list=export_args.use_cache_list, - **params, - ) - - with torch.device("meta"): - model = Transformer(args) - - checkpoint = torch.load( - checkpoint_path, map_location="cpu", mmap=True, weights_only=True ) - if "model" in checkpoint: - checkpoint = checkpoint["model"] - missing, unexpected = model.load_state_dict( - checkpoint, - strict=False, - assign=True, - ) - print("Missing keys: ", missing) - print("Unexpected keys: ", unexpected) - - float_dtype = torch.float16 # dtype for model/inputs - model.eval() - model.to(float_dtype) + float_dtype = {"fp16": torch.float16, "fp32": torch.float32}[ + export_args.dtype + ] # dtype for model/inputs if export_args.embedding_quantize: bitwidth, group_size = export_args.embedding_quantize.split(",") @@ -197,7 +178,8 @@ def main() -> None: model, export_args.target_split_size, export_args.max_splits ) - model = model.to(float_dtype) + model.eval() + model.to(float_dtype) op_linear_quantizer_config = None if export_args.coreml_quantize == "b4w": @@ -217,7 +199,10 @@ def main() -> None: compile_specs = CoreMLBackend.generate_compile_specs( # pyre-fixme[16] minimum_deployment_target=ct.target.iOS18, - compute_precision=ct.precision(ct.precision.FLOAT16.value), + compute_precision={ + torch.float16: ct.precision.FLOAT16, + torch.float32: ct.precision.FLOAT32, + }[float_dtype], compute_unit=ct.ComputeUnit.CPU_AND_NE, model_type=CoreMLBackend.MODEL_TYPE.MODEL, # pyre-fixme[16] op_linear_quantizer_config=op_linear_quantizer_config, @@ -232,11 +217,11 @@ def main() -> None: ) input_manager = InputManager( - n_layers=args.n_layers, - max_batch_size=args.max_batch_size, - n_kv_heads=args.n_kv_heads, - max_seq_length=args.max_seq_len, - head_dim=args.head_dim, + n_layers=model.params.n_layers, + max_batch_size=model.params.max_batch_size, + n_kv_heads=model.params.n_kv_heads, + max_seq_length=model.params.max_seq_len, + head_dim=model.params.head_dim, use_cache_list=export_args.use_cache_list, seq_length=export_args.seq_length, dtype=float_dtype, @@ -245,10 +230,20 @@ def main() -> None: ) example_inputs = input_manager.get_inputs(tokens=[0]) - edge_manager = export_to_edge( + ep = torch.export.export( model, example_inputs, - edge_compile_config=EdgeCompileConfig( + ) + print("Exported program") + print(ep) + + edge_manager = to_edge_with_preserved_ops( + ep, + preserve_ops=[ + torch.ops.aten.scaled_dot_product_attention.default, + torch.ops.aten.linalg_vector_norm.default, + ], + compile_config=EdgeCompileConfig( _check_ir_validity=False, _skip_type_promotion=(float_dtype == torch.float16), _skip_dim_order=True, diff --git a/examples/apple/coreml/llama/llama_transformer.py b/examples/apple/coreml/llama/llama_transformer.py index 5788bcd5e5a..2ce4c1d2b5b 100644 --- a/examples/apple/coreml/llama/llama_transformer.py +++ b/examples/apple/coreml/llama/llama_transformer.py @@ -13,8 +13,6 @@ import torch import torch.nn.functional as F -from executorch.examples.models.llama.llama_transformer import RMSNorm - from executorch.examples.models.llama.rope import ( hf_apply_rotary_emb, hf_precompute_freqs_cis, @@ -25,29 +23,6 @@ from torch import nn -# These are just to prevent to_edge from decomposing SDPA -# A better method is to use the to_edge_transform_and_lower API for CoreML -# and not decompose SDPA -@torch.library.custom_op("coreml::sdpa", mutates_args=()) -def sdpa( - q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, attn_mask: torch.Tensor -) -> torch.Tensor: - """Same as F.scaled_dot_product_attention, but with custom op to avoid lowering during dialect conversion.""" - return torch.ops.aten.scaled_dot_product_attention.default( - q, k, v, attn_mask=attn_mask - ) - - -@torch.library.register_fake("coreml::sdpa") -def _( - q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, attn_mask: torch.Tensor -) -> torch.Tensor: - """Fake implementation with the right output shape, which is required for torch.compile/export/fx tracing.""" - expected_shape = list(q.shape) - expected_shape[-1] = v.shape[-1] - return q.new_empty(expected_shape) - - def find_multiple(n: int, k: int) -> int: if n % k == 0: return n @@ -121,6 +96,63 @@ def __post_init__(self): self.head_dim = self.dim // self.n_heads +class RMSNorm(torch.nn.Module): + def __init__(self, dim: int, eps: float = 1e-6): + """ + Initialize the RMSNorm normalization layer. + + Args: + dim (int): The dimension of the input tensor. + eps (float, optional): A small value added to the denominator for numerical stability. Default is 1e-6. + + Attributes: + eps (float): A small value added to the denominator for numerical stability. + weight (nn.Parameter): Learnable scaling parameter. + + """ + super().__init__() + self.dim = dim + self.eps = eps + self.weight = nn.Parameter(torch.ones(dim)) + + def _norm(self, x): + """ + Apply the RMSNorm normalization to the input tensor. + + Args: + x (torch.Tensor): The input tensor. + + Returns: + torch.Tensor: The normalized tensor. + + """ + # CoreML ignores casts to FP32, so existing implementation of RMSNorm was not stable + # We instead use (x * sqrt(n)) / norm(x, dim=-1) + # Using torch.norm and preserving this op in CoreML improves stability + # Note, we ignore eps, but could add it by using torch.norm(torch.concat(x, sqrt(n*eps))) in the denominator + # In future, we want to add CoreML support for the functional RMSNorm op + # We have yet to do large scale evaluations on the numeric stability of this solution, but note that + # it appears better than what exists currently (removing FP32 casts and using FP16) + rms_norm_eps0 = ( + x * torch.sqrt(torch.tensor(self.dim, dtype=x.dtype)) + ) / torch.linalg.vector_norm(x, dim=-1, keepdim=True) + return rms_norm_eps0 + + def forward(self, x): + """ + Forward pass through the RMSNorm layer. + + Args: + x (torch.Tensor): The input tensor. + + Returns: + torch.Tensor: The output tensor after applying RMSNorm. + + """ + output = self._norm(x) + return output * self.weight + + class Rope(torch.nn.Module): def __init__(self, params: ModelArgs): super().__init__() @@ -304,12 +336,11 @@ def forward( k = k.repeat_interleave(self.n_rep, dim=1) v = v.repeat_interleave(self.n_rep, dim=1) - output = torch.ops.coreml.sdpa(q, k, v, attn_mask) - + output = torch.ops.aten.scaled_dot_product_attention.default( + q, k, v, attn_mask=attn_mask + ) output = output.transpose(1, 2).contiguous().view(bsz, seqlen, -1) - output = self.wo(output) - return output, new_k, new_v @@ -413,6 +444,39 @@ def forward( return logits, k_out, v_out +def load_model(checkpoint_path, params_path, max_seq_length, use_cache_list): + import json + + with open(params_path, "r") as f: + params = json.loads(f.read()) + + args = ModelArgs( + max_seq_len=max_seq_length, + generate_full_logits=False, + use_cache_list=use_cache_list, + **params, + ) + + with torch.device("meta"): + model = Transformer(args) + + checkpoint = torch.load( + checkpoint_path, map_location="cpu", mmap=True, weights_only=True + ) + if "model" in checkpoint: + checkpoint = checkpoint["model"] + + missing, unexpected = model.load_state_dict( + checkpoint, + strict=False, + assign=True, + ) + print("Missing keys: ", missing) + print("Unexpected keys: ", unexpected) + + return model + + class InputManager: def __init__( self, diff --git a/examples/apple/coreml/llama/readme.md b/examples/apple/coreml/llama/readme.md index 353f0b56307..a9efedf6bbe 100644 --- a/examples/apple/coreml/llama/readme.md +++ b/examples/apple/coreml/llama/readme.md @@ -4,7 +4,7 @@ This directory contains ANE-friendly Llama models. Export model with: ``` -python export.py -n /path/to/output/model.pte -p /path/to/params.json -c /path/to/model.pth --seq_length 64 --max_seq_length 1024 --coreml-quantize c4w +python export.py -n /path/to/output/model.pte -p /path/to/params.json -c /path/to/model.pth --seq_length 64 --max_seq_length 1024 --coreml-quantize c4w --dtype fp16 ``` (Note the script should be run from the executorch/examples/apple/coreml/llama directory.) @@ -17,6 +17,12 @@ Run model with: python run.py -m /path/to/model.pte -t /path/to/tokenizer.model --prompt "Once upon a time," ``` +The runner can also be used to run an eager model model to compare with CoreML numerics (--use_eager). In this case, you must specify: +* --checkpoint +* --dtype +* --max_seq_length +* --seq_length + (Note the script should be run from the executorch/examples/apple/coreml/llama directory.) diff --git a/examples/apple/coreml/llama/run.py b/examples/apple/coreml/llama/run.py index 65026e1f6bc..501aaee07ed 100644 --- a/examples/apple/coreml/llama/run.py +++ b/examples/apple/coreml/llama/run.py @@ -11,7 +11,7 @@ sys.path.insert(0, ".") from executorch.examples.models.llama.runner.generation import next_token from executorch.examples.models.llama.tokenizer import tiktoken -from llama_transformer import InputManager +from llama_transformer import InputManager, load_model class Tokenizer: @@ -71,28 +71,90 @@ def main() -> None: type=float, default=0.9, ) + parser.add_argument( + "--use_eager", + action="store_true", + ) + parser.add_argument( + "-p", + "--params", + type=str, + default=None, + ) + parser.add_argument( + "-c", + "--checkpoint", + type=str, + default=None, + ) + parser.add_argument("--dtype", type=str, choices=["fp16", "fp32"], default=None) + parser.add_argument( + "--seq_length", + type=int, + default=None, + ) + parser.add_argument( + "--max_seq_length", + type=int, + default=None, + ) + parser.add_argument( + "--cache_size", + type=int, + default=None, + ) args = parser.parse_args() tokenizer = Tokenizer(args.tokenizer) runtime = Runtime.get() - program = runtime.load_program(args.model) - method = program.load_method("forward") - - metadata = method.metadata - print("Method metadata: ", metadata, "\n\n") - - assert ( - metadata.num_inputs() == 6 - ), "Do not export with --use_cache_list for use in pybindings" - # k_cache input - n_layers, max_batch_size, n_kv_heads, cache_size, head_dim = ( - metadata.input_tensor_meta(3).sizes() - ) - - # mask input - seq_length, max_seq_length = metadata.input_tensor_meta(5).sizes() + if args.use_eager: + assert args.params is not None + assert args.checkpoint is not None + assert args.dtype is not None + assert args.max_seq_length is not None + assert args.seq_length is not None + + max_seq_length = args.max_seq_length + seq_length = args.seq_length + model = load_model( + args.checkpoint, + args.params, + max_seq_length=max_seq_length, + use_cache_list=False, + ) + n_layers = model.params.n_layers + max_batch_size = model.params.max_batch_size + n_kv_heads = model.params.n_kv_heads + head_dim = model.params.head_dim + cache_size = args.cache_size + + float_dtype = {"fp16": torch.float16, "fp32": torch.float32}[ + args.dtype + ] # dtype for model/inputs + model.eval() + model.to(float_dtype) + else: + program = runtime.load_program(args.model) + method = program.load_method("forward") + + metadata = method.metadata + print("Method metadata: ", metadata, "\n\n") + + assert ( + metadata.num_inputs() == 6 + ), "Do not export with --use_cache_list for use in pybindings" + # k_cache input + n_layers, max_batch_size, n_kv_heads, cache_size, head_dim = ( + metadata.input_tensor_meta(3).sizes() + ) + float_dtype = {5: torch.float16, 6: torch.float32}[ + metadata.input_tensor_meta(3).dtype() + ] + + # mask input + seq_length, max_seq_length = metadata.input_tensor_meta(5).sizes() input_manager = InputManager( n_layers=n_layers, @@ -102,7 +164,7 @@ def main() -> None: head_dim=head_dim, use_cache_list=False, seq_length=seq_length, - dtype=torch.float16, + dtype=float_dtype, minus_infinity=-30000.0, cache_size=cache_size, ) @@ -117,7 +179,11 @@ def main() -> None: tokens ) processed_tokens = len(tokens) - len(remaining_tokens) - logits, k, v = method.execute(inputs) + if args.use_eager: + logits, k, v = model(*inputs) + else: + logits, k, v = method.execute(inputs) + input_manager.update( input_length=processed_tokens, new_k_caches=k, new_v_caches=v ) From f9dc6ef56d305f5b9d127ce551582cea9db9cfed Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Tue, 25 Feb 2025 18:10:46 -0800 Subject: [PATCH 101/584] Revert #8501 and #8624 (#8716) * Revert "Switch to new ao quant api for 8da4w (#8501)" This reverts commit f3fc096a2df3c7ea8667f1598ca880146116c6a6. * Revert "Use to_edge_lower_and_transform for XNNPack (#8624)" This reverts commit b5344c17d6069f6fbe3b28db7bf67376cc2fb629. #8624 caused concerning test failure internally -- out of bounds array access. #8501 depends on it per author --- examples/models/llama/export_llama_lib.py | 132 ++++++------------ .../llama/source_transformation/quantize.py | 9 +- examples/models/llava/export_llava.py | 1 + extension/llm/export/builder.py | 28 +--- 4 files changed, 53 insertions(+), 117 deletions(-) diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index 3a1f423aa27..6d9ba750431 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -676,62 +676,47 @@ def _validate_args(args): ) -def _to_edge_and_lower_llama_xnnpack( - builder_exported, - modelname, - additional_passes, - pt2e_quant_params, - quantizers, - quant_dtype, - args, -) -> LLMEdgeManager: # noqa: C901 - partitioners = [] - - # Order matters here, dynamic quantization should be applied first when both xnnpack and xnnpack_extended_ops are enabled - partitioners.append(get_xnnpack_partitioner(dynamic_quant_only_partitioner=True)) - - modelname = f"xnnpack_dq_{modelname}" - - if args.xnnpack_extended_ops: - partitioners.append( - get_xnnpack_partitioner(dynamic_quant_only_partitioner=False) - ) - modelname = f"xnnpack_{modelname}" - - logging.info("Lowering model using following partitioner(s): ") - for partitioner in partitioners: - logging.info(f"--> {partitioner.__class__.__name__}") +def _export_llama(args) -> LLMEdgeManager: # noqa: C901 + _validate_args(args) - # TODO: Enable generating ETRecord with XNNPack and to_edge_transform_and_lower(). - if args.generate_etrecord: - raise NotImplementedError( - "export_llama does not support XNNPack and generating ETRecord at the moment." - ) + pt2e_quant_params, quantizers, quant_dtype = get_quantizer_and_quant_params(args) - builder = builder_exported.pt2e_quantize(quantizers).to_edge_transform_and_lower( - partitioners - ) - if args.verbose: - print_delegation_info(builder.edge_manager.exported_program().graph_module) + # export_to_edge + builder_exported = _prepare_for_llama_export(args).export() - return builder.to_executorch(passes=additional_passes) + builder_exported.run_canonical_optimizations() + if args.export_only: + exit() -def _to_edge_and_lower_llama( # noqa: C901 - builder_exported, - modelname, - additional_passes, - pt2e_quant_params, - quantizers, - quant_dtype, - args, -): builder_exported_to_edge = builder_exported.pt2e_quantize( quantizers ).export_to_edge() + modelname = builder_exported_to_edge.modelname + # to_backend partitioners = [] + + # Order matters here, dynamic quantization should be applied first when both xnnpack and xnnpack_extended_ops are enabled + if ( + pt2e_quant_params is not None and pt2e_quant_params.quantize_linear is not None + ) or (args.xnnpack): + partitioners.append( + get_xnnpack_partitioner(dynamic_quant_only_partitioner=True) + ) + + # force xnnpack to be true if pt2e_quant_params is not None and args.xnnpack is False + args.xnnpack = True + modelname = f"xnnpack_dq_{modelname}" + + if args.xnnpack_extended_ops: + assert args.xnnpack, "xnnpack_extended_ops requires xnnpack to be enabled" + partitioners.append( + get_xnnpack_partitioner(dynamic_quant_only_partitioner=False) + ) + modelname = f"xnnpack_{modelname}" + if args.vulkan: partitioners.append( get_vulkan_partitioner( @@ -746,6 +731,7 @@ def _to_edge_and_lower_llama( # noqa: C901 modelname = f"vulkan_{modelname}" # Need to remove asserts from the graph to prevent graph breaks + # pyre-ignore: Undefined attribute [16]: `Optional` has no attribute `exported_program`. remove_asserts(builder_exported_to_edge.edge_manager.exported_program()) if args.mps: @@ -774,11 +760,13 @@ def _to_edge_and_lower_llama( # noqa: C901 # pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.utils.utils` from executorch.backends.qualcomm.utils.utils import _transform, tag_quant_io + # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `qualcomm`, Optional type has no attribute `exported_program` _transform(builder_exported_to_edge.edge_manager.exported_program()) if args.num_sharding > 0: model_sharding.split_graph( builder_exported_to_edge.edge_manager.exported_program(), + # pyre-fixme[16]: `Optional` has no attribute `__getitem__`. builder_exported_to_edge.metadata["get_n_layers"], shares=args.num_sharding, ) @@ -804,15 +792,19 @@ def _to_edge_and_lower_llama( # noqa: C901 atten.head_dim, ) ) + # pyre-ignore tag_quant_io( builder_exported_to_edge.edge_manager.exported_program().graph_module, - partial(get_custom_quant_ios_dtype, cache_shape), + partial(get_custom_quant_ios_dtype, cache_shape), # pyre-ignore ) logging.info("Lowering model using following partitioner(s): ") for partitioner in partitioners: logging.info(f"--> {partitioner.__class__.__name__}") + additional_passes = [] + if args.model in TORCHTUNE_DEFINED_MODELS: + additional_passes = [InitializedMutableBufferPass(["kv_cache_pos"])] if args.generate_etrecord: if not builder_exported_to_edge.edge_manager: raise ValueError("Unable to generate etrecord due to missing edge manager.") @@ -826,6 +818,7 @@ def _to_edge_and_lower_llama( # noqa: C901 if args.num_sharding > 0 and args.qnn: from executorch.backends.qualcomm.utils.utils import canonicalize_program + # pyre-fixme[16]: Module `backends` has no attribute `qualcomm`. canonicalize_program(builder.edge_manager.exported_program()) builder = builder.to_executorch( @@ -847,55 +840,11 @@ def _to_edge_and_lower_llama( # noqa: C901 if args.num_sharding > 0 and args.qnn: from executorch.backends.qualcomm.utils.utils import canonicalize_program + # pyre-fixme[16]: Module `backends` has no attribute `qualcomm`. canonicalize_program(builder.edge_manager.exported_program()) builder = builder.to_executorch(passes=additional_passes) - return builder - - -def _export_llama(args) -> LLMEdgeManager: # noqa: C901 - _validate_args(args) - - pt2e_quant_params, quantizers, quant_dtype = get_quantizer_and_quant_params(args) - - additional_passes = [] - if args.model in TORCHTUNE_DEFINED_MODELS: - additional_passes = [InitializedMutableBufferPass(["kv_cache_pos"])] - - # export_to_edge - builder_exported = _prepare_for_llama_export(args).export() - builder_exported.run_canonical_optimizations() - modelname = builder_exported.modelname - - if args.export_only: - exit() - - if pt2e_quant_params is not None and pt2e_quant_params.quantize_linear is not None: - # Force xnnpack to be true if pt2e_quant_params is not None and args.xnnpack is False - args.xnnpack = True - - if args.xnnpack: - builder = _to_edge_and_lower_llama_xnnpack( - builder_exported, - modelname, - additional_passes, - pt2e_quant_params, - quantizers, - quant_dtype, - args, - ) - else: - builder = _to_edge_and_lower_llama( - builder_exported, - modelname, - additional_passes, - pt2e_quant_params, - quantizers, - quant_dtype, - args, - ) - if args.profile_memory: generate_memory_trace(builder.export_program, "memory_profile.json") @@ -917,6 +866,7 @@ def _export_llama(args) -> LLMEdgeManager: # noqa: C901 output_file = f"{builder.output_dir}/{modelname}.pte" builder.save_to_pte(output_file) + return builder diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py index e6d228d5da9..8923ab1fdec 100644 --- a/examples/models/llama/source_transformation/quantize.py +++ b/examples/models/llama/source_transformation/quantize.py @@ -119,10 +119,11 @@ def quantize( # noqa C901 # Check for required args if group_size is None: raise Exception("For 8da4w quantization, group size must be specified.") + from torchao.quantization.quant_api import Int8DynActInt4WeightQuantizer - from torchao.quantization import int8_dynamic_activation_int4_weight, quantize_ - - quantize_(model, int8_dynamic_activation_int4_weight(group_size=group_size)) + model = Int8DynActInt4WeightQuantizer( + precision=torch_dtype, groupsize=group_size + ).quantize(model) if verbose: print("quantized model:", model) @@ -662,7 +663,7 @@ def convert_for_runtime(self) -> nn.Module: def quantized_model(self) -> nn.Module: model_updated_state_dict = self.create_quantized_state_dict(self.packed) self.convert_for_runtime() - self.mod.load_state_dict(model_updated_state_dict, assign=True) + self.mod.load_state_dict(model_updated_state_dict) return self.mod diff --git a/examples/models/llava/export_llava.py b/examples/models/llava/export_llava.py index a5057e5e850..82c7aca09e0 100644 --- a/examples/models/llava/export_llava.py +++ b/examples/models/llava/export_llava.py @@ -67,6 +67,7 @@ def export(self) -> "LlavaEdgeManager": dynamic_shapes=dynamic_shape, strict=False, ) + # pyre-ignore: Incompatible attribute type [8]: Attribute `pre_autograd_graph_module` declared in class `LLMEdgeManager` has type `Optional[GraphModule]` but is used as type `Module`. self.pre_autograd_graph_module = self.export_program.module() return self diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py index ec6cfa41ad8..88d2bc0cab9 100644 --- a/extension/llm/export/builder.py +++ b/extension/llm/export/builder.py @@ -21,7 +21,7 @@ DuplicateDynamicQuantChainPass, ) from executorch.backends.xnnpack._passes.convert_to_linear import ConvertToLinearPass -from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower +from executorch.exir import EdgeProgramManager from executorch.exir.backend.partitioner import Partitioner from executorch.exir.backend.utils import format_delegated_graph @@ -39,7 +39,7 @@ from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e from torch.ao.quantization.quantizer import Quantizer from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer -from torch.export import export_for_training, ExportedProgram +from torch.export import export_for_training from torch.nn.attention import SDPBackend FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s" @@ -89,8 +89,8 @@ def __init__( dynamic_shapes: Optional[Any] = None, ): self.model = model - self.pre_autograd_exported_program: Optional[ExportedProgram] = None - self.pre_autograd_graph_module: Optional[torch.nn.Module] = None + # graph module returned from export() + self.pre_autograd_graph_module: Optional[torch.fx.GraphModule] = None self.modelname = modelname self.max_seq_len = max_seq_len self.dtype = dtype @@ -218,8 +218,8 @@ def export(self) -> "LLMEdgeManager": kwargs=self.example_kwarg_inputs, dynamic_shapes=dynamic_shape, ) + # pyre-fixme[8]: Attribute has type `Optional[GraphModule]`; used as # `Module`. - self.pre_autograd_exported_program = exported_module self.pre_autograd_graph_module = exported_module.module() if hasattr(self.args, "export_only") and self.args.export_only: torch.export.save(exported_module, self.args.output_name) @@ -330,10 +330,7 @@ def pt2e_quantize(self, quantizers: Optional[List[Quantizer]]) -> "LLMEdgeManage assert ( self.pre_autograd_graph_module is not None ), "Please run export() first" - m = prepare_pt2e( - self.pre_autograd_graph_module, # pyre-ignore[6] - composed_quantizer, - ) + m = prepare_pt2e(self.pre_autograd_graph_module, composed_quantizer) logging.info( f"Calibrating with tasks: {self.calibration_tasks}, limit: {self.calibration_limit}, calibration_data: {self.calibration_data}, tokenizer_path: {self.tokenizer_path}, seq_length: {self.calibration_seq_length}" ) @@ -433,19 +430,6 @@ def to_backend(self, partitioners: Optional[List[Partitioner]]) -> "LLMEdgeManag return self - def to_edge_transform_and_lower( - self, partitioners: Optional[List[Partitioner]] - ) -> "LLMEdgeManager": - if partitioners is None: - logging.info("No partitioner provided, skipping backend lowering...") - edge_config = self._get_edge_config() - self.edge_manager = to_edge_transform_and_lower( - self.pre_autograd_exported_program, - partitioner=partitioners, - compile_config=edge_config, - ) - return self - def to_executorch( self, passes: Optional[List[ExportPass]] = None ) -> "LLMEdgeManager": From 65432b1a27e14f3d84d26f401ee94b5d426d913d Mon Sep 17 00:00:00 2001 From: Gasoonjia Date: Tue, 25 Feb 2025 20:48:56 -0800 Subject: [PATCH 102/584] introduce sanity check when creating bufferdatasink Differential Revision: D70190912 Pull Request resolved: https://github.com/pytorch/executorch/pull/8709 --- devtools/etdump/buffer_data_sink.cpp | 12 ++++++ devtools/etdump/buffer_data_sink.h | 26 ++++++++--- .../etdump/tests/buffer_data_sink_test.cpp | 43 +++++++++++++++---- 3 files changed, 68 insertions(+), 13 deletions(-) diff --git a/devtools/etdump/buffer_data_sink.cpp b/devtools/etdump/buffer_data_sink.cpp index 08bac801ef2..976d6dc31fb 100644 --- a/devtools/etdump/buffer_data_sink.cpp +++ b/devtools/etdump/buffer_data_sink.cpp @@ -11,10 +11,22 @@ using ::executorch::runtime::Error; using ::executorch::runtime::Result; +using ::executorch::runtime::Span; namespace executorch { namespace etdump { +Result BufferDataSink::create( + Span buffer, + size_t alignment) noexcept { + // Check if alignment is a power of two and greater than 0 + if (alignment == 0 || (alignment & (alignment - 1)) != 0) { + return Error::InvalidArgument; + } + + return BufferDataSink(buffer, alignment); +} + Result BufferDataSink::write(const void* ptr, size_t length) { if (length == 0) { return offset_; diff --git a/devtools/etdump/buffer_data_sink.h b/devtools/etdump/buffer_data_sink.h index 522203443d4..9639f2e072c 100644 --- a/devtools/etdump/buffer_data_sink.h +++ b/devtools/etdump/buffer_data_sink.h @@ -24,17 +24,20 @@ namespace etdump { class BufferDataSink : public DataSinkBase { public: /** - * Constructs a BufferDataSink with a given buffer. + * Creates a BufferDataSink with a given span buffer. * * @param[in] buffer A Span object representing the buffer where data will be * stored. * @param[in] alignment The alignment requirement for the buffer. It must be - * a power of two. Default is 64. + * a power of two and greater than zero. Default is 64. + * @return A Result object containing either: + * - A BufferDataSink object if succees, or + * - An error code indicating the failure reason, if any issue + * occurs during the creation process. */ - explicit BufferDataSink( + static ::executorch::runtime::Result create( ::executorch::runtime::Span buffer, - size_t alignment = 64) - : debug_buffer_(buffer), offset_(0), alignment_(alignment) {} + size_t alignment = 64) noexcept; // Uncopiable and unassignable to avoid double assignment and free of the // internal buffer. @@ -77,6 +80,19 @@ class BufferDataSink : public DataSinkBase { size_t get_used_bytes() const override; private: + /** + * Constructs a BufferDataSink with a given buffer. + * + * @param[in] buffer A Span object representing the buffer where data will be + * stored. + * @param[in] alignment The alignment requirement for the buffer. It must be + * a power of two. Default is 64. + */ + explicit BufferDataSink( + ::executorch::runtime::Span buffer, + size_t alignment) + : debug_buffer_(buffer), offset_(0), alignment_(alignment) {} + // A Span object representing the buffer used for storing debug data. ::executorch::runtime::Span debug_buffer_; diff --git a/devtools/etdump/tests/buffer_data_sink_test.cpp b/devtools/etdump/tests/buffer_data_sink_test.cpp index 984f7776300..0dc4ae997fd 100644 --- a/devtools/etdump/tests/buffer_data_sink_test.cpp +++ b/devtools/etdump/tests/buffer_data_sink_test.cpp @@ -16,6 +16,7 @@ using namespace ::testing; using ::executorch::aten::ScalarType; using ::executorch::aten::Tensor; +using ::executorch::etdump::BufferDataSink; using ::executorch::runtime::Error; using ::executorch::runtime::Result; using ::executorch::runtime::Span; @@ -29,7 +30,11 @@ class BufferDataSinkTest : public ::testing::Test { buffer_size_ = 128; // Small size for testing buffer_ptr_ = malloc(buffer_size_); buffer_ = Span(static_cast(buffer_ptr_), buffer_size_); - data_sink_ = std::make_unique(buffer_); + Result buffer_data_sink_ret = + BufferDataSink::create(buffer_); + ASSERT_EQ(buffer_data_sink_ret.error(), Error::Ok); + buffer_data_sink_ = + std::make_unique(std::move(buffer_data_sink_ret.get())); } void TearDown() override { @@ -39,11 +44,11 @@ class BufferDataSinkTest : public ::testing::Test { size_t buffer_size_; void* buffer_ptr_; Span buffer_; - std::unique_ptr data_sink_; + std::unique_ptr buffer_data_sink_; }; TEST_F(BufferDataSinkTest, StorageSizeCheck) { - Result ret = data_sink_->get_storage_size(); + Result ret = buffer_data_sink_->get_storage_size(); ASSERT_EQ(ret.error(), Error::Ok); size_t storage_size = ret.get(); @@ -55,7 +60,7 @@ TEST_F(BufferDataSinkTest, WriteOneTensorAndCheckData) { Tensor tensor = tf.make({1, 4}, {1.0, 2.0, 3.0, 4.0}); Result ret = - data_sink_->write(tensor.const_data_ptr(), tensor.nbytes()); + buffer_data_sink_->write(tensor.const_data_ptr(), tensor.nbytes()); ASSERT_EQ(ret.error(), Error::Ok); size_t offset = ret.get(); @@ -75,9 +80,10 @@ TEST_F(BufferDataSinkTest, WriteMultiTensorsAndCheckData) { std::vector tensors = { tf.make({1, 4}, {1.0, 2.0, 3.0, 4.0}), tf.make({1, 4}, {5.0, 6.0, 7.0, 8.0})}; + for (const auto& tensor : tensors) { Result ret = - data_sink_->write(tensor.const_data_ptr(), tensor.nbytes()); + buffer_data_sink_->write(tensor.const_data_ptr(), tensor.nbytes()); ASSERT_EQ(ret.error(), Error::Ok); size_t offset = ret.get(); @@ -94,8 +100,9 @@ TEST_F(BufferDataSinkTest, WriteMultiTensorsAndCheckData) { TEST_F(BufferDataSinkTest, PointerAlignmentCheck) { TensorFactory tf; Tensor tensor = tf.make({1, 4}, {1.0, 2.0, 3.0, 4.0}); + Result ret = - data_sink_->write(tensor.const_data_ptr(), tensor.nbytes()); + buffer_data_sink_->write(tensor.const_data_ptr(), tensor.nbytes()); ASSERT_EQ(ret.error(), Error::Ok); size_t offset = ret.get(); @@ -112,12 +119,32 @@ TEST_F(BufferDataSinkTest, WriteUntilOverflow) { // Write tensors until we run out of space for (size_t i = 0; i < 2; i++) { Result ret = - data_sink_->write(tensor.const_data_ptr(), tensor.nbytes()); + buffer_data_sink_->write(tensor.const_data_ptr(), tensor.nbytes()); ASSERT_EQ(ret.error(), Error::Ok); } // Attempting to write another tensor should raise an error Result ret = - data_sink_->write(tensor.const_data_ptr(), tensor.nbytes()); + buffer_data_sink_->write(tensor.const_data_ptr(), tensor.nbytes()); ASSERT_EQ(ret.error(), Error::OutOfResources); } + +TEST_F(BufferDataSinkTest, illegalAlignment) { + // Create a buffer_data_sink_ with legal alignment that is a power of 2 and + // greater than 0 + for (size_t i = 1; i <= 128; i <<= 1) { + Result buffer_data_sink_ret = + BufferDataSink::create(buffer_, i); + ASSERT_EQ(buffer_data_sink_ret.error(), Error::Ok); + } + + // Create a buffer_data_sink_ with illegal alignment that is not a power of 2 + // or greater than 0 + std::vector illegal_alignments = {0, 3, 5, 7, 100, 127}; + + for (size_t i = 0; i < illegal_alignments.size(); i++) { + Result buffer_data_sink_ret = + BufferDataSink::create(buffer_, illegal_alignments[i]); + ASSERT_EQ(buffer_data_sink_ret.error(), Error::InvalidArgument); + } +} From 13b5605833e08d1f8faede09b0015f28565e42a4 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Tue, 25 Feb 2025 23:04:49 -0800 Subject: [PATCH 103/584] Introduce NamedDataStore (#8719) Pull Request resolved: https://github.com/pytorch/executorch/pull/8587 Introduce NamedDataStore for weight sharing. See 'NamedBlobStore' in [RFC] Rename 'NamedBlobStore' --> 'NamedDataStore' to mirror 'NamedDataMap' in the runtime. The NamedDataStore exposes two methods: - add_named_data: add a blob to the store - get_named_data_store_output: return the contents of the store, to pass to serialization. Invariants on the NamedDataStore - Keys are unique regardless of whether they are in PTE or external file. - Different keys can point to the same data. NamedDataStore is used in D69764150. It's owned by the EdgeProgramManager. ghstack-source-id: 268328940 @exported-using-ghexport Differential Revision: [D69764094](https://our.internmc.facebook.com/intern/diff/D69764094/) Co-authored-by: lucylq --- exir/_serialize/TARGETS | 1 + exir/_serialize/_named_data_store.py | 183 ++++++++++++++++++ exir/_serialize/test/TARGETS | 16 +- exir/_serialize/test/test_named_data_store.py | 85 ++++++++ 4 files changed, 282 insertions(+), 3 deletions(-) create mode 100644 exir/_serialize/_named_data_store.py create mode 100644 exir/_serialize/test/test_named_data_store.py diff --git a/exir/_serialize/TARGETS b/exir/_serialize/TARGETS index cc6f16d78d8..6671bf00334 100644 --- a/exir/_serialize/TARGETS +++ b/exir/_serialize/TARGETS @@ -32,6 +32,7 @@ runtime.python_library( "_cord.py", "_dataclass.py", "_flatbuffer.py", + "_named_data_store.py", "_program.py", "_serialize.py", "data_serializer.py", diff --git a/exir/_serialize/_named_data_store.py b/exir/_serialize/_named_data_store.py new file mode 100644 index 00000000000..999913a4bb0 --- /dev/null +++ b/exir/_serialize/_named_data_store.py @@ -0,0 +1,183 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-strict + +import hashlib +import math +from dataclasses import dataclass + +# from dataclasses import dataclass +from typing import Dict, List, Optional + + +@dataclass +class BufferEntry: + """A class to hold the buffer entries for serialization. + + Attributes: + buffer: The buffer bytes. + alignment: The alignment of the buffer. + """ + + buffer: bytes + alignment: int + + +@dataclass +class NamedDataStoreOutput: + """ + Holds named data for serialization. + + Attributes: + buffers: A list of unique buffer entries. + pte_data: Contains data that is stored inside the PTE file. A mapping from + {key: buffer_index}. + external_data: Contains data that is stored external to the PTE. A mapping + from {filename: {key: buffer_index}}. + """ + + buffers: List[BufferEntry] + pte_data: Dict[str, int] + external_data: Dict[str, Dict[str, int]] + + +class NamedDataStore: + """ + NamedDataStore manages the data that delegates want to share. Backends add + bytes to the store under a unique key. These bytes can be retrieved at + runtime using the same key with the NamedDataMap. + + Note: + - Keys are unique in the data store, regardless of whether they are stored + in the PTE or externally. + - Multiple keys can point to the same buffer entry. + - The same data can be added multiple times and all keys will point to one + buffer. If a duplicate blob is added with a different alignment, the + lcm of the current and new alignment is taken for that blob. + """ + + # List of unique blobs. + buffers: List[BufferEntry] + # Named data stored inside the PTE file. Map of {key: buffer_index}. + pte_data: Dict[str, int] + # Named data stored outside of the PTE file. + # Map of {filename: {key: buffer_index}}. + external_data: Dict[str, Dict[str, int]] + + # Cache of the data hash for deduplication. + # Use a hash instead of the data as a key because a sha256 collision is + # unlikely, and the data may be large. + data_hash_to_buffer_idx: Dict[bytes, int] + # Cache of the key to buffer idx to ensure uniqueness. + # If a key is added multiple times, check the buffer idx to ensure that the + # data is identical too. + key_to_buffer_idx: Dict[str, int] + + def __init__(self) -> None: + """ + Initializes a new NamedDataStore. + """ + self.buffers = [] + self.pte_data = {} + self.external_data = {} + + self.data_hash_to_buffer_idx = {} + self.key_to_buffer_idx = {} + + def _add_named_data_to_map( + self, + key: str, + data: bytes, + alignment: int, + local_key_to_buffer_idx: Dict[str, int], + ) -> None: + """ + Add data to a map and update the alignment. Ensure that the key-data + pair is unique. + - If the key exists, the data must be identical. + - If multiple unique keys exist for the same data, those keys should + point to the same buffer. + + Args: + key (str): key associated with the data. + data (bytes): Bytes being requested to be serialized. + alignment (int): alignment for bytes to be serialized with. + local_key_to_buffer_idx (Dict[str, int]): map to add the data to. + Raises: + ValueError: when the key exists in the store, and corresponding data + is different. + """ + # Get data hash. + hashed = hashlib.sha256(data).digest() + + # Check if the key exists. + buffer_idx = self.key_to_buffer_idx.get(key, -1) + if buffer_idx != -1: + # If the key exists, the corresponding data must be identical. + if self.data_hash_to_buffer_idx.get(hashed, -1) != buffer_idx: + raise ValueError( + f"Duplicate key {key} with different data. " + f"Existing data: {self.buffers[buffer_idx].buffer}. " + f"New data: {data}." + ) + self.buffers[buffer_idx].alignment = math.lcm( + self.buffers[buffer_idx].alignment, alignment + ) + else: + # Key doesn't exist; check if the data exists. + buffer_idx = self.data_hash_to_buffer_idx.get(hashed, -1) + if buffer_idx != -1: + # The data exists; update the alignment. + self.buffers[buffer_idx].alignment = math.lcm( + self.buffers[buffer_idx].alignment, alignment + ) + else: + # The data doesn't exist; add it to the data store. + buffer_idx = len(self.buffers) + self.buffers.append(BufferEntry(data, alignment)) + self.data_hash_to_buffer_idx[hashed] = buffer_idx + + # Add key to the map and the key cache. + local_key_to_buffer_idx[key] = buffer_idx + self.key_to_buffer_idx[key] = buffer_idx + + def add_named_data( + self, + key: str, + data: bytes, + alignment: Optional[int] = 1, + external_tag: Optional[str] = None, + ) -> None: + """ + Adds a named blob to the NamedDataStore. + Args: + key (str): key associated with the data. + data (bytes): Bytes being requested to be serialized. + alignment (int): alignment for bytes to be serialized with. + external (Optional[str]): the external filename that this data is saved to. + Raises: + ValueError: when the key exists in the store, and corresponding data + is different. + """ + + # Set default alignment. + if alignment is None: + alignment = 1 + if alignment <= 0: + raise ValueError(f"Alignment must be greater than 0, received {alignment}.") + + if external_tag is None: + self._add_named_data_to_map(key, data, alignment, self.pte_data) + else: + self._add_named_data_to_map( + key, data, alignment, self.external_data.setdefault(external_tag, {}) + ) + + def get_named_data_store_output(self) -> NamedDataStoreOutput: + # Clean up empty maps inside self.external_data + self.external_data = {k: v for k, v in self.external_data.items() if len(v) > 0} + return NamedDataStoreOutput(self.buffers, self.pte_data, self.external_data) diff --git a/exir/_serialize/test/TARGETS b/exir/_serialize/test/TARGETS index 853d82b8a9a..63f47720137 100644 --- a/exir/_serialize/test/TARGETS +++ b/exir/_serialize/test/TARGETS @@ -3,7 +3,7 @@ load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest") oncall("executorch") python_unittest( - name = "program", + name = "test_program", srcs = [ "test_program.py", ], @@ -15,7 +15,7 @@ python_unittest( ) python_unittest( - name = "flatbuffer", + name = "test_flatbuffer", srcs = [ "test_flatbuffer.py", ], @@ -25,7 +25,7 @@ python_unittest( ) python_unittest( - name = "cord", + name = "test_cord", srcs = [ "test_cord.py", ], @@ -33,3 +33,13 @@ python_unittest( "//executorch/exir/_serialize:lib", ], ) + +python_unittest( + name = "test_named_data_store", + srcs = [ + "test_named_data_store.py", + ], + deps = [ + "//executorch/exir/_serialize:lib", + ], +) diff --git a/exir/_serialize/test/test_named_data_store.py b/exir/_serialize/test/test_named_data_store.py new file mode 100644 index 00000000000..d5355f6d7bf --- /dev/null +++ b/exir/_serialize/test/test_named_data_store.py @@ -0,0 +1,85 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-strict + +import unittest + +from executorch.exir._serialize._named_data_store import BufferEntry, NamedDataStore + + +class TestNamedDataStore(unittest.TestCase): + def test_add(self) -> None: + store = NamedDataStore() + store.add_named_data("key1", b"data1", None, None) + store.add_named_data("key2", b"data2", 16, "file1") + store.add_named_data("key3", b"data3", 16, "file1") + + output = store.get_named_data_store_output() + + self.assertEqual(len(output.buffers), 3) + self.assertEqual(output.buffers[0], BufferEntry(b"data1", 1)) + self.assertEqual(output.buffers[1], BufferEntry(b"data2", 16)) + self.assertEqual(output.buffers[2], BufferEntry(b"data3", 16)) + + self.assertEqual(len(output.pte_data), 1) + self.assertEqual(output.pte_data["key1"], 0) + + self.assertEqual(len(output.external_data), 1) + self.assertEqual(len(output.external_data["file1"]), 2) + self.assertEqual(output.external_data["file1"]["key2"], 1) + self.assertEqual(output.external_data["file1"]["key3"], 2) + + def test_add_duplicate_name_and_data(self) -> None: + store = NamedDataStore() + store.add_named_data("key", b"data", None, None) + store.add_named_data("key", b"data", None, None) + + output = store.get_named_data_store_output() + + self.assertEqual(len(output.buffers), 1) + self.assertEqual(output.buffers[0], BufferEntry(b"data", 1)) + + self.assertEqual(len(output.pte_data), 1) + self.assertEqual(output.pte_data["key"], 0) + + self.assertEqual(len(output.external_data), 0) + + def test_add_same_data_with_different_alignment(self) -> None: + store = NamedDataStore() + store.add_named_data("key", b"data", 3, None) + store.add_named_data("key1", b"data", 4, None) + + output = store.get_named_data_store_output() + + self.assertEqual(len(output.buffers), 1) + # Check that we take the LCM of the two alignments (3, 4) = 12 + self.assertEqual(output.buffers[0], BufferEntry(b"data", 12)) + + self.assertEqual(len(output.pte_data), 2) + self.assertEqual(output.pte_data["key"], 0) + self.assertEqual(output.pte_data["key1"], 0) + + self.assertEqual(len(output.external_data), 0) + + def test_add_duplicate_key_fail(self) -> None: + store = NamedDataStore() + store.add_named_data("key", b"data", None, None) + + # Cannot add item with the same key and different data. + self.assertRaises(ValueError, store.add_named_data, "key", b"data1", None, None) + self.assertRaises( + ValueError, store.add_named_data, "key", b"data1", 16, "file1" + ) + + output = store.get_named_data_store_output() + + self.assertEqual(len(output.buffers), 1) + self.assertEqual(output.buffers[0], BufferEntry(b"data", 1)) + + self.assertEqual(len(output.pte_data), 1) + self.assertEqual(output.pte_data["key"], 0) + self.assertEqual(len(output.external_data), 0) From fd3b5e9faef4a60418678d5261d2ac32016fcebf Mon Sep 17 00:00:00 2001 From: Sanskar Thapa <98921536+sskarz@users.noreply.github.com> Date: Wed, 26 Feb 2025 09:14:00 -0500 Subject: [PATCH 104/584] Adjust xcodeproj in react-native ios demo to use relative paths on CMake (#8522) Adjust xcodeproj in react-native demo to use relative paths on CMake --- .../rnllama/ios/rnllama.xcodeproj/project.pbxproj | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/demo-apps/react-native/rnllama/ios/rnllama.xcodeproj/project.pbxproj b/examples/demo-apps/react-native/rnllama/ios/rnllama.xcodeproj/project.pbxproj index 1a587970640..73314459f6a 100644 --- a/examples/demo-apps/react-native/rnllama/ios/rnllama.xcodeproj/project.pbxproj +++ b/examples/demo-apps/react-native/rnllama/ios/rnllama.xcodeproj/project.pbxproj @@ -557,7 +557,7 @@ ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; - shellScript = "set -e\n\nif ! command -v cmake &> /dev/null\nthen\n echo \"cmake not found, please install cmake. \\n1. Download Cmake.app from https://cmake.org/download with version > 3.19. \\n2. Install it to Applications/ folder and run sudo /Applications/CMake.app/Contents/bin/cmake-gui --install to install CMake commandline tools.\"\n exit 1\nfi\n\nCMAKE_DIR=\"$TEMP_DIR/cmake\"\nrm -rf \"$CMAKE_DIR\"\n\nPLATFORM=\"SIMULATORARM64\"\nDEPLOYMENT_TARGET=\"17.0\"\n\nif [[ \"$PLATFORM_NAME\" == *\"iphoneos\"* ]]; then\n PLATFORM=\"OS64\"\nelif [[ \"$PLATFORM_NAME\" == *\"macos\"* ]]; then\n PLATFORM=\"MAC_ARM64\"\n DEPLOYMENT_TARGET=\"10.15\"\nfi\n\ncmake_build() {\n local src_dir=$1\n shift\n local extra_args=(\"$@\")\n local build_dir=\"$CMAKE_DIR/build/$(basename \"$src_dir\")\"\n\n mkdir -p \"$build_dir\" && cd \"$build_dir\"\n cmake -G Xcode \\\n -DCMAKE_BUILD_TYPE=\"Release\" \\\n -DCMAKE_CXX_STANDARD=17 \\\n -DCMAKE_TOOLCHAIN_FILE=\"/Users/jh/dev/executorch/third-party/ios-cmake/ios.toolchain.cmake\" \\\n -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD=\"c++17\" \\\n -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY=\"libc++\" \\\n -DPLATFORM=\"$PLATFORM\" \\\n -DDEPLOYMENT_TARGET=\"$DEPLOYMENT_TARGET\" \\\n \"${extra_args[@]}\" \\\n \"$src_dir\"\n cmake --build . --config \"Release\"\n cmake --install . --prefix \"$CMAKE_DIR\"\n}\n\ncmake_build \"/Users/jh/dev/executorch/extension/llm/third-party/abseil-cpp\" \\\n -DABSL_PROPAGATE_CXX_STD=ON\n\ncmake_build \"/Users/jh/dev/executorch/extension/llm/third-party/re2\" \\\n -DCMAKE_PREFIX_PATH=\"$CMAKE_DIR/lib/cmake/absl\"\n \ncmake_build \"/Users/jh/dev/executorch/extension/llm/third-party/sentencepiece\" \\\n -DSPM_ENABLE_SHARED=OFF\n\necho \"$(find $CMAKE_DIR/lib -name \"*.a\" | sed -E 's|^.*/lib([^/]+)\\.a|-l\\1|g' | tr '\\n' ' ')\" > \"$CMAKE_DIR/linker_flags\"\n\n\n\n"; + shellScript = "set -e\n\nif ! command -v cmake &> /dev/null\nthen\n echo \"cmake not found, please install cmake. \\n1. Download Cmake.app from https://cmake.org/download with version > 3.19. \\n2. Install it to Applications/ folder and run sudo /Applications/CMake.app/Contents/bin/cmake-gui --install to install CMake commandline tools.\"\n exit 1\nfi\n\nCMAKE_DIR=\"$TEMP_DIR/cmake\"\nrm -rf \"$CMAKE_DIR\"\n\nPLATFORM=\"SIMULATORARM64\"\nDEPLOYMENT_TARGET=\"17.0\"\n\nif [[ \"$PLATFORM_NAME\" == *\"iphoneos\"* ]]; then\n PLATFORM=\"OS64\"\nelif [[ \"$PLATFORM_NAME\" == *\"macos\"* ]]; then\n PLATFORM=\"MAC_ARM64\"\n DEPLOYMENT_TARGET=\"10.15\"\nfi\n\ncmake_build() {\n local src_dir=$1\n shift\n local extra_args=(\"$@\")\n local build_dir=\"$CMAKE_DIR/build/$(basename \"$src_dir\")\"\n\n mkdir -p \"$build_dir\" && cd \"$build_dir\"\n cmake -G Xcode \\\n -DCMAKE_BUILD_TYPE=\"Release\" \\\n -DCMAKE_CXX_STANDARD=17 \\\n -DCMAKE_TOOLCHAIN_FILE=\"$PROJECT_DIR/../../../../../third-party/ios-cmake/ios.toolchain.cmake\" \\\n -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD=\"c++17\" \\\n -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY=\"libc++\" \\\n -DPLATFORM=\"$PLATFORM\" \\\n -DDEPLOYMENT_TARGET=\"$DEPLOYMENT_TARGET\" \\\n \"${extra_args[@]}\" \\\n \"$src_dir\"\n cmake --build . --config \"Release\"\n cmake --install . --prefix \"$CMAKE_DIR\"\n}\n\ncmake_build \"$PROJECT_DIR/../../../../../extension/llm/third-party/abseil-cpp\" \\\n -DABSL_PROPAGATE_CXX_STD=ON\n \ncmake_build \"$PROJECT_DIR/../../../../../extension/llm/third-party/re2\" \\\n -DCMAKE_PREFIX_PATH=\"$CMAKE_DIR/lib/cmake/absl\"\n \ncmake_build \"$PROJECT_DIR/../../../../../extension/llm/third-party/sentencepiece\" \\\n -DSPM_ENABLE_SHARED=OFF\n\necho \"$(find $CMAKE_DIR/lib -name \"*.a\" | sed -E 's|^.*/lib([^/]+)\\.a|-l\\1|g' | tr '\\n' ' ')\" > \"$CMAKE_DIR/linker_flags\"\n\n\n\n"; }; F7CCCCE770493310D0125117 /* [Expo] Configure project */ = { isa = PBXShellScriptBuildPhase; @@ -827,7 +827,7 @@ CURRENT_PROJECT_VERSION = 1; DEBUG_INFORMATION_FORMAT = dwarf; DEFINES_MODULE = YES; - DEVELOPMENT_TEAM = CLFN2N8XXS; + DEVELOPMENT_TEAM = ""; DYLIB_COMPATIBILITY_VERSION = 1; DYLIB_CURRENT_VERSION = 1; DYLIB_INSTALL_NAME_BASE = "@rpath"; @@ -878,7 +878,7 @@ CURRENT_PROJECT_VERSION = 1; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; DEFINES_MODULE = YES; - DEVELOPMENT_TEAM = CLFN2N8XXS; + DEVELOPMENT_TEAM = ""; DYLIB_COMPATIBILITY_VERSION = 1; DYLIB_CURRENT_VERSION = 1; DYLIB_INSTALL_NAME_BASE = "@rpath"; From bc55c0145b7a8d6534b76ffd58b173cdfff0544a Mon Sep 17 00:00:00 2001 From: sabarishsnk Date: Wed, 26 Feb 2025 07:18:23 -0800 Subject: [PATCH 105/584] Update CadenceMemoryPlanning to support per-memory alignment constraint Differential Revision: D69881079 Pull Request resolved: https://github.com/pytorch/executorch/pull/8689 --- backends/cadence/aot/compiler.py | 2 -- backends/cadence/aot/memory_planning.py | 30 +++++++++++++------ .../cadence/aot/tests/test_memory_passes.py | 5 +++- backends/cadence/aot/utils.py | 6 ++++ 4 files changed, 31 insertions(+), 12 deletions(-) diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py index b96a0c7ad39..f9abe1c5425 100644 --- a/backends/cadence/aot/compiler.py +++ b/backends/cadence/aot/compiler.py @@ -264,7 +264,6 @@ def export_to_executorch_gen_etrecord( alloc_graph_output: bool = True, memory_config: Optional[MemoryConfig] = None, dump_graphs: bool = False, - mem_alignment: int = 1, ) -> ExecutorchProgramManager: cadence_passes = get_cadence_passes(opt_level) edge_prog_manager = export_to_edge(model, inputs, dump_graphs) @@ -291,7 +290,6 @@ def export_to_executorch_gen_etrecord( mem_algo=mem_algo, alloc_graph_input=alloc_graph_input, alloc_graph_output=alloc_graph_output, - mem_alignment=mem_alignment, ) # Get executorch program after Cadence specific passes diff --git a/backends/cadence/aot/memory_planning.py b/backends/cadence/aot/memory_planning.py index 8c64fab61c1..cfe1b9ab9d8 100644 --- a/backends/cadence/aot/memory_planning.py +++ b/backends/cadence/aot/memory_planning.py @@ -40,6 +40,12 @@ def get_size(memory_config: MemoryConfig, exir_id: int) -> int: return memory_config.memory_sizes[exir_id - 1] +def get_alignment(memory_config: MemoryConfig, exir_id: int) -> int: + # EXIR's spec.mem_id is indexed from 1..N. + assert memory_config.memory_alignments is not None + return memory_config.memory_alignments[exir_id - 1] + + def get_aligned_offset(pre_aligned_offset: int, alignment: int) -> int: return int(math.ceil(pre_aligned_offset / alignment) * alignment) @@ -84,6 +90,10 @@ def position_based_greedy_with_hierarchy( ] ] = None, ) -> List[int]: + # We do not use the `alignment` parameter and instead use the per-memory alignment + # constraints from `memory_config`. + del alignment + num_memories = get_num_memories(memory_config) bufsizes = [0] * num_memories allocated_buffers: List[List[TensorSpec]] = [[] for _ in range(num_memories)] @@ -103,7 +113,8 @@ def overlap(spec: TensorSpec) -> Optional[TensorSpec]: def memory_available(spec: TensorSpec) -> bool: return get_aligned_offset( - spec.mem_offset + spec.allocated_memory, alignment + spec.mem_offset + spec.allocated_memory, + get_alignment(memory_config, spec.mem_id), ) <= get_size(memory_config, spec.mem_id) # Iterate over all the specs in sorted order @@ -124,7 +135,8 @@ def memory_available(spec: TensorSpec) -> bool: spec.mem_offset = 0 while memory_available(spec) and (overlapped := overlap(spec)): spec.mem_offset = get_aligned_offset( - overlapped.mem_offset + overlapped.allocated_memory, alignment + overlapped.mem_offset + overlapped.allocated_memory, + get_alignment(memory_config, spec.mem_id), ) if memory_available(spec): allocated_buffers[spec.mem_id].append(spec) @@ -172,6 +184,10 @@ def greedy_by_size_for_offset_calculation_with_hierarchy( ] ] = None, ) -> List[int]: + # We do not use the `alignment` parameter and instead use the per-memory alignment + # constraints from `memory_config`. + del alignment + num_memories = get_num_memories(memory_config) bufsizes = [0] * num_memories allocated_buffers = [[] for _ in range(num_memories)] @@ -213,13 +229,14 @@ def greedy_by_size_for_offset_calculation_with_hierarchy( prev_offset = max( get_aligned_offset( allocated_spec.mem_offset + allocated_spec.allocated_memory, - alignment, + get_alignment(memory_config, spec.mem_id), ), prev_offset, ) if spec.mem_offset is None: if get_aligned_offset( - prev_offset + spec.allocated_memory, alignment + prev_offset + spec.allocated_memory, + get_alignment(memory_config, spec.mem_id), ) > get_size(memory_config, spec.mem_id): continue else: @@ -439,7 +456,6 @@ def __init__( ] ] ] = None, - mem_alignment: int = 1, ) -> None: self._init_mem_algos() @@ -450,9 +466,6 @@ def __init__( self.alloc_graph_output = alloc_graph_output self.additional_constraint_gen_passes = additional_constraint_gen_passes - assert mem_alignment > 0, "mem_alignment must be positive" - self.mem_alignment = mem_alignment - def _init_mem_algos(self) -> None: self.available_mem_algos = [ position_based_greedy_with_hierarchy, @@ -489,7 +502,6 @@ def run( allow_lifetime_and_storage_overlap=(self.opt_level >= 2), alloc_graph_input=self.alloc_graph_input, alloc_graph_output=self.alloc_graph_output, - alignment=self.mem_alignment, ) mem_planning.run(graph_module, graph_signature) diff --git a/backends/cadence/aot/tests/test_memory_passes.py b/backends/cadence/aot/tests/test_memory_passes.py index 1844a3b4d80..245f3d64003 100644 --- a/backends/cadence/aot/tests/test_memory_passes.py +++ b/backends/cadence/aot/tests/test_memory_passes.py @@ -16,6 +16,7 @@ from executorch.backends.cadence.aot import compiler from executorch.backends.cadence.aot.memory_planning import find_peak_memory_usage from executorch.backends.cadence.aot.pass_utils import count_node +from executorch.backends.cadence.aot.utils import MemoryConfig from executorch.exir import memory from executorch.exir.dialects._ops import ops as exir_ops from executorch.exir.memory_planning import collect_specs_from_nodes @@ -792,7 +793,9 @@ def forward(self, x: torch.Tensor, y: torch.Tensor): mem_algo=mem_algo, alloc_graph_input=False, alloc_graph_output=False, - mem_alignment=37, + memory_config=MemoryConfig( + memory_sizes=[0x1000000000], memory_alignments=[37] + ), ) .exported_program() .graph_module diff --git a/backends/cadence/aot/utils.py b/backends/cadence/aot/utils.py index 468bbf4ae66..37011067897 100644 --- a/backends/cadence/aot/utils.py +++ b/backends/cadence/aot/utils.py @@ -256,6 +256,8 @@ def save_bpte_program( @dataclass class MemoryConfig: memory_sizes: List[int] + # Alignment constraint for each memory region in bytes. + memory_alignments: Optional[List[int]] = None # Optional fields for logs memory_names: Optional[List[str]] = None @@ -263,6 +265,10 @@ class MemoryConfig: memory_xml_path: Optional[str] = None MemorySpace: Optional[enum.Enum] = None + def __post_init__(self) -> None: + if self.memory_alignments is None: + self.memory_alignments = [1] * len(self.memory_sizes) + # get num memories indexed from 1..N, compatible with EXIR's spec.mem_id def get_num_memories(self) -> int: return len(self.memory_sizes) + 1 From bbc500c920907a30aba1c64e7a0d3c54de961430 Mon Sep 17 00:00:00 2001 From: per held Date: Wed, 26 Feb 2025 16:25:33 +0100 Subject: [PATCH 106/584] Arm backend: Annotate types (#8723) Annotate types for files in arm/operators/* and arm/tosa_*. This was done by with the tool monkeytype in combination with pytest. --- backends/arm/operators/op_avg_pool2d.py | 2 +- backends/arm/operators/op_conv2d.py | 14 +++-- backends/arm/tosa_mapping.py | 6 +- backends/arm/tosa_quant_utils.py | 77 ++++++++++++++----------- backends/arm/tosa_specification.py | 2 +- backends/arm/tosa_utils.py | 4 +- 6 files changed, 58 insertions(+), 47 deletions(-) diff --git a/backends/arm/operators/op_avg_pool2d.py b/backends/arm/operators/op_avg_pool2d.py index e300b3ed016..170ea5260d8 100644 --- a/backends/arm/operators/op_avg_pool2d.py +++ b/backends/arm/operators/op_avg_pool2d.py @@ -41,7 +41,7 @@ def _build_generic_avgpool2d( output: TosaArg, input_zp: int, output_zp: int, - accumulator_type, + accumulator_type: ts.DType, ) -> None: input_tensor = inputs[0] diff --git a/backends/arm/operators/op_conv2d.py b/backends/arm/operators/op_conv2d.py index f97e408a02a..6230a75c15c 100644 --- a/backends/arm/operators/op_conv2d.py +++ b/backends/arm/operators/op_conv2d.py @@ -22,8 +22,6 @@ from executorch.backends.arm.tosa_quant_utils import build_rescale_conv_output from executorch.backends.arm.tosa_utils import build_reshape, tosa_shape -from serializer.tosa_serializer import TosaOp - @register_node_visitor class Conv2dVisitor(NodeVisitor): @@ -36,8 +34,12 @@ def __init__(self, *args): # `(input + 2 * pad - dilation * (weight - 1) - 1) / stride` # must be an integer, but tosa currently strictly require this property. # This function adjusts the pad value to meet the requirement. - def adjust_pad_if_needed(self, input, weight, stride, pad, dilation): - mod_remainder = (input + 2 * pad - dilation * (weight - 1) - 1) % stride + def adjust_pad_if_needed( + self, input_size: int, input_weight: int, stride: int, pad: int, dilation: int + ) -> int: + mod_remainder = ( + input_size + 2 * pad - dilation * (input_weight - 1) - 1 + ) % stride # No need to adjust if mod_remainder == 0: @@ -143,11 +145,11 @@ def define_node( build_reshape( tosa_graph, weight.name, weight_post_shape, weight_reshaped.name ) - tosa_op = TosaOp.Op().DEPTHWISE_CONV2D + tosa_op = ts.TosaOp.Op().DEPTHWISE_CONV2D weight_name = weight_reshaped.name else: """Regular convolution case""" - tosa_op = TosaOp.Op().CONV2D + tosa_op = ts.TosaOp.Op().CONV2D weight_name = weight.name tosa_graph.addOperator( diff --git a/backends/arm/tosa_mapping.py b/backends/arm/tosa_mapping.py index d1849a7f477..9a8b6b2c35d 100644 --- a/backends/arm/tosa_mapping.py +++ b/backends/arm/tosa_mapping.py @@ -11,7 +11,7 @@ # the standardised TOSA representation. # -from typing import Sequence +from typing import Any, Sequence import serializer.tosa_serializer as ts # type: ignore import torch @@ -44,7 +44,7 @@ } -def map_dtype(data_type): +def map_dtype(data_type: torch.dtype) -> ts.DType: if data_type in UNSUPPORTED_DTYPES: raise ValueError(f"Unsupported type: {data_type}") if data_type not in DTYPE_MAP: @@ -88,7 +88,7 @@ def __process_list(self, argument): def __process_number(self, argument: float | int): self.number = argument - def __init__(self, argument) -> None: + def __init__(self, argument: Any) -> None: self.name = None # type: ignore[assignment] self.dtype = None self.shape = None diff --git a/backends/arm/tosa_quant_utils.py b/backends/arm/tosa_quant_utils.py index d53362cb363..1715f56abd6 100644 --- a/backends/arm/tosa_quant_utils.py +++ b/backends/arm/tosa_quant_utils.py @@ -8,14 +8,18 @@ # Utiliy functions for TOSA quantized lowerings import math -from typing import cast, NamedTuple +from typing import cast, List, NamedTuple, Tuple + +import executorch.backends.arm.tosa_mapping import serializer.tosa_serializer as ts # type: ignore import torch.fx +import torch.fx.node import tosa.Op as TosaOp # type: ignore from executorch.backends.arm.tosa_mapping import TosaArg from executorch.exir.dialects._ops import ops as exir_ops -from serializer.tosa_serializer import TosaSerializerTensor +from serializer.tosa_serializer import TosaSerializer, TosaSerializerTensor +from torch import Tensor from torch.fx import Node @@ -116,7 +120,7 @@ class QuantArgs(NamedTuple): qmax: int dtype: torch.dtype - def quantize_value(self, x): + def quantize_value(self, x: torch.Tensor | float) -> Tensor: if not isinstance(x, torch.Tensor): x = torch.Tensor([x]) return torch.clip( @@ -144,7 +148,7 @@ def from_operator(cls, op, args): # Check if scale32 mode is used for given output element type -def is_scale32(type): +def is_scale32(type: int) -> ts.DType: return type == ts.DType.INT8 @@ -152,7 +156,7 @@ def is_scale32(type): # The RESCALE operator is defined using an integer multiply, add, and shift. # This utility function is for calculating the multier and shift given a scale. # Ref: https://www.mlplatform.org/tosa/tosa_spec.html#_precision_scaling -def compute_multiplier_and_shift(scale, scaleWidth=32): +def compute_multiplier_and_shift(scale: float, scaleWidth: int = 32) -> Tuple[int, int]: if scaleWidth == 16: offset = 15 elif scaleWidth == 32: @@ -166,12 +170,12 @@ def compute_multiplier_and_shift(scale, scaleWidth=32): shift = exponent const_2_power_15_or_31 = 1 << offset - shifted_mantissa = round(mantissa * const_2_power_15_or_31) + shifted_mantissa = int(round(mantissa * const_2_power_15_or_31)) assert shifted_mantissa <= const_2_power_15_or_31 if shifted_mantissa == const_2_power_15_or_31: - shifted_mantissa = shifted_mantissa / 2 + shifted_mantissa = int(shifted_mantissa / 2) shift += 1 # TOSA expects right shift to be positive, and embed (1 << offset) into right shift bits. @@ -189,15 +193,15 @@ def compute_multiplier_and_shift(scale, scaleWidth=32): def build_rescale( - tosa_fb, - scale, - input_node, - output_name, - output_type, - output_shape, - input_zp, - output_zp, - is_double_round=False, + tosa_fb: TosaSerializer, + scale: float, + input_node: TosaSerializerTensor, + output_name: str, + output_type: ts.DType, + output_shape: List[int], + input_zp: int, + output_zp: int, + is_double_round: bool = False, ): scale_width = 32 if is_scale32(output_type) else 16 multiplier, shift = compute_multiplier_and_shift(scale, scale_width) @@ -223,7 +227,12 @@ def build_rescale( def build_rescale_to_int32( - tosa_fb, input, input_zp, rescale_scale, is_scale32=True, is_double_round=False + tosa_fb: TosaSerializer, + input_arg: executorch.backends.arm.tosa_mapping.TosaArg, + input_zp: int, + rescale_scale: float, + is_scale32: bool = True, + is_double_round: bool = False, ) -> TosaSerializerTensor: multiplier, shift = compute_multiplier_and_shift(rescale_scale) attr_rescale = ts.TosaSerializerAttribute() @@ -238,10 +247,10 @@ def build_rescale_to_int32( input_unsigned=False, output_unsigned=False, ) - input_A_rescaled_to_int32 = tosa_fb.addIntermediate(input.shape, ts.DType.INT32) + input_A_rescaled_to_int32 = tosa_fb.addIntermediate(input_arg.shape, ts.DType.INT32) tosa_fb.addOperator( TosaOp.Op().RESCALE, - [input.name], + [input_arg.name], [input_A_rescaled_to_int32.name], attr_rescale, ) @@ -250,13 +259,13 @@ def build_rescale_to_int32( def build_rescale_from_int32( - tosa_fb, - input_name, - output_name, - output_zp, - rescale_scale, - is_scale32=True, - is_double_round=False, + tosa_fb: TosaSerializer, + input_name: str, + output_name: str, + output_zp: int, + rescale_scale: float, + is_scale32: bool = True, + is_double_round: bool = False, ) -> None: multiplier, shift = compute_multiplier_and_shift(rescale_scale) attr_rescale_output = ts.TosaSerializerAttribute() @@ -283,14 +292,14 @@ def build_rescale_from_int32( def build_rescale_conv_output( - tosa_fb, - op, - output_name, - output_type, - input_scale, - weight_scale, - output_scale, - output_zp, + tosa_fb: TosaSerializer, + op: TosaSerializerTensor, + output_name: str, + output_type: ts.DType, + input_scale: float, + weight_scale: float, + output_scale: float, + output_zp: int, ): # TODO add check to verify if this is a Per-channel quantization. post_conv2d_scale = (input_scale * weight_scale) / output_scale diff --git a/backends/arm/tosa_specification.py b/backends/arm/tosa_specification.py index 225e1c5db58..94c307d440c 100644 --- a/backends/arm/tosa_specification.py +++ b/backends/arm/tosa_specification.py @@ -112,7 +112,7 @@ def __init__(self, version: Version, extras: List[str]): if len(extras) > 0: raise ValueError(f"Unhandled extras found: {extras}") - def __repr__(self): + def __repr__(self) -> str: extensions = "" if self.level_8k: extensions += "+8k" diff --git a/backends/arm/tosa_utils.py b/backends/arm/tosa_utils.py index 0d4aeba2d55..45473a496e1 100644 --- a/backends/arm/tosa_utils.py +++ b/backends/arm/tosa_utils.py @@ -7,7 +7,7 @@ import logging import os -from typing import Any +from typing import Any, Tuple import serializer.tosa_serializer as ts # type: ignore import torch @@ -153,7 +153,7 @@ def get_resize_parameters( output_size: torch.Tensor, resize_mode: int, align_corners: bool, -): +) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: """Get the tosa.resize parameters based on the input and output size. Args: From 2909e34675411b2662b9dfa0dc407ab7bb0ed38b Mon Sep 17 00:00:00 2001 From: Zingo Andersen Date: Wed, 26 Feb 2025 16:49:22 +0100 Subject: [PATCH 107/584] Arm backend: Fix for non quantized TOSA delegation in aot_arm_compiler (#8733) Signed-off-by: Zingo Andersen --- examples/arm/aot_arm_compiler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py index 1f224983d4e..5fb12342a2d 100644 --- a/examples/arm/aot_arm_compiler.py +++ b/examples/arm/aot_arm_compiler.py @@ -624,6 +624,7 @@ def save_bpte_program(exec_prog, original_model: torch.nn.Module, output_name: s def to_edge_TOSA_delegate( + exported_program, args, model: torch.nn.Module, ): @@ -687,7 +688,7 @@ def to_edge_TOSA_delegate( # Quantize if required model_int8 = None if args.delegate: - model_int8, edge = to_edge_TOSA_delegate(args, model) + model_int8, edge = to_edge_TOSA_delegate(exported_program, args, model) else: edge = to_edge_transform_and_lower( exported_program, From 640979d3ac448894803a5ed09c7d190a807058ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Per=20=C3=85strand?= Date: Wed, 26 Feb 2025 16:52:13 +0100 Subject: [PATCH 108/584] Arm backend: Allow for TOSA tests to run without ethos-u-vela (#8732) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support running TOSA only tests without ethos-u-vela pip package installed. Raise a RuntimeError to direct user to the right missing package if trying to use the backend without the package importable. Signed-off-by: Per Åstrand --- backends/arm/arm_vela.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/backends/arm/arm_vela.py b/backends/arm/arm_vela.py index e259a8867bd..2d448afead5 100644 --- a/backends/arm/arm_vela.py +++ b/backends/arm/arm_vela.py @@ -12,7 +12,13 @@ from typing import List import numpy as np -from ethosu.vela import vela # type: ignore + +try: + from ethosu.vela import vela # type: ignore + + has_vela = True +except ImportError: + has_vela = False # Pack either input or output tensor block, compose the related arrays into @@ -45,6 +51,11 @@ def vela_compile( """ Compile a TOSA graph to a binary stream for ArmBackendEthosU using Vela. """ + if not has_vela: + raise RuntimeError( + "ethos-u-vela pip package couldn't be imported. Make sure it's installed!" + ) + with tempfile.TemporaryDirectory() as tmpdir: tosaname = "out.tosa" tosa_path = os.path.join(tmpdir, tosaname) From 185a0af0736c17959005c6c0af9aab74d64f9052 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 26 Feb 2025 09:33:37 -0800 Subject: [PATCH 109/584] remove accidental duplicate //backends/arm/test:common target (#8734) I must've messed up resolving conflicts in this file yesterday. --- backends/arm/test/TARGETS | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/backends/arm/test/TARGETS b/backends/arm/test/TARGETS index 0e99a349956..58239913e29 100644 --- a/backends/arm/test/TARGETS +++ b/backends/arm/test/TARGETS @@ -39,19 +39,6 @@ python_library( ] ) -python_library( - name = "common", - srcs = ["common.py"], - deps = [ - ":runner_utils", - "//executorch/backends/xnnpack/test/tester:tester", - "//executorch/backends/arm:arm_backend", - "//executorch/exir:lib", - "//executorch/exir/backend:compile_spec_schema", - "fbsource//third-party/pypi/pytest:pytest", - ] -) - python_library( name = "arm_tester", srcs = glob(["tester/*.py"]), @@ -67,4 +54,4 @@ python_library( ] ) -define_arm_tests() +define_arm_tests() From 84273f403ca6945c7ea41994419ef8e86d85be82 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 26 Feb 2025 09:34:21 -0800 Subject: [PATCH 110/584] Speed up unittest-buck by skipping ExecuTorch cmake builds (#8688) --- .ci/scripts/unittest-linux.sh | 24 +++++++++++++++--------- .ci/scripts/unittest-macos.sh | 22 +++++++++++----------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/.ci/scripts/unittest-linux.sh b/.ci/scripts/unittest-linux.sh index e76b43fa22c..27da8d4e4f9 100755 --- a/.ci/scripts/unittest-linux.sh +++ b/.ci/scripts/unittest-linux.sh @@ -27,20 +27,26 @@ eval "$(conda shell.bash hook)" CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" -# Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate -source .ci/scripts/setup-vulkan-linux-deps.sh +if [[ "$BUILD_TOOL" == "cmake" ]]; then + # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate + source .ci/scripts/setup-vulkan-linux-deps.sh -PYTHON_EXECUTABLE=python \ -EXECUTORCH_BUILD_PYBIND=ON \ -CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ -.ci/scripts/setup-linux.sh "$BUILD_TOOL" "$BUILD_MODE" + PYTHON_EXECUTABLE=python \ + EXECUTORCH_BUILD_PYBIND=ON \ + CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ + .ci/scripts/setup-linux.sh "$BUILD_TOOL" "$BUILD_MODE" -# Install llama3_2_vision dependencies. -PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh + # Install llama3_2_vision dependencies. + PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh -if [[ "$BUILD_TOOL" == "cmake" ]]; then .ci/scripts/unittest-linux-cmake.sh elif [[ "$BUILD_TOOL" == "buck2" ]]; then + # Removing this breaks sccache in the Buck build, apparently + # because TMPDIR gets messed up? Please feel free to fix this and + # speed up this CI job! + PYTHON_EXECUTABLE=python \ + .ci/scripts/setup-linux.sh "$BUILD_TOOL" "$BUILD_MODE" + .ci/scripts/unittest-buck2.sh else echo "Unknown build tool $BUILD_TOOL" diff --git a/.ci/scripts/unittest-macos.sh b/.ci/scripts/unittest-macos.sh index c0e39cee335..9f7fafa35ce 100755 --- a/.ci/scripts/unittest-macos.sh +++ b/.ci/scripts/unittest-macos.sh @@ -30,19 +30,19 @@ export TMP_DIR=$(mktemp -d) export PATH="${TMP_DIR}:$PATH" trap 'rm -rfv ${TMP_DIR}' EXIT -# Setup MacOS dependencies as there is no Docker support on MacOS atm -PYTHON_EXECUTABLE=python \ -EXECUTORCH_BUILD_PYBIND=ON \ -CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ -${CONDA_RUN} --no-capture-output \ -.ci/scripts/setup-macos.sh "${BUILD_TOOL}" "${BUILD_MODE}" +if [[ "$BUILD_TOOL" == "cmake" ]]; then + # Setup MacOS dependencies as there is no Docker support on MacOS atm + PYTHON_EXECUTABLE=python \ + EXECUTORCH_BUILD_PYBIND=ON \ + CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ + ${CONDA_RUN} --no-capture-output \ + .ci/scripts/setup-macos.sh "${BUILD_TOOL}" "${BUILD_MODE}" -# Install llama3_2_vision dependencies. -PYTHON_EXECUTABLE=python \ -${CONDA_RUN} --no-capture-output \ -./examples/models/llama3_2_vision/install_requirements.sh + # Install llama3_2_vision dependencies. + PYTHON_EXECUTABLE=python \ + ${CONDA_RUN} --no-capture-output \ + ./examples/models/llama3_2_vision/install_requirements.sh -if [[ "$BUILD_TOOL" == "cmake" ]]; then .ci/scripts/unittest-macos-cmake.sh elif [[ "$BUILD_TOOL" == "buck2" ]]; then .ci/scripts/unittest-buck2.sh From c6761f4c713fb9fbd91724c4d0eb3a3ebaaed6f8 Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Wed, 26 Feb 2025 10:48:55 -0800 Subject: [PATCH 111/584] Merge updated documentation into master (#8638) * New Getting Started documentation (#8179) WIP New getting started * Update documentation flow and add placeholders (#8287) Add placeholder top-level doc pages * Add new export + lowering docs, update getting started (#8412) Write new top-level export and lowering documentation * More doc placeholders (#8523) * Move cmake and faq docs to new location * Rename CMake build to Building from Source * Move backend docs to new locations (#8413) * Temporarily remove new backend pages * Move backend docs to new locations * Update backend titles and inline contents * Backend doc template (#8524) Add backend template, update XNNPACK docs * Add runtime integration documentation (#8516) Add runtime integration doc * Move iOS docs to top, add Android placeholders (#8511) * Temporarily remove using-executorch-ios.md * Move Apple runtime docs to new location * Clean up documentation placeholders and links, add top-level docs for C++ APIs, Android, and troubleshooting (#8618) * Clean up getting-started.md, remove placeholders * Move Android pre-built AAR info into top-level Android page * Add placeholder backend overview * Add placeholder troubleshooting docs * Populate top-level C++ API doc * Clean up additional doc placeholders and fix broken links * Add env setup instructions for source build * Fix getting started code snippet (#8637) Fix quotes in getting started code snippets * Clean up a few more doc sections and links (#8672) Clean up a few more broken links and sections in new doc flow * Fix QNN link, typo (#8729) * Add a CMake snippet to the XNNPACK backend doc build section (#8730) Add CMake example to xnnpack backend doc --- backends/vulkan/README.md | 2 +- docs/source/api-life-cycle.md | 2 +- ...=> backend-delegates-xnnpack-reference.md} | 2 +- docs/source/backend-template.md | 15 ++ ...te-tutorial.md => backends-arm-ethos-u.md} | 8 +- ...uild-run-xtensa.md => backends-cadence.md} | 8 +- ...build-run-coreml.md => backends-coreml.md} | 8 +- ...diatek-backend.md => backends-mediatek.md} | 12 +- docs/source/backends-mps.md | 157 +++++++++++++ docs/source/backends-overview.md | 20 ++ ...direct-backend.md => backends-qualcomm.md} | 10 +- docs/source/backends-vulkan.md | 205 +++++++++++++++++ docs/source/backends-xnnpack.md | 124 +++++++++++ docs/source/build-run-mps.md | 1 - docs/source/build-run-vulkan.md | 1 - .../compiler-delegate-and-partitioner.md | 2 +- docs/source/concepts.md | 2 +- docs/source/debug-backend-delegate.md | 2 +- .../executorch-runtime-api-reference.rst | 2 +- .../export-to-executorch-api-reference.rst | 2 +- docs/source/getting-started-architecture.md | 2 +- docs/source/getting-started-setup.md | 2 +- docs/source/getting-started.md | 208 ++++++++++++++++++ docs/source/index.rst | 157 +++++++------ ...lama3-qualcomm-ai-engine-direct-backend.md | 2 +- docs/source/llm/getting-started.md | 6 +- docs/source/runtime-overview.md | 2 +- docs/source/runtime-python-api-reference.rst | 2 +- .../tutorial-xnnpack-delegate-lowering.md | 2 +- .../tutorials_source/bundled_program.bp | Bin 0 -> 261600 bytes ...library.md => using-executorch-android.md} | 24 +- ... using-executorch-building-from-source.md} | 106 ++++++++- docs/source/using-executorch-cpp.md | 75 +++++++ docs/source/using-executorch-export.md | 178 +++++++++++++++ ...arted-faqs.md => using-executorch-faqs.md} | 2 +- ...ple-runtime.md => using-executorch-ios.md} | 16 +- .../using-executorch-runtime-integration.md | 53 +++++ .../using-executorch-troubleshooting.md | 20 ++ .../android/ExecuTorchDemo/README.md | 8 +- 39 files changed, 1313 insertions(+), 137 deletions(-) rename docs/source/{native-delegates-executorch-xnnpack-delegate.md => backend-delegates-xnnpack-reference.md} (99%) create mode 100644 docs/source/backend-template.md rename docs/source/{executorch-arm-delegate-tutorial.md => backends-arm-ethos-u.md} (98%) rename docs/source/{build-run-xtensa.md => backends-cadence.md} (98%) rename docs/source/{build-run-coreml.md => backends-coreml.md} (96%) rename docs/source/{build-run-mediatek-backend.md => backends-mediatek.md} (87%) create mode 100644 docs/source/backends-mps.md create mode 100644 docs/source/backends-overview.md rename docs/source/{build-run-qualcomm-ai-engine-direct-backend.md => backends-qualcomm.md} (97%) create mode 100644 docs/source/backends-vulkan.md create mode 100644 docs/source/backends-xnnpack.md delete mode 100644 docs/source/build-run-mps.md delete mode 100644 docs/source/build-run-vulkan.md create mode 100644 docs/source/getting-started.md create mode 100644 docs/source/tutorials_source/bundled_program.bp rename docs/source/{android-prebuilt-library.md => using-executorch-android.md} (69%) rename docs/source/{runtime-build-and-cross-compilation.md => using-executorch-building-from-source.md} (63%) create mode 100644 docs/source/using-executorch-cpp.md create mode 100644 docs/source/using-executorch-export.md rename docs/source/{getting-started-faqs.md => using-executorch-faqs.md} (99%) rename docs/source/{apple-runtime.md => using-executorch-ios.md} (93%) create mode 100644 docs/source/using-executorch-runtime-integration.md create mode 100644 docs/source/using-executorch-troubleshooting.md diff --git a/backends/vulkan/README.md b/backends/vulkan/README.md index b428333c913..2cfff6a6eb6 100644 --- a/backends/vulkan/README.md +++ b/backends/vulkan/README.md @@ -1,4 +1,4 @@ -# ExecuTorch Vulkan Delegate +# Vulkan Backend The ExecuTorch Vulkan delegate is a native GPU delegate for ExecuTorch that is built on top of the cross-platform Vulkan GPU API standard. It is primarily diff --git a/docs/source/api-life-cycle.md b/docs/source/api-life-cycle.md index 1836ba77d71..0327f23a985 100644 --- a/docs/source/api-life-cycle.md +++ b/docs/source/api-life-cycle.md @@ -1,4 +1,4 @@ -# ExecuTorch API Life Cycle and Deprecation Policy +# API Life Cycle and Deprecation Policy ## API Life Cycle diff --git a/docs/source/native-delegates-executorch-xnnpack-delegate.md b/docs/source/backend-delegates-xnnpack-reference.md similarity index 99% rename from docs/source/native-delegates-executorch-xnnpack-delegate.md rename to docs/source/backend-delegates-xnnpack-reference.md index 6bfbfa6be36..52d208de219 100644 --- a/docs/source/native-delegates-executorch-xnnpack-delegate.md +++ b/docs/source/backend-delegates-xnnpack-reference.md @@ -1,4 +1,4 @@ -# ExecuTorch XNNPACK delegate +# XNNPACK Delegate Internals This is a high-level overview of the ExecuTorch XNNPACK backend delegate. This high performance delegate is aimed to reduce CPU inference latency for ExecuTorch models. We will provide a brief introduction to the XNNPACK library and explore the delegate’s overall architecture and intended use cases. diff --git a/docs/source/backend-template.md b/docs/source/backend-template.md new file mode 100644 index 00000000000..1962ee4add4 --- /dev/null +++ b/docs/source/backend-template.md @@ -0,0 +1,15 @@ +# Backend Template + +## Features + +## Target Requirements + +## Development Requirements + +## Lowering a Model to *Backend Name* + +### Partitioner API + +### Quantization + +## Runtime Integration diff --git a/docs/source/executorch-arm-delegate-tutorial.md b/docs/source/backends-arm-ethos-u.md similarity index 98% rename from docs/source/executorch-arm-delegate-tutorial.md rename to docs/source/backends-arm-ethos-u.md index feb8f0335fa..532c2e94237 100644 --- a/docs/source/executorch-arm-delegate-tutorial.md +++ b/docs/source/backends-arm-ethos-u.md @@ -1,5 +1,5 @@ -# Building and Running ExecuTorch with ARM Ethos-U Backend +# ARM Ethos-U Backend ::::{grid} 2 @@ -7,8 +7,8 @@ :::{grid-item-card} Tutorials we recommend you complete before this: :class-card: card-prerequisites * [Introduction to ExecuTorch](./intro-how-it-works.md) -* [Setting up ExecuTorch](./getting-started-setup.md) -* [Building ExecuTorch with CMake](./runtime-build-and-cross-compilation.md) +* [Getting Started](./getting-started.md) +* [Building ExecuTorch with CMake](./using-executorch-building-from-source.md) ::: :::{grid-item-card} What you will learn in this tutorial: @@ -286,7 +286,7 @@ The `generate_pte_file` function in `run.sh` script produces the `.pte` files ba ExecuTorch's CMake build system produces a set of build pieces which are critical for us to include and run the ExecuTorch runtime with-in the bare-metal environment we have for Corstone FVPs from Ethos-U SDK. -[This](./runtime-build-and-cross-compilation.md) document provides a detailed overview of each individual build piece. For running either variant of the `.pte` file, we will need a core set of libraries. Here is a list, +[This](./using-executorch-building-from-source.md) document provides a detailed overview of each individual build piece. For running either variant of the `.pte` file, we will need a core set of libraries. Here is a list, - `libexecutorch.a` - `libportable_kernels.a` diff --git a/docs/source/build-run-xtensa.md b/docs/source/backends-cadence.md similarity index 98% rename from docs/source/build-run-xtensa.md rename to docs/source/backends-cadence.md index 6097c9095a6..278a2f9ec2b 100644 --- a/docs/source/build-run-xtensa.md +++ b/docs/source/backends-cadence.md @@ -1,4 +1,4 @@ -# Building and Running ExecuTorch on Xtensa HiFi4 DSP +# Cadence Xtensa Backend In this tutorial we will walk you through the process of getting setup to build ExecuTorch for an Xtensa HiFi4 DSP and running a simple model on it. @@ -17,9 +17,9 @@ On top of being able to run on the Xtensa HiFi4 DSP, another goal of this tutori ::: :::{grid-item-card} Tutorials we recommend you complete before this: :class-card: card-prerequisites -* [Introduction to ExecuTorch](intro-how-it-works.md) -* [Setting up ExecuTorch](getting-started-setup.md) -* [Building ExecuTorch with CMake](runtime-build-and-cross-compilation.md) +* [Introduction to ExecuTorch](./intro-how-it-works.md) +* [Getting Started](./getting-started.md) +* [Building ExecuTorch with CMake](./using-executorch-building-from-source.md) ::: :::: diff --git a/docs/source/build-run-coreml.md b/docs/source/backends-coreml.md similarity index 96% rename from docs/source/build-run-coreml.md rename to docs/source/backends-coreml.md index 45a7ecafce4..804f3fe3a93 100644 --- a/docs/source/build-run-coreml.md +++ b/docs/source/backends-coreml.md @@ -1,4 +1,4 @@ -# Building and Running ExecuTorch with Core ML Backend +# Core ML Backend Core ML delegate uses Core ML APIs to enable running neural networks via Apple's hardware acceleration. For more about Core ML you can read [here](https://developer.apple.com/documentation/coreml). In this tutorial, we will walk through the steps of lowering a PyTorch model to Core ML delegate @@ -11,9 +11,9 @@ Core ML delegate uses Core ML APIs to enable running neural networks via Apple's ::: :::{grid-item-card} Tutorials we recommend you complete before this: :class-card: card-prerequisites -* [Introduction to ExecuTorch](intro-how-it-works.md) -* [Setting up ExecuTorch](getting-started-setup.md) -* [Building ExecuTorch with CMake](runtime-build-and-cross-compilation.md) +* [Introduction to ExecuTorch](./intro-how-it-works.md) +* [Getting Started](./getting-started.md) +* [Building ExecuTorch with CMake](./using-executorch-building-from-source.md) * [ExecuTorch iOS Demo App](demo-apps-ios.md) ::: :::: diff --git a/docs/source/build-run-mediatek-backend.md b/docs/source/backends-mediatek.md similarity index 87% rename from docs/source/build-run-mediatek-backend.md rename to docs/source/backends-mediatek.md index eeaa2b8dc88..456a62aaabd 100644 --- a/docs/source/build-run-mediatek-backend.md +++ b/docs/source/backends-mediatek.md @@ -1,4 +1,4 @@ -# Building and Running ExecuTorch with MediaTek Backend +# MediaTek Backend MediaTek backend empowers ExecuTorch to speed up PyTorch models on edge devices that equips with MediaTek Neuron Processing Unit (NPU). This document offers a step-by-step guide to set up the build environment for the MediaTek ExecuTorch libraries. @@ -11,9 +11,9 @@ MediaTek backend empowers ExecuTorch to speed up PyTorch models on edge devices ::: :::{grid-item-card} Tutorials we recommend you complete before this: :class-card: card-prerequisites -* [Introduction to ExecuTorch](intro-how-it-works.md) -* [Setting up ExecuTorch](getting-started-setup.md) -* [Building ExecuTorch with CMake](runtime-build-and-cross-compilation.md) +* [Introduction to ExecuTorch](./intro-how-it-works.md) +* [Getting Started](./getting-started.md) +* [Building ExecuTorch with CMake](./using-executorch-building-from-source.md) ::: :::: @@ -34,7 +34,7 @@ MediaTek backend empowers ExecuTorch to speed up PyTorch models on edge devices Follow the steps below to setup your build environment: -1. **Setup ExecuTorch Environment**: Refer to the [Setting up ExecuTorch](https://pytorch.org/executorch/stable/getting-started-setup) guide for detailed instructions on setting up the ExecuTorch environment. +1. **Setup ExecuTorch Environment**: Refer to the [Getting Started](getting-started.md) guide for detailed instructions on setting up the ExecuTorch environment. 2. **Setup MediaTek Backend Environment** - Install the dependent libs. Ensure that you are inside `backends/mediatek/` directory @@ -91,4 +91,4 @@ cd executorch ```bash export LD_LIBRARY_PATH=::$LD_LIBRARY_PATH - ``` \ No newline at end of file + ``` diff --git a/docs/source/backends-mps.md b/docs/source/backends-mps.md new file mode 100644 index 00000000000..44cf0065e2b --- /dev/null +++ b/docs/source/backends-mps.md @@ -0,0 +1,157 @@ +# MPS Backend + +In this tutorial we will walk you through the process of getting setup to build the MPS backend for ExecuTorch and running a simple model on it. + +The MPS backend device maps machine learning computational graphs and primitives on the [MPS Graph](https://developer.apple.com/documentation/metalperformanceshadersgraph/mpsgraph?language=objc) framework and tuned kernels provided by [MPS](https://developer.apple.com/documentation/metalperformanceshaders?language=objc). + +::::{grid} 2 +:::{grid-item-card} What you will learn in this tutorial: +:class-card: card-prerequisites +* In this tutorial you will learn how to export [MobileNet V3](https://pytorch.org/vision/main/models/mobilenetv3.html) model to the MPS delegate. +* You will also learn how to compile and deploy the ExecuTorch runtime with the MPS delegate on macOS and iOS. +::: +:::{grid-item-card} Tutorials we recommend you complete before this: +:class-card: card-prerequisites +* [Introduction to ExecuTorch](./intro-how-it-works.md) +* [Getting Started](./getting-started.md) +* [Building ExecuTorch with CMake](./using-executorch-building-from-source.md) +* [ExecuTorch iOS Demo App](demo-apps-ios.md) +* [ExecuTorch iOS LLaMA Demo App](llm/llama-demo-ios.md) +::: +:::: + + +## Prerequisites (Hardware and Software) + +In order to be able to successfully build and run a model using the MPS backend for ExecuTorch, you'll need the following hardware and software components: + +### Hardware: + - A [mac](https://www.apple.com/mac/) for tracing the model + +### Software: + + - **Ahead of time** tracing: + - [macOS](https://www.apple.com/macos/) 12 + + - **Runtime**: + - [macOS](https://www.apple.com/macos/) >= 12.4 + - [iOS](https://www.apple.com/ios) >= 15.4 + - [Xcode](https://developer.apple.com/xcode/) >= 14.1 + +## Setting up Developer Environment + +***Step 1.*** Please finish tutorial [Getting Started](getting-started.md). + +***Step 2.*** Install dependencies needed to lower MPS delegate: + + ```bash + ./backends/apple/mps/install_requirements.sh + ``` + +## Build + +### AOT (Ahead-of-time) Components + +**Compiling model for MPS delegate**: +- In this step, you will generate a simple ExecuTorch program that lowers MobileNetV3 model to the MPS delegate. You'll then pass this Program (the `.pte` file) during the runtime to run it using the MPS backend. + +```bash +cd executorch +# Note: `mps_example` script uses by default the MPSPartitioner for ops that are not yet supported by the MPS delegate. To turn it off, pass `--no-use_partitioner`. +python3 -m examples.apple.mps.scripts.mps_example --model_name="mv3" --bundled --use_fp16 + +# To see all options, run following command: +python3 -m examples.apple.mps.scripts.mps_example --help +``` + +### Runtime + +**Building the MPS executor runner:** +```bash +# In this step, you'll be building the `mps_executor_runner` that is able to run MPS lowered modules: +cd executorch +./examples/apple/mps/scripts/build_mps_executor_runner.sh +``` + +## Run the mv3 generated model using the mps_executor_runner + +```bash +./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_bundled_fp16.pte --bundled_program +``` + +- You should see the following results. Note that no output file will be generated in this example: +``` +I 00:00:00.003290 executorch:mps_executor_runner.mm:286] Model file mv3_mps_bundled_fp16.pte is loaded. +I 00:00:00.003306 executorch:mps_executor_runner.mm:292] Program methods: 1 +I 00:00:00.003308 executorch:mps_executor_runner.mm:294] Running method forward +I 00:00:00.003311 executorch:mps_executor_runner.mm:349] Setting up non-const buffer 1, size 606112. +I 00:00:00.003374 executorch:mps_executor_runner.mm:376] Setting up memory manager +I 00:00:00.003376 executorch:mps_executor_runner.mm:392] Loading method name from plan +I 00:00:00.018942 executorch:mps_executor_runner.mm:399] Method loaded. +I 00:00:00.018944 executorch:mps_executor_runner.mm:404] Loading bundled program... +I 00:00:00.018980 executorch:mps_executor_runner.mm:421] Inputs prepared. +I 00:00:00.118731 executorch:mps_executor_runner.mm:438] Model executed successfully. +I 00:00:00.122615 executorch:mps_executor_runner.mm:501] Model verified successfully. +``` + +### [Optional] Run the generated model directly using pybind +1. Make sure `pybind` MPS support was installed: +```bash +./install_executorch.sh --pybind mps +``` +2. Run the `mps_example` script to trace the model and run it directly from python: +```bash +cd executorch +# Check correctness between PyTorch eager forward pass and ExecuTorch MPS delegate forward pass +python3 -m examples.apple.mps.scripts.mps_example --model_name="mv3" --no-use_fp16 --check_correctness +# You should see following output: `Results between ExecuTorch forward pass with MPS backend and PyTorch forward pass for mv3_mps are matching!` + +# Check performance between PyTorch MPS forward pass and ExecuTorch MPS forward pass +python3 -m examples.apple.mps.scripts.mps_example --model_name="mv3" --no-use_fp16 --bench_pytorch +``` + +### Profiling: +1. [Optional] Generate an [ETRecord](./etrecord.rst) while you're exporting your model. +```bash +cd executorch +python3 -m examples.apple.mps.scripts.mps_example --model_name="mv3" --generate_etrecord -b +``` +2. Run your Program on the ExecuTorch runtime and generate an [ETDump](./etdump.md). +``` +./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_bundled_fp16.pte --bundled_program --dump-outputs +``` +3. Create an instance of the Inspector API by passing in the ETDump you have sourced from the runtime along with the optionally generated ETRecord from step 1. +```bash +python3 -m sdk.inspector.inspector_cli --etdump_path etdump.etdp --etrecord_path etrecord.bin +``` + +## Deploying and Running on Device + +***Step 1***. Create the ExecuTorch core and MPS delegate frameworks to link on iOS +```bash +cd executorch +./build/build_apple_frameworks.sh --mps +``` + +`mps_delegate.xcframework` will be in `cmake-out` folder, along with `executorch.xcframework` and `portable_delegate.xcframework`: +```bash +cd cmake-out && ls +``` + +***Step 2***. Link the frameworks into your XCode project: +Go to project Target’s `Build Phases` - `Link Binaries With Libraries`, click the **+** sign and add the frameworks: files located in `Release` folder. +- `executorch.xcframework` +- `portable_delegate.xcframework` +- `mps_delegate.xcframework` + +From the same page, include the needed libraries for the MPS delegate: +- `MetalPerformanceShaders.framework` +- `MetalPerformanceShadersGraph.framework` +- `Metal.framework` + +In this tutorial, you have learned how to lower a model to the MPS delegate, build the mps_executor_runner and run a lowered model through the MPS delegate, or directly on device using the MPS delegate static library. + + +## Frequently encountered errors and resolution. + +If you encountered any bugs or issues following this tutorial please file a bug/issue on the [ExecuTorch repository](https://github.com/pytorch/executorch/issues), with hashtag **#mps**. diff --git a/docs/source/backends-overview.md b/docs/source/backends-overview.md new file mode 100644 index 00000000000..dd3aa0354bc --- /dev/null +++ b/docs/source/backends-overview.md @@ -0,0 +1,20 @@ +# Backend Overview + +ExecuTorch backends provide hardware acceleration for a specific hardware target. In order to achieve maximum performance on target hardware, ExecuTorch optimizes the model for a specific backend during the export and lowering process. This means that the resulting .pte file is specialized for the specific hardware. In order to deploy to multiple backends, such as Core ML on iOS and Arm CPU on Android, it is common to generate a dedicated .pte file for each. + +The choice of hardware backend is informed by the hardware that the model is intended to be deployed on. Each backend has specific hardware requires and level of model support. See the documentation for each hardware backend for more details. + +As part of the .pte file creation process, ExecuTorch identifies portions of the model (partitions) that are supported for the given backend. These sections are processed by the backend ahead of time to support efficient execution. Portions of the model that are not supported on the delegate, if any, are executed using the portable fallback implementation on CPU. This allows for partial model acceleration when not all model operators are supported on the backend, but may have negative performance implications. In addition, multiple partitioners can be specified in order of priority. This allows for operators not supported on GPU to run on CPU via XNNPACK, for example. + +### Available Backends + +Commonly used hardware backends are listed below. For mobile, consider using XNNPACK for Android and XNNPACK or Core ML for iOS. To create a .pte file for a specific backend, pass the appropriate partitioner class to `to_edge_transform_and_lower`. See the appropriate backend documentation for more information. + +- [XNNPACK (Mobile CPU)](backends-xnnpack.md) +- [Core ML (iOS)](backends-coreml.md) +- [Metal Performance Shaders (iOS GPU)](backends-mps.md) +- [Vulkan (Android GPU)](backends-vulkan.md) +- [Qualcomm NPU](backends-qualcomm.md) +- [MediaTek NPU](backends-mediatek.md) +- [Arm Ethos-U NPU](backends-arm-ethos-u.md) +- [Cadence DSP](backends-cadence.md) diff --git a/docs/source/build-run-qualcomm-ai-engine-direct-backend.md b/docs/source/backends-qualcomm.md similarity index 97% rename from docs/source/build-run-qualcomm-ai-engine-direct-backend.md rename to docs/source/backends-qualcomm.md index 55634459eff..2d2b017aca1 100644 --- a/docs/source/build-run-qualcomm-ai-engine-direct-backend.md +++ b/docs/source/backends-qualcomm.md @@ -1,4 +1,4 @@ -# Building and Running ExecuTorch with Qualcomm AI Engine Direct Backend +# Qualcomm AI Engine Backend In this tutorial we will walk you through the process of getting started to build ExecuTorch for Qualcomm AI Engine Direct and running a model on it. @@ -14,9 +14,9 @@ Qualcomm AI Engine Direct is also referred to as QNN in the source and documenta ::: :::{grid-item-card} Tutorials we recommend you complete before this: :class-card: card-prerequisites -* [Introduction to ExecuTorch](intro-how-it-works.md) -* [Setting up ExecuTorch](getting-started-setup.md) -* [Building ExecuTorch with CMake](runtime-build-and-cross-compilation.md) +* [Introduction to ExecuTorch](./intro-how-it-works.md) +* [Getting Started](./getting-started.md) +* [Building ExecuTorch with CMake](./using-executorch-building-from-source.md) ::: :::: @@ -347,7 +347,7 @@ The model, inputs, and output location are passed to `qnn_executorch_runner` by ### Running a model via ExecuTorch's android demo-app An Android demo-app using Qualcomm AI Engine Direct Backend can be found in -`examples`. Please refer to android demo app [tutorial](https://pytorch.org/executorch/stable/demo-apps-android.html). +`examples`. Please refer to android demo app [tutorial](demo-apps-android.md). ## Supported model list diff --git a/docs/source/backends-vulkan.md b/docs/source/backends-vulkan.md new file mode 100644 index 00000000000..2cfff6a6eb6 --- /dev/null +++ b/docs/source/backends-vulkan.md @@ -0,0 +1,205 @@ +# Vulkan Backend + +The ExecuTorch Vulkan delegate is a native GPU delegate for ExecuTorch that is +built on top of the cross-platform Vulkan GPU API standard. It is primarily +designed to leverage the GPU to accelerate model inference on Android devices, +but can be used on any platform that supports an implementation of Vulkan: +laptops, servers, and edge devices. + +::::{note} +The Vulkan delegate is currently under active development, and its components +are subject to change. +:::: + +## What is Vulkan? + +Vulkan is a low-level GPU API specification developed as a successor to OpenGL. +It is designed to offer developers more explicit control over GPUs compared to +previous specifications in order to reduce overhead and maximize the +capabilities of the modern graphics hardware. + +Vulkan has been widely adopted among GPU vendors, and most modern GPUs (both +desktop and mobile) in the market support Vulkan. Vulkan is also included in +Android from Android 7.0 onwards. + +**Note that Vulkan is a GPU API, not a GPU Math Library**. That is to say it +provides a way to execute compute and graphics operations on a GPU, but does not +come with a built-in library of performant compute kernels. + +## The Vulkan Compute Library + +The ExecuTorch Vulkan Delegate is a wrapper around a standalone runtime known as +the **Vulkan Compute Library**. The aim of the Vulkan Compute Library is to +provide GPU implementations for PyTorch operators via GLSL compute shaders. + +The Vulkan Compute Library is a fork/iteration of the [PyTorch Vulkan Backend](https://pytorch.org/tutorials/prototype/vulkan_workflow.html). +The core components of the PyTorch Vulkan backend were forked into ExecuTorch +and adapted for an AOT graph-mode style of model inference (as opposed to +PyTorch which adopted an eager execution style of model inference). + +The components of the Vulkan Compute Library are contained in the +`executorch/backends/vulkan/runtime/` directory. The core components are listed +and described below: + +``` +runtime/ +├── api/ .................... Wrapper API around Vulkan to manage Vulkan objects +└── graph/ .................. ComputeGraph class which implements graph mode inference + └── ops/ ................ Base directory for operator implementations + ├── glsl/ ........... GLSL compute shaders + │ ├── *.glsl + │ └── conv2d.glsl + └── impl/ ........... C++ code to dispatch GPU compute shaders + ├── *.cpp + └── Conv2d.cpp +``` + +## Features + +The Vulkan delegate currently supports the following features: + +* **Memory Planning** + * Intermediate tensors whose lifetimes do not overlap will share memory allocations. This reduces the peak memory usage of model inference. +* **Capability Based Partitioning**: + * A graph can be partially lowered to the Vulkan delegate via a partitioner, which will identify nodes (i.e. operators) that are supported by the Vulkan delegate and lower only supported subgraphs +* **Support for upper-bound dynamic shapes**: + * Tensors can change shape between inferences as long as its current shape is smaller than the bounds specified during lowering + +In addition to increasing operator coverage, the following features are +currently in development: + +* **Quantization Support** + * We are currently working on support for 8-bit dynamic quantization, with plans to extend to other quantization schemes in the future. +* **Memory Layout Management** + * Memory layout is an important factor to optimizing performance. We plan to introduce graph passes to introduce memory layout transitions throughout a graph to optimize memory-layout sensitive operators such as Convolution and Matrix Multiplication. +* **Selective Build** + * We plan to make it possible to control build size by selecting which operators/shaders you want to build with + +## End to End Example + +To further understand the features of the Vulkan Delegate and how to use it, +consider the following end to end example with a simple single operator model. + +### Compile and lower a model to the Vulkan Delegate + +Assuming ExecuTorch has been set up and installed, the following script can be +used to produce a lowered MobileNet V2 model as `vulkan_mobilenetv2.pte`. + +Once ExecuTorch has been set up and installed, the following script can be used +to generate a simple model and lower it to the Vulkan delegate. + +``` +# Note: this script is the same as the script from the "Setting up ExecuTorch" +# page, with one minor addition to lower to the Vulkan backend. +import torch +from torch.export import export +from executorch.exir import to_edge + +from executorch.backends.vulkan.partitioner.vulkan_partitioner import VulkanPartitioner + +# Start with a PyTorch model that adds two input tensors (matrices) +class Add(torch.nn.Module): + def __init__(self): + super(Add, self).__init__() + + def forward(self, x: torch.Tensor, y: torch.Tensor): + return x + y + +# 1. torch.export: Defines the program with the ATen operator set. +aten_dialect = export(Add(), (torch.ones(1), torch.ones(1))) + +# 2. to_edge: Make optimizations for Edge devices +edge_program = to_edge(aten_dialect) +# 2.1 Lower to the Vulkan backend +edge_program = edge_program.to_backend(VulkanPartitioner()) + +# 3. to_executorch: Convert the graph to an ExecuTorch program +executorch_program = edge_program.to_executorch() + +# 4. Save the compiled .pte program +with open("vk_add.pte", "wb") as file: + file.write(executorch_program.buffer) +``` + +Like other ExecuTorch delegates, a model can be lowered to the Vulkan Delegate +using the `to_backend()` API. The Vulkan Delegate implements the +`VulkanPartitioner` class which identifies nodes (i.e. operators) in the graph +that are supported by the Vulkan delegate, and separates compatible sections of +the model to be executed on the GPU. + +This means the a model can be lowered to the Vulkan delegate even if it contains +some unsupported operators. This will just mean that only parts of the graph +will be executed on the GPU. + + +::::{note} +The [supported ops list](https://github.com/pytorch/executorch/blob/main/backends/vulkan/partitioner/supported_ops.py) +Vulkan partitioner code can be inspected to examine which ops are currently +implemented in the Vulkan delegate. +:::: + +### Build Vulkan Delegate libraries + +The easiest way to build and test the Vulkan Delegate is to build for Android +and test on a local Android device. Android devices have built in support for +Vulkan, and the Android NDK ships with a GLSL compiler which is needed to +compile the Vulkan Compute Library's GLSL compute shaders. + +The Vulkan Delegate libraries can be built by setting `-DEXECUTORCH_BUILD_VULKAN=ON` +when building with CMake. + +First, make sure that you have the Android NDK installed; any NDK version past +NDK r19c should work. Note that the examples in this doc have been validated with +NDK r27b. The Android SDK should also be installed so that you have access to `adb`. + +The instructions in this page assumes that the following environment variables +are set. + +```shell +export ANDROID_NDK= +# Select the appropriate Android ABI for your device +export ANDROID_ABI=arm64-v8a +# All subsequent commands should be performed from ExecuTorch repo root +cd +# Make sure adb works +adb --version +``` + +To build and install ExecuTorch libraries (for Android) with the Vulkan +Delegate: + +```shell +# From executorch root directory +(rm -rf cmake-android-out && \ + pp cmake . -DCMAKE_INSTALL_PREFIX=cmake-android-out \ + -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \ + -DANDROID_ABI=$ANDROID_ABI \ + -DEXECUTORCH_BUILD_VULKAN=ON \ + -DPYTHON_EXECUTABLE=python \ + -Bcmake-android-out && \ + cmake --build cmake-android-out -j16 --target install) +``` + +### Run the Vulkan model on device + +::::{note} +Since operator support is currently limited, only binary arithmetic operators +will run on the GPU. Expect inference to be slow as the majority of operators +are being executed via Portable operators. +:::: + +Now, the partially delegated model can be executed (partially) on your device's +GPU! + +```shell +# Build a model runner binary linked with the Vulkan delegate libs +cmake --build cmake-android-out --target vulkan_executor_runner -j32 + +# Push model to device +adb push vk_add.pte /data/local/tmp/vk_add.pte +# Push binary to device +adb push cmake-android-out/backends/vulkan/vulkan_executor_runner /data/local/tmp/runner_bin + +# Run the model +adb shell /data/local/tmp/runner_bin --model_path /data/local/tmp/vk_add.pte +``` diff --git a/docs/source/backends-xnnpack.md b/docs/source/backends-xnnpack.md new file mode 100644 index 00000000000..e41189d0089 --- /dev/null +++ b/docs/source/backends-xnnpack.md @@ -0,0 +1,124 @@ +# XNNPACK Backend + +The XNNPACK delegate is the ExecuTorch solution for CPU execution on mobile CPUs. XNNPACK is a library that provides optimized kernels for machine learning operators on Arm and x86 CPUs. + +## Features + +- Wide operator support on Arm and x86 CPUs, available on any modern mobile phone. +- Support for a wide variety of quantization schemes and quantized operators. + +## Target Requirements + +- ARM64 on Android, iOS, macOS, Linux, and Windows. +- ARMv7 (with NEON) on Android. +- ARMv6 (with VFPv2) on Linux. +- x86 and x86-64 (up to AVX512) on Windows, Linux, macOS, Android, and iOS simulator. + +## Development Requirements + +The XNNPACK delegate does not introduce any development system requirements beyond those required by the core ExecuTorch runtime. + +## Lowering a Model to XNNPACK + +To target the XNNPACK backend during the export and lowering process, pass an instance of the `XnnpackPartitioner` to `to_edge_transform_and_lower`. The example below demonstrates this process using the MobileNet V2 model from torchvision. + +```python +import torchvision.models as models +from torchvision.models.mobilenetv2 import MobileNet_V2_Weights +from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner +from executorch.exir import to_edge_transform_and_lower + +mobilenet_v2 = models.mobilenetv2.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).eval() +sample_inputs = (torch.randn(1, 3, 224, 224), ) + +et_program = to_edge_transform_and_lower( + torch.export.export(mobilenet_v2, sample_inputs), + partitioner=[XnnpackPartitioner()], +).to_executorch() + +with open("mv2_xnnpack.pte", "wb") as file: + et_program.write_to_file(file) +``` + +### Partitioner API + +The XNNPACK partitioner API allows for configuration of the model delegation to XNNPACK. Passing an `XnnpackPartitioner` instance with no additional parameters will run as much of the model as possible on the XNNPACK backend. This is the most common use-case. For advanced use cases, the partitioner exposes the following options via the [constructor](https://github.com/pytorch/executorch/blob/14ff52ff89a89c074fc6c14d3f01683677783dcd/backends/xnnpack/partition/xnnpack_partitioner.py#L31): + + - `configs`: Control which operators are delegated to XNNPACK. By default, all available operators all delegated. See [../config/\_\_init\_\_.py](https://github.com/pytorch/executorch/blob/14ff52ff89a89c074fc6c14d3f01683677783dcd/backends/xnnpack/partition/config/__init__.py#L66) for an exhaustive list of available operator configs. + - `config_precisions`: Filter operators by data type. By default, delegate all precisions. One or more of `ConfigPrecisionType.FP32`, `ConfigPrecisionType.STATIC_QUANT`, or `ConfigPrecisionType.DYNAMIC_QUANT`. See [ConfigPrecisionType](https://github.com/pytorch/executorch/blob/14ff52ff89a89c074fc6c14d3f01683677783dcd/backends/xnnpack/partition/config/xnnpack_config.py#L24). + - `per_op_mode`: If true, emit individual delegate calls for every operator. This is an advanced option intended to reduce memory overhead in some contexts at the cost of a small amount of runtime overhead. Defaults to false. + - `verbose`: If true, print additional information during lowering. + +### Quantization + +The XNNPACK delegate can also be used as a backend to execute symmetrically quantized models. To quantize a PyTorch model for the XNNPACK backend, use the `XNNPACKQuantizer`. `Quantizers` are backend specific, which means the `XNNPACKQuantizer` is configured to quantize models to leverage the quantized operators offered by the XNNPACK Library. + +### Configuring the XNNPACKQuantizer + +```python +from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import ( + XNNPACKQuantizer, + get_symmetric_quantization_config, +) +quantizer = XNNPACKQuantizer() +quantizer.set_global(get_symmetric_quantization_config()) +``` +Here, the `XNNPACKQuantizer` is configured for symmetric quantization, indicating that the quantized zero point is set to zero with `qmin = -127` and `qmax = 127`. `get_symmetric_quantization_config()` can be configured with the following arguments: +* `is_per_channel` + * Weights are quantized across channels +* `is_qat` + * Quantize aware training +* `is_dynamic` + * Dynamic quantization + +```python +quantizer.set_global(quantization_config) + .set_object_type(torch.nn.Conv2d, quantization_config) # can configure by module type + .set_object_type(torch.nn.functional.linear, quantization_config) # or torch functional op typea + .set_module_name("foo.bar", quantization_config) # or by module fully qualified name +``` + +#### Quantizing a model with the XNNPACKQuantizer +After configuring the quantizer, the model can be quantized by via the `prepare_pt2e` and `convert_pt2e` APIs. +```python +from torch.export import export_for_training + +exported_model = export_for_training(model_to_quantize, example_inputs).module() +prepared_model = prepare_pt2e(exported_model, quantizer) + +for cal_sample in cal_samples: # Replace with representative model inputs + prepared_model(cal_sample) # Calibrate + +quantized_model = convert_pt2e(prepared_model) +``` +For static, post-training quantization (PTQ), the post-prepare\_pt2e model should beS run with a representative set of samples, which are used to determine the quantization parameters. + +After `convert_pt2e`, the model can be exported and lowered using the normal ExecuTorch XNNPACK flow. For more information on PyTorch 2 quantization [here](https://pytorch.org/tutorials/prototype/pt2e_quant_ptq.html). + +### Testing the Model + +After generating the XNNPACK-delegated .pte, the model can be tested from Python using the ExecuTorch runtime python bindings. This can be used to sanity check the model and evaluate numerical accuracy. See [Testing the Model](using-executorch-export.md#testing-the-model) for more information. + +## Runtime Integration + +To run the model on-device, use the standard ExecuTorch runtime APIs. See [Running on Device](getting-started.md#running-on-device) for more information. + +The XNNPACK delegate is included by default in the published Android, iOS, and pip packages. When building from source, pass `-DEXECUTORCH_BUILD_XNNPACK=ON` when configuring the CMake build to compile the XNNPACK backend. + +To link against the backend, add the `xnnpack_backend` CMake target as a build dependency, or link directly against `libxnnpack_backend`. Due to the use of static registration, it may be necessary to link with whole-archive. This can typically be done by passing `"$"` to `target_link_libraries`. + +``` +# CMakeLists.txt +add_subdirectory("executorch") +... +target_link_libraries( + my_target + PRIVATE executorch + executorch_module_static + executorch_tensor + optimized_native_cpu_ops_lib + xnnpack_backend) +``` + +No additional steps are necessary to use the backend beyond linking the target. Any XNNPACK-delegated .pte file will automatically run on the registered backend. + diff --git a/docs/source/build-run-mps.md b/docs/source/build-run-mps.md deleted file mode 100644 index f9af4e9d3d5..00000000000 --- a/docs/source/build-run-mps.md +++ /dev/null @@ -1 +0,0 @@ -```{include} ../../backends/apple/mps/setup.md diff --git a/docs/source/build-run-vulkan.md b/docs/source/build-run-vulkan.md deleted file mode 100644 index 736859b86f6..00000000000 --- a/docs/source/build-run-vulkan.md +++ /dev/null @@ -1 +0,0 @@ -```{include} ../../backends/vulkan/docs/android_demo.md diff --git a/docs/source/compiler-delegate-and-partitioner.md b/docs/source/compiler-delegate-and-partitioner.md index 21a2f4dd392..c6808a11383 100644 --- a/docs/source/compiler-delegate-and-partitioner.md +++ b/docs/source/compiler-delegate-and-partitioner.md @@ -1,4 +1,4 @@ -# Backend and Delegate +# Backends and Delegates Audience: Vendors, Backend Delegate developers, who are interested in integrating their own compilers and hardware as part of ExecuTorch diff --git a/docs/source/concepts.md b/docs/source/concepts.md index 289ecda6d85..4cef25c606e 100644 --- a/docs/source/concepts.md +++ b/docs/source/concepts.md @@ -1,4 +1,4 @@ -# ExecuTorch Concepts +# Concepts This page provides an overview of key concepts and terms used throughout the ExecuTorch documentation. It is intended to help readers understand the terminology and concepts used in PyTorch Edge and ExecuTorch. ## Concepts Map diff --git a/docs/source/debug-backend-delegate.md b/docs/source/debug-backend-delegate.md index 68914aaed90..86dddd75868 100644 --- a/docs/source/debug-backend-delegate.md +++ b/docs/source/debug-backend-delegate.md @@ -1,4 +1,4 @@ -# Debug Backend Delegate +# Debugging Delegation We provide a list of util functions to give users insights on what happened to the graph modules during the `to_backend()` stage. diff --git a/docs/source/executorch-runtime-api-reference.rst b/docs/source/executorch-runtime-api-reference.rst index 5bec597987a..2b4239271c1 100644 --- a/docs/source/executorch-runtime-api-reference.rst +++ b/docs/source/executorch-runtime-api-reference.rst @@ -1,4 +1,4 @@ -ExecuTorch Runtime API Reference +Runtime API Reference ================================ The ExecuTorch C++ API provides an on-device execution framework for exported PyTorch models. diff --git a/docs/source/export-to-executorch-api-reference.rst b/docs/source/export-to-executorch-api-reference.rst index 1ae563d842d..e4aeae9cb6c 100644 --- a/docs/source/export-to-executorch-api-reference.rst +++ b/docs/source/export-to-executorch-api-reference.rst @@ -1,4 +1,4 @@ -Export to ExecuTorch API Reference +Export API Reference ---------------------------------- For detailed information on how APIs evolve and the deprecation process, please refer to the `ExecuTorch API Life Cycle and Deprecation Policy `__. diff --git a/docs/source/getting-started-architecture.md b/docs/source/getting-started-architecture.md index 937b5b389f5..2472b3547fe 100644 --- a/docs/source/getting-started-architecture.md +++ b/docs/source/getting-started-architecture.md @@ -1,4 +1,4 @@ -# High-level Architecture and Components of ExecuTorch +# Architecture and Components This page describes the technical architecture of ExecuTorch and its individual components. This document is targeted towards engineers who are deploying PyTorch model onto edge devices. diff --git a/docs/source/getting-started-setup.md b/docs/source/getting-started-setup.md index f4782312790..3a8ce26deff 100644 --- a/docs/source/getting-started-setup.md +++ b/docs/source/getting-started-setup.md @@ -138,7 +138,7 @@ to ExecuTorch. ### Export a Program ExecuTorch provides APIs to compile a PyTorch [`nn.Module`](https://pytorch.org/docs/stable/generated/torch.nn.Module.html) to a `.pte` binary consumed by the ExecuTorch runtime. 1. [`torch.export`](https://pytorch.org/docs/stable/export.html) -1. [`exir.to_edge`](https://pytorch.org/executorch/stable/export-to-executorch-api-reference.html#exir.to_edge) +1. [`exir.to_edge`](export-to-executorch-api-reference.md#exir.to_edge) 1. [`exir.to_executorch`](ir-exir.md) 1. Save the result as a [`.pte` binary](pte-file-format.md) to be consumed by the ExecuTorch runtime. diff --git a/docs/source/getting-started.md b/docs/source/getting-started.md new file mode 100644 index 00000000000..492334c81df --- /dev/null +++ b/docs/source/getting-started.md @@ -0,0 +1,208 @@ +# Getting Started with ExecuTorch +This section is intended to describe the necessary steps to take PyTorch model and run it using ExecuTorch. To use the framework, you will typically need to take the following steps: +- Install the ExecuTorch python package and runtime libraries. +- Export the PyTorch model for the target hardware configuration. +- Run the model using the ExecuTorch runtime APIs on your development platform. +- Deploy the model to the target platform using the ExecuTorch runtime. + +## Installation +To use ExecuTorch, you will need to install both the Python package and the appropriate platform-specific runtime libraries. + +Pip is the recommended way to install the ExecuTorch python package. This package includes the dependencies needed to export a PyTorch model, as well as Python runtime bindings for model testing and evaluation. It is common to install the package within a Python virtual environment, in order to meet the Python and dependency version requirements. + +``` +pip install executorch +``` + +To build the framework from source, see [Building From Source](using-executorch-building-from-source.md). + +Backend delegates may require additional dependencies. See the appropriate backend documentation for more information. + +#### System Requirements +The following are required to install the ExecuTorch host libraries, needed to export models and run from Python. Requirements for target end-user devices are backend dependent. See the appropriate backend documentation for more information. + +- Python 3.10 - 3.12 +- g++ version 7 or higher, clang++ version 5 or higher, or another C++17-compatible toolchain. +- Linux or MacOS operating system (Arm or x86). + - Windows is supported via WSL. + +
+ +## Preparing the Model +Exporting is the process of taking a PyTorch model and converting it to the .pte file format used by the ExecuTorch runtime. This is done using Python APIs. PTE files for common models, such as Llama 3.2, can be found on HuggingFace under [ExecuTorch Community](https://huggingface.co/executorch-community). These models have been exported and lowered for ExecuTorch, and can be directly deployed without needing to go through the lowering process. + +A complete example of exporting, lowering, and verifying MobileNet V2 is available as a [Colab notebook](https://colab.research.google.com/drive/1qpxrXC3YdJQzly3mRg-4ayYiOjC6rue3?usp=sharing). + +### Requirements +- A PyTorch model. +- Example model inputs, typically as PyTorch tensors. You should be able to successfully run the PyTorch model with these inputs. +- One or more target hardware backends. + +### Selecting a Backend +ExecuTorch provides hardware acceleration for a wide variety of hardware. The most commonly used backends are XNNPACK, for Arm and x86 CPU, Core ML (for iOS), Vulkan (for Android GPUs), and Qualcomm (for Qualcomm-powered Android phones). + +For mobile use cases, consider using XNNPACK for Android and Core ML or XNNPACK for iOS as a first step. See [Hardware Backends](backends-overview.md) for more information. + +### Exporting +Exporting is done using Python APIs. ExecuTorch provides a high degree of customization during the export process, but the typical flow is as follows: +```python +import executorch + +model = MyModel() # The PyTorch model to export +example_inputs = (torch.randn(1,3,64,64),) # A tuple of inputs + +et_program = executorch.exir.to_edge_transform_and_lower( + torch.export.export(model, example_inputs), + partitioner=[XnnpackPartitioner()] +).to_executorch() + +with open("model.pte", "wb") as f: + f.write(et_program.buffer) +``` + +If the model requires varying input sizes, you will need to specify the varying dimensions and bounds as part of the `export` call. See [Model Export and Lowering](using-executorch-export.md) for more information. + +The hardware backend to target is controlled by the partitioner parameter to to\_edge\_transform\_and\_lower. In this example, the XnnpackPartitioner is used to target mobile CPUs. See the delegate-specific documentation for a full description of the partitioner and available options. + +Quantization can also be done at this stage to reduce model size and runtime. Quantization is backend-specific. See the documentation for the target backend for a full description of supported quantization schemes. + +### Testing the Model + +After successfully generating a .pte file, it is common to use the Python runtime APIs to validate the model on the development platform. This can be used to evaluate model accuracy before running on-device. + +Inference can be run as follows: +```python +from executorch.runtime import Runtime + +runtime = Runtime.get() + +input_tensor = torch.randn(1,3,128,128) +program = runtime.load_program("/path/to/mode.pte") +method = program.load_method("forward") +outputs = method.execute([input_tensor]) +``` + + +
+ +## Running on Device +ExecuTorch provides runtime APIs in Java, Objective-C, and C++. + +Quick Links: +- [Android](#android) +- [iOS](#ios) +- [C++](#c) + +### Android + +#### Installation +ExecuTorch provides Java bindings for Android usage, which can be consumed from both Java and Kotlin. +To add the library to your app, download the AAR, and add it to the gradle build rule. + +``` +mkdir -p app/libs +curl https://ossci-android.s3.amazonaws.com/executorch/release/executorch-241002/executorch.aar -o app/libs/executorch.aar +``` +And in gradle, +``` +# app/build.gradle.kts +dependencies { + implementation(files("libs/executorch.aar")) +} +``` + +#### Runtime APIs +Models can be loaded and run using the `Module` class: +```java +import org.pytorch.executorch.EValue; +import org.pytorch.executorch.Module; +import org.pytorch.executorch.Tensor; + +// … + +Module model = Module.load("/path/to/model.pte"); + +Tensor input_tensor = Tensor.fromBlob(float_data, new long[] { 1, 3, height, width }); +EValue input_evalue = EValue.from(input_tensor); +EValue[] output = model.forward(input_evalue); +float[] scores = output[0].toTensor().getDataAsFloatArray(); +``` + +For a full example of running a model on Android, see the [ExecuTorch Android Demo App](https://github.com/pytorch/executorch/blob/main/examples/demo-apps/android/ExecuTorchDemo/app/src/main/java/com/example/executorchdemo/ClassificationActivity.java). For more information on Android development, including building from source, a full description of the Java APIs, and information on using ExecuTorch from Android native code, see [Using ExecuTorch on Android](using-executorch-android.md). + +### iOS + +#### Installation +ExecuTorch supports both iOS and MacOS via C++, as well as hardware backends for CoreML, MPS, and CPU. The iOS runtime library is provided as a collection of .xcframework targets and are made available as a Swift PM package. + +To get started with Xcode, go to File > Add Package Dependencies. Paste the URL of the ExecuTorch repo into the search bar and select it. Make sure to change the branch name to the desired ExecuTorch version in format “swiftpm-”, (e.g. “swiftpm-0.5.0”). The ExecuTorch dependency can also be added to the package file manually. See [Using ExecuTorch on iOS](using-executorch-ios.md) for more information. + +#### Runtime APIs +Models can be loaded and run from Objective-C using the C++ APIs. + +For more information on iOS integration, including an API reference, logging setup, and building from source, see [Using ExecuTorch on iOS](using-executorch-ios.md). + +### C++ +ExecuTorch provides C++ APIs, which can be used to target embedded or mobile devices. The C++ APIs provide a greater level of control compared to other language bindings, allowing for advanced memory management, data loading, and platform integration. + +#### Installation +CMake is the preferred build system for the ExecuTorch C++ runtime. To use with CMake, clone the ExecuTorch repository as a subdirectory of your project, and use CMake's `add_subdirectory("executorch")` to include the dependency. The `executorch` target, as well as kernel and backend targets will be made available to link against. The runtime can also be built standalone to support diverse toolchains. See [Using ExecuTorch with C++](using-executorch-cpp.md) for a detailed description of build integration, targets, and cross compilation. + +``` +git clone -b release/0.5 https://github.com/pytorch/executorch.git +``` +```python +# CMakeLists.txt +add_subdirectory("executorch") +... +target_link_libraries( + my_target + PRIVATE executorch + executorch_module_static + executorch_tensor + optimized_native_cpu_ops_lib + xnnpack_backend) +``` + +#### Runtime APIs +Both high-level and low-level C++ APIs are provided. The low-level APIs are platform independent, do not dynamically allocate memory, and are most suitable for resource-constrained embedded systems. The high-level APIs are provided as a convenience wrapper around the lower-level APIs, and make use of dynamic memory allocation and standard library constructs to reduce verbosity. + +ExecuTorch uses CMake for native builds. Integration is typically done by cloning the ExecuTorch repository and using CMake add_subdirectory to add the dependency. + +Loading and running a model using the high-level API can be done as follows: +```cpp +#include +#include + +using namespace ::executorch::extension; + +// Load the model. +Module module("/path/to/model.pte"); + +// Create an input tensor. +float input[1 * 3 * 256 * 256]; +auto tensor = from_blob(input, {1, 3, 256, 256}); + +// Perform an inference. +const auto result = module.forward(tensor); + +if (result.ok()) { + // Retrieve the output data. + const auto output = result->at(0).toTensor().const_data_ptr(); +} +``` + +For more information on the C++ APIs, see [Running an ExecuTorch Model Using the Module Extension in C++](extension-module.md) and [Managing Tensor Memory in C++](extension-tensor.md). + +
+ +## Next Steps +ExecuTorch provides a high-degree of customizability to support diverse hardware targets. Depending on your use cases, consider exploring one or more of the following pages: + +- [Export and Lowering](using-executorch-export.md) for advanced model conversion options. +- [Backend Overview](backends-overview.md) for available backends and configuration options. +- [Using ExecuTorch on Android](using-executorch-android.md) and [Using ExecuTorch on iOS](using-executorch-ios.md) for mobile runtime integration. +- [Using ExecuTorch with C++](using-executorch-cpp.md) for embedded and mobile native development. +- [Profiling and Debugging](using-executorch-troubleshooting.md) for developer tooling and debugging. +- [API Reference](export-to-executorch-api-reference.md) for a full description of available APIs. +- [Examples](https://github.com/pytorch/executorch/tree/main/examples) for demo apps and example code. diff --git a/docs/source/index.rst b/docs/source/index.rst index d0cff5fa570..128ecda638c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -56,7 +56,7 @@ Topics in this section will help you get started with ExecuTorch. .. grid-item-card:: :octicon:`file-code;1em` Getting started with ExecuTorch :img-top: _static/img/card-background.svg - :link: getting-started-setup.html + :link: getting-started.html :link-type: url A step-by-step tutorial on how to get started with @@ -84,83 +84,66 @@ Topics in this section will help you get started with ExecuTorch. .. toctree:: :glob: :maxdepth: 1 - :caption: Getting Started + :caption: Usage :hidden: - getting-started-setup - export-overview - runtime-build-and-cross-compilation - getting-started-faqs + getting-started + using-executorch-export + using-executorch-android + using-executorch-ios + using-executorch-cpp + using-executorch-runtime-integration + using-executorch-troubleshooting + using-executorch-building-from-source + using-executorch-faqs .. toctree:: :glob: :maxdepth: 1 - :caption: Tutorials - :hidden: - - tutorials/export-to-executorch-tutorial - running-a-model-cpp-tutorial - extension-module - extension-tensor - tutorials/devtools-integration-tutorial - apple-runtime - demo-apps-ios - demo-apps-android - examples-end-to-end-to-lower-model-to-delegate - tutorial-xnnpack-delegate-lowering - build-run-vulkan - .. - Alphabetical by backend name. Be sure to keep the same order in the - customcarditem entries below. - executorch-arm-delegate-tutorial - build-run-coreml - build-run-mediatek-backend - build-run-mps - build-run-qualcomm-ai-engine-direct-backend - build-run-xtensa - -.. toctree:: - :glob: - :maxdepth: 2 - :caption: Working with LLMs + :caption: Examples :hidden: - Llama - Llama on Android - Llama on iOS - Llama on Android via Qualcomm backend - Intro to LLMs in Executorch + demo-apps-android.md + demo-apps-ios.md .. toctree:: :glob: :maxdepth: 1 - :caption: API Reference + :caption: Backends :hidden: - export-to-executorch-api-reference - executorch-runtime-api-reference - runtime-python-api-reference - api-life-cycle + backends-overview + backends-xnnpack + backends-coreml + backends-mps + backends-vulkan + backends-arm-ethos-u + backends-qualcomm + backends-mediatek + backends-cadence .. toctree:: :glob: :maxdepth: 1 - :caption: IR Specification + :caption: Tutorials :hidden: - ir-exir - ir-ops-set-definition - .. toctree:: :glob: :maxdepth: 1 - :caption: Compiler Entry Points + :caption: Developer Tools :hidden: - compiler-delegate-and-partitioner - compiler-backend-dialect - compiler-custom-compiler-passes - compiler-memory-planning + devtools-overview + bundled-io + etrecord + etdump + runtime-profiling + model-debugging + model-inspector + memory-planning-inspection + delegate-debugging + devtools-tutorial .. toctree:: :glob: @@ -169,11 +152,25 @@ Topics in this section will help you get started with ExecuTorch. :hidden: runtime-overview + extension-module + extension-tensor + running-a-model-cpp-tutorial runtime-backend-delegate-implementation-and-linking runtime-platform-abstraction-layer portable-cpp-programming pte-file-format +.. toctree:: + :glob: + :maxdepth: 1 + :caption: API Reference + :hidden: + + export-to-executorch-api-reference + executorch-runtime-api-reference + runtime-python-api-reference + api-life-cycle + .. toctree:: :glob: :maxdepth: 1 @@ -192,34 +189,48 @@ Topics in this section will help you get started with ExecuTorch. kernel-library-custom-aten-kernel kernel-library-selective-build +.. toctree:: + :glob: + :maxdepth: 2 + :caption: Working with LLMs + :hidden: + + Llama + Llama on Android + Llama on iOS + Llama on Android via Qualcomm backend + Intro to LLMs in Executorch + .. toctree:: :glob: :maxdepth: 1 - :caption: Backend Delegates + :caption: Backend Development :hidden: - native-delegates-executorch-xnnpack-delegate - native-delegates-executorch-vulkan-delegate backend-delegates-integration + backend-delegates-xnnpack-reference backend-delegates-dependencies + compiler-delegate-and-partitioner debug-backend-delegate .. toctree:: :glob: :maxdepth: 1 - :caption: Developer Tools + :caption: IR Specification :hidden: - devtools-overview - bundled-io - etrecord - etdump - runtime-profiling - model-debugging - model-inspector - memory-planning-inspection - delegate-debugging - devtools-tutorial + ir-exir + ir-ops-set-definition + +.. toctree:: + :glob: + :maxdepth: 1 + :caption: Compiler Entry Points + :hidden: + + compiler-backend-dialect + compiler-custom-compiler-passes + compiler-memory-planning .. toctree:: :glob: @@ -314,7 +325,7 @@ ExecuTorch tutorials. :header: Building and Running ExecuTorch with Vulkan Backend :card_description: A tutorial that walks you through the process of building ExecuTorch with Vulkan Backend :image: _static/img/generic-pytorch-logo.png - :link: build-run-vulkan.html + :link: backends-vulkan.html :tags: Export,Backend,Delegation,Vulkan .. @@ -332,35 +343,35 @@ ExecuTorch tutorials. :header: Building and Running ExecuTorch with CoreML Backend :card_description: A tutorial that walks you through the process of building ExecuTorch with CoreML Backend :image: _static/img/generic-pytorch-logo.png - :link: build-run-coreml.html + :link: backends-coreml.html :tags: Export,Backend,Delegation,CoreML .. customcarditem:: :header: Building and Running ExecuTorch with MediaTek Backend :card_description: A tutorial that walks you through the process of building ExecuTorch with MediaTek Backend :image: _static/img/generic-pytorch-logo.png - :link: build-run-mediatek-backend.html + :link: backends-mediatek-backend.html :tags: Export,Backend,Delegation,MediaTek .. customcarditem:: :header: Building and Running ExecuTorch with MPS Backend :card_description: A tutorial that walks you through the process of building ExecuTorch with MPSGraph Backend :image: _static/img/generic-pytorch-logo.png - :link: build-run-mps.html + :link: backends-mps.html :tags: Export,Backend,Delegation,MPS,MPSGraph .. customcarditem:: :header: Building and Running ExecuTorch with Qualcomm AI Engine Direct Backend :card_description: A tutorial that walks you through the process of building ExecuTorch with Qualcomm AI Engine Direct Backend :image: _static/img/generic-pytorch-logo.png - :link: build-run-qualcomm-ai-engine-direct-backend.html + :link: backends-qualcomm.html :tags: Export,Backend,Delegation,QNN .. customcarditem:: :header: Building and Running ExecuTorch on Xtensa HiFi4 DSP :card_description: A tutorial that walks you through the process of building ExecuTorch for an Xtensa Hifi4 DSP using custom operators :image: _static/img/generic-pytorch-logo.png - :link: build-run-xtensa.html + :link: backends-cadence.html :tags: Export,Custom-Operators,DSP,Xtensa .. customcardend:: diff --git a/docs/source/llm/build-run-llama3-qualcomm-ai-engine-direct-backend.md b/docs/source/llm/build-run-llama3-qualcomm-ai-engine-direct-backend.md index 7ed768baf23..c02701a839c 100644 --- a/docs/source/llm/build-run-llama3-qualcomm-ai-engine-direct-backend.md +++ b/docs/source/llm/build-run-llama3-qualcomm-ai-engine-direct-backend.md @@ -5,7 +5,7 @@ This tutorial demonstrates how to export Llama 3 8B Instruct for Qualcomm AI Eng ## Prerequisites - Set up your ExecuTorch repo and environment if you haven’t done so by following [the Setting up ExecuTorch](../getting-started-setup.md) to set up the repo and dev environment. -- Read [the Building and Running ExecuTorch with Qualcomm AI Engine Direct Backend page](../build-run-qualcomm-ai-engine-direct-backend.md) to understand how to export and run a model with Qualcomm AI Engine Direct Backend on Qualcomm device. +- Read [the Building and Running ExecuTorch with Qualcomm AI Engine Direct Backend page](../backends-qualcomm.md) to understand how to export and run a model with Qualcomm AI Engine Direct Backend on Qualcomm device. - Follow [the README for executorch llama](https://github.com/pytorch/executorch/tree/main/examples/models/llama) to know how to run a llama model on mobile via ExecuTorch. - A Qualcomm device with 16GB RAM - We are continuing to optimize our memory usage to ensure compatibility with lower memory devices. diff --git a/docs/source/llm/getting-started.md b/docs/source/llm/getting-started.md index 8ea34269ff0..066bb3f3d1c 100644 --- a/docs/source/llm/getting-started.md +++ b/docs/source/llm/getting-started.md @@ -588,9 +588,9 @@ I'm not sure if you've heard of the "Curse of the Dragon" or not, but it's a ver The delegated model should be noticeably faster compared to the non-delegated model. -For more information regarding backend delegateion, see the ExecuTorch guides -for the [XNNPACK Backend](../tutorial-xnnpack-delegate-lowering.md), [Core ML -Backend](../build-run-coreml.md) and [Qualcomm AI Engine Direct Backend](build-run-llama3-qualcomm-ai-engine-direct-backend.md). +For more information regarding backend delegation, see the ExecuTorch guides +for the [XNNPACK Backend](../backends-xnnpack.md), [Core ML +Backend](../backends-coreml.md) and [Qualcomm AI Engine Direct Backend](build-run-llama3-qualcomm-ai-engine-direct-backend.md). ## Quantization diff --git a/docs/source/runtime-overview.md b/docs/source/runtime-overview.md index 1a421fdcc0a..911d0c142e8 100644 --- a/docs/source/runtime-overview.md +++ b/docs/source/runtime-overview.md @@ -157,7 +157,7 @@ For more details about the ExecuTorch runtime, please see: * [Detailed Runtime APIs Tutorial](running-a-model-cpp-tutorial.md) * [Simplified Runtime APIs Tutorial](extension-module.md) -* [Runtime Build and Cross Compilation](runtime-build-and-cross-compilation.md) +* [Building from Source](using-executorch-building-from-source.md) * [Runtime Platform Abstraction Layer](runtime-platform-abstraction-layer.md) * [Runtime Profiling](runtime-profiling.md) * [Backends and Delegates](compiler-delegate-and-partitioner.md) diff --git a/docs/source/runtime-python-api-reference.rst b/docs/source/runtime-python-api-reference.rst index 64c135de8c8..270cab9b61e 100644 --- a/docs/source/runtime-python-api-reference.rst +++ b/docs/source/runtime-python-api-reference.rst @@ -1,4 +1,4 @@ -ExecuTorch Runtime Python API Reference +Runtime Python API Reference ---------------------------------- The Python ``executorch.runtime`` module wraps the C++ ExecuTorch runtime. It can load and execute serialized ``.pte`` program files: see the `Export to ExecuTorch Tutorial `__ for how to convert a PyTorch ``nn.Module`` to an ExecuTorch ``.pte`` program file. Execution accepts and returns ``torch.Tensor`` values, making it a quick way to validate the correctness of the program. diff --git a/docs/source/tutorial-xnnpack-delegate-lowering.md b/docs/source/tutorial-xnnpack-delegate-lowering.md index d1148511c5f..a469edebd54 100644 --- a/docs/source/tutorial-xnnpack-delegate-lowering.md +++ b/docs/source/tutorial-xnnpack-delegate-lowering.md @@ -12,7 +12,7 @@ In this tutorial, you will learn how to export an XNNPACK lowered Model and run :class-card: card-prerequisites * [Setting up ExecuTorch](./getting-started-setup.md) * [Model Lowering Tutorial](./tutorials/export-to-executorch-tutorial) -* [ExecuTorch XNNPACK Delegate](./native-delegates-executorch-xnnpack-delegate.md) +* [ExecuTorch XNNPACK Delegate](./backends-xnnpack.md) ::: :::: diff --git a/docs/source/tutorials_source/bundled_program.bp b/docs/source/tutorials_source/bundled_program.bp new file mode 100644 index 0000000000000000000000000000000000000000..2587278e47fdb6763022392f0439da91892d9958 GIT binary patch literal 261600 zcmbq)2{@L|*Z+fTmC|BKNS2f+MT_U2BZcgxm3D=wh!T|+OG5T7^pLI4rcDc;IZ`TZ z+E;B_Nm^B;#eW{^{XXya{r%tT`d|O)`k0wB=bSln=G-%L&;4Y`b&J2bvQ#HQOSLP{JKmLl zJkgaap6$v-OcD$|8RR-G5!uD#jTdWY)y^d1Q|~M(v+n+}UzJg{c)C1g)@I25-0#oh zy<~>pXSl-P$^=DbX)%b#^Vein1?%}y~ zaiGtd&e`!?=jk4}#&5N|_agt*%Y6BK&ozsJmU*saY&^y0|7nK?qj&Y;yLjCqe?LE; z(H@J|E?eWhXxU1SW$vBKlNt#!d!fTA%cN0C^B4O!g&n-45kcn4EGtYCR^S&o&Z|IkJLl4qF9Fpt5FVKKv^KZq8SvtVVB{9lOTYaxS3PK0A(ygsbV zOzGqm&9MuEs1FeyiSebevbZfGd!{qYU~pxa!yu9sq5u1T#&Ctn{k21si{foIgGf#U z3o+idf7vpXwTt=?!CQ<^iA|6q0#$d-F>gS6X-vL%;)7!}>nxF9uz+lcWk->_=nn5JbXPC@j`r#k{G&6Z| z{|{sBA{`>k7vr1R@z2^ckhO{W6xk(uR!1_7W-wtG%P@{Xq(_82F&(a~EN-hBYahg* z#h}A5gh8J{q+hh|h}MvLG5JhZ7PnQjPKtDhAosCrOkL}mDB`--HIe^Be4^)0ltl8q z7(~yQXf04;P-PgvAmS6@ZxZo};#-u9L(iqk;crZ9K=rYJLG&AQ`F&tosX7FKfX3%A*XTg!j5X#`gV8QT{O@2MY zS%y#s3kEHQo(%QO$=M8{3?2+x4E1bSRSX9hLKz5y76VhyYx~qW-;$zo&X=yZU`3wd z=YQxY#OOo2=+YwogN)8r0tqcv{<|N1Vft8?JPWpXGIKg7U6hMxBKLcD$;*o5XMgQ_ z7DVl${YsP;bY;9GvHsH-U!GV!r}ZB@MeJ0}rb&!fWRocF|L%E}j3(+|+@6`NF0!Xh zOm_1hSuIBYJD#(wT_nqU|3Akg(lNf}A3r_$(~pSVS&aTojPBh<@ATjM|InW)YyZFb zPt@O@F8NOXrGNX!f1)-~n$?x@u8Z}5oAK3))t|KecmMtDYU{KqobmqKf4>>+-~L<2 z`so~hXAJNEyqyrPZ8_q?|5wf*k=1D zukYgP98VzRtO1^zmZ!u|NNzi)3bYwROft5R+k~ zZd&Zm-)BRlvvd4x(-irOCn4GCPZ3SDcCoFY6WL;Vv{~%^JLbLr>R~bRzv+1?rf2yd zJ>qft`cGW{CmnybPTo)!3(Ty}`MWCi99VSGyXODLKl5+Om_=z!SH|-ellNhK>%{6C zSv|U|-sy)pR?igE8!4vO_>aC(fAT6ZJxdwi-?+LjM&B$(|0t&SiI`raKl(=e$yK}N zt#iEUY<`D|)eTvFd{@15+(TIXZ`>RFkstmicN3G7VSIn>TPH@>6r(3~(K};DnbH3} z=krYo^N9Ox_`!@_a`^*;_LLw zUB>r!Jho!=_hNK6F}+iM{o|*>fAnep$)m;O;}~D2SUsE7_lW69|D#7FS|vt5D5hWZ zekn?tT^X-YOkV=y`}-UlvNB_lWJgMtWIEpq+3~IG%+N_==P?#1tk^jxle_r;+w=U9 z=@$7R)b4*i&mvvySlbCv-2c_{xl_-7KSPcG6Q>p{|93sd#Pslgcg2YK^L&EQME3mq zc^26s;uE2*OGoEC=KSGjWS%0MJC=3E!EZ+Y8`q+{3Q-IlSJ2^NK2e);e5;mZ2@-{S1=+{&?N zlV!R@<6=py50P$BAENZP51xz`tNq=Z=$}4#ssA&6M16~RMHt^1cU{kfac5aZLZm~~ zr)Uil@rsft7uCJR_}%`+FM9pzd>$FIC>Qb1WXKkiN&A*@ zK1BSYa~SW>Jc#D6?O!sY84>S8#BVIdKeV&_&$vbVws;>Re$jc$LriA=zhp%HiuWOk z*EBKyxPS4p*TBwL{Cj2)@r&&IduF&UCVSyuvLf5W`}!y1{)rXQ9E$E(|7lZXby50f zoFW|}yZ_qo*H3?K5Ji~i_CPd8wtsj<=f`=Y47E0BuUYT!sm<3~8R+9Wi0Dn(+MxM7|ljz;>_rK+}mjo{JS+i_qz=$=0{yv`C|4o;+NP8#g z-`f7G|Nme4{}=l=iH-Z8e%UqWntz!+Z^3^wnz=ya1W^nx^;^Am@oEoN`u|HGCCWf; z*H>EQUygk3Qhenz!Mr&4D>^K!LeKGP^)D{zW@NgGyZqO_Hkd99vl zr47A>T9+NMPH>x|8b(KY6kneVO4 zy}tOPQOx{D_gNR(9B#;bz)`w z>Vsfm^__m~u4C@4qE-3p@DDKAYKsdNBw>?{qj{*wT_TvE!#VjyLEg2o7;2scCLTrH z^q0fUC$#i6ANh4yWs;kfd0uilSfq|NFFC9L#l5c5iql{&81Py+?%9}18z-qscxF?% zrNpMvEqx4B`NdG);xF8$>Sl~O(}Evd_?0}og0)K*VzJ&^;F-P`y0kyW)QZF!YY z|4uR}#(f3+I=C_x22{p)Os;gVQ#W_nplCiWu?ZW7tbnSVX83x)Tct^KcNBz6kctrL z%Bm3oAb9?kQ!q`0cL`olJ)^(*t*ohd_+rRlh*c+D$`k=A>Em|$_2AV_Tuy2SHdVSaiyxH;i>ew1c{dI&! ze7Hs)r}d}f4&USsE!4%Dj3WH`IRfOQW+FYk9ZAob*yf^z`OlKz)|5?f@9|-}V0~XQ zR?P>pqJl|_Nd&IG(HqQE`hmfrTV!C{1)}pg5Jv{Qp|hn{VnSbC7=3m(X&O~8>`6Dk zz5XlFJkJAL&xPWpo4N3Dc{C)t6_7sfTw%?`J5<(71F{b~!^Lac$%B`6FtJxhg_Pz= z{+M`c%zc?fVXP_~_;|!DW<)ovu-GG9Q$8F`rx)V#Pusz0fjhjfk$?=XNStU8MqKJP zl9mJcaCg!x;lU#bAit)XI+~;skl%u*3=;8y<9Nuhw8r_V?o?+~Ae{9yhx=*UVba+^ z{ISOs<}|G*ns4XB-c<(hr0hE_-7phx%c;>5L!Q%N?HV||ej7L3cpeNr>5Hlv6;$oo zNUZU4xoXztxyW>!aggx_TT=DLqS{25h8Lr#viwqM20D zbj%WF^q7u$1IkI+*b%t=TRu*aivwqaAEdE04*fsqa7(wiL(dQU&3CRkT_jvwnudmR;y@{DVuF!fT1ZWpLBqQFTc~qIa;ugb^iX*hR&6%ITx9{dia zU~$!AP`|2#x-asusHKBQNX@3d)Nj!HgXX|icUSD8y951`LZRec67Dt2q0fb`7;5l< zIx3pNpd*f)hwE7yCx4T8W%Y+Q0Y$i3K>~hyXh6cMet72nK~gO_1XtWRKo6>gkzD96 zyq4Gx)ivTU-_#TPg&d>xy%iyNbOPKQolg?iC*aucQuvFl5y#fs!Q2tGBz9OZX_c&} z=(L*qYS2WZtH;95Hfh}aY$D$NI)PgmeSy|h?ch`&DG~?W0NmgmTCt{|3Z9coBqK^6 zki9ai$=6HTFh?O3_hzQR4cF16=L9b(x@G~*wP%GIaZ9kaw-y>UycX^+T!_Z%`$<7W zE~Ia8pktqmfS+XwP-JkPX7~l+vy~f!ig&^xz~-u;oOjvH%xfQ^3Lz zFUa93*I%QRQF*38-90C3nSw((onJbWIUy7(D`S>>C2rpIzXnN(!!4iN^j0 zOHuh^HqdV=WN(@r)T}r~RVM6!!|Kt@A1b@ zvKVN!BZFh*0k&kJ{F^D*ZPNm{8r2UQ>=NL+vo$EbNW{~MPl;@0E8QgLizhAzLu0Si z*y5NBmwpsL?8_h=P~{75vy$M$o-35UX%g&xwGsP1?+%$Kc9PLOxaQh})%# zAZ+S+NRF|fpVNf#O?lnqyD{lxkWQF7FwL9pXnrC#iJ|Deo7USWD z4LJGq0&uw`gEuPsL9g6}Bsa_-oZ4QHpVE5h=3)d=5m(9WBb(`v?P>Ig`*1W18H=tT z3-NLJF;Z$+MwTZ{#A{6_X>8O(dND!*MqV9*zB3+hD=I7bZkIavHzq5=G|K^?E)fS# zQlIF7-$}T;dLrk(u@IiDnunT;qp%-)HcE%sqk4FS@Ll*XGWXj6Xj^ul`dho;tUZqS zz4a`2Y3t64V=MLH<+dI0aA^hxjLQc1L3wcV*IblbzZ1H*2soepuj$X^-<*12EPYU% z%$*tI11mo9@zF6;I{eybI(1bxT%5ZJdY0)EAI$>t^|dm7>$?&ZcD(2AYRG`j!*qOc zW;rzXIYs2>meQ@o2Ze=?Khq~y1;W{SD){!$PC8)|5|6g6$VqehiDBX+#+L~J*yB7^@^uAtP^la zY!1}Bq+s{LAZSYIftRGJNrP-Y z1l7JL>7yV1_)VQ)?%2h`aEm0IGI2Ys{Td4nW@#v&8B6SK*V2;uUG#IvYkIo>JotSq zh?Y;ygWKgCC>s@l`v-GOGZ{|Q4MXAh;avo_nxf%}Y^?nzq)*Z!q5Q=<2xIB!i4)aQRA z(i5VEn~h>PE#5EkIsFZhNSThMMcGJX<8hVUetIZ73@Yosaw~0J$-@q9{2r@^t!Wcs z+kI9#QwzQMln;E-`rQfmw=vaMDWw+K080ZZwC+jFQ9F zxdLL~VUDiPMuBCLHVIgqPPrGOP{w6Et}e_2^8p#8K57>EJ%1ZSZgr;4cM{8Cl!pAAI@bw;J7~>F1r>T3O+_q#qb$4RL+Xe%i9jAr-BNvIVc0Kgl zB|uw?Gzjz5Lfx_%lu?8G))Kmm-d9H52k#Dy^`=vC<8`r;ldsm zg$3jKLQ3IIB9oIrYjlDzgy&6kP37RGw*=k4VioY}e$z?of0DY|aCE;q9?gD?Bx4RJ zfZfsn9QkQ1Bv=~KgI8Q&`;l+-{;2KHy<;TYuqEiPmWw~`E(Xc!TjVt@ghx+Y==SsX zxjA|1=&>Ua&T35ue+#z0y?D$0*c}ZLWn1`9-&#Ysa1X6h?1#s%Zil;x1=y!-F%FwH z3#BvsKyu7#c;6!v5*-sDV4Wr!4x0l7B|Gq9A1QD-x)K~m=i{8*$#6e3m1I?)qw5S* z$ehiRu<^hZV)mg3r61~nX15fW@1TfB!sp^V2Ldr_8CW#J6y5xtkasj1&5s*G`MuK> z@^4lPPd68Ua!wt67cvd5EKdR}H8(swWCqC@s0>N6I$*VSDoBOs(=mf5VryO+2wFCR z-?(p3M`jIS@;=FZ(op~hNc;KlY`n7Cd7 ztDaVqSJqZAYpOI9PIkaDjhQIOj)!}{=F)4+j6lif2l;-;0ZzEtqm9D=OzW17jdz|YB&&P`Z`bEc)k{n*!J^?+g{Ob)j zcT5W0lv0K3@4~Rhl^D2UX()Voek@q)xKJ6TK{)7f3eKJ;JO)$^IJ92Ag39e$iA_nAYt77nn3c!K7U?kDIokk6 z5P!VjyqmllpM|3bB!ib^mDxl?wtiiIMAki>hXI#e;nVP8VC2W+1mHsFlx`8Wmu=%3 zqU>Q?gr?ByeGD~!{(-d3l7SzEVQ@AtneLbof%{>gaE!}o`s@l1=s>fMa zyGIC-1DruN0qpFE;Rn!@pu zgD2_fd59L?3FXS2{iuIacbvWJJ^4O;8EhIQ2lHo-fachnB=*N>ERcI3tk&5^Jght+ z^6Ui?+3p0^KTUALRab268%eKD&jMTj!7%+>A=Z-q}!2B2-kMv%69!O5`o?OwqG8s--bOCB52g+=xtr+$bV zt5HEa8n=P@rNh+rY$~RjspGJ_k??c>9kTh(Gjc=C7rx(f#nwwB*uIqpRcS>S;wlfX z4o;&PXCKj>yAAQsi5zq?^M(Wc!YjU&5Im@{7&qNc!9ixGXr29=KYpef_G%$i>iSMN z@iPYQ-@Zt)J~VQA+aysnv9Fmy3B6E!qMVbMNBqOLYIn*VX62vZYX*O_@LIAx8*){N9clwIan=~MAj-M(yCi? z>HRnQ(EKn9mt~{~?d&?pTKa=5^nK5tZjb?`%MD9F8=$rHTs$7(F_d20a`Cfyv82J}?RdTeo0eEp|>jmP(6n4?rWgKr~%$4^y-D zlFw&$;>S#7yl3S=Hf=OvHmKoZ_49O1a~zqn%Nw5Db^$j-IgI%wg;VTu;QRJ$82f1% z>gE!-uyrcJp0{+p);DV7sfV{tEI@(RHf&z8N7(;xU)&47$*M(Om8+z_a$Gl{Jus+ut=kir5Mjo)hcgf#~ zMRzUmT$G5P(|?d;<17qt+zf?ZHlf_R^)Sd+hwI}R1+w!)!QT1^o$2(Mgva*5DIs^b zqSHH}yiiCrCMMvvGyS3Rg%>#Pep6BUV;i;+y4zAacrZ8oo6F z6^fFDb1&(U{0G(4)w3f zn?CfR?Z4u1Q;{U7OGe_npFyPLY6S`Jh{H!IdMMbxh^B^Rqiaw;8te_i=F5?w)7}G~ zzS;#|IdAFAnY}^7c{=x9B^JK?j-*QicYxXj1^m1)9d`-tkebTP_+$Z(a~vE@Z`~mk zTDRocp5g}2GQweX#SDBlcMTMNh=YkgBDnFH4{79vwYdKI6r8^GKIvIgPcCLH#B^5*>GW1ARd`gL(l$_ zCl`ij;?5?5Eol;DlksFC`$d!Jzw^hpM&THMuQ@#$g__oR)WJ*=nzGz+@qo96mX}75TsTDD!I?_@8W`N^I}=J$xF}n3 zV4lVz2-~(D8a%F20q24T^{x@iW6N>%j1rnSY9*ccY&ID`(UFYH+(u0GoFON{nqHq0 z2O33kIBL;#PU+`B%y3L4i(8d(JYas2PRC~&W%uyhi`ksF{pnYMhrHEEaS6e-3%=#Yc0a1FLvPdK0@-kdk(!E;*8F@ zd+B6NKKlQ@N9@mnT3pS&_K3n{iC}oQIvB3$ZDZd^ zoVk6sFY^x{+6fA2q4=q-Y6Ze&Rl`Ev%7)QAT!K+ zT26o4rb6550=h#{kA9Rhg|n)wz$G;Ta&xAG*M@u?^I|P*8Fq(^_+3Do_isSSq0*>a zyqn&z^+oG(DtN|s9lqWCl9JSwIOoh<*kn40woAMx7VqUS=z}djEt!lLrUbxHh(_Dv zaxmF{CzZXup53Qv!hix*)REFdn~?eN>Zv*BBPc-sV{th7Yd+4t5Dm8Xgv9$ehfN9R z$@pith~?#b!r}>A(f*1G_P3Pbrqv~Z^h~y1Ju1L?il#W@Yb&iB)eVM!>rD^MUL^eR zO%X{;8m3%UK&M__*t0SSTXD(G z&A5Yf=SDAF#JAF!jlI~O>zomSK6YaWy_ycvQMTmxg&5ksW2Z2&bPo7F+e%dr4<`Za z&T?h92ekif6IvcM3}ifi(Jvb&gWW+5?$_}Ka(i@NbX*Y*ch4CM-*>DemXl5rvsM8~ zZJr3f^2XDVQY~bazXDvkx*I&-@1h1*X3~g+0Yup+64r)S(>WK@D_++8rkNoj@Gzh^ zW{zQd?1mZm=3W|Jx}yLmd7~jn7=defMR-GlqYL@*;Muqs2I&pO38iWHU~e)kemEb^ zcs5X;rUc`oa^UD`_lkmD3Y=P$!iT6CVyNy&rAZkf1ix*}cKm`$IS zX%YjqV9*R11cf>~V4uJb{pTfM3VNaPLO0<}gE-;)96#Llt&Vi4oS>6_4Mon~jjR$* zgdS7WplY5h?D_SD&OJ7R9L;Q?!CTq8X~#Hfy=E9h-P#Om4PD_!z8QVpI2hdejHF)| z1VF*(S@`hdOe{T+3C8W4Fyxv6PD^@1%W5)-p79dy$J}w~o8*AJhd*d=T9I%zyU$U4 zR>ZH@m4-n_t!PYi7O9Poh5JSVY%b4-ol|;~_#?za^buJ^&Fnlaz6b^=szqD{y%Q*DPW@l5Q80Z_)HKV$Y5UnAeLjK=&K*J~K6ycB-#$&BF0CcY150RGk1Qa>1zdG2xefbv z!}gO^!U|pnY_L@V$79)OA3j_dv1K6KX0bDIxGn~j2Eq8H-e@Mw!x0i;@cyk6d|jJg z;i9RJLkzmX@vWzc#_1Ru?3ROh#~zWl&U%<57lxJ@S@@yX8nV-uhl=(?aM4#uxHdl> zcYn&n)NO8Hl(>^@DpobK40FWRP$SadEQL-%V?eLMpM;wz(cg`U5U->G(cYH$RWgs= z*-DXmy9vU(HiWMBC_vL${pit_T5=}r9ECxiu;tzioE3G3OmWI1_sTN~SV9C$!7-umJSBGuFjyJi&-v*QpSiT;paum6uG7<8$H0aecmKZTT z61xkI(J@a9an^l4fls}$Id3o3so93I1LxvC^pFEE16$@uR*`ivnGVid8p-G zgo_$?2(4n&(XC1z)~F2R1n=|MS=I+9%9xW`_FM3Pj4vFP3>U5onFk?IN_&krc`$pDnLc|rQjye$0bGzwPElmzLpPo(FT2omkE z8W$P|fbPC&`00xtNTL(^n+RCTjM!m`!XBe zvwO6nZ$5bM_+(JtpiG`!Hp8lcx#Z4eb$oxno_2q5j~G9U0EOL}Lc1PvFv4{M{m|qF zQ)2w^z!5$?YtEo!=M4ql%xG$BABFla)}#D_&GgsI6U1bZF+2oMTJD{JT!Jxv?l6Eo zsVXq`+gf~|)r)>gZWwgrD|(cVSb-ivg$di1P6bzKV-nikla4fB4dJT!B<86Pdq3-j3j229`c<(o zs#_LTesdw^nM!E#tB}+$-GCBSC&)GTI$CA&j(Q2~aOIm3FzrWr#mpBTDCL|_1`16< z6Ov(9=r_`HzZc!F9gYg8*2Bh$CFG;Tar#@Wm7dWEf{*KOlIIihaq07sxU0K6xp79B zTbHw(vvGS$1Fo}gHtV<25gIvgyWlu|n8Jsm)5D>>BLTKc{iG-E_kySsx9QI7+riG= z5Y{#{2~XJFr7|&_!J=oNa2hWP{6vFLD;ymmgk_fNl^UJFg z!th30im+Pg6e)>H6E@E>#hi!`fQ73dU`R8&4^sf8j0Cu7HWh{Qr^2`SZ^`m}C$fzv z1$9HW3$yR^CvA)7;3HK{IM=@$CikBU`7Kl6u~{I5=(cecHgBl+x-_9PjD&j+UK8)P zNjSuQ7MZ*u7wZ$U;L95~(lcx+G)P5~`YYS;_m6E*T=R@1nSLa{)aPR41q*t1^E;xr zs2AOGbR?05>4Ml`$ zPge`TAu3%M%f4?Zzg~`hr98T~=q6R#7zJE(I+=gWgB&mFMh>4Ffx#J?755UN@b~3V z680h-Jp3fe=T6*9#vhwXf*S4;HJdn`b$%rD`juI6z)K0{JWm817!Rqtchk+<8$k8J zb~0*XAPHzcFKoCN3#N_3=rkWUESsduO*o~Fi5=s?Z0rE6&$Nb-L&rhFiIGJ4cP@^7 zZi$op*h}QBbbMGa1opCLs%#UCr?5sW7Ze*@7jxMJO-ain42c(Eas#y1nmJu2M-I z`9-%llXvxWXh17XKQ)A&NRGpch5=xHVhgywb_MGf`^llkVK|Mm!SospNG{n^Q9CLU zul~q@0w?w@o1NRLcW36}kmQJKE+wGsS2-}hKLWEpWRi99qp|S)PSW$h7QA~?3KcVk zf$X^7!m8g3X>IcxzDfN8+}y1g3r+bLnVk%M4cPr}ac^3!dz+3=JV{h5I6ORZFuWbs zKowRcks-Q?XeHwZFIJqV8eiYi=sl^Zk^?YJH4J>0M&qNf5RiEziI41-p>lP*(C$k- z`Ovb3UKrJ%UY%S@jVzVn+M6Jp>fk2qtJqF+k8C9tlcO6=R(Yv8UplUU+fu>D!kOrhj_0xuBJ(k zn1q#bJd3{Lof}M{M14AKT-gtTgCa@lKv|-(CYQLaE+mhwwv)VwA;iS2k&99sg!)5D z=*Vgfyo9I8jCvzfd7e!zf=p<;7LV8{&mtREjDs4b>$JVkei~K3g&1{rpixz)NyB+x zBGvSr#DB~Y^0ZXSf=67SIvdZ^yeo5w)2<_&fsPWa2;^h!jAXu1wly_6DS;*V%|!8b z6!Bd;h-kQMBe#x@qOt(A!BGXd@ImOb)C$^rI>3bUgTU6M52kjrq=NN9OdiF*cDCk4C-n9%>~59;wSOvpP(<+-=#Wj=gGb1N3@}AuP`b34qc$4 zhrv7#693~gX|JfH%N>hIQ#TDPe)5HDeW{GU+;q@MVk}vkpbHJ5w!(39PSBQuPxRjV zFLbNDDc-v_5o-r(()RWvWam*yjM;LE)?Nywl5#^q0kUaJLSIZuQOAD0dc()Yqhza} zBQ8ieOB+J!Y1RP)bk!e$P5f;{%`-*V_ufRX|4~kYGcFU8)FtH|yLF*oel|I!-xn*s z4a0_O##q|t2s!z}22^s*K#Odp`Y~UqQ@th%+@Dm`reqWSLXHUXkJ8eg9GxdI4$6Ak zqT8lSTJ=bo*394&!3tU-2oMO1AD^)?5{gM#;MbYrb}8 z-+w~+w_g7ug51SiL&$z%sH{0xwn!UOKmMi-k2-{9>&Iffq5<>^QX}tt=aCs}bBW-_ zy9)M0?UQFh46wB_me%%9G}EuUNFHYNMV-Oyx1al3sJ(LnZQD5%D?AsI;070RLDK;q z|GGtPafb-hAECUSGSHNtLQFz;a4rX56RksyB;b||=||R*Eep$t$%8%onp8vNb@-6E zaFIM`-%fZN)%g|ggtVByi0q6X1BaxvfqyiJr2c+H-Evw<_us$Cucc?He(@*z@oF30 zt|x(q_g}Gx`qSR_woqbNMtAwBW5CM?TcExS$#`&p&X zEgkj5=zbL)PxYbUK{}mbeUxr6u*5|bmRLLPAo;x72)2%s2mSdQNT~B>;u+Tq)eNjr z%gY2?7xV_NUr&XB<1A6|pcff!(F3*?D}eO_3Gi2`XTMkNfm(<7SlmkV;sMu$ zMoaEee$PDOwPhGpcwNm^U%pH%=9Q6Ky*tRjh_QHUp&Sa5*qtq;nF&tU2{VE}(1Kb! z_RY!!f;T(ScSl_9ZE59RJ2LsfF-5d#lLeLxxl2CkzMugi-?-XOQoy_L zga7e|9;!{KqbHB$Q;=fMkM2k`k%;5FWh&8D>am!+^vFVbwYi23c}*a=Q19cY$!K# zbGL`uv``XY>_l(%aV3{%9d);Kz?i<5X~nj964+vhA=A}B?a^T3eBuXH+ab@@`X4Nx z!IQ!sTSr4)pT|VfMGlOTU8$Q>H^P&uu6QAtPVTSN1B=V@s8DyFx}5n+G29kx94g6m zWOpQw2jb0-lklYfHo736LoaT!u=Hb_FsX7U4bY6D7Uuo1M%|R?Xez*mCj(IF!2l=@ z{6YEiEoo!&I1t#FlU?1dupwTVRl+#vbt~Sy*v6~BdLpO0$CdDFa~sf-k}x!C=s|? zL0`6?v|2Zk!%r${zkEI|4fCdgRV%spW&+XO<)PhD4n91Tg@VvwkXq|T`(?^vOV&`T zI4P6}?yRP~^lhZ}ssdKO)W!>jd{7Or#i<{)&}9E)p!DYLuC&&QbB%%(4tue=S8ccTKXbk zS@uA1f7?Mz_x=>B>6{ThKK6^8`znJP@#kpZ7+LW8T_(KlOv%ViYS@@G0PR=bAV;Ge zu%%T3RW6iJcXn4_v*s-|e)WM&;FK`1NDWJQ-^jl5pVa%cBnZAsRg_*lONG<5@kI#- zRlmQ|e&gTK>&jUqprn;^I_DvraoiBAq9ial>>ZWO-9=}}S<~9OTeN+4f0~+IMV&If z(O*ecz^~_1qaZ1m%qPObE5FdS-K9XvUqBCSSHzmjzQoOu61Bn|)TEmZ zdV3pze*Jc;wW)&4lDCKWp|5EBH)X1$Tu$q65cqYLoqOjxU}=L9XVTWrU$T(H<&7Pr z-!VJ5HEjf4UbvfvR^O-L-}S&G`xh5*yprIwee_OqKa79XOso}00`G(a2;47`hC_q! zF!z*ZmFr_td?LBF*b*Vy5M?iPr(1d-qNzc;v{*}nDC9Hx_eMCVjTU|SlH%L5sA_~B+Sd)meQkTF{g;zOV0?%e-|Yq1;~Kr>lSFxs zrjVN_G~v{EdlcO0M?-%Y120ILDD>6`{oW2>T=9(f`sdK0-$!6x9a}f%B-8i|LTaAO zCKhe|P^YDVWZfHsza--5^sDNaSuhe}=C+fJZeMA;YZQs^m<(&#H(TB)GcI(K1<_xX zN-aizqD`8aq_*q^cm47~T5^0K<=KS^)q0QS>Yhl^U`1bQWImiu;z^^%D07I9%pyjl zgakMtvAF+{TmHiU$_`sXaDom!B60c0w}JOaZdCnWH`}p>yp9 z(9-AQxEI~!z_-1DM2)rsTr~vw5%w@{?Kx7~R~yfm%Ara}8ZpUG#-#LvMB7XUw2b@E z)ciYCXFzXQeqlJ=8XZe`DU0~6Y>nGA_7vr*pDu4{*MqiK4p_VS0j*1(MY)3pASlS@ z-YMLpF^6O!H8X+EJ^oy1`BXranhc<3)p+uvB!wEh{Y~qd%pmW25w&RF%~dQ?N5S$L zByZ*;;x_vvdAI)qH5p1P6d!COmgQ^@O9~@8y$_NXwLfWv@)}}&-vM+6t7FamN~+bW zg*Dov$i8`}N$B1EH0AXW*ivVL4K~v_El@xODGkWmq6Xs{R3Tbc5A?El*sxNXrq1CL zZ#BTEQz~%OMjlQ%N+WMzZiUp_Qc^fs6$I&i#Osn8@D!hOI^m8OIEr9^{s->ln^xjA z`7_skpn|%*K2C0Zm`~rGh$nReo9L}ivq@EtYPxfCG--(0%pG2&3l@(9sK&6}By?wq z(7n%3>TaWl`mbnvXiRyIP)awyde7TFN8FM(!^)9J>TP}RtU;#YW8$@x*BjTDo z1bEqXwELQ8B$ew9!7J3DR6d_P9`K3i2SspEHp-+vLKl5QzmrY5Nz`WAK4MZaP3W>T znY7OdpiWXJNp0{zzTkwEFjQg`=VaN7-1_>J);z1HZW~nKy&*2v#M$B zo;I`VKYtUR$gM;FCkwIo=xWK)vSV z^QR7)2=#il)MRgykazlkQ0s;bs;QqPsd@I~-k5gcqQ(Ai!}SN;DK!l^dG8?g>JSp6 zHIHfCp1xo_ToRXDvjwO24MMlGLrHy}HC`$yqwlI$k>VXbbgRG-8oUQ`*M{5RkCcg+ z)FYLKjyy|@uJptiMv^4hU>DuFWD@8l+M+@?dE{N2&gEq&f|}|WP#9&5GxokBVETmC zUT+l&zD8G+%+H`Zd-Q{V(z9HY?kC#fR>vi!o8XwkYH-TW4ywkf5gaxi>YuJ6p|1m} zyYYC)b6HDg^t7e=I#JX&#g8Tlq;Q?GHV787^V+i6#36SOZhDhWoLk-yUcvJUeYdkz zXQUZ;J^9Ejcy*uDUZ@uO-(+jtk~w5+gEs1@3`SLU22k<3Pp|81V}s)s&Yych;~!on zb)hoo)c+;TbG}GpR_Ng*q6`YHI#?GKO|Fkgp_AU2V6|~GQG2?Gc=fncQ9RzB)D9iQ zS#~qUnqg1rCH*?0(D8;GJud?aycLue=T6<%ou`)~24QtdGyQcojv5=vLg{f^TKB?| zN`1Uf>vZ1>O$-ik^|QVcfn_FN(dZ*F`MiZw$R&91Og)WXT0u?*m(XItSnSU2qDC*> zXsA)U(CK*|<$M38#`aZIuxJG_3g1amg=@)8`H?ufRTh_z>4`BvV#$ob18@TLg<9K< z!dW(h;M`6Nn7i7HINevl>)Q9pr2UiF?@UKS@JwlF?!%Wzy!T0|o&j!0V`hC`7RN zzj}gHDct0Qkv3>krB3)~U(nhuqshD5hVa!#33ylPD{A+TBPserP;#;X%$Vy*mJBk4 zmXXs)$&WGQ(zS8ur0cC>e zHb+x0t-Td;*)S9~^04}I3ki7iji|U7;aZJ@RAJ5#jInN|7KQpy%Fc74@1!tZ z%MN<$cEg60U)1Ymdc~kwhR|eNO!xwI>h{=_ok`4K#%^=^^_vNJb+aXTi}~p0u0ePQ zGAg{&`eB`4E@}FComxKrNv$_XLVXO6o?4~M-kWBV0P8zks_-7U^+p{mOJ5Rx)k31P z>N*|D(?|UbV~C&M%DuaAm1ta($LP7b7|j0P*PW}z;FW$!7;LtmPFOq?-M9qm#_s2B zZV$#8N7fTg+8$=~8x3>$gp^)E|K48eDED%fI)w_{{^du>zq(L?3{wSrQ%xbm7c#4)G!dGlIT0@ejA(9v( zLPW%%%(*{Ah!|3cp%r1MZAHjN{64>{|6EtDu9N(hO#)WE+-$yGpuVp?ua_J~-K%YGt`F zG1!@u25~4EZ9|M4m8fiZ$1hnphzK7);Zpo=K}Y0GOh3Mj&F~GNirdS%1qf*RxGoT= z^^%s84VX0d5|q{tBdM-?L4WTYR~Ymf<(JJNpzR*&Vy<(N=LRIX-+P=ov=qiD`7|^ikT8C`1TJMl+%df^1Ygb_iVqN`A(C%bq``UI$bVQox0(w(6-}hSuHu&*awPAywj^#(5Xn5M!p(yn$#P?BqOjVJkH?KBsWaa4 z@>QuA(Xb6u;a@J}PB6=n-GG_D`x3>36iz;l?Y0ZlxV--WqIf-TYiSMyt)H%?DibJFDShWikTquQoh6TWaHdHoS`v9z3~!+*`C3`*M?Mfe${l@1`cf#F02GFMeywGx0C3Zf% z0&(+=h+0{IieXM%Sn)g5#_dC|nme5C{wA(8?>oHRX+-D5h0??YU(sxk1Ehxo5gUw< z^pBCyrcIM^Q=i_n?UM}#mPm-mt^`ujl0e;m7I)Skh|$RBAhnr?VXy9U8M!WW-fCN# zUwQ@`y1szu#(3`Z5hvOZ#&o;fO8&%eA|f1BqXsAJ8%5J7@U@QPt-#AMi4kDi=QhmEfgBcpw>r78F9_l}ONw z?6_jHJU-nd61zX0gml?^u(ACIt2P^u^e#096u8muIuF_$_!O_3n$d>CHsoPf5KZm* z0evkQxAb-l<}9nlW`!M1+M&d?_Y6q;-+{F0TM`Ue+@Hp*_apX8|Az8+J*WciL*dnx z7$5Hm`pCYdGP4a8|MTLiA|B#vM|*O6tQ$Lnx1#pnP{?(9SyNR?W{B3goU5JFfnB?zu_nrxR-J8w*Vl&-Tb(5w z+ipXYZuTVlpXV4Rvc)482GUR(MUIU1CeaH9(S`&4h{VAOeP{Kh%0EJhk4Yowm)+yF z1AgTy`<{l56WP3M`0x0xSy7yO*-PXp5|$f1H>BrW4GYM<3tWXDyr zXXW1^@sC!4O5O?!I0GuXI}J?YMo{^l^SFG2BaM@D#49)-z1~t3Z!qCSC;HL|Z6j*8 zwt}yd7p;uThS_JrY0B|lBz0r}#<-0pRdzM-b&4-pyWWY2!!#0cM!Y8H<1Q>ZvLEB_ zuS5BL9Uo@!7F}~sUguHZR4jm4kV{12q?`y zg0DTzsoTfVa3VCCqzv~!q02E8q||F_rEMU$>8A-gSc2KhtFgh)i`M-VP(^b;$TJL} z0$4B6zWWO5pnp;JOEOeN--mh1(Ii>2nfdB_P?jV^nNuB?Vo-p(88<=U+>bK}8c399 zEkXa+2)^x)U^chbgoI_e&x7rRx}lVT_>sTw+ynce?@`Z4Qy!;wq$-l)p4C<^C1r+Vfn>vbG=FD+gsQ& zI*b&!t-*;~{b>8e^QfP%LV4DGPIP7`r`+!W+G&R&?8!Aw7%YV%mbdh@GNRf4c0%5d zp42VF3&hz!C1F;2F4`iTcVke9ZDPW58#-&{b*~V7tuMn z(zeY(C^rb8dV3w0I{F8;=N&}po+mt=6+lFD#GF7?rBQZ11$BNQ%HDqBlkRq)dSN2p zkd_NUu}@I^HjIe!%}D9vnfQ6aNUELLBv~-gk}A~mK>dMzW|xBT=XUlP{&EZ5`sg4d zeHTPmx*#9LcuB|Jv}?m{tjrS9A-e@cT6vjo*mwu>e|S*ICvVa_pY1BomSKM28EE=5 zjEX;Jg8cg#P1AH25UiJQHkV_WhW(Tm?>qxRY5P%XW&z2vVMNr%_PQ-)XkQvbTP!*u z^<*;&{^BKWzw{-A@9*LX16NXNEWlc48=@zV`Qj+bji2H|kNEa#!LswsddV(0Vfq^W$T_eB)k{RRELPCyT3 zJtRk-$EKUCTV#{2HVlLTt|npq)nslR|%YTv2XF ziwh2Nx((-{amE;Wo@EfU$$#^=%f=GJt^G-2_fd2^?ac1Kfke9C0Z5K=RD5A9Un&{{ zl1W2I<=G&pjk^MMGn{FA;sB~SFCcr@xsx#B398%!n#6Kn;%v%xb@Cf8+wg>WxDviR z?QM9v45t;#Gaf;)6n(TKApL-?kp3({IDniSDHB z?`SNZHXfypD>=92y=eC!#;aTlC05%Es8a7hORI7)&FUkxF|KHXktq#tGp2_&x{^Xo z5Rm*pn!mz|s?X=L&o-4;{63V}_8N(uCmcz)ojoZ~I?~jOaS|H7fPl!Hyv_JNGo;P^pe>JiQ6;CDs5<+3smJ!pW)?=I9*3AA0~ zKq5j4Ks=x?l!-EJ9DS7GsKT!H~1YbBX4|Fq4 z>G;2lNd2Z{R89FN$z?OKD#4W`o8AIVLlIOvk?G^R=MdJy?r5c$+B&E?6%sjm4Q)Zbl$ZjAYhzJ;Ln$>fwH8$dI+9TWmr zdd!+JQT7+WWBgUrDGu>cgAJ&R{SQhneutd?l}L8(f%3YqSpEDH2v%L-rC&FKLy9f& zT6++h@9)9ahNEe%t`)<7^<^1;9uzCvG4!=N(N4OC$pg*+-|kL~R>e>=X+2m^HKMuB zCM0X-XkylJ7*g}yP_|GGqPq$xYWfcOiymV0n}?u35e^=Io<`lKFi?LrA=!s4sJvE8 zq+Yck*zJOi6TV^CKzl4b^aWG3t{~oG&Zj?Z!>_UtRF~`tf&*tb)vqs6Ut7-8KZ-%x z>petFh$7RUJcQ{dBZ$|i+i?9^Kho9FlZbn*2lfAkfocRhAG$5c#{wVH)H@Gtmst_< zcy}&L_m%m_M`8M3AK`hm5ADb@0>uC;F5Y1r$d7qr`x6DGYUYo|Tcp1KU0NJp~SYcwsCW}(Wk9CB>@NQG$- zRcuY=>HLd$=#d9acUgmBo0U);kN_PEs-Y~5c{9Vk$@%r6WZsmKbo1bmBuhDhJa%!X zO}7?+V0NS=>*YWyyO+c5RmIZ6+&6H-J(xIKYEg17lxA92f(Z^Fi6S!+<|3pq`wfYF z-B9!!98UG)MewnA0P*BGTG;C|>U@u&v{xWWeg6wqXH!VhtO2v}mr=E6mgdA&Lpt5` zB6ghfVm{3v683o;M-#V$Hm00YeDuTGzY8#FVgT(*4<^wUoUv;1JruQ`@8P{1n)u17{#`>j`5}_sKXg;?uaj-p$kv|d>NiLVp!=@DOIrw~x#t}j@*=RIG^H1s0Z zebBbj4Xhr=(sEfhbR?#r=ieOd7_$L0rm%h?<5>K7)rh{oFq$?s0qqcq>CbPDL|$
`OD# z#t<#6lLQ7>(31bQKykr9s{MJAFJ*l>rLv(kD28#nV|DAD}I3JR|+l3?syF>hncF6FLpqsAl0lDXY8u^GlA%gFj zA>Cv&Y5CQVHmoA_eW@MWJFCFsNhqoRX^I7mbrKxg&FRMt!^WF6VAJ7Alr^9DvSvS0 zB6J{9&VmbEBc{4lTC6r9kfiq{RUT#}df`P(sQCc;GrnAZdrQ*b+Y0)f<9HD#0qt?t z(Pg{oGZ8vt`Vz6_Ax`DL z9o)bPL{I#A`ARC$?yJyLt!M+Uh>=*YW=w=nUlLKU2OlOy(YMp0iE~5^YR!*@ki*YW zRNjLe8PJz@l(|y-vsQHP@j#aGFd{1+v+r2(K$2SUj?=!8XhiODyudp!#5PQimt%koECCou^d*@spM&B^o=olwPoV(<1R0<8^Q zQsYg+{(j2mPIQHWUIG$#^9|InT!MFq3C-JQK^00H&SS_cNDFnLb_*TIu^f9M-8g}J zI&2`FdOe1Ql?CIH@9dhlFs6yakDx~5N^Dv0hFSAqNNl_esT+U6uv`_VzhqARf>I%K zcr4Q)t9Z@mSSl8dkd)NbpwEr}uK*X;jXlKG>^wNYuwzCT4p_LNP zz6K--USZh|C8z`Yz}f+J)MjiXDZZ!Y^S3gub*?LE+wKX0e_K*##W8fgFpP-nKWI{- z2QrOr2(bz`Cv#?)k%B#@^w>vlQoZL_koOx!vqlI>(d8rHHqelkMC`@7W6q>t^P8mkF7$es%{@AbKk+qTfOO$wJszf{}gE68k6B4hY_V! z7Pd@&4dwG+fTqs~>SN_hXkr1xYkE@OFTvD)PA%Rm7(!#_>mkN!B+>Qz0rO^g(#k(n zTuah+Y#PS&#)tvXDf|kmldr&Q(0{0vw1D4FC(1jp=S}H$$o}R{hrqI-9)m1%7~khA5u~pM)$$RwnuEf$#wIyk` z`vkT>refO!6;u{n!c4|`=%!ksFZ;iAVXhb-@&b!XhN0qv1)niRM8pn^`=}a6MGLpW zhSOun@WQ@S`+F2`pYn=n3}&<|p#<8l{Q!GY#uH4ri`sZAy5Q<45@vA*gQO!od7 zO$FpW9~LQxfNf0EFgBr`U_|Y%x|6Sw9JwlEEOOC$jO%b9g>*1+-v1IeS22yryA;!U z?1Z`r1Bv)(G9Prg40HSTz__n_u!{tf9J{x$I>(1dLhnO5i6)T;y-8Xt4~b*m<1t|{ zkw2OY-hBvJoj!s#4|)dGik(n7E|Q2(I}z>Se2H`GZA_oyLQ*Z;Ap3h1e7!9pY4=rN zHKY$sI=dFF4vwO2mraQNrz^HzmJs1&OJcS)8av*7fE7zPq8xY^?3l+T?cT+U8>dMG zl40m=Z%Osl5m+;c3z(+#mcX2o=%&@m%7}=u99SwS&RW~dlAZ#kMJE`e_-xeJ(AGx z;K13_4z>&En~!ng&&zpg_5{^SUSP=*6Os`19tuB?AbRaj&h zXwO$p8co|;wu1D@1g^dKJH)>?B@GKlu)c}DQ1)*W)xF)0DxF$#+kZ5fx5bA(SGm#m zcY0E?aX%J=|e2{MN_XS9q=^3 zilnM;V%r=yygX|hNj>$Pk8p6KBBLN8W9&roqHCC&v=gT@^l--Z?Re?>LR(l|Gzqq&Mv{K8*T13S4?HkgEFU@+PW&wButmaT?%8 zN(VK=(|i5NZZ`+2=r<5-b9z(N=FR-<784ru-hq^kcc(EP<@nSJNL{WW9b+?`3apiy z+5dB)G1Uzak-rs-mx`g_a!;yhc7z?Zo@9f8gm(Nq1$K5ORP|?H#tCOa-1;DzaI*mI z&-WpPODw5;?!F39$`YtPeF_?_tZ0yjAJO*d!8ff40BK4kbl!EPO&w=&L9LkX5ZX|O zDo;9n{TSM{WfN3?VCRx18)I$FNyG6sSkjk*^z&2fvK&go#>h~#W*!$Ya|fiFm!ZJ@ z5$LMwciyG}?I(4p{l2k6vV8@H-QU4ajq36L^8_lK zkHLhmY(|wH1o19g;?OmW?RBTQvf;-t>3k!~KglF~S^%-#_X_h{%djxW5Uc;Y1L^^L z_+zVFNbN25&Fm9cbsDLAiiuvZ+bxJMH4$XblWc=TAxBxXO_c?$Pl7-=m`_@t%zdMeV(%Y z$SI9 zhP@Rmi&4~wqW3K*t3AS-1)PDl7&&Y!WL(#c|0G_s7~|LEfF-M5gI|yaw9gAU)$zYH ziphg{aio+}yCSFm)szU#hik4fovr?Vv9x);hzf7*w1O@Y8FrLK{1O3#OdH+ynio&N~t`AEtxrn!!ZZ5k(ub z5rU#hQE;w1L_6=Drmep(NTtT4-8}~-2R&%oEis9-2&2P`dy|x*_C%-K#06RzQtADn zSerGNJWd-*gvXV9!bC9*D|7};YyxUc%((+1#^;2GNz(0oalBI`t-Q4djMnXA`Gw^Y z(H?eBO`QXZ?~A}K?<|HHo{XK~2x(CploKEbxb&F3OcMPhV4`5FE5W3=I5Yd`` zlc@K&aJt)*(7x&dR!kDo*ttREVl%4<&Ss?9}My|3zVl@VnbL7e7iY>G)K8m-%QqVeE~sLC+9`I+92ZP zcT7erqMbjMXfO4q@xe^De6U*c`r;T8dBlKv#TA0u%oGHT9*|fbLMl7P(CK~yXvZH# zFy~@lDhqeQ%&}_5e!k`8@4~s#b|oZK9tF2?hoIgd1I=1ZpmO1CG_zsM&2ekaiuH4} z<-EYmb%=FV22>a-K%?nyv}(=)^gU`#lt=HQWML6J1YfEuErrGP0kk-4A1_k)lJUhr zPHgq0&JXsZeyj}^PU=a;qb5ttTA1(qmjne{4LQ|f=0AiQP{H2mkYwdUq9v_Z)UzLb znAVSo%H}{)r1 z`IXBF{)*b?cX_$0A84073~_38r6y}ah`?wECmaw&tsKpW?&)D@D>bEJKTF_t`jGm+ z#u2ZBgTRe>fohP@D(nE?{~}0SLJ*C!`UG@GD>^?}3$FW_u9L%N;_vT+RWoPt+Jl!t z+RzJ2gP%ds)qU_SVE{4fse|~JV<54@0mP5)OPcnNMXmfJW-c2=g#XP1!JnSooZ$j0 z?-wXhp5MbomY9)w^NdNwJZoz7a5T+XvKjKULrInPFlZWIp{!;-esmv3b%#E1=8ycS zXygD+T6C3roiUPSd9G`OgM)}s_$jO`?hnPYN6@k+4X;xdM0_4lV<$uBswmMFZg!jakcGVG38Ij3iWrUA0IoA z^a~#$rZpP`1_!upp9a&KQR}f%aT7gRA8F;Zkzz;b_;(3jDZlD3xtJ%*D;>MaYpO zC_Laz#jO)ADwIzl^`aUpo|{seMCSXu{321TTENe_P8*ZWh~T6x zCpb2XX=1A+%8!@egl7!x3TL^!b8aNX{a;*pel%4M^CDcjGfkNc7_)2~mFC|>q2wqp zsw)LS@XwHzt6vyvY(!MQ`JwE-73Pj2s47?^X$n{b;@BD#4Ud2pr#5WbEW+%me_-lZ zPm)1*xZDjJpploAU(fFRcUJns#1wxwj_ogE6r^$(YUX36TCN z8HCl0&n+5-UiaKV{_2^g%gi2BPg}Y4ivdKlM~25(b}n(+Ly#}|RdQQl&AO52vux*f zDB0J5#UZY6NY3V{BR0^Lz;t0(SEBt9%(tyP3C)UuBvL4(8K+CI*&>KUXT(q<%EsC6 z9Eh$b%Vq>-@tG%riM0JKzr3P9?V4AI(;G^l`o(sTPc#M5;uI`0W8B8SZse>P>wSIQ zgT&t?G`P%@y1Dw01od9r{TZnGkJ~sW-jF2t97eTYPY^!7#7S*xxc8NoG%T$Rv+CHK zSn!-z^=2&H#viWd}cxt6iVc}z?3d9TrYU4~9U!-%A^7xCpSN#&{Q=+S#K zx@GNyBd#taBj^sO41##oZ@*yI%BiTB-+|HlEBcHJ`Pt`3R!o0 zfhNeF(2Pd5FZjMj^-h1NFS?DnE05#18;ncb@f+XtTNfz5j>l5om!|BP3g%BDh}qC% znEG@Gjrhfao-OT1!gg%u5^LnBPIH9%9YItNru>qlAF;fq<^S_dT+2ow^&A*Y?)ezg zgpgcN+{nT4@7avHsyF6+4xm+iwxXNPk9iZU%XTf}1)^FpZh#W9>)8G&8bu4v1d_1b zg}nC3X3q9-9}sM41}gjv(&`lC_IQ!uuSb$^*Gwtz&ouEomepiE=4$3Fc`V)r$v>(f z+qB6!uO-)rWLVb|~ZI{cht@ zMIaUQS+2=Nrjh-)0mV-*^NOvF&~@hoTs`SQ?Kg5X-&IK4nJ?&bm}wrvnI5xw2q!Xq z#_41KVk~?bCz%p}hCR$^h17!7AIQf|p^vbttPxUrgiysk)(tewk~Vw_roD^BB-{o{^ShvF%mvWR|AsMl4`PE-1J51Zh+@$O+U&d{d)k+1bso(8IP;2*xdNxHJbVMB5wVM5yARb zn!-@#Pw(wbRGXT(;w3M+M!QE)1lh=^j3ENqP$&z1397wn(0BOrX7457H<|5Nq=_-t z`*_=tUs3YG7xMqO2S$%K!k8_-WMZf<@yZXT+D$XjByS{bc02<$c~>xHkpPOj8I!$# zH&-?6AJm>2%Way}31+_upseyCr1{xUx#LN04&&&LG2N%6)`Z#~V~ogNZ#=QamK6Dp zqT&f`HgF23?2f=}0n4|hpMl`;ffFE#Zq;82lv%jLUfCs@Z+^+BwZGV8D928Rre6m>%y>vw4h?7 z4KJ8BKvKVbBve}+L-EEeiHP3gZ2vVNcEb!w{_^vX+^`-r(M6Cx=Q)1v=|VdB7Z9Of zy#~hXaQC*}R9n#^(Z*SG;x{bEb?*||ZSFyoinV;L-3gFd1!Cvyp;XiA1*-RUyr^Ff zzIxhdQg=*5bq5TH)e|;P*sj#b!`OK=XS~L5kpZb$aTzSHyn*b0PvT7>N4~DSckdmB1DH0?!V63=1aJ9CmhFO)h=v?G-sF5n;wpqo6SY4F@= z;`4hiqD>8=E4B`&swv4DRj)l4Gv;TZHuS6{y{iGrA9>Q%7noKXU`Xu0TG8d}`w_w6 zD;Kp!Z=gE#AXFKD$GUZqBxQ3qdmoXQc+~!fsYn0js>>p2ZLA~7ePKYPt2MlQO^YTi zZXcL6uS50oLM$~NNZJm~Wc-0A3jfH!;!EY&@JlbM`nebCrE@^lumY|>=tHHqHgoN} zDj`!FP2K)_hpEG&Nc{cHDBsqP@6GZSe)|EG$6K(rC)0wieFFV=5w*Adf$Mf>?6(nz z^CM{(*B_FKYOr;~!I)b%W+`{@DbD`Nj7mAL415vnyq|R|i=hTfby|)u7o_hcrotrTs4AJkM6Ion0 znq~)G0vpq(s9XJxyS=$Dt-KP?^5=WG!mR61daVjeqdcf$PaEHeyRh(z85Zw7#I@yC zK<0Ut`FbRvxyvp=L-{2j?wQD$$I!=KaDv z8=T4O8GfYks0EGR7tUtCg&>#R;q_7*{&xOY(%VEt^oNYOQva2pa=jxldU+7Ex*3|U z$}qy6CRCs2$mxXFz&7%Ku<0*0L##-Lc6mJ}uy^*jQ8v?C*sw0SNYZe|mzG@YPqYJa zB&wPJaAiIpQM+ZYBx2oORJpbADLb1W)$$Y<f6TN-7@P9 zWbXNdAJxNACdGwkJZ2W6S@d!GpJGjw~(Z8 zi$Oay69fE%sj6@an*HqptQ4Hi0^kG1f;OV!RijD))yWs^o`gRp>@F z)9x~kWFr6ktO*Uh!7`)A<577wmS)a40N++y6Kn5aqWXT5(~1{zezqMD6vwh)yJAsx z*9p@znNECW7N5Ja0Y85lL(;qbv9Y2D(GN4=TYBBX;+A0Av@sKAHn3+ppTf)C$MAHK z56e#(-9x(a1;hJtEYp+>K3=jz|fK)5uQ zcRO+oYd>v-jzAJ7(4ugv9Ptax&a`8EK??JJ97%r? zLE3`vV(P0?Tz>I6bn8fkjsrEk^0%IZn7_b8FGCb9bAja(!l>x@Mf{lSL-kT)PIu`& z=CA8VFR$*ydM9=OTcb(LIQC91M@O2tyb5I>w(uQZUNnJaf#=;}JUzS5GMEB$V4oSC zzPA$!_q_!1DJ$CX@-}w&W=!kyT^O*^kA^3CupM*&jh{1`mfVddsm^_1!YzLywgpZ4 z-A`CiXT`GT{lUoP5af6=jke}5iPWEUzLal+ialYpvVxGr$!)lIdJqwu=tC=00%*EZ zPokV_LF@CHz%C_}YENE(43CTO^VA5snC%d$yXv^~`JSXD@-><}^d^PbQYbs4!Nf6r zX!B+#qI99WS^f}`VzLVTUV1>>Z(8ig{}r1%t*OY`hSVos0m1KD6m)*$>gOHC#q6CE zUhI0Yoy6E2m>!b*C>)wI#?da;wYIb|lw|hZ3BuJ6INv~Tx-^BcPrqgJ5x<`UWz9EC zJ!AykJ=y!~mWKizx*JvWs^s&8NFuDh1Le{)7{7ioMDHv{+vC|N_-}_q@n;|O>blBV zGXL${E)Ocq-pA`}rg0rAHCN7_vCeZc!Rs##_xaD!)W`k|W@mY@y{3bUiP^(;$sGRY zLDor>^a%w=_Mr5yB`n8dikheoSg*N@mi}I}>N4}YhcjP2doU@ro(Gqwo3Q=TN;3Vn z87-RcNGh+a1!?do6#lcDZ+~T{f6Mvds>_`G4t(YgfP$Dba z$Z1)oF@BXjjJ-D1jpFz{bX6V_UBt=gzf$|R>m!x5x zK|hCaiheUW+1lNlpzAD%Fa3@gzASG&dmhX7u)WjpHlDxbO+^Pjq3VdeMkQVcb2^yE z7~F}v=j*WDO9R!TexlQ_{pgZWK{VsjDB^pH(2i|~_?lPeF#k*q=DuV-3kye*ycecq zZBZZ<*X+}ffjd!U`%Gf=q7lps*xBme4uZqWuv)nk1OE|Hu{Yzo!=|9r{w>=xgs4dM zBDO!b!`^iURGxg2&o_U7GWItWKCQww6HoLww-rS35?(sDkbj@RxW}Gfc}>z#A{J7O z{4WiNy!L>Z-5rzNlG}4?&T%nmgEqwMAT{{Ml#V1D6v}rjg zPg}_~4ELi=R`)>bQH${#Se{zn7p2#3@Vc%%Q2p}=v-{phvnTc7WlL~JeJ@(rV<;5= zya2vQ_S8)=8x&8yAa_9;=x6YJ(-AhW-i+rz59?1Nldb3`M|ZmT7cq@qHxGRLU$EI2iv(i>oA&e$&%`Xde}7M4OD6` zgIVhoH0g|>*Kat}BFBx;a=Hc*r6Eu(V{F6nsa*W!IiMXkJV-qKHaFeYn%dh`f~SR; z#{5**<{Up2`QSLnfqt%@xds575TuP_HM8G0de3l>;l#ZFn8Vl@vm? zHWZUBC3Nj~2f87pAC+fUO3K!|k#2q+*lfLxnQ399_E0&P9}XZ(zUxss%NKR=HrTwQ z3;XwE^Q!S8#|$)=Jx7w4JC?NhtwNidr@^jI5Xr5cfa|25!Z~}4@ z`{VNM9PJ&-SkC7MxcIjhFki_sQLNJ=Nz7R1j;UaC$eScspMf2Hdk`T8{A^`;w8%&jzaSRFHY8!w?VjKh7EOew{YdA!P0ZWM;HHE%3(Ye@bm0k(dF)HI6W3_kuCbiuto_^}5=!iET!w}QH<}Z@7Q^BQ&UtS{>7Y+2 z{_UD1)nX`=x3ax>=&#UR?nC(T9z^S;;$<@jL8>Y15Ehy8_NRW}Qz6SceH%dAuRMZ` z$*mAQDgmR1y5oig-sI|t5wvQ)KRJ{XN(3J6tn+9v%4VqeAWtFn-W@=jkF%`bAPtv8 zHb8dNK$3gC0>;P3Q0u}W)U(rqSa~y!+kGii%7Q_)=%FOy6ziNj8%`d78%Bc+80RAG zg|%VkG@!+d+P)8^31Kaeqs;@&ik_hHG$bA2)>IxUDNnjK2N?xbi5n(T9#wuCR)EO0^%ojo8>3m!e6G+6vSbx)Y*7?(8 z3Cg%)2z%HE#WGVacmDz`f)Y?4Jc9Lo?V-tO929Sh$BYc-KTHosotq;qK5hW2^+Hg+ zyIRqB?iCmg4klybSXV;#6%<}2;J2TBp7-`Z#IE32f2GRXg_5b{n)QRExuKb+Ijo2x%n#loom>k+r?K+%mV$SN+?;e z4b>-)fb97QsD5Wjg~}0_xX29L@?%LyV>GqZFs|Ur2Hwurg?QCnfmSPLs+{o{PuDn8 zMYI_oJQc|5XTe1IX%&or$7a|;PUI=eX4SZgi4o)I(tie_Po)ht>19d%hJ~Ua>ol+b zJ)iH`+Ra-vFl}qYOwQ(j62(4-ylO)WCyt&hvDs}(?~P#BqUIg=C>!zEKLf~;OfCGJ z>rJh8kD|`whmnOVa!FJUuyr5>Yq48YryMRfQ$CzeYUkhYn_(d(BVeCf1f5VzZ! zrm!w;KekWEw%M}2?jBVBWgBP%wrku%PJ)wgG?Cre&&_TaNT>hV!S10wkf`>cS%FS8 z{7N_>DH~Bb@EGX*9z%N7b4;+Pf~RH!Y2q_GJao*1=$C)sgqca0e5D$-=d*crkt65z z$6kmt4JK_&f2z1DCR$$`PVlJ**FGSS8jU`WIezRqYwE*y_c{ihx&MHwd;lskJ!yXN zXDEtzhSGJed{DnY(l%)Uiem*Jd)khdtsRNsOAlHv%LbW66pgW9b718z&Gr5Ev^*jg z)Bh9G)IN84FZaGQUBSBN4xR(Oc_w;E%s{!x5^9?oAgDLvr8Zh|q2ESPfnQ&~`@Vp5 z9J$LF)CnPKOBd);Pk_n`h|y!DARAGM4I%|9zFV?v^mnd!{Yem%81i0$DMWxum z-p^&T_b?Wlf8*kQD6nmuJ2s>r#&5$-h{ud-ux<~eUQvcr@b%Y_;)$C;w(2yOXvTV- zUpWx34LwNVCv)nxX*=JRz7oA&D^R|EwWRyzYpi`>N_@w$98uS7)K>B#zJdM8?4|xx zcB&^B9caRcd+x+C;XjD~lkIXJ6e#wb!O41^;ER_u@@A`7GVOP%WWnLlWM)kWslLi) zn<2m9(!>4Pxq2NdH^o9%+A^jm2x(>T7XDj2&{hjK+EpuH9^gx^W6S|8wy5Ibqsr04 zz@OyK-h$cdhSQSEuW<2aC&nujU_eDPY)XEG`gfQ3PWY6y{ zhd~rq%8R4BxuVbp(5@NAPd;Eliw-jXa_xIGS{g>vhO*vWZUnuaI*`goT|`^!P}(xX zi)5^9L^op#XnER)l!uBbeR2h?y?tmpjEBxWC*Xu6m`48*0?HjdG3WUfsP1Hb$&gJL zWd8}`3)G-VW?6!OfmD#W17zmS5GaWt#Rj&hWS({!>&#Ue29Xxe)0psDjkS{3s2`XB z1#LZO_d_K%l?9R@%Vtb@@(1eiEyTu|Qf;D%=5$RzTKQVZXU98|vjay^@vGrPoMO(4 ztJYNH+Wj9zXCD@0`o8f|61|X;GKrLwUKE2e&vn<3L}|718WEP+jaDl}^p)3a86uP+ zB1DN8#5~unQHU5KL_~zhM#M%f{jT4?9UVsm>BA=H(46xkCG9MF&ZPoCuP1(dr_i4MLr zUn$5G4et@#Iqi{{cXcpJnYIrDMjNp>@1J7*f)X*W|51!xz7^{q)0x^kgtzZCW>$Ae z``RrNV@t^kwCyI%Vx|er$U1$;YBJdAC@xc3bej)XOi*6S*qy?V;9Mp z)vbB>^)%_G_7Q97;0ZxGdAgw6w*jqge}ZqDh?6#p{!M#a?n02@XX_?FazZYhjCFew!?I*DI z5b5)pH-n2}0$xuDo!JCo6PuVK&nWATPv$DcM=E%*LL}mcG{} zz9QX0(|#Oscp!7?;m)*)8^OxSojsopOgH~e^q%OzI`<}4V)_-Zy>7^*dEQXAwH>1> zy&zg;zzn_5!_?cS@T*UEuBg8xUMI%Wqt`AhM;F3_HJeZ~rvu;7vnvaqK<_|ho)BZr zFpu<%N{2JTdg@`mnT}(M_qRmFz-}y^d(y3FGA@9)FYOrg(1yi+$px3F&!S7q zeTduRk1qMtvwmMBw4AfyZl(LtCu;;(6*mdyYt6CMvlTTK19{dn;+4kvvme(|Ab#&f z)DKuJZhPd+LjUZ6_H%r>qMr^5q!{z5- zUf()Uh+h5`%ry!u`gj@Ac7MmDKAVM`OBmCQrSEU{X8gVLcrM#N9VPG=r0oUr7}#=I zL4w@k6JqG_)uJTR{-P{;xKRCN9hUXz17V70bU5w@aXb6)q^l2w&RT1hmD&tvY>c_; zf0?kp_esS|?#j=N_pEx&9NEmyKI2h5|1~*S0`sfo_DMye7cL;k~?$7pKqFU_w1dtRI z39N>Eua;4y3%BLU4gb%&Hes3yM{M>zLW~=Crd#z4>loFv$I{_mK50weO}nT!3&gk$ zZ$y2~7j($iV)M%}Fz@C`DEP$y(_buuj1^UQ^Cj(LmOX=zDhs}B0Qrq_W{WZHKhUcD z3I4vyk9iMuAdUPjD6^2T2}#n7r&W?)^u~lz(p! zLc2AH20paBj56T*eG*XaJfLlQ>dBpAN>Ta20y0`ukTizg^DPdb@}^p?L4c{N3PBNE zCpeg$z`TVO&ugB&clJ95*9G>G&tn!#h^PLpp$okfc^g5B>gT* z*SL#iDKAhq@u@cAga?LfwP8tftaqdmf!W%De1_bIx0Hw67z2bx1n(Eu)W%>uW9DhLUXv3WLzOsU+bt(Qy{f~#o0rK;4X|7QoXk80FSeFOR- zDls8$24;^jX4V4&S(7u(n|1enB6nwFdP)H5NoS!5m;+6ACom~8L}+&Ygu$VPJmz5! zUK@7?ozEMyqSNEJG>K}TSvK16dJk~kYR01j52DM8Ekb$z7qk*hxO&=t9FZgdYYXb7hqbLf;V|{Q57%0K2bVA31?8BF7ZlxwLj|L|+9U;&-FHIV z6f<16LBZyW)~vmrW|BeA=!2}1Mcc=sVZ0*Hr3SL2D)dq+g(j=DtJ)D@VbO-!t$i0^~ z;xN+zOxxap^B2z`DI#2~G208)XFD+IokK!$+GI@Lza1j~tHbbjZ_#Lj8_)ihfHUGp zu&Go_R}7`|E4Vk;b}GVb+7$$yx(%PNjOWTlXSIsGCBpM(Lz!FCNG26}FfH-U^NIDB zq$)terH0JvHL)L;bzpVFH(}lrCsrV-z|RLrvvT;ls9W7GPF=ShHm>c>ei#m7S64`Q zb;1WISQUxNp({j9S{^9p{whyC6wF=@cV-_;1pdTqD35EC@X*g!g}mgqnBD6Sl>Ial z!b_)srf@PS-;BEu@4p?4pDsjC@;zxZ8K`k<#Lbt7u`|a>Z+@jrtY1z%63>(J+I3Q< z%4!zm7p`JV>K~|-4+7wuwtTZM$w?U?>tLOKK&d46jE ztKC+P%72S578s4<*{(ye`mHZ7vK-6WdigMm2m5jE_klcIwE#Rzw_w=2F+5$6@TQ@D ztb&-eipyC-hWQ>eKl>lFUhaZvUd14p_Z13$59Ezgw_{LyS7!9so3EdH8MKqyVeh;E zCJX6wvE`4)pu4mN7hkpHt;yt9-R{XOzMMr#PFEOJDdWu}DJ#L0>e3-wp#JV8VqNag zy1uqz&Hll>>D5-K9%I2$y7c2Jlg>=}c%Pu|Rb7;Gb`&oN?*UWCoABh4WX%0pErgG- zL&bYFejMq|YaK5`jNdApR@0ML4*M6Cy1p#*up>4--;BwwRO4m#W=$p5Jk0ef);g!6 zr2Ug%{M%?g`EMKUa$+^c&i7@xXYYtV2Asw8C{I)fQ?Pt=5vU|v1?6AAiY?2|p!D)e zAx-*?X866iz0i+2_wLVg9t>j%FSlal_#s>~XStBD#u6*`bmxmd)Azr6ykPuxC)T)) z;+oZM0z1)}m96Q<^|d;`Okx_u%-lGxM(_=HGR1wr!anp~ebT|3X{29(kBs8Z*_?3F$|B-4inZXd#& z1BUX5!+rVJeg5qFNC#H!_#Dl@uYgLgeo)(K5K4#p?2NSiQeDNXI=u*)Bn@ z-YNmv^5v+XvIVPNe}fjEd`%ibKy7(TgrVm7uOf9s(0A@-3p)$mXDOL7hvzBrtUiaY7 zf12@bA1#<#Ce!Mt6+vQ}3bN1kVa*W8WB#&Z5w;OvQA(aPeW|d1)E&y`wdHzbkiXxU z<}*%oADW@|3-R`{AH&p9xnS)>KeJnft>Kd}`CVU#nPi9IPeZsawHrS&vlnT?--~*` zqs0HRWi<~2dEIXE4M%j~=jM&&4Rb5dQbp{vJ(M+do$@kDjo1UXVD6=~;@XBX+J$WJ znP*Xh*-pD5cUlFs6mCMPi{f(w@Wo=x<>DLB#5V7g5n9 z!R9H+sO{s$7xxe5db3n8-eG|2RrH)F<@^U6fZXJ2K{9GGSow~EHOnnXXFU>?qh)OK z`oUZ+bHqy0Y2^!JxQSGQmA>A*IlVh??M}UB^iFKPxe5Z-kL4vLy?CMfbMmBJ6SVX5 zaFuERdqAx2m}|4JarI-=tuZQfuv)#M4)7 z&6TiHbnbF0Hq;xUS)o|AG1o55fc)`f)~&jN!AVg`h)5KU+gt)8+^8|V8g4omF9!pS41 zui6F)m8Y@#_l=^(u@}(XoGC=~Ix5H_VlZs8A$RvQU@>=kfN@0u%6=`@%CFA{&Fg6( zGYlRPmEIjx@#n?JjC52p54r1fKk7+|)#zr=s*2xX`rGF?TQ-p8ecA--@B>1DM;l~Y zpT>xZ2gu9uR@5EM6sniq5vR>{VF7b9Fs^SnHq4?sX-Ek7N&E-R13WRoVJ!DwJd%g5 zQP8`Z2|3DX;Za*waiti8zDOb0*$Z{)CYbd*?Smd4fQT*Igy<(`#1R?5LKjUE zGVQ&2=$2{XxziHnL>%s*{7+!9`x7LrbVB`@M2vm-4<@-+p;0Fb7P)r_ujoh5#MxkM z-Y8&hVWa5u?F^{nvbC}xVz;NaKupP6(EVC3=t9G=?!E>khEK)LiNvpOY{4wj!ffkr zL0lAn2yY;cc5bJD1rKbk%Bb*uHaBy2$lzi zGIn+srY;)F>ucyP`7uEZ!%i%Cb!UDm(wikN8_m5*pD@|TiHpgOTrW38M{5tRX?Y-e zngqkVp<3{s)SbuD{i&+niC??X{CVzwV#41=P(J%3c?vqo&83rw;iyCD0(;I?PArD} z>oTvt?D}Xko)BIG$-@p1LD}FYOk8vl z)e&=raLRluyJLV>pLc+U?)pOgNj&4|$W2-vV${aIP-yDR8)wymZZPp?SQsWa+q2+B zJ-GdiM;Q5%j<9#bDKDlnv{T*6rO`I^%%-ua#H#8KtPOGV!Es#vi8wme<$`3O zsTk#VAI-5!bRFKCB~AMcLI-r=jc4OQ;;o}>hFPdInkt6Or}}ikbs^VuEK}~X7gdAK zgEYU7m=X5|tfr3U?D`3u5i*eJy&enEUrR~T5+RqaiG*{%7&3*#1q*9-Kuo{UsI`$} z{Qd2qjoXOf2Q9$VAei{`ck$Gdf!xsgE;@{8g>ILIGUqyfrrqz(tB(nwp7c$dZ!?Y; zQC*=e?~N%|H_+uUaS3}gimUeZWceneSm*~I(Pqsh^7=YJ+5b*rtEE5X75VY{qvOS? z&+nk}`edvO%csBT3eN4W>?P&e>9@Ly`db~esblDD>}O2;1`1zh zIb;}lBJ@z`Sb(*ICS&W7e9VZqK>%?GG;mpTc&Lu!gaNOqs-tq_`GOG zpVfCE{C*G|$QaLKBFsVgG+le(iWRH+pD#DGr~>c08gNyOp%20lPc-z~W2gD*F>H^x@SGMbeeGx?JrkkDeo4}6v~^&L;Svg(c8e9D+s+^*kPp^KO%Ns^?oQxo*eqr1oLz7ycJh zCh6enL&l`PuExf7QBe7df;H9-XW2c>EO;_KCq?yQl;KH?U*W?Xa`SL1v9>isUI?YHuYn$y2*H%ybRlI3 zQ$^esJcr%~<*pW?VbXnA664Hesfif*FDwUMGr--w3zu(C1A=Yvn|6l0 z;CmWI{$k3SuZ`zcI#*tMb^xD0*NsK(9|xs5DUkT)G8k{9jFe-Kz`k2=rfm5WHQ$Y4 zolg*xEov9-*90>A+r)g|g3z=x6}5jX#lG8IdHQNIRQ|nG2%Aj|`cc<0u4jo*y>=8< z{`?I;E|W81T_8(Z7zK@Iuc7L%ASk`M6C7GCndDLjx$^8{QEIeS%)5Dzc&gh#&R(O` z@H9qQoy9B~tVX`|WK}=*Vc_YZ+|8TV{4>nOg+YT^BeIM$CNC zDQxLLXXus{g5+h9yxgn?H#P;Zpvw)g=I>x$ACfJUb$tonUUcEI`@|Ld#SX3Cb>>%F z4fy)<(LD6{PO&*vCAhiN61%byU)zmg#_4gm*i*)2x1t1xi2kr^s5@7UKOy9G?8KGZ zxK3rPo6BGlS^h0F`?EG_UZ z295S&+6Q`+o_QdYJ-1;|y%=%te#fM-Q_%cxS5)jkXyKm^kS_?gR!^C?K6+8{k{p`+7j`U$SryKByf)Utk zrxSyoc4R5nESPt%OW1TU1w%4`8LxeW(z_}wr2aH-+aXAB82}YSzTl6ljcxLTE)&pV@<#Mr_Y^IAc4PAYox{qL+u&v<;@qh;BO3nFCb!?3@%$=r_a=w<^Nc4SFg?);9D@LS@> zxpMB1d==C?57O_+;cX{@r43Di(Bm&as<09C5wiqo46$Mohq0=`5*}W<3^TT;VzMuA zn@?L&zx=p(V3QHg5v+LH6k9H25--x>9u^Jj$>aYdrVB+YIc*^w-)`bzDUHBEG71&1 z*FnRhyQuxgnl14l7SweEu`JjhG}qS%n!sO$WdYXAcD4u0+7Zm6I^P7F#dV;HvVr8o zwD+jEhYgmG!8=loRs&{3Ld;%_^W7#UZy1i+>l#q^(SkCqf>>FT#45keEaA`-G}(C< z$GVXwbJ2fdYnMdS3?42>-87=h#s`AK$^NwW^x)aAlhGl-8`Rc6<#BHZfX&h>aD8FH zHoM7r&{sHj?mrS?a`f^4aZ z{(pde>n=fEX3lx{H?)iP;*HO)L-w0oVi1hSSet#QPQEY5uh9NmlY-F~8gcAbDJvW5 ziZfmjlcjU2sNAt#F4^!ztgy6YP2|JVRIS7H*Y;EY&;hE>or1ES-T4ujg2`Txhf;P2 z+*Z0VQ_9qsukXb*8+r*TZ^-AVyP&lnY0N|3TeFHi1KImEGOqsT330w23;x^3a?QsD zLhc5==)GV#t5&}jR}rN)@!MX^o!bG&W?HdbBVBm>xK7Nc<6Tg>lP>9YU(!mOuu!93 z&{Et8W(&c)HkH%3x=UZ7*}}*B>x)GgkZqGe+Q4TZQF~$A(4jo&i3-k5lhVv;2ln;r$GkUE-It~kEa`u;YRN#i7bh|2 z@B46vGR9_)8^fhb4VZ3tnUMHKRmZ^yU#0?_ln?!(vOJ3-j$t?_OC4jGjG&)$a-y>*c?R(ZpMin(oTfPc+*4e`3VS z@7}~*nW3F9W)zS9@DA&Dw_)NaKlXcuBM-XjkGo3yv7`s2<+?`qYFPCQolrVNSB?&DhV-X4d)B6?EGb{kk#V+g*3xiT`Q)Kt$^}DDpcnl z6{OQ@#mMG*D7tIMv-i0(^~`5-H`5@dX{;9wv4Cot8K~G^AykJxz{UxkQ1tK5^M$^Y zAy1iBQy7GG&|t9ZSK^|LX%FUmJsLRBQc(4FmdKohP+z6o}p&r|8Nt;vn7Nw-guqrTR zn$q7zNk#7w1+q5GJF*WH)oDUJz4uD<)70}44}m5+|?GKxA+K~Fl<^=B4qleavE z^mpYraT~C)lj!WZUII$lR?3i0mup>UueSEMShd`R)%DDT+-@6$`BtQrC0j+@Kg8Lq z=_|zlcNXiSN3)u+K$hS?8FRdhxKut)P$jqH^P{FLD6A{@yk85AE;FIgY%%BzH-ogY zALZkG7Vc?rS3+| z@Zf0-{$Bu(38ahy^D`jzo+(UCpy$c{I7W?Xzl1Z-i%m_ z!IMdAyx*C7f9u0TcN`XEUUqVO^8c;>g?y+{cZBLVV!6CFz=&S7D@dX&r<{vIWz=w{ z?ziD$173I$y%#8xEZRNXDjBJ-8^0wF|CT#7;;w|YMNaC1&w=d4M8qS-Qzk|!)X6VhYqGq`bYo{6T z+>U{~x$>(ResUf9tnp>P*E_MquK|49uf%13J6QgE4G)*e7tCYb zw!q#P;(u?uFNF1OMcI*7F=>-1I>=3-e1a9X8qk5)pISkAQd@96-b6VXL+G5*Af3@9 zsGl3lL+70rW$SNKpFr81%^n~e9m(CCkAuSfx#+g92J+XANYzXp zT3rb0PjkjvZ)7uXgXV6Spiq^Ak2%dMm(fnt-ydtfZGeV8blzS4fy!gOuX&H7=*g|R&BbT;ICAII059)u=oE!6+AMW{VG5p(05Aa~J4QFba(8y`TK z3)g31P*O7lZT$;ARt@B-J4W%3Kh1fl)gVE?+WPK2*^{2WFf|&&JlLIp zW;U)F*N26k>nr39U5DZ0MzW-auTcNTZ=(J$sxzl|Wchxi5%*}Oym6h_dg~!1oODCu zKeu4>*C&GN;WLn@>_*SeJz4prVSGlhGuviG=Y^_Vu5pGd_`nKC9edIYl^?E#!`Wx^BP9>eub9auGK zN~)L4hOAGm*iz7q8T;IYdDbS(6(|Ge?Rbdj8H?c$cEj^A_B{RlGl>252}HT9z;e5% z5P!BmkKeKlLR0Ptl0M~v%Y(^qio67cv;(x7t;OWrW_(~OWmdm-#puNaD9t+qk8b-j z&(qscd1>LrA}R4TwpoMs41pW|NuHlxj7{|+rpL6kD9IkD)w@j)^yyURt~`pB*%#rE z7qRr#)4B2Kj67+d9|XP1MO73r&zH6c>Enkm^KQ$baMEimFla!DeOItv54$;6-!!CZUzIpUQeSp!^B+ zUqDRMKTE{wQZtkev={ZC1`D;jJ5BbVBJG&R&csAJ2HXzPv6EtB4rn>x=TLYFPp_2zq=T<;yljlNuKY2Jek9Z zVvuH-VuQ09s(m{`ZL*t{C(L-(9Hrn0Z zXchu@NLY>E0n|^NFO->d!Lp9sK>5HQ7kc++>e_q4niWG>%*855`tnVP>Z`$glM%dN z3u*oK6FWs524xE`qBP?aln0zenTHYz`b>t2>qc_*3ol`DgCW;gEf!q9EQWc1c=Md3 z|IgX#%C5VUE_=AIyx{JA+{i3>*5M0atmRl|-<7K-n_^krb1XO?g1L{LqL&|O_OD)p zgf+>K*X0!G-)=^W>JhB%V<4BxjJf2ut>U47Nf){QH^|y_71ic0+G*cLG3}qzvGND? zeO*Qgvb|0tBuh9ne7*)07>!O@jaYs7van>ID=%~@!RWHXU`-CLw!f$`MI*Be|}k z3;O)>9F%w&5^46J{Ef8DCWWGXQW7e9bmiv5HzHfN8_nw`p`kPh^o|43At4E>tSRp* zvsyGBY|f0`=vgHubL5im*m#Vx1}+>zr%_p0>v5ju3=g5!aVbcQ9R$hM>CoVK0ir4{ zL%}mYUf>%I>Y5p%i_trw?-<}S0y~mE>@1l7m<_54ZQ{J#Uff7)!aUPVm@RR+wYMfv z)&;|c6Qh_e_b;po_2b`On(=}`PB<~CFIV~&VU33l*e`Vd>~X{BwoIHT9m$$kKLK;4 zD3*F^p{bw*%0yefu+@z%sT{{$`s=Vdc`0zSK&Ep}6(SB7i;)Y-qrLbePG||@1z^T} zvX6i?@(!AJwB`!QN^tk;%L6K}Q{Q360ykK($v>_5nWHkcS3-W_g7au+XhxY53?%bC zh1l63;1-kJc|z5{lvUw;KGBqV@fkK;UEUKUKQ-F$O+KK0`Wb`H9)|$K z;gl2sKg{m}b-uZvt+~vCyB;`=X;hk#Hl{DK+&N5c+{tsBRUxJuDWWSjs zW!GI5EaEivo#Hic-bHoO%JtZp{CVoyCc*Q~CG@RzBt64Qv}|7kn6}OgrWXWLthZT=f+`;VU4xU2egQ1tF2(=BpP?yoR>w{@nA!C|*0tgDHs3m3urLVjFXjRSsi~L&sqF zW8&^~wPJd!k<2j2iaY&z4^?k&2$mlGm{i}FB{Z19xSv|+%8Nn=O;D}o;CO(R8QS4w!E%E?Y^T}XT*_9K9MKAH`6ce zh>{VNa?QXUsGD_82&enUrB)|IxDDjp8cZ3V=fRedHn{xoaZI>j$f6g8aR16)JoI^` zpnf9NN<<&FYOEiNzqucD0%gwip_=VTC-^bo6NY*iW5h4E?4ZjK{XFKr1W{yP# zKWQi5k6C?r1@($3F#nbOEvlX@UbPJtejz_XiG+(ay;!v*LsVZW!3yNs z`fV{~+L0zclXUx%o~VyiiIHEP!>X5^x$=Ip{J?5!E;})v_`lxrfT=gIfY>VEX7}SZ z2Ex>jS<)l`es_B7*tb`HZ87eK~*v_Tg$Aa}Bq$C)Fl0+xcq zyFQQ{-VyX;d(t`L2C|%CEb54im%sTB!~Zu7!cN&Tr{&%G*z;YO>p|cVD_)|Ga*HCj z2k{Uc>7xuhS;UDK&@$=>m^%-{i0KEhTCae*Zr*&ZOv>t{KD_m=8kK-O^eSF4@8MReGX&zkIm$tUj?q$P1)3mgIHV%vBrjL zP;-yiaFzxTl|VX$Xb-M@oTPo<)}8Au8ZWk-DaNvGRggT}5|Y*~5b87cixKNKVVdRx z$^w%FwE=)nTpJiak>P{R#!T}w6C^`-2o_^+W6^3A>i?C9lB6$`4f~SxQWNFTrYEp| zs+Z{bb`8Y&ZGyMKUAQ*XlEtQ!pnBIEF{N39dJo-&ut)jO{JjB+94uKq?SS>RHFCpC zy}0*>3ab5WDa%(!Y}a1Q>N}kWQN6k1VG-EVe)We4dsg^nBfN`djH@~|#m#|GBi!Q|IiHA|Kfw8j2F%UeReDv{$ z(1FYi*9K2r{5ZR=y|e=cK! z-&trh9LTP^g>Z>|jG!Nw1xaCd#bZ2}Y2stBC~^SPXbVtY_lEcjqab(jNl5a!1Xg=* zW6fGmCTVr!ArrbY-NVPQ!qJphTi9`R>19E-;&0kVuY<;}^f_LCh12#8pzN9hsO$1n zO#I(^Y)p)V>XYNKiDn9#mIEMpzZ-7W598ro$8i49j;$!O=ZPCg&wMk8<=PK~yfHF% zp@wqEdcMJ!0`fptD4^QanjP3Wgh{SiqDHkFWOZp5jUL)E^C3r2WjFz(3-3X=Y!W27 zMG2p~3oJ!YRGNESjJ@Op8u*S)KFs;KDlr?V9He4LWs-w;crY5#5 zepd*q-jFL;sa8UyjCckCy)jA56s!V|oA z-H~Taeh>QS=|a`iOJMSK9P{<;MEQ;5L3X7^tk|B9Eq@GSWmT!r=0~}R0}3JJ)kv-_ zwPCeJZ6NNm;GqV3SW?!LdaqV2yLS}lr-ty#%V}tC?v4SGq*sk3J>B*m{Jo1YkBbvQ zT|5l6(>%~>JaHloH=wR$ov{AwbJWbgB$h{*^U@d_CaGygy;TrTn(5DseFyT=3xDGg z^+j|!!8|)B4jXF)Pz2sUxyxSEzfMQ{iVMVC?S(EE1~RqlCDfD{vXlo~@TJ92 zZa8Z&i|S&Eo`wz}x$n&kf^50=_GhSHK)vmXN*LljnmtJl;nJc%F!aj+%Kva>jXhmi z;;*kEJ>`Fped8SQPLknc%K+BAq$3l}{aL{S8TpuRp!Ci=Olox!73S^2L{B&360Je? z_LDHx^$V(s2JtigHe7l(4!QCPh8IskmvuJW^dV&%TBM>(xI)Zfy01)0i~aP45HYbU z5<7FAV0;tn1(EXYqd`)up*_VtY_5An{&wQiZjf{P7avJ? z7b$OU^TV(=_o2*)v?#k5LHLtgaLYJ^+PnQRI^aA?JNOAgHO;QCFA$Aq*mCvKvsk*e z7$aBO^O}Dz!Kn*wEVp1T7$2U7@-7N!`|iZ^1QodS%o4w4P`xr>1h17!xJF0%A-yrW zxU2-tj)lUmzk0Cd;?b|^&N$#9SWB0zK${kA3^i752)AA z5~?;*e$5=>=bH4So$)}HTUSqetLNBM(t$rJb7aQP_t1{I6RW=%2Gb}TxN)d}o;ybH zy`-&d>AVYk9f$>bJ_n_zbHD}e3qG!|aAD(cu1)REi?;jn24c}je;fhZ?Gy%kq#sMj zGJ_oI`J8Q?SnG{bxNs`%65iB-(~1{Z_@^rmn5_kUbE!O$dhhD%T3ByX2|-xtc55g~Wkd<#9#f5OR)LELx*M~kHYFiM_@VJFKWYd{aS zL)n96eUK7E@i$@?oB_?}{o;>vWvCmT4cRRP82;}z)DJQvKBOKEC+`9w%$U^?D^m54 z_5-gqpgO9=T=P}v+|7mO?z162(hJbtegXFe+OS=HyqRIcXAC2as}Jb|U1BLesbUu> zf944Fb;OX<-A9{{9?Uvn46}b}%;sHr3Yy<(&lCS#REPVs(z6ZV(Dn!9zFdRzb>q0T z56zac(nOyf7F_z@53#}j97xyx3HnxdZO*$|(&$_8m!z?&A7{stP!RGDy0h$=a(J+I zC@=rL0VKT&qf?o}ty8cTkF-(7cB;^Leuc_*yMw=#OCW$3e_|wJ0W8 z{tEg(mk9+%iy-HxKxgS&F=_NZNZVfq_URHXpYH&z9j1YURRk)sny_@hK-!n*2w(H< zSx~>F@SHg96@I}yi~S3wt51>E;F%aXfOx=v4inT%YlZdEq+4!30J3EXqN5r0WzP<3 zmDXkCX|rUJ6~?TzxFRZvKB-RqS_}AblHg48pJ`}?@zF( ztrIiZc@1rvf?54wwdfhu6Vvy%LF=gATvgFXTq)We85!`;>aHxQbE8;qJxsJBo=3EO^-o6WHtP z#|j=#gYXNTK=UROJl6*E&DO-o%W)7bx_2H31(VpE5zOxuLgJpwbpP%Wj`fmppBy7z7IFryLP{XH zo$~x{IdaLwJz(;8JvziHP=4Jd!1LV4~n_`oW}YFVd1Z7*4E}i;bZO2Y?f3yFR)&AlsX#+wNy`8%-5%ZX(L>abq6ON=3|!`t)Y z`Rp89rfcc|c~z&$Upx#YcK69^ZpN!D_k&POT9v89PqG}yq9*RZbWfTcg`E|e=iC&e zjW>m5ZCyFVdw{uRo1pe@#|KSz{5|ce6?I?5z1!u?VW%yO`rLtgj{OEYM?*S~{zn?k zvCQe*1uQ*%5tXVN^5Sq}jn|V$WGiKo9k9Y`F$|(kSU}ClAa3&KUT{81@AtA`XtkOL z1#w2~UT>=9hf6SNX(pWc%aN%M{4F2Wo9^+$U0L0-uH0k>vGOy!vABy*#mf2l`0f5! zUip$^qPiE8Yz<%q-yVW$+hII(hQ7<`*4*6oso>xk3w8m6xw7*dq3LcB*8g@{ys8<^ zL)N+Qlq)|mbh-@P#?U=ke~MW0H$;n7#1pn3iP3|yAnwTDVqEGiF;+!8`5_Nb-zo^Q z@ROqRNoPLeV@F~qu0-{p|(jlgp>br?v_;o$@iM<90FEZ!;!^5XUQ`GwK!=W63dME!!G7VqhW=s+nwrUR?Xf z3Cax~%j;iVhvbWIv25K}s2X}0r>z~v)PFohgQ<3;ZFLgr4|HJhlPl1Coe8#V@6X?N z@~0l?9R|Iwffak{{(54=3?GN^X15NIz0v{fYhOcs>kN>-wh%+RErNu!*BE=|2AJL( z%3~voF=Fvum=;Q`g7qd$cU zo0LeS(|qnnDd{94w%u%7w0107zGB&E4Iv^T#1Ii8a>}9Rb3f!5*&(Hku@NC6B1DPb z{rhA8a9w-3$b9DWJoo*6zh1NNLqop}m>l~|l*m>Sb9xLj)ZW1c`M)T!x}$9q2e5eB zeoBkhV`9ci9FS_v>g!EdVW}&ZbW4LQnGKJgKN=e^OcT?8355Xvkti)%i5b+jIxy6X zNB$cO4FhFtK+mx(rgAK^i(L$=#$C$HSYe|h)Gdf7_JA)>9sHQSQ}1H;stTaVZ|my#73>?E8x$cW&Xve1i zyJ)tXa`XN(v0|tbmpx0xPG{-|62nbm{0-!dVR*Tb_y*p4@Ei|i(nbBCXtN>vyfK0; zTI9tVx5uC=-$RJIm;tQ?ABD^|>YeoVXPUSE!W0`@9_$;+o`sEKPYw>}S-G$Ard=3Q z?o?xIvpE$0OJ}V3WK_GfK~6vrFEEV*^^{ja`MQ}%q*< zaVR?8gvMvjV}TWQ@Unr;8cErakzDK~=6FtrBU_fBD>N%WTX?T{-rnM z9a#(YTil^x_+6|o3Wp!V`|`|y@i1eREzh3&0h=q&VEoK(Tt>UdO#F%!`RF>DdP9Fsg`mrHw(AJt<;C+#pic6l z`;fNSmA9@@J^@`EbXn1tfjt4w^froNTIF~nLMtC4Ucc=sHMpi-jc3-ae?Yda^md-lw ziS0G|u{QDs<>>BFE>AHK^8Ntw%S?n9*M0fsohDpJGs)2dB< z#q!dDsE_T6)!!)xKf6Q>j2ysp&+lN{%AJrk%9b~j?16?F#G>2l;rkyWSe4Y0MW6l` zRDb4++EOE4xZ*Ba?D>q%$sy2^cN4SMUnggPGiHri1Nxp?!TOHCtN#~A-r+{jrR8eR zIRr7SkthGSB80h524**=H!Jul2s*Enp;WNOtxA9H*oSgS#K~*y^+`;S{e$y&_vLA} zz{^*hf(hN+SS2}v^7}nRU4RSE`?wnoPxWAndQzU@+*wf{k|#9HqYfSIR2suqh~eWx zSjxL&Sh?m3CRm%JDzAk+17)x#$dDy>c`hEJj#T(h%`l02a60*)qVA!9bvtaCTEAPo zd}k<=r!s66Ys9kEhlss7l*znLimI6{O2-q9+~lx?TWRPV7B`lCG*+;L2_;zhdlqJT zW(Xv+W7)?hKzNt$SeRqO%hx(!N2C$YB@;xsiufI$96JB-9pe$C|0`3eN)P{p`YdzZRdySe~y~XP@XvSo)M47c;!NqVnTlA*}91kzY z?RzOFEFh|4PAH2VJ_^^XXlI@k$aPh<+8_@l@BV~(A8pm>m?h_pK5>Gx`2g0qq*0K+ zzJrb>*3^I03Bx-)S@lR_v|p*EF0v1AG?rq~pkq+AbO4vUxuwiIHJGb6|1H+TYe-># zV`rC#5ZN_HF#NY1XhRSo17Q9YU5ND1lszRb$uB`Bs267BkbgOuyel$9oa*LzR2_#%KbuZqs5 zg`&jkx>hwhPpMkwBPa^K`=xeifUG;mQ2Fm{Y@62xsUGCb{pl8j58nqy#p8H!`+H&G zAsMe&QVi`&!pX@y51p?m_$5Do{^&E!$U?4Q@=p*zphOrkuz0D_2l5Iup-r zv}e&(l->IF0OOU!!22|oNuM7OVupN0v*-TIcY-@Bo)E!wv&IY7o9*}|`{68o$`5$^ zH$Of<%#PRebmZlEY1o?Oz{5{&g3oFLX5qFU5AAnn5ubZ8*P&a$-FpmI4ek^o+ufP+ z>@jF;kdVigve|xZehpv9n|5~?vx}?2hW;7Q>q0+P?k~eNZALuv=O-wi9zgGYH#YAY zC%%6)D1wWG$hbTVu~`Ah*%_eCj^M?oZ;1u56X3wHAl`T+6jWxb#PDIx{Pw2-tm4@> zF#hcjx!3lIdB==c)rDC(aNdLu+G9xJBXjv`iGd#TO5fjrCg4QNlcfTeQ~ zGYT`~%DSN#Gvx!m8t=lh-aJIvk2=BP(mqU|+ywF!_vj4N3F+ddiX3j&Q~No=5Ny?Z>PC zc}`uCt~_L}4O`McpS5Qn>1Q-e&`ek%*cfaDh1XbZ?Y0I``(65f&n#F3jb(|kS0RAz z%yIh~uzc=#(6%^7+V3ln)sdA22SDEaNlCX}+pTX|$ zk5C@69u`tAMz!x}sF+a%?u+}tfEScM{+1IYW7g!k)#}?Smit4q_GboNtYE zVIj}V>0Iy{Ud-`lNx!bg2@m`8*51L~acw=y{!4?5Zq)O6lqz=oW5}C-?}Ig&zFemt zp_BwX@wIzLxy@c*&?313-3zC}G9%!2hhE{Vt+yfQ!Bd>~|9$3UJEp58E>*V5S90Pf zrOqW*^gl%`X3}CUTuo<;>8r3Ztr3)FhlkNBra>vAkCB&9HaR|lm%~$_7NKs zK0u?bz*ndP*sMD}__@Phq4n!>L90_?`y}cO9j?M?r`SXvxLU^x$F|4uaz8Dbo2Sj!|Evgm4QTxM~x+{oQ zvez(=GM%w6--)fOto+SSK+355If*x!DKDDcy-8VuG|p?QpH%*)%R02MogkR>P=#^m>J!8s z%N9;0_F|Hlx7y1S58=&zMoh855IN66oqi-S=|Z7?**;95yU+?VDJ%au9?JVSfwk}ixJjTy=ntyhUTa9ubzZ3HVXpiamL z1>=Q-xVF%a7oWN;#%1}i{QXxUP2P`1|4@Ls-A`d>-%u8Naid_dZxJR=MpW$9Df2e^ zv&yADTv9{#sk`TJMMp5R{9zPJm71`1YYccnZai^I^Gce37z~kpi8J2akNl7VR%g>J zWKR!XU)7f@t{C8iS)-WlEA83mU%;4`kBCiejM+YV%=n4&haD5e;_lN$&7dF2vuDo> ztjeIFiyu$4T?+-o4+@yih4guJzsq#i-n`<>lmjDJn|>=Kue~Cc{gDihvVb)v4iwr~ zs8F|Mls5CPEKHgFGc2oSy!`rd$aQ`Kk=fQlb&pB#w6BF=AEX^7UH$vPJ zJD%Rt9d#cU3F>p9eC}><7Je&~6$Iu$%!?*W;LT`U(~5Zsaj2Q@#)3*L*^6pRVx>ih zQ=7tB-rujWKz|3`<_EL<{xcrB)JmJ0zXchZ0Vz&{qZlJF2@GOkz*@c~ZwhmP5BurP##rF=3+x%65hB>2{ zV(c#3-G31DM)Wg0C?QtcIv;b}2J|ltV(Ar6QBgNTtCQ^#RJucG)@>6cO)w` zc0aMN9NBTjAT~=7nDUv3KfVuQACL9qnjP__HvdR@jPouq+U8E@RVl0N`5LzuhqB1K zv;%v1T9DB^xM)*XRuEDv$|rSTdd1&pxX^;zy%++%u&m<1qG2fkM z7dK&RpVMOAO=1@g_QHVOo)A<@d(fvd1Piq}m%fc62i`NxbPMFtFmenY7*0;$a%eGr zgr2h_xEc51_cpmOeS;{xk8x){yU5#^`%X|4ck{^{6b0on;4+U6abdhWZ{9f+e16*j z^Jd<|+ySF`Q5O&LcTxYZ_eX7YQ4df|nXA?K$wY^sKz1%rgNI@S=600vqK~Jr#h93t zhpsA9{B~gZ>VKiUrU;6NZ@ePIfZhByltpfQ1npsi;pV9B%y<=L^Y8ZORtcS;*m=0r zzs8U|PRzohKHs5YOBk#B;>1#a^J4`8UAd%WIG=H!9BPIev3g%A%-HD38=f{n)YHo- zeODvoopPfd=u^W%e}MZ!D+=yV^vIp?P_WO${C@aAG@??ODOuSKu>jBQ6w2^6bSP zpnTm6SFPwx`}Yq*tI;~)1o_NjLm$G4Kd)h2?_xBb=ggBQm&5GCj;#5!7ONN7V>0pV z&RO;5+}nwmj54flGsn+Hwp{OjQK*m3fXmWOQ1#pITX#?gW)|fTJF`ENRc#O_>>0x! zT{7nq4+X?fhdR=Gp!jN_6SI@h_kF==sM~S}>MhA*`dSaN5q_XHAB*Mv+lbNLmj(Uj zM6(zzBu1uVdwCJWwl0NP+jhf=z{ik1u{Wv>{u3Nm9Y@2gyC7b2WU|EM)s_sfmjt^!!WV@JT~ao zpjWLotFU`TUcCk=I}nU)9rHT^j|PKnovKcTnm!|cj-qAZhfUD`NE^nQZni2`fw z+6K~JE`!H0VrBnQ4hHkWSnNc~ZLC{?3vUn?wZB48OuQRL4{Zdc#$MI{1Tl>5UneCB}*M`DT38 z!3fsUWW*~ySHi5{JXpZDYN2}aAjmJbVwe9tjA@%}So!EB(5zdJi?70jwX4WFaKc;2z?FuS2AueEE3=JIy*J?PJ(9Q*R50r41VRsrGkJO!kAK#&pT zL6T^vb^eeLL>w9EeHWDHY{IL7G-qk}OOP~$lMkR%)HVBI@x)xg`2-L*W{IF^s8+_h z9)Zm4A)vdtMXa+|@&IPfl^ZTox9Wrt@_Rj&rzJr8s^<`6xE(Hy9L1B4MWXKbbFEnC z$}K7anCRApC*PgbpNKW3-wJDnYZQ4brZx!lWrN z5b*9PXzpLou32TkgEr}jcXdFJ*S`={8(jUW$)TeuY7rzOA_NCpc3gxpNnQ)s6eoPZw>}UMmlTUhJ&GvZF zd+JXOx+afhDd}T~RZ{}>HFns%*_z9w^m%Tg;#vezc)R{O?w}^S+(qD|G%$h>_gVyI#f0k@oPhIMZXr~!M@46;T zZF69DQ9ZcoPQA8zdVd}tK-{azjl>J0Ea*>^vHENS=WM7emJ>mFx~q8enS`fANSJE% zaz9l+d13_Pz zVKJfRqx-_(GrG#yp<*J|a%4knmJ6y0*-@y5K-MEED#w9M`r*rDhD$`3b9Et%jcW$l zGCS!2oMJzWmzQZE{-`CFl=o(qaeY`z>^SzoYaE;R!#BtmK7etC5~3a}DLe5PbPc00 z{9*&Dz6wmf#~J0@Ys9SO-u$L1G3Z~LmuAm=2gMf8VNF#qSJpeTC+B+chTrz1WaJby z?C}77?%oFV@py6brU+L5t3T9FRD;sKFYOl>V3PSj-dzd2NMZsho)+BZ-WQxl`690} z^7MIc7Hw$nReb$-F#c-BU3wd_;%k{WpZxZA8SBCQr>T%XIuBz!PGR<1ax3Q4L+n~_ z>Qq^BjinrIV+?pS^=#UneZ|NdeOcDo0xWN@1I4@$tiRBMS?S0j;u^;1g*($;!cq*E zR6%6CREP~B{z`)>F0$DL9+c0pvVM#khs#*xx3`dO>wv}FlIiAO((3onIcU~-^sv4P znP0Rt8}210AE2KFTmgCVQ{ig4F)y2c5LNsM{Z}d8tQ^Lx!I&3EREP}@r_r{V3^ID#DZ>7+}T{a|UvaL#PmU<0+PB-G_p8)PomRfr>hG#T8N>;*yN> zhnlj?h}%N&7elT(-(IR%S6{m45Z&+moFI-BftBS8xO`&=X6t`~%(|6Oa9|MS^sYeX zWLMT;L7B2U-q`3$tlJ0Oc+R-p7?f(}km3RG)59jY;Jm&mb$SxkovWa;sJ24z( zx#U}Nj#0Xw<~aNyWvKp2gVa|>tiFKqeNiRge88B6tm(;0dRg$?v#qe`s~h(`Lz%-A z3sxX8L903gmQ*tuTJ1W(g6c;rj=HhfvqfU&^x@oM_%-TE90GNRH_N)Z4$EAkL2|Z% z9Ao!z#qc34dd3iD;rJcQo4!NcrF@Lu7|JKCmU8*(c(7Sd9J6{`CRP3gkItF!1A|Ai zxc)1ly>cuMdW10ZY#)}GEoXMlAv~(@aB_Oz614LNvCC~|p^$u}W!?7R&a0!@?J2IT zXx$x@{HoIW8XNHh%Jk_kgu^^Px`(V$`c-ZZWAUCg%(yFkZbtM1x$>f*A2^8aD}6EG zlmV;X_ZAfaraY<3Ta+&=gr>Rcv0&saFcc|AFt#gK;#tUE9|6@(O3VxYjOykrXlr%m zW=ZRir4Ha$Zk}8#oeo8dZP2CnK$h^`iC6y92Cf;G!Fg#YcOw?Byd@6mkCwxt2bVE= zsUaxyXgA_u$aPKa+WNiGaJFcW=T<{@#5+j-eUIqlZpPd8JjT>( z9WblLjWz!DtDvpZqPAlIs8v4-_ijmeRo`LE*myiS;a$0Pa#x-;@)Kz8ox$e76qHXs zE4sqEpfbpmcMo#n2(@=WLLf#UoRE0cq(a;Wq7#DnIPP zxpE14ilV?VZzZ-w)xqVJe&l_e0BTb|)VbdgK3=fnk@jn_a?UYGvO$dbbPLqi&q9>> z4M>~IF}4T!(*76?`a4zPloXnYoHOF8&vs&Q*f&AF`3cx;y9&kDo58E04^!m@Dpk*p zpq^NP)h7K}Ktc*M8y`Sd9rY2s60v0RXy!i6o+lon`PU*#)~f7;#tjdlyvPgGn=YcX z+wT}F?+}y)Rao|^P286!aA`Gtu77-ul~z{ddEXA^BNdbx@#Q|^9PN9+6uE0MJb9#NJQnYH2ur*?dCngrc~l2IM=xfA^4)t# zsOtdT<{+)+^+3MUtUqNJZCLO}`ktt>Ls_5yL0+*XQ;mNrijRo*Y5J!aEt>(8oVxL% zMU02MdWKNk|C& z4RuN6;>-zPmn#Hjo8Z9ZH|6wMUkEmBlw&g8Lib<=-?@prKj%;2y`RHaEm3N(|{QmvG*%{n_lDGy|`_3CX-eNZGDIMae*X@!geAS#HA<2A=}!(-NkrxP-BV ztAwC~22670thVZrl&hV>AU|O_77zFcY4g8>=HtUs?e;$OJU2sRvp+ac!7|v3A_hZVZw=pvH zcXWKP1=Qthg_`T#cyW9py7mg?oeT0o=b9#%-Sy|HI~@YuX-|5G9}%uE z9?PFx=*yCNC4=hSHf`nzwQ%7}AWu;FU{sfjcu3*P6DR(IkrCaw%;FxV%T7T0i!@}f zi_nhvDVfq6Vs!m*%nOMHw&^gWZ8c;CYjUx5@pAF{$U)3vTR1ymYs#uqtUyvyF0_11 z2j}DAJS4vYYwznp!e=(E6?oeO>qYVn{D zq_gIolP#H~`Umnqyn_tW3g5FS}1#PTxc9bfBvk~rH!Fypn85U+7B-gvU7fgrgh6O7wMOxC=8*q9V7%KA?hB|EQS!d_RF+jl?K zd-a0y@~0@-n1i|ldD>Xo_bQCmD=Xsf(w^Cbw|;*uvS(LOo#QUb=F|7ng+f#~9`NlL z7|5({58*cJwt-JL@!~1FAX)3gFMJ-urVbs!T92OBJGzkhQrK6%oY)lU2jDVYwaT%U~vN;N1-)3kOYkD+<^J!t!580$mt zWBKWFK_4H)t=4wu$xrQJ_!k$ZozMfN)sDPr7Tu$7EXMAq%z0&-Jv;S~x;RDokPzX@ zR*j+T!m2=_5`$TL)(Ae2z87?xXuop%HZ1ZQz$M zvA!!e{+f&vA_p_O&~+%0=>_*Y_c8Y4Ul1;IVNDx%;j%LdCN1=ZvXOzjEom269PY4|1inaaSh!?*1{40WiJ)v+Dj7Wot5#_ zu5Flb&WlBUnk!mFMzHvsBY0Z58k6=0bGw)En9^q-XtFA?Y%=B6+zttiCBzG8-3yYz zt)RbQjmpEjpqGNqd_peN8PIp{55Y|4I0!o1k6`sd%0&AfQ`TiKNA1&FDNp_Qh8k4 zxT1={aLws^K zd@{KoSM>i`49_3Mv!s2=Wwl1E_|cfJ$Z}`y*So{Bnbc<=6~Y_G?-KOSB6!2LC~r?CCuwqfcoN1f@YLP zRQo%LDJl!{seBdWjfWt6B|`CBl@RTg010M$u%+V)Hf}xu%_mw>9;FuMb-f0W=RaaA z?GMtr59c|=UFnq&${r+GvN#)hmU{jt#1)<=j>lzkKu|x5c!KH@a!XW@L-X!fR(O-R zG}}iLUw^3>?sysHK7Pz(;!s|FESLqQy~Vin50L0tfqZHJd(mdjd?*Kz_BVfKR$%V5A^gQ8%4{s%SejyOLwBN1A$(XC)K`&1!ro6z^LPQ; zp9h1h<{UUWR--(HL3Y#N>evE-97eMBH6eWh$v=#Nkv{{mi33S((6i!dpp z4J>*zW76a>{-nM)t37fFvOZtHhVNBa5=Q>mV`f~Z`T_6V_hA7wnZicuy1lvNW1LSpS*)d^>1vajHK*9yFAdY#PHX23TTBXap)g?G;^n4r1~vH$>Uy-n@NZ z8M?lEgIYIBF~13Hj)T>@4XUQmRCW#$$hZP9tK$hM3BTML2>IO zA$H>-j80z$&B+R^r+Y8&mj!W60X%)#VOT;tT-87esO$C;(w<)d^Q~#%zJ3zM{xKbS zrvq2NkW>ETBI;aDK+eUR7!_p5%ZHpmvO7Vl+?K6NB4+trD;~6AEzS<1zjt{HX7(J$ zv)8+_lxr)|KLn^pL1*amz;r`y3Ol=p@Pe5q#cWSK25jCADwB5b*i(!~ql|cyq!+6d z^Ra#>!SO>|K)$8~ijFjst96&)91zGg*-NyWua9B@xBmw*beAg#vf;)*jATV;M>5IF zI%Pb~D3w2%vJ>O3LBLa6R3Ez!DP8SY(8`PO_R9bse~hx_k+gpu-2w-K?U-hLjW*|) z6&Hqt@$7u+l+?`zMUOcUv*8)~xQ=6&cdtMfUwdBn=QC)y9?YU465MxHW7~p%$ZuqU zIVGN~;z=)_=C&MNGu{&eWGt-PV8E;rE&1^ggSpw|&E)BzUl-Z3w!J4He`+lzp#e+0 zOx$+IBHzfQWmw@^g`NI6ko-O!BLAHx+O+j%l41W5r~fXb5ND=jxhJn1dKoj}rSO>; zsJf``Lc$IJ{TqhSRhu!Hve4|tCt}>@!Q4nUc8@y0^0*3P{!qJ zFfr>6YWQJ4SwR8jB$%-IKaJq+HI~d#{s}`|&q4e7G0c4Pdn|~Xf|BkE%(EfaY5#MW zJ!3U!6jDsKCw6`aBx9a4hMg6n-(9#m3|(|-%+KK))|a>rINPmn>Z-W^(U4Y}l{M#wbm3;AAYs9CXJ zPzH8D!n6K7Pgslf?$jS1HXP@YYq@dmQf!n4VcJ7-ODyRD@^g2wz+$gZl}SFxYu=!6 zuhYsm&O)6>5$gYOVU5{8iPAh`YRI~v>@w|N{@1s3x`!E4b^jYwOSQ@*@4-ClwmolH zIfToI``^5nI%rMjP;#^b8fc!9m)sLlg~J&8`U|MvkHWdJ2Hf!0VAl3iKIE23SjyT; zbnGQC^KQgQrGJYc;)hJ=8prT+A$IabNS##*?JpQNwwMMk zb(9<4DQa`9GofxHxt)LC4=MYUkSXuQGz}<}E&L4Ke=z0+nHCVLGi7!o$Z4eChTDs1 zZ`&&pZ=3Yzy23W$+%;ksFD$_Nicr+XkB8dLq0I7oFmE|ej9L~fY=5d?k^b~9D5qWN zLE4|q-Rvi;KPoJnMEm|pw(R5Sp{#XUFJgoC5O z?-{^@e$&8|7XsG{E5&Nt#h`iO%6GaDgY8!OF*2;1FxP|}0}gG_n)Xrbbl(IUjfq>od@D}+I+*#K9n4~L zt_YP?bpC$42~?|RLB<98jMOZGMNXrbF*#^7cQs1MMI*?k45P{}5@jY!#7BY|FDzaU zk#RX3%nX)=^Q=u(b+{b{W7a zUwd{tn$8Q`kBb#|Tv!w%N1@3I(7)_Ko!{rBQse#N;>PYw<2XZGE%(KabbFrrVgpEK zxocN^lQ6q$Rw%o*SWKFD5xUd+vZ8k=D}O2hk2}xtM|;X=9UGzasWqj1CH4Jm#;~0` zyD^RX091H-6K6F7zY{B>Jd4itv-Sy@O{-vfPvA;}uQ1cHKi^zy&zp8##_&fjtf;Oh zY@T7j+O7^}D(k*lIo+z~$FLnp`q0jK9c650J{Q#X zcZBWPU3d*Kj5U^r#pbp=m>SrZZ}caYz<>jy%ZlE-{+>HaIu!^RV+3BlD2OZ0UIE$o z03kmp4+Ez5!4n4tvEbUof7yL13#_0NQPN4oPT;qJU~nxj|~OwNq)t1zz170{bL6^oMGu&&=O%=*=b zr3E`On@OYjtIy#~TVM?-6aNPRUk0&)={E(3yTn@VwgSrcb%Di8`?HMDk02XU4F`tV zvNNMRnb!X%IfY+hYPSY(|JRc@ucNbF2knnr+(e0W3MT#aBcv79VB>_JaCW5y+qYyi zPgwE_)GLMxjjt&So;!xG@}uW(ivx4ta|r_${Eo`oLog(50(Q?D!)iCbg%t~kgRxGF z?s+$%IntXaZ%}|;XK$X{MBcIHYucc+KyK0X6SU9m0o9vE^2X7T!VDP2i_L-UaWLZ{ zI^u5LiBTB;iD&?DcVghx*YKBSJnb>~ZZnlEk#tmSdb4+>D(zTe|_7N_J zy0OmNuP~v}jyLvjqO5AUQ2gFiRFCQdiq=Y@dBIsMSa1Y92M=d)AH4XYv=bmPKcciV zdra;#FG1R0i|M~i1$E6Yg1gHZ+KhpL)kOw;`* zls`WSI&}eLeQbibmwlMK|3EHX+kl!cA0YbAov4}T4NrC(vbQa+OtmEm65bxbTEUk4 z%sB$p?dQ?%AnhzF7t(gn?!Mrvvf%k~QRC;KwH)t3XVK%ndh)gve#-#m zGC9k8HiWf)jRDPe;%`qNPi*@_eD*br<#aLUiW1t-e*cEwOBF2TstM)b=3w*)+COCd zCuArOfhu&FR%P=XTgP`tedI*Q#kc5zb;Q2>3c6q2v=z#C5Z?Y8X5W?Z^ro%YS@9pV z8hLQzrrS_l9xm!foDtIfZ;{*iXUILX6K-0Vu@@h^GTY^0T&*nqLRtD<|06=hxyIbrg$7NKqA8;~E*5G}sZ_h>guR=n#TL?`_XO(BSRVkIh`Jkpk# zNuWWo5%q)q2f36DYJKI%RF{&W`j0RkFE?VEO+u-3jS)0;$wHN`1RW1rvn1OuptAS` z=bVW{;2$YeMGj)kw@RTwc@(8%iFFvYh5VvDxOByRG5OqXj7T2Ba~6;*^207L@AeFe z{a<6$qJhk#bS$6oB#5W4J&ggsO~QiXyC8M)VcMa85|XbB#-^OZ;AS(91+6~`aWUi} zHP43j-z3a8Si#E;wV>L54(m6cfz*!% z?eS_z^6|%_1$54A7{K1Y?8VFN>hZ~IFXpkpk;^~*E9$p9vF};K$a(u-Tl=GgOHbQ! z7t1cpqGuqJ&R6mf%7#t97EHWUy^Iv0G>#Z#i z?m(=>IqswkiBTqw0{^=XDu;UXa) zTb0OyyjaYCG}q`gMr>X32Uc&N0I5@T#JDO3dFmG|i?^n{USA<{O&-puFku1KM?~#^ zzv4#X7H#*kVrKep(0pbZB$r+hl{cur^IFQ>E_<-|GzTb}Kb-3~t`Z_uZJ=4uPn7KL zBE~KK3Jv;4pdNDs+J=o~a`(ScHSaB2jCJCXr(~eY{U#P2JAsW4gHS!wj@1i`p>Wez z(E8e7{aqK#_$v?Y?WPRvrA=U1>%>n@qk9?gfjd2^zvOz0e5@;lxPnd`;5U?MWQ;AG z7|z>&C+3Jaj4#?mpRvtOs539|Yo6`Q0-_5Am6I`(uJPj86VKtict;j_hu({O$Xcp~?qFrFmgyF0$WDIN38?rLRHPLv|OjHa!A?hC%V$$5tDA_X@)2Ay? zH*%=9UbYk^|8|9Y5AEn|Tr93LaOe8^03O?92~r1dsOz-?raq+|klPTp`4>8q?sq~n z>Yx<&kD=c)MOgCJa4v~kBYdn2WM+)~WJiOPd))naOMVZwcy0*u_oJ+6WjGi9r9N6u zIo3Ve3ylVo#m1P&V$^}vP<&~qP)KJzT|+;yank}YOilwu`*<-gZ8YUht$6D{btswQ zEGQ;qYIOz+w4Ze&Sm0wTp1L><8c(JQ#>YId&}9f~89Im+jqb_ik-eD3)NN2Q)SvgN zb>Ybe=R#So2UC1%SDIUvf$O&xRDaN;)qBcU%U#8q`u<#bRgbY*n;>y-4W@h@4`s$T zgyPZp&^l>0<^GA^M(zdmu>P3fas-onyYuKNeNmY@3=;133R5wI}U z6nkC~^Q;pv=+|{nzRZp_4(H%sJeV01Z}Rgnb5>y22AhrKOufqlCw%J0sBx zb3K^E<+5M>?Fe*@EW(xrTX5db6_7aL2uP*W^SZrFd?htti$0P2=$RE0nuqY>-^YkH z9Ya~;Xg8r^`E!&!EElVPdrlq-y>dp94bwmCzzW4c9?LnVT-gR8r3caNzky6PhIs3< ziJR=`sy*<4oN7bcLF4=tw|?uv^NGD}Y+?w`H@*3f>+E^i$T+YV`!5D`tplBYt5Oni zL7N|B!`i&_Fel&=Jh)CXd0WausXch$z`rG;5Oyur`}Aly+xD^4H9S8T9LQSnk70@7jM~eaJT#oDyg$m*S#07J97lnMi1w9 zi~k|-bU8-tyo{G+X;=_rk7jFaS$^eUt_%tRE%})1ue^scoA0PKF=Ub-qlDO+!MyYE zzc{nJ8$a>$9o$GPz3J}bxPI9oEZZN4BQzvYtT>cJ7Tl5|>kLC-m zN9YV+Hwp{QYRQ@H%Q6!8!;CNw#?5?K=H9nh>8v4^?p?@jX#rQtjLBS=i+S|}xblUJ zm&6Zd9<#mpiCrad#)Y1P1j=u`8Ut0k#xdo}tB@XM$L6aQHLlJDDvSy4FER z&2c<8Cj)LK8ZgVd#~jdYZB!Oc zrkss)46Z42qRh?!>aVO5G|N{A$;7;sYs^70)(_L44(H)9pU`~c8;o`8#T~~EW_gR> zW3JbCh!d=M`^9n0m>}ihx>qoJ>Xt-ritdY zr@*fF5LS8N45rz4;V~1pqr1E}?6EWC?t$*aA0wW}>VMJfS2|y{2a=o8nx%Wjpi8j{ zvAMob$DtT&ugh4S(2!r>! zVO+X98~dCO;cdvX+g^TXoUH3oK9`It4U+esqjq1krUn%=;e_kveZ!IK# z4PZ6yzC7~kH_`Y+F)T`IK&$E0Bd{|AbILK=Svp}^l^i_I_vZm2`^D@xw!HWS7dkt} z@))|q##Z^V&3_Hz%7m3DScS3Zej~Wg%^djL8NgH@tXcKY2rL`0TaZ`TkSpf0C@sDx zDpU?)l;vJ<-&+IvqsJj~&ug)0?llO%w+DQun6Rwj!&vgzB#bojW@^6yc;Jo|AO75i zN7~*Hbz4_z1GKjxU{@lAV~;>X?L|~iGluNUS}YJ(i-!MQhr(0; zkjKS=8O@o-g)zkk7HP5*KNYRqwds4=*ok7cHvQ$nUEsw z0Dbusd{E)YuR8SQ)$RX-l-)G{oEE@hYVBF`cLQk5lB4v@TGV|0O*r*uFBYA-5wg*l z8yvA_K08)JNGiDzC-&4%rw)Ptz%JY(EDwxVII-ACG*9)S-_LI~+$5i8lC2hN9X#PDME3JDNb6_g_vbc$ zG?(0(BeX*mbbcGm9GocQ{FVAY;b+mH&4Q);Zorh!-7#ciD;BLU!34j2IM6nhsjf_b zf;;&j3+c)i4IRL$>sH{C7!!VK=>Ycqet*_s@69rI(N{EOven%$f?>Z~5IfWgT#A6z zjynyi&K{8brYoOU^DmZv4&ed!B#>El7MhP7!g7NfkQ)CQ0_yXH*t*YHm*~f;FFpb< z%KEqmEx~;KZOln8#Nfg~Y-XJsmz{qnq-;A4g~B$FJf?fLNv_~Jvj$av{;VvRuM-ju zh4V%GJX!j|KOuQWAY*SfVa>}RF88La?Vesdd$<$7In$IG(r#Kh#Sv6Jx`@>e-+=1a zn9`)NSAltY@FwFdVoU||EP~HTM=TdZ)*phc-d6m?e-H4~ita4iwjZ*?!+1){NM1d4 zG@1>mgbwp@OtB|lWEIU2U~*I}cKcV9Bz(qlvq4Xr+3q>rJv4MfNKkY_YhfhR zzM~F&{8{L=)q!VTwTJ3M&R{n46ihnl#nT=V+hTneu6WZM?rpYYWryZN()C38p6|@-Z(0yPw9vX8PQQ{1wQOx2}YaUB=9;a1)drHere#cQLkoDHOQ) zaQA}~&}QxB|KsS~<6=(V|36Afj8amjqe6)iBh|dGn{?1gD{{=oYFRrrTG85UM5N8B zEs_`_LPW$!HSg;t$E*wyq8uU&5h+B(@B04!U;Bt=-tYUquGj1N#JVBG0Udmc-tnHS z=o>&9`AHp?@4>o9an?kGkg}g2Oh-+3(l{k=}MJ>Ze?+ALq!V z$x`O@gR)o0{=(zpKyIBgp5^*q#zo$KOcfD;SAWii!jZMe4BVK}6*-%*o%~C!-yyzc zmmoj-9%CFfg1_w;uDY{VGv)0VZkiUvLw(%1bZohp{^xPQ{{*p)r!m~}pNz#)Pv%GA zD4twYAuKxY&kZ|wV)NJe5V<)AB#%l(CsRE3i8*AQ;ao5Q`}coDY@> z(K!z(Yx!Q&ks)D4bxv&2P(!BjyC%vr?*}5;cG&W}OP8Fi$Lxzw% zjX3;!tMPHfC>F6ii07|yWBJ6_F8o)Gi6Q;?lq-XnuelGj-0#l{C*MYgbv4j2v3mo2Ttu8Fze4zU%AaL;j^~D5AK|5r z9=!NBdoFRH@Ai$~HS*IvFk#YrD5CGbyhf@p7$ae6PejcBio~O(dE(?XLg#)fzUY{O zn-ia@VDBr;X*-Gq?+lq74~j*GmS|c21)?66VBRM?Rx$dXm=ZmVxt^D@VSyQ$F@%5E>T6K)e-kj67XM%@HGRc}h(=sJo)W-vaAA?8+}W4`Pxd zr?5)XoBK@#-rR*e!Yj>rx>*JqFAiaM9H`1}KbBh_q#etu+32*~gGB|L$Hda%Y>ewr zu0TKFt#b0<0>&ncVkN}?jP%}y`SyFj+|`aH*8YU?^GdK#^$jY&SYpzYIC!*VV zE6n$8!DavJ&Fa2fhBazy=6t^q5)NI#^lSf#$&qQI{<4qgu#ui=vNMX~Y1Hx7ppY_z z7*OS1Su=Si9BMQeJ@8Ld{TV1!yts_=U0L`!#FQ231;Db?gDHa@$Qs<-z?YbT)?J43 zf*Wy=V*eP5zwUvSfzP3SFJB>T9NvXm@!}#5x(Q7X`N;$$6EETd^X@$8 zpp45Sm-;0AeHZ==>aN3?a^49!CiLxYrZp}&%QXnt2N&L^kD`qh}O4&sL-u3Tkc z$#WjsvWj1Ji5=ZufUes(*70T_yOlwXxB%j5ce{_;h&Ex)n$g^|aWpR&#RWIN23TTl zz@`7r098V7*0HP^3$Z_6wQT_7@d#$`sdMvBu&7kL6jaA&W9FSN;E?9XLc3+5Ja&g> zdw+dCP52CozX!6HlTEnEiZK55U;6i7^;}FAQ_x|4CdBtHh6{5jPdIN%MnoTx1g@-IkEZqKyDruz~(qu@O4=NlYX3t&C(p=?z@0)w-nn( z(tWF?KQsL2C{(QP!#Yj|v#QyBm^}YTx!mrh@H)hdEB`&^qwb%8=@-4&_EuA#Jn9#4 zHG2k<=(w_)Ry*bvVh?7X-fY&P*Pyu82kUKJng99wsN0$c=7)wu=yr1awTxg6UyPZ& zJ*`}&si$u5Lr97~Lz(>DShHk1q@UR-nr2#XHe(kS=IL|Z>kc|idyTUOT!)q!p4_&6 z6pyqUO$@grK^s7hbemB;>4=PY*T2IN>WLh;^x*O?*Ffz<=j4)dO{ayx6WuAtH@Zxt zsPV*tR+^_>{SJ|SD=<<&m|Vkih35LPET!@+%&{5EMe2PT_OPM%MJ{xd+@vnX3bY*C zjZL}l!X*2%eBvL>gi8ahd1K8G>MiN9piR3`^-wJiao6Kbew`RJdp(4=J`!aL0NZZ; zdDgb;FzYWjcI&D+w{|h(is`G+)Tb9qqCQ;s|B@lA_gYYWPR8V++r)_U(=g$VKM&r_ zSVQ3uXgHROm7lcGG>-fd=T2h!9cNS(J%gZr8&K-87q-xSC5t{srsObFxy^-SyIG>5 zwh7uEZ^6*rW0>y7dvI%3@PuU#v1FV%ukB8A4}X1j)qX5X>N%Qy-5td8mmVeWxEd0x z12L=06y4OVD4WY+jVtvAr^btwk#0OTu?NbdUTX}^lhLXF4{SSm8(ZzDf1|eyrZrF> zc+ope#2fmKUVlI=F)P&erk%_-9~L!e0mQluXII*#Eb7R8LHx59%Z{d*>B{j!dWShz z2%b!xa|dTh1(x5|hp%ceWsTEDF6xxpC+-4-rFj#LyE^X zAzYFfqFA$k49)cGFt5y(HNWV?hy7tj`MUiOHN6+Nymuazy~2HDo1LJ>_Y%r!?vc|* zyT@gJYjVtKPgJu87XHy;X^iygU`%sa>NMo0f^XVHJT$ByducY5 z1;@yUW8Vd%F76j=R{aJszWbr=`xqXUs?TTX)1CIve;S!j30gW|f>>u?w($vdg52rv z_gn|F?jOU--J`km?HVC-My9mqNwOUl$KNTxex^cPDJYoKv!R%~<6KjioC}@vOhvDgfJHt#8_x*L}-F<`79~{@j53pj5A@8y7v>Fb86L)PeXTHnV zf#u5w==<`MpzE`IS61B?orxN}8Oy}-~ z@Da0w8nr1e9(@4wM*ERU(EhJNQ|M>O z-KO`T=j-$G*W|nR(j7#_J=y>DHPG!RUoOAD1`mW1-!}LRhF?g>_~zr{+(TOUB6+9A!Wj! z+*D}y&4M|h{h3>04XU?FA?ofcA+Oky$+Joz{m*tV%>50kO?xm;*%01*sSoDUcgXbU z87Mk+6*ERVGG)Og&9uHMm$ z9@$=x#N>UUe9;o%b20*WZXe(h$zrekJ=-BMJdm4DapZ1G`t$tbZ!vt~VzE*$3&a2U zN;$7IXxp$KoA>0wm0`dWy}fzVLE6(8{EvQT-WU|;1d%4Y(VQ3_9hsxpW3e}PIP1n1 zIMO@1=MX;d^#I;{vkzDvX@!sLC`a(?K&a{01sh7!!Qo{v=oOgo7Gj{*tlNcQ4sw<| zA(Tf|4FfAd$`(v@<29b$p@q5)#qs2J>YjqxPtCYhwiPSQZ-&OF!+9Nb*CMx54&XvO z6udkvTsT%u{Pi~=yWWbkDjj)jUw^QQknvRVo11>!0fA2Axbj;M%7Psh(?Wch{*V@s zpSq4oMr%;zB`AvCT7leahT@36gtg7e1Gh;gykL1S7I~e7Mb*7{%z`|qdFTOkAG~-4 zIi2P2C&HX(bSKZ$;m#|jEPuE)&n=d*d&m5kWJ9u|WqKG-AH^W3Z5H_I>@nQ^Pn^5* z0Obug;~YgO-_zBDwa;7!{O|jq{<=+c-uWCSUAEz3kS|a6=nYw%&Iu8zSkV-ZTD@pd z?z~hmbm>B8uZ5!I%_VUPSTa-nRA^b=1{ZAp16i{usxJnBywgNbC%h+@-D~mXVmY1P zsdt_B10@Up!nT~}Lf+>2knA7EDkldrZQgcp=-Y$&=j;J?{0X_y9tfA#`|_8s9e6@J z&0+_5GLvvG;;e=$Dtj374gJ=lHm9#pWGUfsAHPDpSs%*TQGQ$7gNL7(1=>S*GzMjZ zx!?1ieA=cWoc54*ki-6k2Bm}-G<|?Iea*S5TPHTBI5FRIx3P&>i0LbaFfZSH*xuiV z7u>1=vm0HBOQ7{B%*loP?sp+!&}Oh5UjcQ0d_cK;N||{o<#Fd6#H(SY5Y)qvt;(@w zOK*g-_zAta`VUhcG+n`;=L}$T67{%Llz>BP1IE3uV?mMB6Zz{nruXg567o85-<;7b z^-?#cPu-;C(H7ViP7dy|4^bg8#FN`>*sbn%++h~+on((RwK4r!(*MqYHglaIR{{%q zX#k5P?%ZLXJ!g8hT>m#c9#mwC*552yvit;P2YM+yy~cCh3uBOY{R+wIUZQ&XRw#|R zh*A3-xNT%DYJdH%scT9`rCD+L)v$bM{6of5#v8Nzlu%X}rNz*WI*d7AkFhzXe9cj+ zf?D4fOuHEHq7ZuOY9|aV_x;3p22gGOo(KddzR47mwZ06_Sm9z>if5CcJj%SKrdzWVM7< znnc3Y3y)Be`$Np8&xibeE);bJanp8S?PZ^#&d-zO4NZh*%GX5PzYWec_rOVBioP~W zaoxYfAv!N)o088FdW-Mr$A_%G=@z-q_MBO@h_6C&SUqN14>X@Fi=5Dee zrpa~ZjjOWIahn-SYyX0oy{XT49VypPT>kFTXr@zFi6+O*nU2?CVvj%``(F~eN(b|3 z{ozR$`8K_%LY<+{{NWq*3N>qTO7vL(2V0^Jw$0nGj-f&Of)(VNw!)TQ)J84Hs(0{ zmq2EFv59g|i$o)G0=37`jJi5Zk=8qasYj%UNq>?L=8y$Q&bJH3H}^u##;sU-fw%+{ z&3Kq4?L+$<7G|~&=cU7%v6*u1F^9*nc$#x3tx3fo>cwhH8HUffgN4*Nc7AvY^n1`} z%kvIc?Cryyt=@~iH8J3H`v~o4iNVoy0%~tl9%Zo;&xsw%mY*1HmZ zUGhSuyB$4ix{L9Kr@`X07js)W0-b-Igpu96KsEMfQQq%|Cc4BJO%%khDWiU0LjX_v z!w6md?O6rh7FP*0H~jn^7Ja6>f`vD)5Y70$fqtxE6ys@$g%B}{9AT^Lan{oAlyMBl z+9ZLg4*yzS5n=){}gn}o}K>h5Q7}=!5=#kWm-{VbeXL`;C zf5zZ=KX$0qgRgovlHaj*WNkaIi#1jX?A$zx)hwj`m);9eZ@nGAVIIcVVNX8o!fFUR zQ3~ zR@gJ0?H18n8^E0{hGFpn%D_Ee4vqZ^An0vRbZy*?(Ty}ep^gX7VDx)C1hq3QC|i^1 zqw0OlM|w^a6~=3+J7&r6q;zL>s&jC`tqkRN4=S2-H(<5rK$hQ=Tu=2yV0`2xu6yA_ zb15m8@44tBuUV*xSmwpT&y4}qzq7@iqASn%YR8?I+q1}-bD_9>4P+inN6Ssb81E4b zETA0K;LTM*iDL1+Y{~XhP2-0Z--!GC*FlUdT!S6wiOHCu#}l7q zKunn>i@ZZ_`@C=1{@^TjzAaqb;!7P_mPx-gX>_ZT4R zI@V%x>sHaN%P^GGG>QBA`to;aJ$VzkAY>aHA^gx)QG4)(V7%Z395^Xsc@ONM>Z>hp zed*5%dd4wTATRDHWPMu&(knS=`#(QkA`4}a1vc2;dprxR?Ld#14*a|h znC96MC?1gw_pJIco%0y%?5zTI%sTNC0w}2{7lO>mFjZE2H1}5JiXLC@AOE6XTn>sWwV1 zTcBXAHHOUTx5wZ?d*_rVK3tOinsN!ATo)QArvCy=7p)b811wnVP}(JXj)caF0M?-N z=bq65Yr6Rcq6X>yZ{7%V7j4HIP1a0)uB*aH+mprpFyxs=8ju|_2g&zWkZIBYj`4x~ zR<8kE)%l`4eY7KJzeOtcIoNUe@G>#_PXlb&dIMDv+ZFLso(L^#&SB1~Q&4=)maUmj z{ow%tEX`#iwq6Zn(r!lt)i0yP@TqZPWtbZ`P*5kizYqMd^5U-g|3GQtU7C095T@@Q z%B0Onm^SkTW+e`SVU}Zf+8rC@^Vbr4xDT{l%M|qK?0m9Zk4sLamTTW=q51R`oE~k? zV#EK0@T%D&eO?5`B|V;2GKlM&S@Yt(v{Uve0OwQY;JT$BuUK|XknDQvBieW~<)^lC z$uV;=S$i8ce(J&wL|JeLw?vFA&V%Ac#(eJ&VlI*7?=xNx#fg`}dbKxe{QVxpZyC$I zPLJZQ{mCh_(Gg4k&c__(6O;r-ic$UV3U!$sAovX<|KBB$?drxyq|xkf#6gVG>x!zu zLj?0b)Q~q>2ep&Wpj)5eP!}9UdC*RXzEcmYOzpYOWv9SLwqUf^I4=3nOElKYr%t#V z&b2rK@-E-8hq@=9&mFkK$X_wigmQdug3yw7Lv_o(gKm`zM$Zl4s=s##;Z{m;{*oee zOrxBCM1^occME&Wp#4wQW;%Oiqcqr`CrVWq|FZ=zN|i#2$!3uJ?1mL4$Au2JI>^qq z;gKl;yuSBE&_QN9&@cOh4<_`LbO+zbD_I0T+bF~*MsQLqyE)}3Y_ZxWAJ!nB!8)ki* zI4d^Jyy%z;oTt#4KIMCv^02q?`JKR;AFrUDWeGT~xQ||=H-r7muB^i?40BH0L`9_y z(>?xIEV!>0q~rW}-mnBnoh)bSqQ%g$cOcV_TLP)?omkw%z2u+;p(x#hSHGj~dDvkg zcpwF$|2&Ul)IKbH)CttK*W#3=j;sSS(O2(hnx(45tR+XWNHAk!Lm>N@NBzG=dFUK} z8r|G|fNiOU`T*j%pRUw+^|N5ZhzVdhtr#X*D0uw)m3Y3|pAGCKplj7Ma)gLde7AN|B?(1;Tt^T>HDYTX4J zZriY!%5;>Sae<5|J*J#ad*jp(VE#9C^oG4xc;sAx&YyOHe9u)Or!@;oGDoxg^e(*3 zvs%b5AeO*gBNlc3sh~KM3Bp}Q&yqTgY@88uvfPJXbE%)^@(efba$wDaXjVS>CaRli z1og-)VT`?ur|X_WyxtcfvWfEU$@a`~xq|6^?7@oPuZQ$EUxnOp$Km7J5Pp7tKbCf{ z3Y4$O_4CgnRE373+A>RY8dm_Vt*$)pRSIr@V$Y118MDYx5pR7T$u!r$;(<^am8$FW{!!>W^rlhHVx_VNuN8`%b$h)Ph#zEYH0jNv)! zD2u~;vb>)RQBvPqWZiy%-0nF#g{4E;tY97`Bc{%@t)RX3a8#-!l!*nLfduw zK)&}dWEhWO#YZV0@^=^f{Iw^WK5h*6NC=~x?p`5f$$7{+w+RQ%x1}9(9G< z&hL;ExD{uTds%TxV3}{+`IKlGdw1T7Z837>QQe%dR7taLrxvmB1$mjgUx{<%9<1Yn zHE*%BX0o6UqRKgx=@Y9()vnd(^RCRx?ijjU8Ad(6fnZv74W`Y|;f-@ftT><^<9j<` z{=ECtYb`{nSEs1-u-2%u&MN9wo)SEsHlog^5wbr!QIAxvct_9g#u2@^x_TgV%pC?A z+w0)8{vT}r@&%oPt$D%Bu{^?NEnX$|*z{(4Pj0UhB;pyNA!Gv-ck^JHGieZYq7Ytt z_GRrifXhpJDXLi!hO5W0%5FVTk~3SN3Kh?^rcU5$iI7~-pSZgZeALs5FtghrmNwM{ zgA}Q-sxycw!tLlT=*gs0=v@9PN7FRak6D!WV)Cy01=ZDsqP(O);e7j|pfy{jFdu1; zlE`{-yM=^B+0j`i`-;$_8^;|tF)n_ktk9`L*r~PP@#`p)^Zf)IsPbmZ3uzzja;02l z)}Tpl0Y0y7EI9^y3QA47;@(#u)(okjJU&r051559-`Mqo5;sYEK93s)Li@u zD${jfG|z&ihtT)yW*}>vBH^#sQO5u1O02&=l&hcig?M7}7GLYa%r+^w$Du%$aQqn_ z7$v73+Ad;PTjS1eefS0WZd~d?v$4$!Fv`^v6jP^S-LfijH8hKj;Z+bT4uwgFyjZ-; zD6CjmBR;hM28K?BXxQxqdfSGvplzeEASzpQ@a;iN(G{Y0w5g_WTOX!es>h4J6N5{a z15T~(Olj6bZ0?*3h9#j)`I_eUi!TdxUpEn&bO4VVeFqHx{S3Cx-$T~=e{styBWCU} z7w*jr=7!gvffupB(>hIfS+O$CT7IJvH%i0FNKq4P{QdQQiIp zBEpZO!saSGRR2TW+gsopEhjIWFN-_%0gZ|&=kl*qG(99^QEFldhc5w@mpS*Jco*G{ zrlMt5DAQIR*BBVe_(L}{mX#BZ-iM9J!L&z_6}TJJG9%hq#$j_~Aa9JMIoPy7?o2yJ zwX+s8E3NpN-v=<~bKiwo2YuOwXzIT_t`zqi*Jp)+I-E8j8)JRR5&l8c@NzXC|DS?6 zBznM}LBYH->ov&#>m?LEH)IKV8$q(opM8EuyO-OKG<7Z)1$FHa@#Mr{9%0^uS2{~(F zyO|%a+^mNMH=OX+HOj07-4_#x3+6C+GbHq&T|(@8LHW>$B|lgXKMr#j zyh8y^#NVhKMi-5hCVbzLKD=2@?}X<9)7H0Xk|wRi;5)|r>b*4lVKbCFhvsAM(j&Os zsT+@9BZ@_DuVUP3dv4iK1}_&A4=>Ds?oM5~yqeCUr`8L${;sUlD}-5Y{f>Df-I(s_ z1DMwRJ|?}}OnCzXL8dGhWQHSn_3BF)bbSEGdMHHIfM<$ag%&OxzXS5pi;5PNGxJzQ zz0OSy#G*PWbo4U;o!59_dk-J({QWs-yL|y=z7u=b;ld@O|EEcen@*iN8?hB7JZ+tf z*Uowc4%>9-{|n9Tb8?_yRf$k%c~qF`LHy@E{!l~tdP!K5M%wPf>-G%5eZTATL`fp( z?gc=oc^>&nuUG%%-6u=FD@veA5&NT@_!gO z#FGd8w*hjVorVq6FO@HwiYfZUu^j5fCS4C;+TAZTak`HnUv(3dH4ijVBlEPtKp1!W&xn2E1F+d0se$5g+> z#EJVL@|h7&SwMSB4;zi?F@Kia_7@~BH)2WaIze}(ACo`I1YfIW(Z8<_S|3=jju8OP zQJy@Mc;%Po*t3)|sn{H1&NoocP5w2!T((XRa-8ci!l4O$7uwLUM=tFb#+APg zVIDQ)R8|K-bO-f5+j2o^HC3aW@27eFv^z`H*s_-185lJ*h^dzppmau&;9u#;61`5M z{bVcNq}Polh7!B^?0zNO_xkw&1y~KdW2+5jy(}=MJ(l*y7%Ra-9LYI&33Eoy@|}qwAnS zE26A^tynOPT*Vo`iFszh+?+Ckjf*aUZ~6{Y)aQZnsEtq^?8cnFS@VwJ<3VoY26EkS zD48JewkSD{`Kvqagu}%sb6b`>rwO8n#ai{&mSva@cI`4{EaUUQ|IR2zd%}8AbQkzvyS;YAyTpmj2~-ZX=Dhy&~X9% z?&7i_rtv+wFzlD*re}MD(OQIv~ z#g)z6r;1@!$@d*HG)Z&?i8Z0Uq=6ya%fJT1Dd=0QQLGE+Wvfi z70-VM=fM?Fz4IH`uD=4M-?l^0p!481HV~y_*9&>*!qn6BvFY+>tP2RoieH|JC#{At z#jY+Ou}V>>UbG3}e*Z$-rhT}PI)Pb}yKt4AA!w~GmB$s)vvgA?gsZH0o8%X~;M9OY z?(rZs3}q_q@p9+s`=G~J3C~|z0S^<+n3Jag3k|2Pfg~3<{9?-Fu9=`97E$zq(abCS z7N{l)V$jfh%0u6VrDtrIWc66)KrFN*;$P?o+=Z;F(X7+|B%1f7@2ib3zjecv8$%b~ zS#bdUN40@u$5Tb};>TEh<_MbX8_pv}R$=4a(X86Qiz~h@L|0&3ee(k9o8H9Aj7L!O z>nXI}OFMm4i|}KkCEK{(jd#cuEHNO2H9C}nN=v(`aL zhLov$AusS5-CqsF?Aa!4QHLpIm-Tpf?Q5ZWwH1H&u?tI9%oF3s{sQ@_m+3y&4DtoJ zKJs}p;lKeY_g}USqNaSn)Ts(~;FJaLw9g^NtOE~P63Q#P2@ut5HR>wI^Ma`pK`vX0 z9^{CZADV~qst@Jyv({o(m#3ikS&yeQtU=u$ZrpbMNUnN62VY)rXQl5p!@V9Oc-t4+ zA^&I-vz6qnpPvm83BG*9bW0u{bxnxfIi4pie+tTjCO#kg5L^6S5Jp8ff%B@jLV^|X zpGSnSj?FX=`^V4aK=bBW)O9} zi`+%$yVKDBsXuFz`LcMg0bH@`FDQ8M3Ce67nRnkoY|e(^F!eRwtlk z{Q&MZe>?d8lq!g`hVcr^zL*^&@RL6~@$f|AR2;G8QpzVZ5AQ{ulo8x?kps89aUFtY z>SNAHM;<2~%EuTFU`1jVUThi0GY=BW(P#xEZ`0$`9@l{;Etu84DijvI@@FYW)X;GF z8%pN5bG1dB=xkL-oB3dk)J1_!Mz-x!y)LeVu-Dll27w5MeRpZuBUIqr#HE9?UkXLQlT$T zFI)qc9)$41cvs%$(t{`8ND$SNvPHRFlvojT9(>Ix|2o14>x2fOEWbNFW2b@fywS|_ z=1}%}fDfPY&5ob6l(MXcv|}P(r@YcgXm+~>&-agHGwb`X7<)SRZ2g4s-FAVPP3#I5 zE9OqLXR7;|rRgtXAZ>IK%x$N<=5_^_7$>3ew6&;oTrH>;FIUt&wa2X6Z?JV5ov#W{ z38ho7LVTAmqO*T0=5NXXIsF-V+E`Iqr9(-~IzctkRD4Q4qclTb{yJhPQ(uxY+4b$B z^q?2pxT6OfNZ+sd+XGn4rhTC8Ql+TTYzHgP@l4lM6w`Ctpz`Z1460cUO<(IE!R9b5 zRfICaFQOiJk^3~v z?g@3d-Fdq5JE)Io#oR;JV9Gg5CP}ox(1`s|khFvR){kNSxDf8F7I-wx;#2z5Jb&D@EBK)G@B=&YYamu}12L4qq>T{ADkp`QvRY zrfhv6on0cDN>EF)y`t-3tZ1zTbwft6B*QZpeYHDUj5KCZ|MMpn-bi#VbmJ*+Jy_5_ z33Ehmc6<(fSF0)OovTn7y1hbOLM|FL@h`YAgr4uS z9>G=RLzoil&N^@HLe+vv3fV=SP?ADCa7~UPdPpH8~K7vlsisl?YEF$mPL$6s2LDC}mf*7s zBlI>w+_grCG;rYN=BqI2SP+Xi{SrzBdNAeGqr$)xFCH;fPJFFARArBRib9*U@$RI@DAp|WWNjCYj#hrzDj=kai0A7xj}52z9XM=D1^zc zHGp)+cd<^MjE196V@I$p_Yc?z$!9i*+DX<5zoEfAYlQ-%Y(`<-#0=`-=EGOfo+~4B zM8)+pm{hwL>&#C;WXYc>DcdQ?4v*%N*{k6|oiz(fBPPmBJ@mTJMmq`0*=Kj-=>vz* zyDSwCnd?gxIh49FlV<;FuJ_}~^HwZi?)snZ*p;>Z(JG9+$Ve+tJLW}1K;%*r5 zX1kA2GpQWXuO5L(_j|D1ncZ1!i8)iJP)F>A4;o+8XX*!1lss!nzI*W{vL#=cJNbczb^&UqSb@rlAM6;%hatl@v@5t(U z9v4dJ?wS0l88p_EhaS0181h`sb;TDjZGk7Ve{9a~RWg=$!k^2wc8Vsky}10#MvZDU z&9DA)gQm&EtvhfUed+gP?O?#?4|3*mUA=e-$Fqn(iFbT76C0?1J3VD2^R@m1>dL00 zYJ!>K#}#6;C;WyJ_A$mD4r6CmNLcKwG2H8p8*l!)40M~Cg%YD+o^+r$5C1lSb^=yd zVQ0rm?d^H`ma~|1cRe)z)tkB10*faWiM*y%A%FI-;4t_(s;-RUWy?a@#s~IH|6Cr* zjxbS}Ka8h**aEJ9jOH&pLwLiqF>HdJoSSYK#K$CAaNpj`VPl9p?N5$@{sY>T%zjTS zWa4GKFBcW>ogkw8E>7DxghiUUGUuW5p|0^Xw%b(_U!)CcQa4iea|Ev(O}qGWlpj5A z#%FE$2bcC^tYLv3%~saZ9{Mfh2R*^p7iizzrwgBw8OoBEsW3e7qL>>}jfI*0c=+fm zSbfrxr|mdMoNy=Rc+G|DpKJlI&zOSHsChO ztwMOb;h%8imI2E$_u=mfso#9IRmg5}Vec-Qv03XVKljrFv9wD7w;fL%WNi@Fe|rTI zvLAz?`6-O=UM~jb`LQ{#4Y+QBl$_Oqb|K0fMeUcKe2;uExwn23mGOmQ z<*?OooPMsFGyO1)+%w@@W5vMWKFs&)X;2O_RAdH|<9l^CA?-DFRc{;?GaV&7Cy{yyw#pLE`5Q*NX3<4&>c*l6Cax=gtk@-qyk zZc)d3F|D8s?yNNBMYqV6cy}r~^jQrqs|N9Eo(gI6;&CQ3A?C^ku__n1x)<$=^ln0Z zv^C2)cMZL%uh4Y(Jt$JCm-e3Wl$CcW7wW(jpMzL<&uomkRwJ~O-vpVvuc#`rgX%lP zn(3PgbIEHL-9HrC4)tM?P7xTWH=f5{uZB7cOMHGem^*50`D@~U`<@uc6!|AHkN6Y5 zMRj7P)`VsCTZy%&^;rCfFPOEz3nb3aXKB0Po4WDj7fnLU%#C1s z;}aI-UVw&aN3dyL4H%S2+5Bh|9$x)i)E$0;NimJ!7^ugzuV#?TBvpLfwHGg3{t2h0 zjON}jh9w`KC+NE$0*}M>m`542t3&QUnrtweb*ToMzx{>kzG*^rT{X;cm9gM3%CP0D zg|<0{a7o&i?#`F6aixNp&oRb?Tyky33jFM@FgG^N|g&eX|!lR`lg{pYw?`+ZE;NpiIjE4uEa9;W#ZJU`8aS0oIAt8q>& zWzZ*(54nwN{&wVYr;C~q21BT8br_ChQ!i)Ob-Z<{H&6e=9aCV4>%oNj5dT%!hulIOLXhemgFzM#I<22HD)u;}7Tn0cZr z*KS*_QAb=CtCaNfQV$_KqgilxYztl|y;=OHNl+Z8f{uoX74`|npoDb1~)-j(tr ze^_#d*e}pHx)8$;5X0d$bAotb26=LTBi+a(Q#Rm+f-uD4W{@jO~530kuk8fya zJA}Js{Q%v#V6m9y!gW`Pk1Cu1$>v6ooH|z?oBswLr;TH_^QzErgcHc)Cn$=$eE_4& zCfqr~1>;Ber49@AS@1GeXMTs+!F@sZSR&-^xrq@cszLs{HJq%VzJ+fuX4K1;=dE6k z+F5cjXtO)WciD>ruiCOs^3?0JI+T6?Boqv+79=MpilzU$afjre;Z_E5C+BX*hAIP8 z*gb^T<9v8!L_hSubRYA&yD;B3CxBC)Hz&@In@~nJa$#S#;chQh>&;j-IZz9lTEygM zkD$_-@)Rm!?*8BNsn>1rPrd`Q5;~AKWJAd$JIekEki57Pg4VTz{PE86_}9nKU``L} z>mP^M`H48Us~^`d=tkUl+FJ&iaBJcg3ZX;zl#xL^dG{_%Ka~bq2m0X#e2!uLtay;r z6RXXyVx7H&57|4CC6zQ{!AIiWhukCX=~(J6ZUqN>y03lN0WJTzar;Y#qx$RyS14+B<@|_8&(Kt^wd-8_IK@wqo>Z%9)lmq4AWn z5Ep$0WAtp8c6A`1S*XWc7mQ&YmkrSM))90~cnom^y;;n=4X7BaCBCj2J>BTDHz$CBE@RvzQ|2`MHMH;F z1NnXrq2uOp41aD%&eUSX)v7{h=o`Q!i^HJ9w+7e)LmpY*6>|z|vE%pNEc4iE==`q; z@+`8TW70-Y#deY}nfSbKo?_WoFE;%rPaZ3aCtrg*YZvcB(JW6MrIA71_7pU|eGXVp zTb4M?kXX}`aM_$77H_m4(>{gq_~tp3S94`gUFg5xpDiY3)M3%|1*p8$UEFe*@{8Tf zDgSBAf?9pJwq7H=HV9*r(3?vpTvj}t?93C3ZTY<*@*#|&_hw`$<`X~2S~-xV|JfU5 z13n4M7EAbr(HYor_ZRSen~uxLx7IoI31y&;DYO}fH7^54QFDkgkhgp%Jg@WOb*FEDB-0em_VJ`=nl-QIy?9jXY0+@oZMM;75xmuXN6!_rPDG z?)4Jdx6n*s;SeYYErD3wc$|CgK6CQqt$9{7OjYlybVc2 zM2I0GL_`{-dCsj!o3|m7$hIOxX~nP+@jJgiT^HAtu4m@C?{mK2&nLE*A5R||h%$p< zO@RL;SUiJzI47;RVX}an*rby5PAsB3jgiuyW;rKC#mC)(!s)&!OVyNQ`BHy8*;4BNYd@}qhc|9IS`S$6kRtw}Lv}4kazGC9!I}n{Q0wNdn;M?>bQ4e`1 zHqo;;kVi+{Lv~~NmYgc zcFbF#$CFykx#H#)=z5sEBd@1H*?-oY>E7e_AAERrf(~OdlY}bw6KK2Fntd^`V5*M} zkQZDeqzx4D(`hR%J7+D*mBU#0jd3h*iX9ZJB+%UM%eZ;O2wvv)mhv(4#R=!TvDsPV z<>|W*&1(I4H1&ETC^OVjAd7L@{|EWxY=1Ve8s45W2x%Ed3;-j-#hM zHj=ti*6t;d8gss^JMnTnMzi8myXdTOPViXk$yDPXLhd4CZa85n)YSO$RUGceQ1l;%+Wuxj<)0=z-S9`Oiw=hHtbG_T z&5W6TrJtqCADX<+ZsapP1RZ^DqH)thP_8(N&m?2G>P1CK+#UtQdh}vZ0rq^INf*{o zs0S(3jdF8x=E^+>G3Ll6R34}nLKlAovzM2kE^`?=rQQ}2n=P6B!oI9NZZz9iV8olI zwF%CH?AVO!eYwMn0*qeh40DznFvEm-@MWV3zbR54W4Ify>-8GLm(Rp-p9D0&;>UGc zB2m?II?UhmrAF8YR@~yq0f>}2bKeLDe)6mXFWdYp z1UZ}n8_)6Fc*;<&{lym3#cWjka2}^CEP0cDgBa6o06$Kd$;PvPp?0Q|nD8hJaweu? z-1Xj|zGy?<>?M%Z(}Pv^cZTNrQT#@x18?v(VG-SA+{rjw2u+R>Y<(!FwakX=I}YN~ z=j3wszD#+jvry(ZkjJeej z9$+)Z4Cm@qEtoNKA1H?XE6Q$n5xBY!FEEY*^R$@%h( zUxRqZyptH#?7`L1lY~fl61_(g#E{@OP&$qn0DaoAn0CHl7P-`0pTJ~`&d4Rh-if8m zpH1KE$R#O6$2I?q>GH9V{mzcL{&o!OhIZkzhjeG!e{_Q3C@FJW9Kg-{jpyGh4SC}9 zBUnA?HOMGeR_x%*3!Hmn+N=Mt;F2wtiG~mza2#tB^3W&Ak=qUUfDLO8QP^_>@_)PY z(9yHR*fq&=1Dzk2zBwi2(~imV&}fzw(2Nnf8l2>1%S?`(5fo#WL+ECyP=9O(XlEb7 zPTKL+nF1(pZo@Laxe%*cD@acF!iGA^soeNOV{ttlTh)_bPk<*Y@GPTFVI*`4RS=(U z$wIH*6B8E9#fZ7|8Fv_lk*B;NzvovhmwX51DlIq#mkZS07bG8^Ys@w|bDe!Ts;~Ya zo?kSc_&ldeRL%;~;;ucf|B?c_<+fnp^chTc7GpIV&E>?Baq_+eu3h@Ejz$GeBDRTY z&0k_5vtpG(Jye{sWS+jG_&Un+W{#wNTEIlVlrFtl`bcMXr;7_yOtWS$Xt$}JVJ7%4 zmGbIA8ce7gO7Fj&5D+(*zq0DaT`vq{GlE_C@vF4o-DQkUJtg?~f;A7Vwqn{NAL*>u zMW`7`*=GCoQ2Dh5ES7Bp#~vfNfzwFdYC(I`eUwehE&vaOHQuv@9~SaxQ5?Ty0a=9j&}Qwf=Lv4)-s%l<@*wg z|0suy13hRDIY1LqY|1)s&>rE3C4ObLGE7j?4*h2YRln`N-@A9^LDu(CuIIqQr-Xph z(M{k!Y!|lwd@n+7jf2|30&zk9Hr;FU_ABF>?`Z9 zZ4j1n25#6>PuHv!{Yxft<^2N~d3y~;m^V@8?yZnN&XuuiZID^ii}@0}?qHq+S7u$u z841C>;m^5X@Y5i2O&jy%U6qiv`5>`UBQew>h_X=uJmFysmVG-T9(@+VM+Xg~ci$V7 z#y_H2tq2BrJ-O}5K3scv6n;E3iWlCej`7SOCRsrq?d8_+WpNj7w`UF%e~>VP@99uE zC=iUzzo6tFrQr6W{V1`M?ah#wLOLS!aUe_B!s^mZ^Be2^(Ae;7_r0~6PcuN zh9F5hBDVjd!MtW?mRYzP4+c!&oePO8+I&jPIHUxHmRKs8y~TUay7S7#2IQDOC6`%B zMfQo{rx? z_hF$|h#^pWT8wno;nHYeQM+D)#xj5>?jU!fUWXXeRm%4K<-%P5$_1+$9cG%2WX06e zR7rNroo1X6?CuQb<@uwya%YL4KJ^pLNh1Ww$NvPwgR?O)3qzf*4a zhPqKr)nMG~1lE?HLd6q%o&f&|%2`7N)x0FI+j9(Zx(4v%1MR4|Isy~kZX`wz@Wh^4 zNblp#9P%iKuqF?7C3WF1R*YaT)-Yz5uoF`~LP6rti@Ew#LMCp8hO<*BlUxo?oA!t` z_14_5E(QzdG=clpRnRQjde9uAF-0yHPo4W8nklFv7T;o3}mGH4=?y)af- zrKDZ_^$(heg-7Al-#wYk-AS(fXRM&$zk;HBIz(OCjve7MpDJlK6nq+z%DQva^X2IAK9H|EvmGjjzQ)w+!5dWXzKBkfK2m0&9%7U=xa@Q{&OB`C?j69VpY6saZ~O6v zfDlY?{uQGFE`Y+_~RE#cbc z5WablD_eDy`kaN~Vqskm@&M7i1C9uhKF z*f7c*Ocq5dVABN1?zBuQiC6oTJ1*9~SHDrajBg}%y|5%Bk*>6N`jFg)kY!g$L+Vg-M znwRc=CQ3rL$?r7;^X5M`pvlpT;4#OO+mpv9cMI+F`-h=LL=t*Sg7`i?XI^kq##MUj zN~X4rVbZ`5VI$>}QU?yfkid0d?CZ{y4@Hn@|HQ^uHoO4#gEH`_c(bf0&syoiLLQ%m zj?jVJ&BvDM&Uql8IF1R;!OY_S+}NVmqM;dm@1@SgP_GAK#K8b=HpY%=bP8@rdEmA` z@1tw25!c=wEhO|>N**kGo_~HEvzojUI$!EBhpR)md(V@2FlH2C0Unps-?)XTB?i3h zr>j_3w+D?TkxP5A8hvbu_o($_o?9mHVCuYW*SJ5G zGr2WF_;fmV>^=gyn!dbx=|q-(_oC3$=?>}2( zr;P<`CRbsn)jPC#N?9A~5~eom@$!*ZQM=`v;8PPw&OKYmyX6k*!Cu1D^%kt_=k9DV z1abF&X#Q?cAoqQ#&yw@9Anf39ZndEua@X~tjuLrha>_I&6Nq1`09?Hw+zJ7daB_t~=A|L$W}OAZ)M{|g#?7eHLl z?@&*D5aqFVg5sZRu+DrGkEp4@LfPtm*w+VV1ig<>BeGxyu}V&JkO$ zXgElJzS5oN?DK=^#MEAO&YkJz>T%7;DUi2!t@wVIF<UhVg$M1>2i@T!klD(Z#v%h}8_Adv<%o3OUyJRBn=mdS z2tvOT!)5J8P1w*9?6Ap%xjhYddjCu~J~@C%*91ZI!V;)D*8p+e#k99jV(i%SV%ugZ zS62_?X7}sS_mLsby>S#hZ`!f%--!8CIgm-N-Nw*6Eui|n+D}>*E3SIy&0I%z~--fwCgShJ`^0Y4+LVd?pK^@S5G16*~#;(Tl zUYqc{m5i4@q3_ZkMhUUoax}R;T-d7=ckXk@lC>BP;bC(g5LdhdB9k7WcE&uYzv9i$ z*qO4UKl}2Gj~N&`qy^#}8X%)bZ>IUp0}@a5Xe5UNy zBQfZ=fnYbF1gxbJ7WMfMT5K~0JL6gC?&8UB4111Yuhv6&Ch-vIjw2cURa5sX^^ATb z&(+07a<9>|Xu1uXKiZS8K_JiO&u~L`3DwrW!>4iu3%y6(`E%y-!#jqvpa**3{*1a8 zp|1s5&?(5P`bP|X`&oP;8NoFHMquo@8EUup=Lr=-F#2yjUeGZVB+92HW%1dNV_O5+ z`osB?3B#Gf?IIZ5c3}4g4Q8H_bdW9hsu9);T>q38EB;{w)7^Rr7EvG3dXj`4AJ&DZ zCT~Ks14WQNo@3>8f2?~{fNgqb(Bbo67~3&e6V>09do1tILqC5O(!>51+jqW%#0lpi zy3v4D{bk1!hP)SY^cO$^9u|tvJ2SiKekcjs1PuiJ`Rd!{phUH2yOb!kZ86a z&G)(Rx}S6yiAx}cx<26=axF~g6dQh)!=8EGEN+1hbr`n_I#0j~gR#tg*%-FM)q$5< z4(0)yU!t1k;F3@FB@TuzENo08W{m5@6*Lo0O8N$M6GpKotseYUA!D_H4{`H##_ht! zGG&pyaJ++YcSB45thgV~dq@n0+_Pe0Unzf5EMdxye*~XByD_IY1ea2$v~D@^g#$x$Yy@^d zqdW3WsJiw6dqfOoIiXQ7momxU`_Y~(%#f#_I3TE8vP8Q)IWsO&;f+l4TDFY@^^Rqz zsP50DN4${#^#pQ{Cyca7Ymba|kQE2Tk3ZhoH^2#nM zsE*l+weCJlI^7+< z$}hfPmrlD$-(4gEeT$Z@J$Uw_I`CRFj#Vr)W*UV#v=_8P@qr53(VrtbPG6=wJO}KS zjt6C&6T~gj;mNE~OldP20`|Os;_Gb?xB6Fz@!SO~e$?aj-Wws|H!07vpN5r{P0;Gw z2P7>~Vxoa3&$)aBLhldblB#;Y1e1C(RFVyzZXeKf{Y&r;@Z|N>Kgg#$PDL~IdEQkE z7TNoWt@cvT-PJ+TbK>&OlCuPTUwrEw$imcFkTA%JpL}7$?@jB@Dh)nCXVqqOYHSnc zG#N0hZVt$TM+re+(y;va8~EaC$=Mfy+uc*5|0?P|zA@ttd45dUA_z@3SFm}!H?RIV z0eq&cMb+~|qPj6d;MSjD2k|}^n^^Mp+O3%6M{K|dZ>XL&m>fBQ*vR*zcFbLk#nxU> z8Mp;a9HSs0Z6m5mewMo*JBZppZfiWHyYR-vBB-NWhf<+0gci1=$-ykKX!mGVcXkua zmisYzqYaPDq&$?Rg>;6fTnfD#4`*a0u9~lH* zpgH`A_o)8kq7ZE8&zNZ;%v~(-+JUuT{I7yl{@e$1*16I9(4Pl?AJ3)Ut0CKcFb~sZ zqPwvbkJn@q6DCfGIe!|pA%AIZ9HM!hy%7%#lJn%iyI^})M;-HdC=2gWVmNLu@vCQ{ z>X#nE3;jUeJh+2+E#xiuh0ZTOIzrnQ;yivmC&v9jfB(k|a(P@C$TYiz^d(18(&8d! zNuQz9-9~5~`6I-9)#G(Cd#0uRcLAMQQ?DdI+1tlrWz%1f{G*iT*t>%@b7hf#41hcI zy%}IUkZEtd6`~w~n`}IZv190S*8T*XFG+Z1SpfBx_lw#CtwI!?nX9+cySv0juxWH- z_P;YG|H%&&vZLs;@*GCbwC9W0jNncWDOWHhA1l{Wmeic~$SE%&^h1RpL7hfce*tt8 zo}sj)9Cytz;jtxeV@RBRf+jU#a%VFGh-+p{}%a@n^ zIu7zv?72m`1J~@Gz??pWV#+%gUiP_+T!N)SW}imfHlKLyUQCp!r;DlqF0ASQ46TPl zG2@^iyLrKdRsQ=LI^@KTSfjy{1N(Aa$qwrCTeI@z9Wbv(pDF*9h(7M8KsW28DDn2e z>MeEf{-Qg#Jl~J)@f^-K7nt*PQ_NUL+#2fd1YzA9eah*77Sl~@g~-RIY~Fy2*m$l2 zlb%@f0?~oz<#omE*Z0xsu!5V=UC+xx&dbK!!I(JWIn^G8j&t^`GrtK&U+m4)vwZpg zc{7;E5&~%9+K;d$CBZIn#`vgdLsZL5)Mj zk5Y5qG)OAe+#pu)`LlvX=!YJQM)3OA>6kQ9%33DdGOy+Iv(B!@j&oay8)P6z(sz*; zVHw>^Ot|5k?O?yFD~tS3i)ELqsJmp#rHOwEUv>rZ^0~%5r~Mn{wucL4bH*|4=@EkQ z!hccGV+>|#2l3520$5pl49Wtomvqz->oD<8%>Cma^xow6b{@&I#}4GHTCZW#)_tO?neH|D zBe*v2u2}HuB@FCu!V_t4YIkQSb-fNk#G1{RdaWI{AvV4t=HrZ>n3qPgn3;~?*k;YrY&H1Mk{m?g|ANoV^|)|xUsiX%h-U8O zyR6%cXBLiQZ00ctt$6_Q>t7+MzZ-V^^%&f3CW88EmJpklt+6`snequ=!G_KaQQbNq z_V$l*$(@t(v{%Gty49iyn^T3wjF?7&H$n2752Wr~kB{FB56uz|ob&lAG$mOh5GJlhPG@_7QU~%ct)0 z@_#5l>(3e|HbG@*FUXCQuzXiXo@?6eKZTX<3a_!VWc z&dtMy%;gYP{s<}_>NC^*!F-3I9T$%Jv5bXliBHJL)}$!F)r=HW36Z4wdHzjT^BoM z+;SZ~@9YMx+X}jeKNoCIUWWQda%S5@#KNRhP;QtiDu+0T61E330~KKC@eX9>otlVj zS04LbRbuUE$@E_kqkrib$k(`VY1U9yHn2PGcal+Re^GEJcC@a09U5kYVTac^X!B5T zpOiCL^@Z{iuda!ib3PH5ZaB-lw*=EnYp^QXmdAFcY66@x(U9^Pb;EnI@cCm=e<85S z`ern&Gs6{c^ys^Z_LQju@L`G>SKDQXO~2<;29)@W-*2J&-cOiP?7^?S_8|XIDLhQ* z#-y{tVa>KczI0zVE*rF3XgHIMqh$iK`|UJnRJjmxF9o$@D+GCr8#lSy1*3Pqz_KeZ z$h(;iCgWw`;cmu5)29fET0bznV#5+kk79X`W6(MFF-X0w#Aj|pSjuwUQuPsCY33dMxhFXFHsWmtlwZz% z0$KF$@@a;V;c^RJZ8u`A7hG9WJn_oo_KEL_4HJAAIy5x`*D?btKeYWovFQ}sXy8*>OUGxXY`G7&ovJ~^V?m#(RVY% z#jVHqbA5T?0qP+?=*^@%4uORDOs=4T`fWF`@XI#(4D}yZmj zP>f^{AEe!gvwW-Z-;{WzIY){1EimJAo@+%E2V1REX}SLEph7zX}K&Z_IW*C9m}E&!AG& z$kk20OkN&|(f7tf_M^8DDk||=q8&@QN$manJj@$RS<1rA@G*{@SbH{M#16{+Y#)x- zkobkfwPn3W^72VV*nC1q-yv5ZudWhvrW3by?r5RFeLaSG(46DlEqQI|N7&eLC>wF%Gv7d*aQ=1-7Z@UO-ThBnX z>~{OY!& zdf?4M*Hl9Mm|M_dOHSdMLcBpt18J9X(cR!;E#TBD-+|YcO2e0pP3K}l7;k*McLD}azMvguK&9f>& z?HMavJ2Qr<=W7K2SrfVRiAv1)op!7(?!4)UvskMtAg=CDLe1Y&CX0D5Px#gqqh3;9 zx40QoFZ~OZqw+y|$U!XEZG*(|mc+H1iWcDmS=EPTOfomYXr~|+pEHOTd@e*~eL33Q z3rCszI8m_^P`&+EQ2sDn7`?nZtCJiOEcR3031?HUrcErX?}u{Td&r`0MT_+SrX#Ou zQjmfvR;h8`aRZ*Sbv7hD{EOz;mo@6R)ncZ4J!GDC0K0_`h=tT0d~Q*%xn(6rPRtc! zCK1!lA`X)Fs~~1V1y0ZD#XhYa!!wW0p!3=p++*WM?7Yv!6etqsRc?l+`L*zHG0pnT z)M&SII~ZpAb4gkB#m)!oAZWKAn=`IIOA>aW@ecARo7u23i|t}l&K7VuagNSFo3Va! z5Zm~g=JPi7$gaMDn`e7+T~f2yK%TsX&t17iuX|9(gP}9!9>{ffuy%b97X5o5OB$m? zGvXl1$#1Rhw^VeGI7^Jx9T54nPS92l5`zvcz^uSgT=U!k5C5dc?UpxT;|^j^$Xd|k z(Q`Oi-JP*>{aNueZ)RB83B^&whFGu1`RshGJN{8DUonJx-g=Hv-Ogj0@E+T{)j-l6 zZ$59bB@Y_v3vYXRGWYOaC|leQ6Q-t9N1pr-hQ+8l?y5XFw~A0J|oASn4;LtGix@s;A^CVcQ{R!3PYC0~Y?%e;_BfRb7{v zkRbU^-ggI(XBa?_->4_}yH?28T?2`GI_2z_2yTD$U>40iv7*M0n@y(d@V6yc8QB#o zb8R8=rzo8DRlz!nW6^$sArHNKTeMkj%wsaYLeztI@USg}xgOj?9Ks9W^y~+GVPwxz zH7Tfc&I0MaAwpjG6qF_RWvQ1FKyh;rYk1_q%Fb>Qmw8hU$cVURpYMvLzx3tdR-u@- z{|Guce#UEthRiAMi6HxLkGyH|-^3mdVY6KHc;u;U%=zUGB)+ugp=Zokke7@d-q(#O zH#DN_+ionb)gGJQ=ra#lZ!U?q;LY#fqH;amKNgvDm0%&rL(f21SRUk@r0-NsD#Ti5 zYr0kq<#mAv#K_kdM5oWqLc{2fkhggz-gM}}?FQ1^{_9W{zqSEA#*JiN!(3UzJU6C9 zx+^!n*O)C`3k&N5_^WyuOQ`P4nykn-{v%~O?0ulDI1U<@-G+=+dx=G(^5ahqVCirp zZX3Ohb}mw2?&*-e^Mz=9$&=MmrdJ;K3#1)+{t@h{A z6#%jdZ=uOc5IyhaWB5c{bo#%e%uxqr_q0OUp;l3%9N?$D)fe-w%|*#I(~_i}udq5{ z6xRBB^B~$K+s_`t;|83YPr3|5Vn^?YsoL{o;V4$K| z>g6!MgkOd+!=c02<0JHY-cl->>?Ho)-tlbiIX`~B4|&}F_F%e~WAX3`dnWnZRcu;x zAFZ8-QqCb7TMfUV=Cy`)8@)kmK1icGsLu^^j)CHBH5OM;2J1~f9y_gtvZ!UK{#Ya2 z)U#me)AkD*VoAnW7GjQJ3`EWR3PDQ>p_Tgh%J3|x4RN4e+V7~jF&Yw972#~kJZGK= z;VtyujPfU@`xv^@JgLP@Njr zTMLgs>wvT9Gv*+)majwus{!2pj)G4%a^;Oi`#@!PLGC@;l__F32zl4?!EHou*0iut zaLRfF?!@wGzHtbuqR&H>K5;9v?g)xuZ^hP8gCX?BNWs;6Ggfyb6ZdDgyo!EjwcF33 za=9DNbNdt00|3^z3asA9fZt4~tfkrkGKn3WV|)qU_crCbN`sigc{x`WTQJ)KH3Sz1 z(vB_yZVbpF&*5c^2@GWF_6oH9n}d5Au}Sh6&qun!PoCV}VmPQxjVX(~ zQ7GQv%^G4)(O+p)S9ONs2&nzRU!ojIgpV z`)DRO5UNV=LqzZ{;!~P&i_KU+LuonXvIFg3goGmreOS#HyGsO4r+x}_U)?l5IOKkLBaMmn@*KZXj6K}>x;NKo53V_r~-;Hmc!3yNQ0VXiY1 zitJd`vlhrIQbNVgeR#myk9dBsF`r6ZqKKk4>TSKDo%?HfQ2YNod>oYLE(v)@-C3H4 z6OVno9^2=-@PwZ`g@(b`A>3sFlvN+X0!zxBpE`x*;aec#pW$fqU5~42&+g9pfoxfg zX7l;M%s4R*wN0+5UHRNEEwu!-BNxa+kDUOi9nHSlwqv=a9eaD6@*aP`0PRE6XeNCJ zb+2-o%TE^~{a>N`=py2(ZiTV{7idrbL=XHCS6u1BSIH?0_0~;P9oDF24y?FWbTQ&3LApS0O4DcTuy~ z1v^b&LY8R-hz5Olo{V@kX>J&~;BPP~v&XQ%C$P6EJ}jsvkNif0u8*LrNT1UEpQ~gr3>e-8z4Ku@zee0lMZ58bPAXjz z^F+39mPT>R9-`Z~Lphy2tj`W*o_jBYotG2K*-3L_^9{nCX*8cH+ABofE`;>zs}BOWxy?#dqKp&76{jwv(O;{JZ~y_ zPxK#)%-Nk6UVVm!5ltAx9e5J1z_Jnd#U69|vO6Usd2~u3Pm-BnY|BX2wD-MOwyhVB zk2nC@pZ>wt&kh(#{Sn!CBb3Y-&V5Fe!@y0JeATbi+Zqr8p?k?=yv6`D!^e?p&=?CA z4S~nI&3KcG6Es{hpm$KYrhQup=HJYRPjNxKQqK@0=$#a#eL_y(M7i{=JqWjDTs`kK zezfevH}?o&ul#~p&Y#3vIkOH_PHCFFLp@lCC*>u|2JnuA8gwM4NZBhZaBo?ML8EVi zT6hW$*Ui}X$AR3=rZ3Nfo|tjuJvPpMMD7pjQ+z$7@%Wwb_H$Nzl^Jo$dREedZwpGIJxhwGj^;_Xt1x7#n)*FAKn6F& zVBfAx`N#>CHy`4>GGNTti|5Z@kBOHZc=OqF)M+&$R(T?R>D!MRgcD13aU;h6)_}zU zJ3yB90>isH6Q7CPT)$dz#h*__-IGUd7sP3yAp%rwcCSAA_E)|nbZsms!|352HcHbT1nFpIkAFHGez;wgs6`wvo6) zURH9I>oMHu(S_JH>Wiv{@en{e%j$=2tYH-8y$jdK(<H2M20B;c+AuO>I^h{>QZXcMBB4uXRx=`-#B0qVLwhotc?So|M( zannOs*rHxcVtPO{A?8Es;z#JZftWrXBY3js1#DQ-2P{Tsq1_oL$Z2T7wk3U8)H*fp zTTGd!#{F2Z^%kDFK+wPp+DGi^$MY+FxT=GkNR(aoajC|s9h3n%t^}vu4$MMs%3M90 z(4z4@R<}7a-GHq^$kngV`ezJgtK7ME%D;Y5%D${P(T`QX8^i4NdvaOdM$vMa8w>Lb zVe!jG@HLc)l9?P5LOYVsVze#rnI8OOu?so6op@I43n;@@F}CcETq}RAiLKkKG40Wh zt#KQ{=BChlZ{Pvw*mePPRtIyIk%FPmS7LeMS@1mnFVIMboSipIq6hh)(Na5}`zZya zhp&ne)M=J1tkI~JJ}kMm$(DSJsTe-|8z?UIWl1vv__azO_RNL$YWK~dq5m$}vtlA| z-0>gS8@sV&Z!2DxX@Jibo3kw9;Llmwg9X`Ff@IQVO;UCgNKep?^x+0*>gUER=k{Up zTh~x~@g{XJzTqcNDVt}M3swKtVpN)p`;rJgL}<5|d4o}%u78q!DJMuY6V zux}nQ9|DM<_2*XMys;r$F^b-knfpMx!x2&gVyWv?0Zj|nqEp;zh@CPG8=Oy|)231k zePoZJtA_9hhqI`v>kcaMqPz}Ihz0plEST%VtuD#9*T^BnIiw}0`~>JZ-5 zPr{W^Jur3H8t7ELfc)G~pxX9XkltG+lo?PaXzq7Ei)a4)eaa|iJGhBDnkS%aUcQ)m z2e9DdH+ZN|@2lFmD1Grt41E*?)l*hrEwTz80wJSFTY@TSRT`A7bZ-*WlFE8qK7&$V$#&;v?d0lJ`-u=#3cS zd>M?p6=C~150>%PnMWl5gBvTnd8l*=WSiavC#yT6Y}gntG)7wG`qieuwB` zi*ccgH8Gv+Pec93~7&k(-z9UQGW23vEQ5=Hv4px^6B4?c~FnLh3=!E2n*B z)mOpY!w8!iW}&S4qFhtdAC)q{{%r>(vPV=+p^Gxa%|{M zeWap~c+kz9eWV?_k7F9R=WT}ASJ}uV$1$<60Bc5%=3>}*Ryn*kxl75f5L6)qo$SN1 z59smGaWuzz^j2fCCku1~!%G230-G>E z!V8yBmrQGorF%DMtXSr|lX;!X4 znadv3UGT!_Ew{mBiz6tmRl?>1RFy^KZtk%3u8=h0+cG4MAbEd4Wa89Wm(tJ_{|zxEv>?DK%n z{)V0*#PTf}!2^6A0Dmp;T~kb0UR#4GbAKZ^*Lm=~2X9bk-iP}TUmM!TX@(?B_NyFn{4&d&oo ze1Z7P!$4g=6LosI5V`sV=A9EOK;nG8MEApEDCBlraU}qq7Lgly(hx3NK32?l`~aJ}t%HHqhAiyP zE^w9`a%FA2n4_Bo`+m@8;rn`VYlRtGMsljF=Z_24z1D zW%?WZ`JO1^K&3tq5>|}DD`yN@IlA#x!3uWxz6W21e!O*S6z1%uj_v<>0bdLJ%zn#( zo3qS$=A%*Un#VX+tyqUXDPKT(`Lx(%uva)5NbjzJy}{fwkT?BB_e7xsmFGNp+dg3IV#uopFGcmy z7$_5#W1a64Xjny@rjtd2NuL?0)vXX3+d4rp^|0Wx_WD zKv~Io!R}T+lr+#?#VQh4%=KcOS%)a2aSX21bS1CyA#e|031vsh1oh5FF)aQK@=2$m zY4RqlJG2t`*-elzSu0jryoZAK7oq80AhgXn2RGu#qxL8VHA6;miDFxEWN0XK*3kF) z?<+;+&aEijupS#PKSNFXOVnCy(nt>laQTE=sC2sz^EZv>O3IeECbeS3Q`&iNxg*)HXE@nB8b}%2WHugu{47Z5lz?lawo?SWPJj7`!Yk8^H(cmw_kyr z<5N&|w4Xfp$aiR~yaRzv{w#@j{|()TQ9q#<^Eo|)#r>#-_@kwmx66@do^|Fqg}&Hk zYr+dQIG}a4H_!d!H5#9x+-#rSSlN;Qy5cElaF6nI4>CmM!ug_h+%!l?-vpANQY;$g z#_E35imCHXfI-V8tfje3eoBAVI;4z%bZ=hyPbX zbL6fp`tmiH!OYl=L{pYE_x*>j|H%GFqUh-EEFZ@IfzGtS=$sx_Hj4ynAVNK*tG;=9O}-J4SRBl^)$ia zv=8ykXF@0Ovm$@15M`Y~a)ZYiAe+2gG@HK_VzvhH@S(O`zS$BhI(o8+negJFw&l|LVnc}t!-I!{OwV!1FEKPFx7tmPf@#OH2SefxRIoDs&{wy3bdt37z zzO+kU>deY-I`ZS)?YL6km#sQ>5rUK#FCd5qVb;N7~hYP=j#qyoAzX(gI$L^^5ZWyfGAA40$HNntblwBJwyX=&gdD>va`gtR>%jvOEt!Q+ z4Q!j&mHW(ihQ&6i&=&5*w0}m6+FnECmP4u6T0W6FmoyJwN?zRXN2mIz3*kgP1CV+#wD}A))M(*j)FBGXAyTJwlI9ru$k4-6wDA zIkV{STIejN46cS|RuB54q>ml-z$S=iCQ*M=>jl#C7dS8OKf1dNgZ7mBXjWN|CRUZ` z-!_<8l+MTK*hCC<>L%vzsfC!SGH&2f4)w!#LBc0vUN9mOOkB?h>0b^ArQpwt9J{c3 z{y&P&#xKUSi{qmgrKC5+B&B4Clt!w#&!tgPN~_h9&1PF+Y_?^^vJpd8MC&EQ5Gj<} zh*0J}7ZGY>yoC}G$&hVDh=}Jrf57giG;`nAb`Iap7 zHaW4X`|`3~zN}(+21-xSbIPs?Bc~og3Yqf#3S!dVPE)jUOOU$Oq47q>a=S9%|4xHt zwtblabv4sQ8goT-Pbh(&ph=^%isM@$_10swyYes8)1D`Ud-2ds_rRy3g*vIrp>ldZ zu6EcY)EW2V?lnCi^obRpzukpr82VH9LdpK@7s#vc)B7RH4AVCq7F!1O@}#^en{6gn=jRo8k`cC$Y&GO}kTM*m^7QKq;i zc>vGwH^cU_jTpF(awoY@&_1j;Prh^l8?tgh@!}jr&ANixwTnTQwOU-@Kpl;3o@lZu zfYrb0Mofo(T=`s&iW!BNKiZ!+q^`$=t4dV=dRveWwiFt?3n}ODLRdf9iY0FS6AT6o zXYMaYuz6E@vsVYC+?*I1W|V*OdOet1+{#6r%Lawk_%;?zDuS=Gs5|9H-JtQ8wZXrI zf+VOX^Y0(V@>0^!`*&Bazy3v#cOIsU-Z{9xbu{&v-(u_GVG!o!#EBuzy1u9L!=oa> zYT`wh(Ty@wUrK~}avWQ@G(daYK}dASMB83=5M5L+-0}A3g+CC>xOpjPhDtzlb25HC zKZ@n)bfQa8J!bijR3@eR^4 zu4CS&4A6J>$0yqy7;ij|vMFQn)3??vJnn`tr-r&S)8w>INr&=f4vB)vx z4OwyuZ~ezu>xlk5{bm}}TqWOm_h}f{=EU|)lrm}a9zib&f%5PSp=Hl8ED(ZtDb4$I zzUhkcG9@EsEKGjl#7tVJL6?1hF0Zc=0{$kh?cTmDJ@hgZxt<^v!HYBT^%`u8IYnm^ zxsXw6gc++pVB)%`nEA^KuvoWA(EQ^fM2E*>kf{kz99|`=GIK@uhdOLb2}FC^*IwFh z&J4HOv-C9nX+_6uQ1!^9Y}n}K)UrP&|m%lx{7HK zp7lgjM_BRPDfX15`G8Bh5p%i^v6zf4+0uwEEFSX(N~_+WU2QhR`dUNj&wIckZUJ?R z2l22l3ugW>g!P+g#uDqZgcIh#3cZNSm9kwlSm?%+D&C?xLJKd?4q@dQXLPsqVXbw) zz>}4exJ5&!U^j6BS3g~XR;%-I$tFwUJeaV&synEro}(OzOJzbkk=}YCf8r#rvG&r| z&smFB{5%$uA0y)^&9#HJz>+#AmiO|i@Q(I^4YC%r`{pKeTCbN{Ay)f>?^HiVa3WI}jTAY|YD z0_84?a8a>4Ps<-mtfC62{CO5Ed@AF*_aDWC5(iv%(1ba;()l*D4g+0!a*LgZMGb34 z_eMF}e8h&G$dRzbTZ`e-ZGmM49|V)3eR+Cnx^T&r*i#PnqHOLDkjc$>M~oGZ<}OgZ z@eoLDDg;e;0L7VE5FgwLH9MDMD1DB+ZrSn8jsuyy;W*ZnxPw@l9^{()9aD~0q3p`P zVp`o1l-x~H)ZTrHRS$==(DI9z{o62Jvy*{>`kU>4_Q(1^tH4>XW3>-U!M*r6%@4kX ztZ@j(e9d|8z)N7!?#vX%cd@l#4s^Zj#&fMGKeVwsk2$jek`I-`m5y<&DIyn(4~MZW z?>mUsmx?VH`f-nY7ChB$Ir@x$h$X^hA&aDWQgX%VtUhQH9d=^*>|bF1u^zmgK1aF- zC5m>l!@~VS;DpMQ?GfVgN4OmT(IOb(nm4xha;JCR4|X;Gl}vhr@?k8xe0G5 z@y1#`h7UJn&Z~X6E^E9PT)G^h4f0{?uRT}+K7=k04cyVu?9(tFH0KStML-iq%nxJP zYk$Z4V8i1VQm)3Ddf~Gop>bLr<*v73!kYP@yc7fF-!wwwpnIqhPiXtiFy*oRO(2l^ z(V>?=LgJ+nV0%T$BTfGWWuIPX;?V)&28#q;(F}6-4Q1(Vcf}I_FlcBDX1Y)P1l#y; z@rHjHnEpyxKhIuRd;0>abViUJ^#Bzs6S3UA4$@aV75qacF_+`i_53*#c06+AVXp>o zO+^4K^*#ZMB8IW-R!`oN)|1un&-moN2e~~S3W|BAnE%}{7Wb?WY=7B_?$^gLN&0Hc z{*iLb-=77uFVtn)Jqr^)??Qb!7g7ViLPE;}RMA=V(zuD-wU03`FAPMdUsJ%{QD6ZP za$c`m3PIO<@CNfD9Dc=<_38Ady?mEwuQKE=lg+uO8O`d6$D;b+BuI#NCaLovcYG1; zPr}fCg%2x@4Cbnn-NdYR1wS-+A``B9@egS(JbQ&1k6l^~sh=!C*I~<(0@X01)s!t; z)SWAe6(~_!fO=jXNafqarj?H&s+oErkH#?1y_QU#mv+Xa%79gT`yWWVbD>kw2x&V4 zdFwENX}ku2!vtfl{M(-`%Qs`5ff{UG;K3p*cB9PtqPWFiAZy#~!=lH(6smd|viYZj zSk0fyFoqB2MPpWDxx$dC(w3wCUmx@@?#axiya1DzH1{4c85DVwz;Hh8bn0)S%c!B; zPJbJ_GB!cD@=u}u<7-heVwNJ?xeqRwH)-=pgB|~T53t+r)Ir3{E27=x!{^c0|>Z zdaOAZ%wqak@QnX1Kuz>vY`vz!%tu++K57`)-Wv&Vt1p1X%t7EVa}Wg4^&<|qwb<2k665(R zXr0l4sZwj2!_E|=w{>SM|0C*0&k;XeB`#}c94^bTWB7&HDq8>DK_N z2j-&Gr5{T48Cp$I!V_eJ?{@rvjnQ*fw_DfMZm(=5*y6@|s1JZ{ZjMG0JQdQk{oNWP`YTr|$7 ztmDUM?0`BjTui+3gFSit*PRgA(i@|QmDPUR3@b%H7CdVZE6S2FZN_+3R$$I`>^C8< z@FD2QIXoran&0s0&CqxkSqD zzsnfP6E3U4kT?;U5e`iGum?n4sRQ>EQ>OKujn!>8F~z?MDqp`r*`o?vfL=U`yq)zc zHi$l|EZ~pVgKB{lx=-)UvJ0osyoR!M6YoOXn14VvmY6Hgg7K7xD^Gg&6J?p)#SXg< z*xHlMNqislA;z_705O-;32d z$RR6E4R+rUzqPnT>ttCDFI$u>;5Dr-$6kkWZ#^b^okMSf-fZfi?#%4Egr^U#z+&+h zEtUehRlRJ~&|3+cH*NA&wa^QQWc=P+;`?8Fxn~*IL z@XqP(#KAPj%~ydX?7WJ?+97=DvnDirdlNgH4uWRb3PtYYK1|GTW>fwlUdD=x#JjQQ z^Xx)-(}OD11Y0R;?s`CH|AFXI<;aWQsnLcis_aG!x|1(bpFEi5sD`n^W8--IfjcOz z(Lmm87rHz6QxDG6FaN3|UqC%gb<+O*>oM8hobNAIa))({SDH;jowI`ap(g~%y!T@15+^pt$(fg$y+GAT$~pW;+=m!C zqsg9#O{3}G^MVbQz%uM|dIHS20ODGKOa4Wz&h-MyZhQyw1$9EG>@i5j#A=nw!?@<7 z12OO4D(Y#zCO;g&EF7rQuB80o{%Dv?%oG3jp1gC*VQ_VJ;06Pn`1PS4TtlsJTO-Ow zPaeZFw^*?$S4Q!nPKMl2TTXN4!MvEbg4c}=xT;kKMZ1<@!^?3@*8d;W|JEI+7g(@X zNe&j}coMVuBv^DiDz2YP8HlszP!n&*HE&Y2?P(IGbZ^7tIrpKa{R}FcgFwQ%gfw{o z3%X#>b@NQ{)%TN#={}4tI^@so3h3E)E?c2lV9z4{^`hFD=)hS>kYB zKl3=v)eb<+G3s1TED)2n9EXOitHjLNC?;OFX7TS`xkaoilf9-~Gw}eHQ{Ps5w>!`M zSczVPi5)b3BU)^DAQkaAr6oe zEsZFz$U(;ZM1WXEf!x$({urVvStxVDNOd||j3(*>PHV{Zv-8D&b$ zt`ow%O@=JW=Ok{VoJ%5e2i?zw#H*ahv&zRYFRe+%@N?Us<8L?a@a7IG zh7!XfEmI*`)v8dQI0=)t%ecG!NpxH~fF+zA!1S3SZk}wyW)(a0#@y8~WuqWoQg)QX({+a?;b{H|K zrU4Saj|0_AcTnO{jQwLPsGLthP53U*u5U(_(I`-j$q-jjrm|*h1T-wByWXczme#)q zPaB(xnh!Que#adv+%H0Wcgklz%ES6&*F^8L6Pe^6#!BujhiWa&QM&elbcGk+IM{)u z`udR9xCJ%){vek6L}oRs7CQ3pfQ#jMEXs1`MZT0b6+U6sWpWS9pd2n`57cq3*qD4B zbj=>ZokL;tT%K|!z65>GKY$QbNQ zHe&dm4`Kx6uVlfKLAUoc_#D53;R6;x{SS3wTE_rZHpYUby0wACG+dNKbp!ddJy4iq z#1qJQ=+>1AGtBI{YQbkk=S?YAp6Q8MKdgrK{wGDn%y*#g)ddT?sGDjy7>a~ykOo}? zO^^9v3+=Gg##?FrtrqnYpM&jRbr8vEE?h`Vs~Ob*P|7ObV^B!>G_F1=4Hv2qq+{qVbJ5&0HkZeE1U zQB|mZ5Xf@7-$C8Qd0KaCJO1I}0M?;92Z=BC5;Li<(D~Mj#l-!Ct+QT3{1+`oR*~=I zau;T7(_+@zQq<+F)i%#!%w;IKuLBSwywXs1yIjyLY!D-6c7V+2C93*e*9HlbxPH_X zA?tGs$mTA^bju>)oga0RU-#xeIykZLj7XuJH~@LZd&JK3A0gK1XV`Xz_$dQUWAxEm zg3N8VaH^Y<@4Dd5l0O|n*)=t+KiZ$iehp;mJ*^P2_A|(OO~xu+Kp!+Jl%GIPq3*3D;>K2>n(JW474`u{^31GJj6RgukVzn`?|E z--ifx--dF#7td)AcOT>}9vFD420~9#wpOmgJZJL#`CCFm-%u{MIwI~TrmQ;tBN`fP z$5qzks_>YBs@xQ9#(f#ep4AB!bA*e{{;8Izwp*!#<#5s<@zIKLVM9B zu&e4om6~z}PGg|9>?{=B7{&ufhO(^j+o7Z5A(R~cTa;7okWZ~5o(({1dk7T1wBqWN z*^vEfFk}zA4%J)eetfoB>pyTwi7t=gOY{X2MJVvt-eWsMl$^ z3%oprvFONLQNHez7;%3$=obZm<`2rOj+#n*%@lNe=*g3qg#ONPTD{jiabvS9uQ>l6 zTQ+Zi*lqJb?{Jft3vFU!lY+Saln?mpvm)-hooF#41ov+p$|Ce4WmOC?njkq{=jdMh z+Z?EU)Cj5`^@2&n5=d?9$uvK&@Y^z?9>RZcXTx9Eu#&MWp~7hbQ-@Z7Zr%f}?JCO4 zXWmA+MJ+}h?#;u#^JIrUneim|otXPa7Ph^1<3Z!PanIlH;*ax&Ftu|B8YK7P(LZg) z;NM2@%u++vu-%2}i{1zwsTM4P7*wjitoWK-#w4K)+NQ6jyd&rXZ0Q-sl17ik*arH3 z^zF^H`-eg2ot@Z~Ph96Wj!eHdNz6OeAE);bc-^3$-1|%z4_%{Rnu;UX@zsJY^YY~j z19NGIRUl;99mSd>iy{2b2~@+S-I-p_FeNKr@BdVZ1R` z#*DS}|GKwcTe80sawp!#=zDvh?V%c4cLp;hAIK8Vo)Z(FBtZItT(R8Ni8UCULQQ}m zrZPvyM*HzR>q0T&x+BdK6H$Nb24zNQPWywB>*n59NV5KbH5-WE(j>>m_4{Cl!kRgo zkcaKoJu&aW-$D}2!5_UF!!m-yp!gqua^%bhsmpF~@!OA1?OkwdGTjwNFQRw%C`?*E z4jjzazy)GJnn|jm;7vQ0Z=`eD-WH+0;Dz`|>B?0vGZgM;Qlau%C`R6l#HdwtW}9a% zME35YooEKl3D$#;PZlg}TnDwgHLxtogPWbVXBI0Ci!xmo7R)}6^_SNOn)F7f?=}UB zW=sWYQL?PPd+|y{FjHL~!%Ai=D6h5vKV5fY#$Oy+)VMZybljPn+xF(s(>DkU`;Xu) z0lOiw&s{-#A_z=}D~VT2?5W?q`J%t6TVdS`-B&rWh~)L~a+@n#;~vD^zYtwGRXd;smHjAN?N z)K06SY}@=?K{MuC!NSxTO{&SMNZp~3FXUo)?8YqeX%}bvBT9BSuqK-$*wx#MnQRNA zdD>@~KDR&j%-Ijpg<6nU|08z28PD~dUj)UpQrz5T!y41~K*sFlw7*J6w}ihy`j;=w zqXu%#?$=s{?P5rszW`-vn}sJ%!Aw3v#PErQf<=`#Jb$HRk4(n0_z&kmaXtkszm+q) zaci;Rl_!69HJDX2)S!CwBvIzlDw-{%?s%pVv$!yhnfw_-49y4Fxa$Izn_q=Ay$%C+ zk77P!yYp=`fv*qrAYS24(M|RiYWn)~PJ^L5v3ehP)Cnw;=7+Vn51_7GOZ?HP=;YFm zX(*4aYp5}Sk#$5k;0O(!>2{p$@ zGxZ?a#87{T zmTRbchI3I;V-F2a520ve#gx%QxyEdQC@)I~_46Bo{G^i*er+I|VlS|uH%b=$E=5Qj zHh{%~8#Br21?DT=nC{&Grs_3IapU{3Ogj3o;IfOd`|oF=RK6cH{|Q{0Iv;Hw_F;>z zP!3|S505%^9oBTXv!JsavY554z@?`qSA^8@0C@MDN%ziqy%QTTXF-dZrs4sp6T9Ng4$p(#_IM$-Uw05 z8&ZZz-JGFo8}%qcr(?#9iI|;n8|x|G(WMAvmWkuJOK$}$7bsh>?Vymj*b|dWCNa&n zaiID%fh7#1{B^|y2%p#>mM`6dk(23jT-yQB%DbXQX34!9dhq&hTQKoSIOw|59v~_~ z%sSLW+yD(e*-Gr(mkzk_WLEDc<+C7SG@*> z(_B~>{XMhHKfs+ldse;0kS`Ch=DArdu9jmbNVgf1)3xs~}dNSLe99f6MIS3w>0!{~> zLg}T|p!t4(X<$k-=G_T{=xNEAQF;tz&AzDdVfcY&V9`@Z*PQsK%CY$tJ({2ws0HV0MD9JKC@IiPCW4eFT3Le-Vtti5+F z4E;i=$eRemoHRfk5-#i(l^?kAI*)Xp8@|(~yyc>&NdtXe9D#nJo3y>JIp1S6*goaHA zK;7xfZ+)>~>6t2wav#o=GulMro-&i8X3U#dNl|~~LFxJosCb$Vr7wcmjo*vN@wEV^ z&+%rd<5#03Y7Ekpu9`_3li6 zbA(_{-0aqeO3;2Kzs9jj$X+vuw}u3A_rGspo~536LdmFpZ$&%o!zd~LmU2eE5dMbl ztWz&z(whyKI(GqB_8h@!Sq755J+viDh{N*x6Ko8#hC{y+f6%Jo|GPS|fc68LrCBtO z9tn4Plh1A)&3czLqn38*;mZr~7^zFr=Vd{`@Q)a|;U$!~6^UOz`?DOS8L#OhW3^-Z z@Xi5_vn%AA$(=T!s7?}?zwFA!PfMr;rChgRkciE+C{`PC=d=>ImL?qDRY zx#Plh&J$p%CX_9hV$J03S)%+_k)UcOZoB$zX@(tij^^R~W8cje814^34&p-21pKFZX>R zJlZc~6?2Svc;A)Sc(fXAXRLy}%bUdT=aslka?{h!6J1zu;aPFD}B3DAKie(uOYX)Sv|JB zvt+HEy?Dy{c61yuj!UkX339v7qHgq8zwif2EFYC7bZ(x6)f^2e-5x^f!P8jz`Wx70 zOPE2!Pu}R3Z9n2CJ-51SY2D5XwMlj8$30S`OzSwY@b|}3!Va0Y2+ArQGuaGsD zWgUS%;KfwL;BCEW##nhd)b(^=HH&@&$s2p}DsEE5SJp%Hb$ZrXJQLIRp99^ovszjI zyCB^>2z47~K=v0ow_EW9B#UIMC?Xys*;drqd7#N{HAu{V5^nVy%ky74@%n#>YdpyT z-e^bhmc1D;XQq_tPJL8#h3p2Mv{T4l9Kf8m^0)_CvrsXr1eXq#akG`ra7IfA-<3)~ zv*ur|&%gA!pQaFtDt|&9WSrSD-GFCHlX1#x8B-|Sd8})9P*Qe3m-bAmoS($T*j}*3 zbR<(x*<{ zB}8%&k%o5{tkAWoMm=ygBF@^(p^dBkEnL`}&kwcip*dp->%}dtXjo`sa zUM$zO8*f^_AM+C_=cmayvuB1mk69ne-c;BUS9%A;F3!TnBYlZQyjZMA9Zb$mUr3}` ztoqhqXz>1kn_GZ)tayU%GbyjM<)#q5=M|Q;{48oVq>35O=cCilzO+ZV2ezDkkC6uK zP4QUPYV8Cro?oDL?p+K&^#=qyTC(+9LwS5_4pzP$3|m6$Xm0yXyKLrA>U`IV>1G!p z`MeTSctjY~!*%Ky0*;?ud{zARoEOYoU*NvUCuw%#WJ8|{=L7;dv0TvDMS(cF6lP%2OP}mmb4<_ugEd zii*4sM^H}pmg-LdJng?MkgHZGG=`hRrCDDg*TP`}8U>Anm1lNM%)%wQMRbqrw(mAfEV6Af;qVQft=BbGOdIw>z4 zSo*w^wG5Y9Q+*l zh7>6Fqxt=|DG(iU4ANI#7aiu0V8Le$n7ql;uOg!y!#jHj!8%*637#dy`Rt`G#SDlC zI`dBGE~E|gWU*Q`6xbfd+IMtki@8Hg;v{%?iSARg22o~Kg_`vr71DHLUV`HUerHw(x*xyZSZxx2$0}Wxq2b>5k;#FVtQoj@7nb1e+o+Ch7ALPJNxo z`t2u&dY3s^Wd>Y`><$Z3#6`Z!D6=ys6Bff zw%j`p74ErEDoepRHKFAHbHeJPK<4tCL;Btu;+!3Z%qckyGjkQC=_8*6FYqz5|R>=a#_GgAarD5!(Ntkkc9QWK{!(~w) zAvZUST^cRt+LOU(uxSk6Qe6qH8FJ=CGpWYO-WWU37_*1=;!bQ7T(_Rc8lsCqXqd?A z1`gu^{=He(%4*cq9#G^ptwU}9Nb->-!HnLvEPK%@DC%iW90+r$`bNPbEP>5`W6ML& zS+mFmKBzrW4~Jej^HWEOUF&s9NX+iWYnrmZTuO^zBulIa>GZ0e&!Ov?z|tDC}s-L7gykn zWyH_tdtk;*;?qQO(MLm`ot~Dc9`rYKu8fB;XU3-ccyLQ=%E0`RfU3Y0K^pZ?jBM=5 zq@MyH#q%keFC5Rq4))@#=oyB8@xyjWqv+8^&abb_QGU@ykn5@E5Is}q^7Uo1J`+JZ zbvxv~`U_&d{|j^s0zj zIS8?3r!lBi%JZpPA0Dzns3uOX3T*|oj6%I0Inch$g1Q^4$+PW5E>Z&?G>6z{K2IU_ z_;qxvlCcP{7UJR0_gl3wh=W?Ux5=x^oDR z?Di3E5yxHr@x2h0-2o<3<*?=SNr>6`C&c#L3+mxAaSJ(%>wnoMrWMfHehs}Z&QkW& z>?@plY```0Ws1b$L|E9r6Hc9V;?e)Sr+NIZqQ2p@u;m9mdR+|TvE!0J{Lh$4Z#;qY z$&aY>`WW=zYy;id<=|oB&+puDV@_E=K|#!AwAlPWOn)>Bav!;H(?dbTg})EmHd6P8 zG6K7*AQ*YZ za=0hck6wfCh`+o@ecM_I8Tfj!8DU_^uBDT-o2g-du{Luq*Zt5|T zzo{oDm64J!is;4C|M-Wzif!bxKaLg?ow-HIDLnM&L{_u-8YB{*wf*vCQE4$0^`D%f z^SPW!n!{K?B6W+O5~uRJaa`VT1Ee(;Tr=P|v_1U|s@FY1ojLiPs=5gdyLxf?>A^gq zegt~84kYL7Q_vio3;WF!Y~h0*tZPvYZ1FSXs%7LtoRuYJ2PH$g_wN`L^92fLy7T&d zI&2GTMuWGM2fP`8Ej2+rK0Xy6)wwavgTGN75G-nYSYYcvo#<|Nmhya!_++s&%TQeb z>1fI~d@Vq;OgZx$Tnf^8K5*j|osDfk%sAH{%jS$_7Fp*p+vpjX-1DYxtrctE@KUrn z-UOzL`2yQ$E<#+_y%o2d_c z1wF-=#NN1xO-^sYXId)ebx0t4hA&SWWyB(elYi>B?mYWc0VcZpL*6x-3-ww--763x z4-hM5Jwmj49v-?qo+~#0g9|e#H?>GagVp_+?xZo?Y8}Zl-UUGAC|8W_I|`(I^Tm?i z(@`F~RjY}ci4or(0ol{R-0ay}sQ09q{Jgc=gb$Y3(b!Bmy{}^Dua(%bsTFn&1eW_7 z<=qF{h+3yx=(66L#{|;e^tTa0^Wy%z*>RkB{VNwPIDY%8+O5?9}Bhg;GI`{LegME zB9CF78Fr2x!Q=~yBe$u=++<5HdxpNIbBFVp-$49p}qRF2w4PSY}wMvnzI>N^(&`eX6P^(dd*9fBXz z`LovQQjO5fyy!Y>(;w#LGV0o^#$(mXQGkZGp2 z`{k|)Wfphx#pfL(c*&+W!p3WI;@p=br+$JicmxaIeh)NBG_M>>jQ>9cCcjpySbs2t zFM2+hm9NGC=9wLTsfTSy3knx)lx9FD#i394OeE1Sf zd;I{+DHJYUP9LO1dv2>E?^Q=%aIf@Z1v$h{piX41DNuiVByp7|V}W)Xn)K_5yPjEa z59;OXeeYrA-j!e$cmZa-AcoDX_ZGpqii7E3(Tm|lJaQ@h1u=G;THAEAuUwWnIv zdI!z-jNw^fC7_M92Gybjtz_>^w9su6lVkpbNaD=xctvxH(PL=G@S)Vx^peI%6|!}Ri>!5@?(KDGPZuqU|#Q^0yT})huHWW zYP`lU+Z7+NM8O2U+KNB%b7kcomZ&UAK*_*=v9f@ik}pQFC9#ylTe3^U!VZ*s<%}2jS`vfkzE7wMDEJ1ba9&j=a!pP8k(B1tV4cGmPUZ?Ds(ycEhCQEs3 zwgb!FbQ*6My@8T^GpwGq87+6yyUt6>ckCR=H4_^!c5yCt*yUpMtj}_-L zA;_IRUq4>J@JKTz$ojBA@fyme_Gh`jo=1gc2#-{Hqjdfw;oR!cOd-=jV7Gm=(>sAT z(t~;Dy7?eEA;+u-br|Qd4^=uw-1EP_QUuso8Ii)2s&TS7IEWz zGuE2s%UXZE1&y6~DBV0gavoT z!>uX_)A#5r$oJRyg@2DA%Ptjye+-18GU9d5?a52JXHu^0oM_%Vgu4v?4Dyj{#nS&m zS-I+h816Vk)O}i~C^ATbg5DE&M%Ite+BFD^cWI#{uUc$3Nf(t5tC5}j7hCD)ZQUHe zEsDa4)1rgr8aM8^gZ3W%PasbUVyb>pQMa;8A)o*DO!%@_qDPw*m+R55Yg-Xy)D2>7 zx@&Obnh6U%%8~b?e4*Ttxl*^h`KRtI_95*D;~G)_b_2xQ7Nh*Ug*N14C@-0J2<6ur zwA-ecGq-pBnQY4>o|$3JI=#(N|2Y}M|If)@R}J;e+adOo0+jcRIF0nNTzy#dpsZv0 zV&daSi5oDQ{MVnYAYp|C-(qwT0uCR;M@+0qV9yU(L;o(3D9y7ELo4`yx)gbx_T^me*$Y->Dhnr zAxihvLygHTkjsA8meY)Ray?^-t`|giA3G>6B(B)4w-6ok7R-|TdCTZ@Y|s7=vcC>s zF&P%DQQw2dpI5TP?Pa3p{7J<9TZmCz*_dp51}5LP9$j1X|);u08en}M)j6;|{u?IAqDaF9YK0G03B%9S^01uz?Kv0%_z&nSP%(HzI z&-m)dHB;+g3;C&&4wpjO%L>r9R|v7W)6w$LAg;0fTdS(HVo`LLXm#xeb%9R&&;|=O z^zR|OzD)+5^(l}t@DMr~6hq#dKcIbp1L*qp6@o46Q4zNe_^Ni$oc4rjhgN*@`y^h# zdo%s7fVSUtK!aZvvEz3M^K#vJ{v&c%Rjm>h?th4#jRQdi%Y~tjJ-J<153Z6|qm|tz zT(ZK9*RCAJ67Ri%XnLQkPfx}*CyDc|Qi1zxd)o7Tz)vFurh4xrF68easlbpeom>O$ zd_$8n)3pt-SX*3~-pl>+V-@|yY;3dhS?-X2p>Y;r}zvi&fJSk0Ab>8QS#MSksQNSO8# z6TiJE$eX&#qQ23B%I%>-KE+trH%jT4v89fLJ_^B~nu!lIU&^0@FinBParoK?15;^wZ9rQZhIw14sCaXC*ICF4o< zK9Dqj5lBJ7pO5NB-LQOmX9qIL#Vx{&z23~WFbL#T=kROsI97btpVipYtTSeqBHYzj zRIeR_F>d6s+Fb-rkB{OSGdXL0T92yxF)(F`GuOF#;-XLw79Lt9Ce=H_QuRH|y}F*x zhj+2VARGG)raX@45k-1iHfU0oYqhs6;N@?`xy-w%O*Op=?m1gP`(Y$x+%^Q|2V<~G z9msj+XSDcZFjrkK657XH76M~dW8C;KW-|XN)H|IO(@oW4p4nUa98w4ByfbU-?ZK6! zys%Uq%u{;qgLq=s8PIIYVWKHd{^t=m4LS++Mbv?^Y{f8tYd-YbK3uuX1|;imYnxtB zW;E(1N=}X!?6O77^)_;#0!N9<5}jq<7nzlEN0DHAE#vy#55=w$ z2i`e+2-xN<##FsAdp>^*j~wt9JhC0n!^h7SV;dvjQ$`REUa=6e_xZ6qH1A^52lA*< z=h3kwjAvWPFnO;dcUJadCk{|w=JQZ?!H8ybuZpqtrY}~X-;RBJ1nznN9^9d`U=w{G zHI)y9<+Nw2|2POP8TVsSLj@>)`VLF~8OS7&yR~lbjd=M9VzSP90)bavK>g84sByX^ zD$nmg%~j$9|L)J-hMWh*_*c-i^9f|QECh?v?U4K{v94qeV)mOmQ1z{pyPfFE-sIS@ z;P3nLl+DDx_#qjz%#^K3D&au9GnN6_vKU#~Robq+o{2`*7Wc{xmc9;_WN<&^!FRpqiJ0UU?xb>fj-W zx4MSXg-zG2DK|J0Ulkd%cK13l(%uB*h3n3!@2b%({7+1)H0P&w zj^Q=C-eY_fqPpz5kmO-;K0a#aA&E|7IzLU zcG|JL4=(J-k&~E;x}&?6+OhnsVJz`+n$UQ35EQB0&@)`gT2C18W5mZ-8MNaoyHU)o zj{(!vzQVk~2H}InShjh-2aDzhg_3!cZ}`~~1Ir%asq4l}B{yaDo1~x%c!YNj*m3pl zWU)Q3JI_cy1R1avV;_5iMMEuAgc@_Xp{bwxuO?XPT!x2ygL&q%I?NdV0pm3nK{{lQ z7+ewo`b)jUU@u}}*nAWfhOj_lLq$KNZbRoV9vy6j**E$? zX3uc;8`=*ax(J=puW&t;a$rlGMECcrK{s}}LhRj-m!$kGHtJ1) zJKY4^9nI+RsvB1?kAluQ#QQOt3$b=l5Ni4vQvT`13={?|yfZ_z`0lgdQkw!TspMz< zL8CqAV$7vKegjFLD{$em?I`)_e-xd4T#V`4#z#qShLm1aN{A?BN;UU+DJqj_MQhv5 zwj%A=R}aPtUretG9;rrh}w%sPn+Qu0*LcE3RjX*{vihLJ|NNvt;Nz}u8Z?Aovg za)>|F=$i^s*CBY{)0)K!R%ouXX3-}9VZ|icJ|*s%G&dbzZgFAf)#O2)>Hw+rS5Ubc zA-mzj!WM5Mp3HRlw5`Mg4$KhU-qJ2)z%j9QTN`Li!^rcNvt0@9OmX@d^3T1wETTxG z8-H7nl84&qwFcFB)|gc=oOMXofXv=jNKz3u_puABfoOltkSGMTm@^xwhQlc{U8W zZNYNBnS-uXOR)UlFXFGyjk)w%me6*C^2O7KL-r72z06q+aSw=#CAy>0(g=B{bNOrad4`)>#lqYStnb?)ouEu&0&7A7j=p#JZrpe>IPWNJZ7 zUhc<7?J?u&`xgk|-}MzE3kR(kKvTn}=uejq2kB(wPo z7PYR$Q$zdGnWbImZ2c-Eh90HdSEcA$--g-LyHHMb)D(qwVub%OOg}YQ6w+Oo%5Np@ zj1jjz+=P$)Lj5SO1mVoyiA>V(3AX1sW6!G(I88p5o3Sj?@B@Vn@uQgHNj8Q)4djAE z&SPw!!QD*is3$3fWR-}iUv5IdT`%epzLlGKny_Ijt@y_XTmIUE7~*ZK>8$LHJ~v%h z1!=HnKaS?<=d8K5$`Lz^Z^DQOS6;W@`@3=1r zCQ@$q*&eet-NwPkLs&-Verycs&8#oxqVl~fq)rXutFFjdZF@85{WytNts2C_i4#&Z zB7mt3P6~^Ewqika_7~Eq1HN&qe|Eca| z@`7qVUNi75n7JQ<&i{=i*7Oz1b=}7ES&ZF|Hsd+of8v-2eR=IsV#>`Qj_Z2UMp{2L z?fPbm+HW3-aX*wmV8v)i8R*V5mx%M|vP)=O;m%Ft&H0I^6B*-MAZ}X$w3W%Z{;)!< z|6v+b-+1|?CfTFo7HDny2FFlKB11W=MczI8-St8o43_BfZc&2QQP~2CNEN(nDlHE%PuX zl+MYjR3Y-;$$aaLfvkJzCopWWVsRg@qO`_`ySf-L*`>!Qb<(2#$#YXPm^Jup~^}B@C{xONCzOm=p z(FIs{;RKeqm}BFKjpDPSaZHgwXX~OU(W2iG)J)h1^GO@BOYso2f8>cp9_?^8U>I{r z%?Go;sGGMYU9*(@JGEY*;8tzR!!n$h^syT(J!i;k=B*}1mlwO@O?!5|2a~sA#=4Cr zkgm2D{ro2L@(bsMX}?H`^I0GoL>MteY9e+HSR-`r_=1wk)pBKinUGm##ZpaNajvI8 zd*JkWDMgGpF@`s^p9k41d$#`z1*+_?{y=MAkvU z`4O!1SP#T5aK?^I+5}wouSWfHg+?;#H3ap}AWz`A`1KCy<9a_53#&$BR~U+iv;3L- z5_zyLl*??22kpAMqI;?{edu#g=Sm|s!+98cB%{AW z{0+Irl>RKH`^B@?#PoanPF#9&Aah&g#WIaYf_wk%c+-=1q-6vDV^;&`QTwhVS?4_8ac?I(+muj7C&JtdXVS}v(vesx+)@#^V|!~L>%eYk)C`596;rt4D|HM9 zSBR3_i?qY=NRvJLDFpT>KjUd2)Z84->AXnKtk0PCz7o>z(oW496Q*0Ahvk1vfM3T> z<+3~l^;WDzgR8w+ilPu@Lmxo-x1WUa`b44eT#it+z?NN}yB)O}6HV>bK5+dT^6%!{ z5QnI2c;X);FfeQdN=CmCiYXx`v8gSkBNc{e#Jx@D(3gz)l#1pEe=jloR z>YdesPWD=iiJ{D$(v>BqO(Pv-DwD2n6L;v-KJq`eC^t-dwO{`Ohuw^2L``DFKdKM``M6(|>7`}EGZ+;Mg<#)b_O=n5Z4q7DqTGp2<*0w@~a0?6<^yWJ{$8i_s zO02JNgk38p@#^^s(4S2E4!_#*+a`W&)^W;Roc|3Pp3uH$bRs>mB*lswrqAyuw~he`>^cNT971c zm(w$zI4%R2qINNMdmM&X>fxt1x5M1TQN)f`h&|^>i~cB~J8_&SJMjdq$YzE3^Avi_{`os3l{*~8#%liBStw3ojADke@1 zHZvz$re`Fc*GI2S{oqs>O-hjt6=E}Id6~on|zEsNDA48Go|!g zwYH$0$V^agmYJJkYELekED?ujPy%q)V$mU>Z0S|y5K&x{9@1Do^6HNq{Dc|xIc>} z_OU1B%A@yqai!B@&5FC0JT>zO%=8@0<|1Rts3F95ATMqAB|QAffz_}w^iA9fuIUbF z^5O)>t$l&bTPHJhW2HvzXao6l|4CYV4_#aSgvN#CVq*6ss6N{!@bga0e32pRJY|95 zc~7x4X%Q5A-xSaPY{d54(B|ilzDzPOQC|GN4hZ`D4qT2RUds;)q3-KhP#qqN+73^- zL=y(8x_DT#e4ruTCXWu5K61KGBGaDVqRx{lfa+L33`DYPHoRT{?ajys912PUzu3ID>c zryRIp|9mVOat6Yp?_*=(bF_=85$ryfV|#@OH-8k$ukRYbtYxp!nl3D=PSUBbW(qQ| z>kv3!ElOAQhsD05dByh=m^gYUuZ=GS^{4s5?0Op(?&|?*y*b#>2B)2BDYz~*gQcU4 zxi+L$Fl_hXCfkOy@`Oq;_0xJ}zRw|Ab(L5Vq^r#x$=>R_bNzRRFpKg!PQ=pa+_ww% zW(;ET$HY`Ga}jc)ta-xv^{@s!S@mz(;<1(i55naM2Fk9XtsCEHLdy(CJX zT|}krHYmCA2z7OTg6`5xEN}l!tZlsv$6QQV>%=kK)F_yjGy&85b0EvcT3Dy^;Ifo) ztU6SIohH4(VS+jTde552r;R2Z!kOv&n{rvIo5)o~Fq87uJxkAmb;}ye==}(*ND~X+ z5g;lp=82u-l416$K&I;mW{L@MV3cRdHS347-y?_er8PlJSyE3s25Ca0{xKnO`M;=& zd`ZmNji5gI9cJws%HlRX#`OH}h4}v{pDzM)@gnY6S8pa$DKSH`8}*TP8(%eu4xe23 z{2MYJ`olO@^7aXy`JT=(2IGX96ngG>FF@(W<)}4ZDbJ%_g*xI2rZ4gq)W;P}MV)BX zq6tv$kq6z0A24T83(CJYhqUV(A!8o(hu>vu+-+ZC>9hpufQ_!Sw)A3>g!@qY$M=xB zZwusTFF?)3ttgvG=cl*z_;!vRTeB^I`Q4^119RG4a-AUR`b@$zH$#bKT7(zNZCIX9 zHkNNPLD%KYV)OiT2!FIf2>f9`oSIATjonA_`Wy0LL;JI@bDU@k?KqSPHZ19`54-&# zl*Rl+Z1{+Fv0|MYb2zaJ5-&|+i^uzL_1V#)GbjKB34;N>EoihUMIT2#J4? zFWb0ISTURU`O+4#mf8VH>)W8V&I_EpY^YcIP;jw41#MH@So^Pxxo_GI)<5)R$;V?+ zQMw)r_Zsu$cYAQ@00k=?#@N_ieYjIbDDOTmBY*gcnCaM#i9h(#87WMNruY2(Fbkgg z@dl)SyopZH0nCZ^r%d`8Fz&I3c0vr0-^s<|4cE}gya3wg)`D&>vBfszfV#F+jPM9z z!aGM^5xgG*2fc&4VMfd)v=dsbEO?d8R8}*Bblc~ixZ3{0l5TV9&w=x**Cegm-!0fI7cg^mQG_qTR_$&%P-nHPqs+N4`wo z(3K5(MQ76bEikyR9hccUv+%z5C>%3jVI3N556+;SC~sCr_t}m*cb2?10!n!?x&`-T z#gB6_dcG6S`t>d}!v^xXY*of#v*cm7IDp5+ZHdnqpSVXW@a1#DP9m1S8YE_-Ip+Nb%Nn9Tb>v|!O?ub@bA8a;g#to`E=>@@BaO-auT_a80l{7r;} zR7>W5HVf+`$m`A%SbgziuG(HGzKt{Ii9QEWui1?kY<~zwZ)|wbf1$ko+hE9edH~GQ zTcP$RSJrrCzo6>%uTWCZ3gyJ`lm;DvJY6eZwxAx~2pz2qIRFiPO+?ond(fU^*L$rLM!Ne4SDVHvKZl{E`O)PcLCTZ$;Ds}#bI zE9gGvjGcu(tXZ6cIq7;(R(%mH8h(e;k3*SZup5`|Md7XB#58MrAXT6oL)Um-?Dnsq zyEF+$Ozy|!@954vX)i{PU|jZkAjql{L`9(&-^<2void0YTi=fd8N9|!7d^<<9i{$@ zJvL6>3rhPJ#A&jF_L3^hIobw+YqpDxe~gDnI=9YuHRMm`jA5e&8Zo!^muUB?Qczcx z3i9F@aLVe!bhn>`&fs7kDeFhu!DWKUsKLDRI_apCDN5f;8+5vK;;s(Gq_IZaC2KX_ zy)~LkI_6ZytvQCuYkM_2$_BECCl?_x*PGWLbp)H=`t!w}q3rO+5LW8&60%EIg6q_g z+-=hXOwV-Xt#9buyfBET$U@n{6hD?E3*bp3S3y;j1xu9tfHA*VvUob{?L8O5gD4Z^ zaAh5wSKIK;V0xdIKNF;0C@9wtLS31RyY?A`gICc#u52;}oS>bMQ#~R9kuf_jj92fX zzDBYi847wiL-&nEL;P18MMbD8VkCKF;FBe0am^f`wykwlf50&O3E?pbtTpik1w;2 z9}B$U4qWjM$Rf^wDRs*$lJh_$R@=c?I%_X~fUyJm;Io|qE57c)KXut~&3hLnU9}0- zhl#`OA{Enm+Q>(kEu`yf#k3{Q;d-DwQ*W1vQL^KhP)2(h;KZLymGbj9ENCww5)=3D zCZBAmeA#wOrWQp}q1}S&7hF_souN6LD&-}&fH(bg5AFP9Si0^!D2HDW92N~`7Po$Z zEE{(gTHT9{v6{@}ig>WtAC5QQ2Jw=&Z$azbP}w9+ll1fm z*rAsYRap+zj}Akl0^4{(mk^J2V0Q z-?M$RrXTT-ZTMFo7cQSE#a3h5a_&Z2u7J^hHX)DSUWb{{*# zL?I>iB$iw-CG7qgHz?$6nWa0=Xvu}v$Gu3Wx+gAHnX=4deh}_`1asc5f}$-P zMl6=Hv%$l;-_PWA$G-se09oaRCIw5KJqx>6lQ-Z<=h`h-g_={jpt`7~ezBLReC95= z8cTWdmWk~8@h~2(%SOASLxo>oF}}aQA=h8|05xlmVVk)fm#ki`(M0ycy1@p-NTZ&& zm7m=6pJQPCwj3i)9|zg!%^K^&k3m~$j1u#Bd1ulAkk_d}pV+OwyRM>%KpsOsSRj!ISXfkttkVxx+tec{$zPXJPp~;t(h@uruuixJA*1Pv7AR=WY>AxF_I?Fwx#K#%jBgk7b zohzp3u0lgEYo1k8g>mhVAZG7uD6h7Oy7mY`G0h9FM2+XJm8mGb;LDTbZ&0PT8(a@HK*1+V z-qtGvV}BXJ!}bj4*00>zv!*F5^UeV%{?Hrr9!%jGkIdN+g*jJD)n|*&7_dh-WxVA6 zb7=Rq;*wntFyiP-)P1{Cba}fALwgP6{M2654P7VfC2vsqE)G(r(X+qoAr{?pXe6IVfefGfLP_!MI! z=nS1@!Ssbj%#r>F#it)*RJaOcieh=Wbexd0yD#khSC4n?7vbP8L*7v)<*q5UqWYiH z)Ylxs-&f1Iws?=Gq^1i459I>4zC+KX2GM;9-9P5KaMzb7KyiRP4V@aQ-=7gutpvP( zNMH%O89UL}jMZQ5jiG9DcAqmY=ai|>{~%UR-yj4A9l|u@J)o|it8t4+N9ijI%G!+K zJYqGP{YV{-w*FAjO9iQEF04FnfS}tpN3b)B7ZcYJ@8zx^Zwyv}No@_pEHPwlU6%aU z(9tYqn>9=AlC!GHiOi*PBN}dq!q$txgGk#iE?o#26!p{XM9A+bfrJM74s}yUae@}y zeTk5Z%v-*LaP+ztSYbWl&x0Fjv`soyAhR(zO zTFQO?F^R2_dGMqYK|FaR7x}1REX>}Imz#~I&AIE?xaYm7KGiHr4oyX)sct-C!V$=d_bPt2I|%uO+H$9t6f%)t}S1~R9n zkx-mn0-D`sxb&GNpJML9mZrM1oH;9?j=bsQnN#?>5ym`x<^*)|_kpzbDv+5M3v13& zN4f7*HsqZN*PR*4#Ni%1xi$$xUfHq=;|@^Ax`J%O{>r%cwGi0l0-etWLG-u<7&&-9 zHs5)OdXI*%kcfzogGFlS9_mxfh|3YCR^&#F4c+Jj=bziH-tBRhb~V8*`TgT{K-Qrrc50K`Tx`BEyPjFT{?utkZ)YS zeH;rP)dTJEY0wZkkmWwHXR@u{#MucK^~X>b^R**yo2`Xv?|9Tt>cet>>rXzq0blgg zgewiE@aiGQ&}sHTTt_~sO5;xNf|r8bsF8TTDTK@Ce1wt$%A@_a9XoH^VQiBrM7^(q zZs#YM+^9@<{3Pc*L#_<*qv^;ILnPU{$h{O&keEO z>jR{3rc6uVV|>}zkLiy3pyqpfa2+)kTi1H9if46@Y_vsi-5UrNqrI?HL;3ii@l309 z7GzaFYwWVGLEzc_sQci}Oe!q7YK}ik^7r8GSteZ5EMbkwb%Ji%Z3wV5XZd+sAkp6# zlwE7(#m7c~VRjv~8})&pHhUK7;l-m)Jj3XD{Px(c= z%SY(H_fY=NP;Br0H^^01So&Z&?kE|;i#&;WHF+X09rG8a-lrY~`E7wJiZ>sfBTS?m zz_g*n4@%b;!e9O^%BQ7b#Ud+ybB7Dp=zl`j#{VELnY=IOiA?tV8ZkTkuyM&)7*eRu z6RO5=J6RR51Jp6Ly^Tc$mfSsgA!N;x@Z5R*Snd}I3-`W+%Gl2kSbq$H{`VKkFAT<0 zn?sn(%8#H~QH5_uN{MN-QcU_u!L{$qD#OeN@Cy6anB#E}qi2Vq{+d&mv~@3}FY;sh zeOj?O!IqmgS~JDHdyw}B(q>&0Hjg@n_Yd^uChKLK?b?iaj+Gd4#*A4TZh^4Vv>jGl zgb~s&z)cjaj-Fi|cUOYv4lh=JDu!5dA2C}&ePo|~g3rCdOs5MLWse^D7eHU8ynjO6 zpXkY4x2;D-D{VHtejpgo@9l1&rCr|~&^X&;k_Gkbs@@4k6Q(k^*`FZeuPdm+$Ko1e zb5?SMm;$fH%X4HB=00vCd7M_7?$$pcYw1pGD6?VF(Ni({yOCJ1x&lgGmqP5V`}pXA zGs|+_h=IvD5N$h*=Y{A{OZm5)tCz4pZ8F>Wv`~&A8ng%3SKjSMIv4d-+*hQa?ASd` z$M{=#xkk>?#|LxwTOXh_wGcME8^@Y${OF$J#Jc0BFq>NXcT`fIJasV4sQ2dWv@=zG zHV%7?y?M6RHP~^{l6BtM37hYGvt)~{Li!~c&sww-V+0Qt^UY+v5T`EdGyo(%v;PseR!Lw|MPj6|InK2;xj<+=g~ZQ`9ByL zAz}SBVw~3oq4EoETz=#$-ZsRMbJZe7PHlE5G@67~VwJUXJ`|$Qy>fqHIVwUp(v^)O@ zqCI-h{gp8@9PtJ&zH?$u&t_xlNa{*no-UVOap5UHIJ1Z<+C4anltWm9m)BfJyYc5? zU78DVfHonUoDTg@OkfVXiD&%epr})1g7W2gO|kPvQExl#js7wjdlcl$eAk7qCA9H1 zy9<+J2EhAQQJH&@SB#+9Wf1n6w#Gg&y4GC~;pJw}38bwy=2XC|>cU1tJW} zaK@1d+@!#d&%YMPd@sExw&O8S&{-tq!39j}B(C(u<6wAeE_UYJ6GMKNvDBlaQQ6*? z=lo#9+h|8TQcp+Sf!)ygeLZc!e?_MOZa8MA8O!7*tUTtmn7Qd6R6l(nsB<=of!vy> zR#4XB>>jcB+dE?GA0xQx^ESNSIgmS8^yM}AJ3-gchwtre&soT3Ecx~TXaZtE=`c|Y zJiG?u71k{6=QSuPwP)+jSaQEf<0!X(7z~MNSu*W2y8r%+HV|9n!%~BpiPD@U(|fQ& z+Xij4P22Uq58%4~iZEk&5YsL01)()lc=M=gXgp+u-4-uF+hamJ7CkV$e;Yrp>%;X2 zR6_Lh0--B-0B=dAOv$ENv1QJGuuR{ZIjy72;LaPT&#xTGW*;X_)-#{>ep{ep`zeeZ zMeOE5127_H0~F3NWUk4-!`xN&EHRZfI$GKx^6Vv475yco9=L&ik^Na*Pbr-ZRzuX! zgNYqdhA}Z&n8_IA>1_m+ALS7Z26FYIMtRG;m)QKM0WxPPxUO(Nx~<=f9f$7Xy6+5F zXbti2kaF|uZ1{>r!AxeP&l~J_0I&0A1&z~Snv$3u9d?3U-2|>4Mdyx-DwO^e1F^@3 zVfVKdJZQ)(?4mA>Zp}17_IVzpelmjQziJ?Z?tykAJ4Eg3+v4@xPF(tFAj&QqVoB9H zFlEN9^JTjjs}vzD=LnQ~i>UoGPu%~9z{gGx%`m-=zprf@Lx zr!0K&Lr`t}0iq4RpwUe^yZKJaV*EE&3XA|eeU|; zkT84pWZr$T9VaM_dKA(1B9iu={8aF0~hPJ>#>Thly%rT9jkxo7R~N`#_*UB-k5z1GiH7wJ_gg=onk=T zpyj9xdA)$O=VEbE10_h)?`_^*hO zzN=7^WzKabwZhCba;_X5uK9J&Sl0I81ZX$Sg206PnDdJPOF)iqzp>}H)#iMD_!QQ% zjGk2+7DA%37nhE|D9UCUFqP^sE`6cL=I6L_>$Sl=ai#}X>SzTc<2THE@(Pm|UB;y8 z8l1LZ6kkzd!RjYCqNh2rsjt!&`|tNOzAO5(rjxIrW|tX{ggSitcp$GG{s)chNq|e?Fvk3NNv>;Zq(@;Ho7lI8$(Esk7QJ;ctPb#rv~>O8ULm{~^@P zHDJD0nV4MT4-WGXO2+qKP3%#S_xM8n^Y!qI^xyEkj|B5HFJ5tKJd4xRLHURpK|S+( zP3OvcP&z&j;x3+n*6XCV4J<(QhBFwZ`y0nvPi8H?Uc`=T7uf;CD$*eovr?hl-$#_q z>Q~v&%bg$dbL8q0nGhZS0%Ki^d(>@VbW$kWg)*Hs% zPrk;C#Q|)_40|3k=OJ`?bwgIqB;GJ=82y)v;#UZ9jlI~|bt71**CLQE%oQWw(T3vcXOLUz%(A^gn9nj_ zR-WM{+Fho-;hD69{;3Qj^Ww3%uQN=MQAX{#7Ha+0W6xi=Au`XAcBanal_yrLe48_d zExv?b)kE0rr#}3|GUCkUOL^qJ2-+){E@s}C2E~zTuzOM@R4-2h>8&`(HMHcJAB?F} z;YU00XN6em1Zj8Lit>Ewm`6XrigRBue8Li;`pkNzti&dV zLR5i{?5FzPlqqI$K9m^6$w*4x|A-EtU8lIHpI4UVjC z9__mJ@nM#W^jP}gDSYXRK`gB81=LmSN6r6Aq4Q?5pmSIt`n84e`>JuQ;#3}__ZKlb zeXE$)<;m42%=l{u3nq88fCkE}hOeBD<)+llj(7#HOG8*{?+DPh-vZSuR*Qu{4rECo z|DYnX2izLl(B--vGjj~%iK08V+olx4Cx)TYEmhvV;CGx`Z@^=yYo0ibau3f=qNhtQ z@~!@Yne72gd;bs?4l-j6*UB)iX%bh@dE%clP|C`S6UBo-+Tm}Z%;MV=NIE@)r%W_r z>b*&FSN>cm{9?>Hu&*s_7#_XN4jyvOrhW>2UZcE2dZ2jwtt8# zZ`W9%VMeH7zwnsMc~dxY>N6V{^6L*E6&>s@~U-~VID z&rb_t+QM2WI~%}aZLW|ei$QbV;Y-RVcP=@McD7zZhtVNoD4A%oM&_dUpC4<9%YmEA2XZIPS0@X5|rgT5$8CnksMy2+2t_@*!t;fL5)myC2 zXeUOn34Zh?fBClrNZ4J4@ns=g9rIX7dQ*(LFK58)C-ON~C5g$32#_?6*QCBW4n6P2 zaY^?Y`5I$ScI*xHBLf1t?*26>v-t-#Z|YFJ)0-KN9mxtFnJ~!}iBJ^oz@uHRiH1G@ z!o>3v`DXfgPoxNbSSUj41zT1>*np??xGJ^T&c~;t4U9{=ey`K7g6=XqLM4Gy0A>1I0g6eqQ-h zBa54lY4hIFzV;K)O;`*at%fYs(TmAU6QF%eJLV<_@b+DOP_1;+$V&C)wSx>;erGny zXVCWPwI;N=W5e=i>991`fhpb7gv2p{?C_mnUbLzh9NgX6{$dZlOE!dOR`=zBRb|-N zL-+n<1Ij{oiPi^b+tg$ND0^iH3X4bBb&Es(q|vNi^BRjBFF{vwJ~rechDAI>_OKMv z-=9U9_OV8BcQ}v!Wf)7iFqOSOH;6lEOQ3Tj{ru-E{4;){``~8@`kudsna>APX4r|g zga$zE%wgC(KtR_Y)(W*pXpel&Z7djL#j7?E!>ItEq(#akR>V+WZl_TtpBJqwIhf2k z2z60)7_qJn2K!6dsAaA^+y5qB>}SdBy7Is!)P}aw$56(Wbide_!if;dQ~mZAI`#L& z;-9P_tLz3S_D;sAe~K_R)RuS9IXPXnOVl__!Prew}?@SXrqe8QM^k|8>@J%jW<5-{0r!@47^dF>m@ zJWnLwWZbV%p4%J8HqrOKzb9{;I|VF8C7{_s>UckS1B;{GS%1sX>@Im>ihp|Z3hMyQ zfBXXK*8ZZ|wHIhurpHxnhef;R_M&0sBD&v>gTZpz<9gVSr%MY&^V7!MqU}di+iVcB zgN%631KMToyB3VReYi5wT6o>pkm-E0#Mqtm9Q@;@&^pGQx&BfN%CV7}#PK6ZFMErT zHYLzb`!LJAOu74EJ528XD+KLwWQ~bIYb_D)zF zK9KHp{%|;VI1k)O-A+k8`T4(!AO9jfNih*V%8a?*eA-0*Z41^tYQ%QK0I<76XD!nJ z-g);QLGr&fm6oJKNQT_eG(4@x9cjemb2sPdtwXsy*@Xvg^5Od144G=w7;vrHCdd*k zgx4xxt}C}D9)Sw>p9*5K$Tj}T=fxGBapaSQT*64{WVn06g}qH6|5R@k)P6b&iU(HM zvST9ao*IUkJ}y{pkRl%2MLy_^J3^9ODD$0k6PgyCg|gySVhk9edtnD8Jb#M$51(LM zd=N`{#Q5*&V_3JGGAXz7p`}0V;WjLXx{J0fGE9%ZU1iLp?DUxH`7few_+la9!V`G% zod@rFnFVM5rFg8u1Z2k4(LB6ZkhK%va7G|&xXifbVFYQ*r7);#0%_bbR-bQxhI>TX zaWjFpcLTXapXE^7<;p8|?uRi&R_wBIFbi*t#h$D_tfKfJq)zU^;uaO?|G5*Q#cR-U z(~~i&7e8@%Jd=!gTRDnYx7>6GL@#W`iV|bi(9;DOpAUfY>~V3&c6U}UCC<%>2dG>d zAk^%kpVOIKh(0)mz7xdpS!luBMwjAs`mAXeJF_Y0M{=$7v8MW$-C(vXfR+D~kKg|| zfJbz@v94AV9$FL1OcJv}S=*?|OdJc6BU?cK)=f|?+bpCtSTnWBW=&ezd*WE_!~F0U z#I*JRU9v6Q_TmI_rfPUECwv)MGSk zNe0b@Y%m+$0Xgq9#HTq4uixA8+QOyK}a}ZaRtUz&^4eL={gltU!cO4MI zsxQ-S(Xk*_bIX8dd~eL_R~R$3cA=*8R1(Vei5~JNhf?E({ZupRQ_nRtAPw z?iN+sE`aJ8!|q|BTx$Kd7=Ms*;d_YYGMH8qwBN)*uTJ(uPT_zyS{5e>*ofnplKw|kdNd>Q~i0YWi{ljmb1*$09t=9 zG`%&3r5c@wxtpf&;y5=h8Ei>AYs6yeydo|>>B#E_&>d&yBMkJmV$t?n1UYrX^eMCD z=ARDb`@KPXw*<6{pNO&*cPk}Qf1xLaa#g2xiRG!pp;>6er3rt7I;pP^`EUduW983U z9vp`FGz*sIDCJ@FIX`il?rx#i!Eos-sLvV6rPY%#s$nC{G9JS^U9Jh~lz-49-bUrn zVe*vs?@+n!sm65XSZ=7l4cc|zfQod#+5^E@<97RMD=S-e z0W601gR;dfkcJn5jiftUmKl?N{2lerQ9qz6Q*64(@$%ngP!&!+@iDI}8)tS4m##SJk=#wb<~-i=$g!uH;SU# z{geD_i#_SKcknLVZ9Nsf%*}QXD^U05VK3?ISm(igK0EWGO`D-+E}hS%-B4y7%4(mhAvmQc-vAOjgWuQZjXv8H}Tsz<`2CNO@ zO&#~(^+6|=pc+Y=ktOnWhwo7v;4Macs)CNs!QA>*7HUV$fkfXKSh{uqCQG{UV}vPn z|AHZPy*X&6aawOHE(;wtTKb;_G=O#kff{T#)_Au0+GsCfIhCF;> zhN#@-Eo8S658AsNS6mEb+WJs2?h&FiXfF<;9OtT!hD@6D3`;Nfn{BfQ??PyBiRt<_6=qcXI@b@ z>IvHY_FmL3oTmv18OtSGvcb*5izodznK#hJmoU(bxs2IK3@9Iv?U9N3SIl{VFbOqr z8cbOC548~((@bSt@vj;rOD_0F5`U)nrVk1U-psE$ga<~^wn52N)E*kkQ;lR0^zs1s z8XtiA7zfPpS&zB}0YcU!fwwvi;c?B^vCVf2>g%pxVpj=v^tlgVp)S1rq#U)=UkKp` zN`%HY(V%XzW)6oY@Yw-&+&Ahk%nN;hHB&EQ#C9iMIM#q)UcV8;pOQZJq*1Oh-il_R z#fV=fvy!FbS^ds->^WJ8PCpMrX7?2%uYZB)(JC>|#ffWT9-)M1ixDqhqV)}dn3uSh&98=3xnBI6>?8%zIlAr8^ z%l?!2{wY(q(^uNroNmwStL}m1$6`TiNc@D8rOKcHqO3 zCK&PAcZn0w^GdU7#bhoi+aXFyjYwPW5Y-2EU@7@6-D|q=da(i1-Wn`7T1Y&Ej4DWf z8wG(g&SUI&(zcE1-|=Cyyr3x=p3xp-CUNqW7fuOb3r~Z4a5r|=zl5@)@vMTjISZmk zLv_?OafWgv^L%#@>WK;eaqSS|H~uTcT_k?O11(G;O}&2iVALH{iRCAV`FwJ&&|vck z{U`$~J>8E*dsy%qRW2kfJ_5Q$JFxSxloh)eVb04d(>T8cr%i*gars}u z_5QZJdygA;|9c?_hlcW;Efe??IX%aUc4PWNV#C~+0j{*-c7 z{p>)TU|&}H?_6-RZ9w&z5T1AR8V>U`V?DIR-8Gl`8T?OFI~?>czIaavock~6ZFJ+I z+lR8oZ+?R6oV#cdnv1o^TZq5573L3|%!`Y!ipf*FQEA*tH7??tjc>%%1B0+MFqHRD z4$&gjp4Ym5hUSAyFeXHW*<*KL^T?Z67EU`)XQ!e|x)F09cnH+V#zNAbOBnKv2P-~P z1=3;DFmReaNSXuXhNlX#IAI`^&vO(NKd!`%v8y1m*I8&buw}|OwqWOc9HMU9^Wye% zDEUt*7Cd?hx@i}obC_E6*(zg-`Aebs))DGY7(!FRNhl>PHYK7L-(TU(mAm(ei4q6o zlgJMoKZOmNV!{GVci``{6)bY9jLSVnL7smxbnCi6LS0aqG)FEUT>;_t0ciQzigWL3 zoOopfKeo}1Ei%yOS@|+9iyO)1>*m9(&3&0+`wvi@bVF2yhOpQY0o4PXAi39n|Hsj} z2gI1ZeSA_WC1OyfQdFjc6oYD>>t>WhL@T>7PC1T^XoYM}Lx^ad#n8$zA|i*(bKOL2 z#HbZYL?n}l2obIK`n`YqM>X?2_kCU8@8^U1h%W5BmYgi#VgqhW9LlQy{0A!dXY^K@ z^C*w|VA!83%33P~`Gj0-`5DCW{9Cc+Uk-zG6U~#?HVDSt7lJoBvR0xa_s_Tl(%%_2 z{vd{H73Hi>Zq_Sq7Gmn>AHq+^-c0`)Ab&keRUWd zatRa1~G+C zh?upEoZtU;!36IH%0gv8`pt6?`K}tWALc;ZpSK}$qbVoy>*+X7XYaV8G4z=+gh4qaCdE)8|FyblkIBdG` zIrB$y&6g4UbQjud%V(m}q&+sy`VIx!4*dNo390mk^6DqY(HJ+AT-qKi`b!vS+9nCp zhj5y~KM|w;L(Fts1F}cN8MS;U=)79=2?e`gicbfwQT!Adj}t?3a0}|+%!Bl(2Jp=c<@u)&mh|q* zXHVx5L_CO`t*DvegchnqOpX5+rGtw>vZ4o;L^nZPTQi<{ zF9ZGKLsD7U(%38qEH7fMG zKAiS|GFGv=QHY<=5u7&r5_9*KU>cZ=>YRhZ?SE}K8-5&B-}gXxjT}p|OnJt?G*7+c zOl~$2O4khl-70bg-r6oI?s^MF76(D+Jwu3TTLKode_}x}?MQ1*iq$VWP&Vl?Xoqyc zvc_U)-%-ilukFej9cH0qZ)Y*T`K7?V*Fa27FW%58gja29fpLy-?C z{uHPqhuyP}omj@h@8}`)BJZhAG+u54hF1ev(x@uTcyj@A=e&Z3AzvYN+bgVPA#8fD z0~nSdf~&@n#T}z=Ru*-D(~HE|SsgIEM;cZchj4Y^d9il0lI4H93dZU{vGLFgD9TR4 zw2_a{?m!*{Yl$^^Vw%vLpA{D|L%cy8pL9qgj(BdHF6^^g7|lqEC`0%LGmpw3;^tbMu$^}PWyHvF?#^^C z-wCqPPpDiUf#JjG{kcq!e)b!&`Q}OpwzT29Z}{@$wSQpAA9OxFxd%JsV#db0?E2Ok6V?IV~N-WpykGG*61rOe)m&bXJucoB5qajsXvF!P9V zrAL1@X{a4fs;xxBg`dh?-Va;{2D0$U1wy#_7ol+mir4N@mp62(nEG3)IA`5R-u!GY zgw{+TR!pHFaagA;Jll=C=DM>Z<-_@eiuOz^Uy6z$raVbOXY;!VGwc1Czh!r>Xdv&- z`z@mEKu2C*OfJmtGX%-TyGp}}gZk6wh#~*(5G$7htAIDh3Bx@YALp*cO=?@pkmYcA%ZyNjW-C_GUR}%Yl_L?xN|9u~x4k zxn6;?S>3T}z*n3yg!bB2_dyb|PN_HX<%2HTaI2jMAzUGaW5n2rNcju(POqR!Y0aI+ zo`;MXgSqznF${RCWQv-uf=%)^(b}OeIW&u4i?bVdC>_XB(+BX}xVM74tR46EG$*Gw zxiLF7qeC}Kp852n5O3cb_ovZ!FnJxyK2uMm`Cnmgg$K7&7h*E5Kucd2nlJti@_`4% zl=ELPKl2*;{tRZBx&uP^{dRErkq4{Y*qN<7)SIg>UPP9euZ&vf#h-AmHU7uCPCi(i!t1(&)*w9ZphK z%z$yZ2HL?EgRb5}P>rP=S=4iMud?DT#uJzscuEL8Y0bkoufwM8w@`LKCLF15&+bk0 zmBl-BUMSl{X(#Elxq{w=m{q`5)wF55e+g#$0<`z-F(zC|xVk zIm4Pqe(ivL&kg7Io{-PG=*T6#K>apjgfhA!0>2+1u4&J3w5Z!ooXr6s`(g>Y9jU_d z>#i&&)P;Y27EI6DYZ%&0J*D3Vvf#vGjJZYg@H0&yOz~juYs`4nGG9pFPzaLqhn0po zn?NOfBbvV@X7+Mo;C!h;S@~;OTpV)*Q-&*8nW_ZBwY$XWpLalbP@~X98G-mdp}2l+CvG$UC|3NjP25Pc zSl6=i=%X^_!hGTs+%Q68bPr6uy8z1C1#tf_&Rjw}Y~|D>$lUZ6s$#qFI&!f4o9_(U;{1-N9CqoO!9?Ggv$bh2X6pAbH7oGz>ljHI-etY>)+}CXYw&v983wO9H;O z1~m8kD^K?{;V#4-cLLh?R5oDP)Y~YV_EU)8c^|wg`!a)K23q*O!MYaU=@U0Y{QXaO z-!$gZ;aOYVo%JUsZL*rz@V!(Z9JopIHz>`O} zZpX?NSD^WJ^6y9}!?A&O-J2a*N`N<)n6DC&ru1joixNR{+=IKm_Fy(*FvM9FvEWv<^IBi4iw%v*PLPvcSvNgS)hH0-0Z5 zY!3a5GTYIj`HGRO?>-YAO}mDh>pXddZ-!v#jizt0>>LN=heAY)mt2=d@t2l0)^#lqHR ztgtK$_eK%7g%9EN&(5N;Un?%Z@B!+R>CAJ@P3*XJ1T$^lhSg~lEWF2O7&FU*SBy9U zF~9tPrjF+@f7uXLd(oShsCu#QlPMop;?1^D2hC}BCZyz+f!C87jJUQBmVYP5^N!23 zKii2}4IF~|`12+2Y5rH^$g~UkL8Rkph?(iZ{Wlk(Vn9cfpWGyl8AW`+V>iJyy%(FL zsY4eRYgV|hH%lM(8e7h^<#uyDx%SC%F{8M+h1^Kryuh@!}!d%0=NBe4Jwy> zf}DR-z@lUk7;TS6ALqlDMygoRn(r9)pcT^>L__AyR~Y(r1W&D3 zfNV@h2o6q#d}3>6j?2Wf&+S;_A`ABY`%s=U!-q%LO$ODOU08MAhfC`VP|KC@nPKCSZyCCk+(?ue`MsMMyHkNyL$WZxT^!nM)QcN8 zoAHc5a<6_W2I;r`U}I(w3#6G~<@7JsTPLA^>1UY2gScIg<|BZ4BY6nkXLSbT6SY`N~nH8Gype0vh+_xdVqar=a!ZWkbX)kK_P9>OgDYsW`B z5lgAQ6eI=z=#zUpW6={|=CbrI@ou+6rHb(@X1*+P%`R~AZbhF(ay+*EB8GN42f5St zp*juFXOuaMzfax5+Sg*m<(tA;`WqXkdP4O^D_&6YgB*N)^!ZJHV!`h1U>Efo{8#*o z{I?UJSo9V(f7FWM9_45iDX{u;AFyh&0#$R<#oJ9D{LmdYF8SlO-gx&3IzMw}8p&n! zir*pn z;a9L}mO1n3b`TTp6`;%9^TZcAB;-o}!te({uw|SMY(C`+Hb3W*!|DfCJay;IU&xKI z;5U?29u}J~)Z&d>cHAe&7Ixlk%ZGiV?|$xNy>`WDvF!L;T)UOJH(w;Y_yA>`D37{y zO>5rr@+VY;l!_@^-a}Pa$_Br0i*+x~gK6AqO!gUyDV~-*qHq&jtawGPpG#tvo_0mI z8&Gp^n?AqhAU5k9S@NqMEYv{)BghpI>V5*s#89r?l_5yoegVZFuFBlE-%t}fAFU^J zVigCJ7=3gXj%e~^@++5-{UJj0?8A_6w7~GY9aweJ2J)t*kWboNTt2V@a^96-EV;BI z6&ulJfgkboiJj%pMtuB|GCxy^i*NEP*m%^^|EDKPH+;l#$Gv%0Rx4(av>OaJ{6)L{ z4lJc{9k`@-gkN6xJ~mr7fTi$OuLWWj+gARGf~s5P*zJ@GrP^j5PlPd)P`v&dl<%>TT|Zlv@6`Z zD&Y;a-PqU3Hf+RU`hQIK=QVz9nR3DJ5bKqKRn)D`{|-#!rc#y@x8+SHIndRsP+3Y$ zh(EI7O^83wG&z8CMgkw_8_cx}iPzsdStvV8e%P*qdBvuY5F301q<)@k_YCrZyAcUex5Q- z7n&hj8v@hywV3&AEY|0D!{oMIS@Ze%P}SC(X*I*aTTkzjr9MJwB4ZPz9hvK_fAGCH zoLqn-MV%>tf1;9WbH|{jv^x*id?N-xC|9QbL}|@PthKY@I(|+lJ8~OhxWEp*cmZYa zgIMWyAHH1~gIglkL(myt9$)kXw0o%cdM6oMeDkm|*M!ag=E6%I6R>b|1SXHyK(Q_L z?K`-0|9&?iKk}a7UBOtvifeGu=N07t5d}5Jy0Dq|6?{p)13$F(Ef|gj@Zbe!Q5J(j zsiYr1e(B3ituI2@Ps)L|EC9ugI?QdEE0~`80=k{8ASyW>BQm#PmAV!)x{lz<=7TX( zpN%P7ZCIhd2amt?RWzPdi;-I5_pSG&nf(avqkl_tj%LBC%9^V>Uc%DhgW!Do_H0bK zgx+^M1nr|Yf|hd1Jw`gRsBQh2)eP|eyPJV(cxEs&D@5s*HBv#ztL6;Ku`>?DB z$;88~BNjs|mOHjilxxlilCN+{vSYr!`KbrfcCQtj))rwHF@;;Yvl z&)-A3Qxw_3k{1GR*+g8T1DmkaiS7*Hl+_sT&ZgaWWs<*-5)im8*B+{biaFV$#PqdN zF>Jl4S(u~Ojq1bFDF2-1cN9|c>LG1PGbZ(Z^m}yBI&&dP14f((Gpfk&B-EHgxcI-fc;zV#G!@6(`o(;#L_{nTUGwBt@l1Wmg|f!8+l zXWCdxOj}Al1zQ*BXflMAR#C3!59;`LaU&n^G$AdaKTCbmAY?^fMV~l(ru}UOcM1~e39OXyKaAgh90MdW;f#Tz57c{(MoyUmQTp&c~g2f zn2ne%-z|cTx zqXP>adKxoM0oZlQ1jD`zr3?0k+)X!x^(#z?Q*mCNrMAnN*#6vE*ErXJXV`F2MH zUH_3nGIfW&LbAZUn;TcZRzhAsV)GvKg|PBnn2_bqwZl(~uK&^grT0fnlsRDNcY3Du z%GSRK>&Y{(z82?CrJPF76mU9p9%IakK|ZM~Z)x)dWNe~fv>gj}bT*MC_7LA(_Te^b zdqSumIcD;wP=B}*>yI7+&BAEW-6e=$TyY2!5}etb(>^?H^m;6sRSq#38T9+>2I}Q* ztZ|hYL_S>!{xfMdvS$Q3(f(9(<+FaJsXcX-u0rS)Ys$lR6?2!8>xjBOn&xq$%?u^4 z`r`}MXBebBF>}^w!FjShar^J$y%%&AS>?!zmzeUp zM;{=y#e=6*0L#kl$mH*jyh z>JKPpelg)rS1kDaKp6`?xf?9n)Wt9;Gda*7WQnpneu8v~FO&V-ORyRDRnVE3>Al`tuxT9~d5gyd z%>K0wznl87MbCic9Jk`9f5=&C_X((5SF4w``df^j@hjB);=)tO`IUc*^2L5fiPdL? zbDjh<)$hdMuD=L3+704Y>dn~dZRDh{FjpEzP1EbbZ(Vv;K7>5-n=xuzGgKVR7E;b! z0p+V^P*ju&7W;RC?KU%RTzemrju%5_*9~IIp)^3Oxx=6(&u)5KLI}p^)s6aMD>CG;7xzky!GvQ zrSTw2)({H=X$LXx465scQKY=~=u|h_WqRQu%M)O;{0!Q3+ln`&&b(|@0QLMwu$^~R ze7j=?rnM&~t>F#?_xK-|HVJ*I{GjntJ5W5?O+4!5f__gdl@$vh;8e7W%e=6q9YpavEUvbz&48AINi5M?uzkC;H4g2Q8*+ z@%;xoE-5}Jkb``$!o=A?dB7UcqnE(9|8Nn*zl@nGLW>&R))<%u-(DfltK^Iw8@dVceZ9udg6Lb z-7os=TaAYGKgFD7cfjWn-AzN~SZGbNnTA)mz0+ybj5?__x0iA!^R_H~>K%+YS%Ew4 zsh4fIBZlP-VpcN?K(p$Cu;3A~xS!LkJ*x_%R??H$tpy~%oyM7WO_}LVQyx038ycO6 z3#NPuRq7~ky}kvcN}zl8eb9Vbt;~F%C0dkEg{YwY;LC9WA*0o`%;c@v&P=g!Y3ym)?d3B0f*fAUuko^_Y8)Sp{~ zM2pRsadjNz{&xI-IaM*==b-R&fhQ~dHx#r}{J2f^c$gIO0(qyM*x0rLDwRXo_QO)9 z)mjLTLw(qi&aOPJ^c?K9bLR%CHBFAlM3^&^zb=Q z{6k)`#kE4C(_f-wS+SUCplsRh{;2zPl3v$1RjKY=h?9&D;99IV*F|*@tyIJ5=Uh?B+K<^!Q?csr5771fd2HS>3B2~xGxx&@%2(jG@)&UWivq#WLC*Zc zzMxLshb!_z(Kw412H_2m*yk3mF73!h&sVZ==}^cW6@d}H+nvPz}im+;DKTsSh!O$(4*z&*JR#+qE{1V8QAaP8#UBw%7JMk;#ZJ5vD?mRhN z#S&dk;RDLDtTk)Hy-ra6s&8-J`CvC59{U>i{~*rQ^ARjz;xRZvoSe*C=f!~WAxt?t z1}ke#_~tqp3%49BM0$3@4BJ+G(cezo^zB*@N>${!KL)`oWxRf>lt~}?fP+~OA2&ag z`MX}n>ij~~lm@qroke|k@Ok;NWAK^P;rwXHxZjMA8)3#YYyK9B7W*?c zybZ6j9m=cpzNkr(aJ#pEL*@Nbu-%{Ty^FsG%J$|8xz;y?=$`_&uILF|w=?6VE+AQ!E}?f08O@@1u7N}bpR zvc62!V~tR_auN&^t+~~bQy4mUH|XxX6Ljmc^(wl{$ZimK)T#@&oM*!vR#`BoSJhDZ z-JAB(bRE{`UkXVW!3^cpd&@^LwbxfMG%69ADKCCHCzKxwXn>0)R>TJJ0G%>T8UEzF zuzaNvW}DJXY2gk*edGrGEHdHeKgzgf*T+EV(sDEm+Xqdak5N`9Kr+oil~G3&+Hb5_vbn%saDXN2^UXP7$KlNo3iqIB3x zyQ4Xn?_z|e7de=}jr_Or$vAtgDOXxd!Az`%idpTL$FFU;#HTZ8Hrum+fDSx)YdHDtgNi4@uF;wow!gC_TWB>TGjJxL`anD59Z%2Ln7EksfJCsS%H;8hrCG|quFh%-G z!7rdS3+d{~GkZN2L;q4@D6yx*$JhzmS9fNak(V)kad%>R^zn3Vho3Oxcb(HH~a)8;{4 zp*OF+!+4yx6LFepUSTs4ow^avcWJ5++A9sNWw&LMrnTk9o88Fp(TyAa9;1x)ONJ}^ zgSctm2QYp8c1Y9QAub$sjyx*RMo+%oL(TA5bl`H6Fq||;i;=4YNF3t|#wo!<_&jsC zxE7f9m!**0V8iSve_Q5q8uOpN5W`1)!~}^u^Z9Qf>ZN~?XK+6x4laZu=?%p2RTfCf#HD>c4kmW@iV`iDQKl%1dg-ll!goKPa2$#&37`;sMl6uqt(7 zvYUyJ@a!!350o+0j7ITAoC8x_=qhZFy9c`C#d#-sP)2YY@h$BneEL^Y-f-{|1~2+w zH*O_HOsxP@Vh+`J?hon%Pr=KQ_Shq%M7#AV*oe1_eCtd6BbOmIG=hylc`h2$cym`V+W0f z=~dN`*fa!vybfb{h(D5tlZEa#2lcKRu#^cb(eoMVh9A`D_oO@3;}Wz>(t#qVBZfX4 zjj3@_n7h_N)Ga%#HhMzmY=jdGOaKHSh(>=@L6pG_w>Le2yjbD=$}wr|a)i^sq; zCxK@^+z2gcKOq;2h5AY#T%-%-y0)!_;B`aU#rmswzJo1GkMQEnu_w{z)_mxHpd&M0 z|0x0)gDJWpt;X zA>{jH!ihF=*7B_*o8)876(Q!#W*uc@gXdz!jSSI;*m#SM`0?G^&OByD2^LXKTDkrS zDEkpFp#LD6v3kJv=pbT}?b^~E@hdsma|4u_{ji#{Qo7_F%1ICDUbM3soAS5e_KU=xG%KMFPYimwn9y_7 z4wH4`Fk`wEWO&R2-NjczoO~lt3Pc6 zHC=xfyq49HmnunUm|aD;L=y;oq=4j3!BFW=Zqq*~gxkBI ztlwYw{+5(QHv4exQ-ATr^e!yvOlxlGD6r`7W?1o9%Ca7BfzXC7u;sxPY+Ow{(N5HF zt0vZQ8=bP0&fA^)jo=CU+Ou>zvq*Z}5EAeBK|*y3*yh(j)}A4(Bq;^*7dvrTs#@?^ z>c-<2cBP(32wag^u&lElthvks5`L`4g1e8PZ|}i8sH+_hA2CZvUK+|3>TY6m&mbOi zsSLNDsfOIwCd_c|zCPJBh~<8+fsELnG>1lL8hRGAnT~?}<<2~3(hf|OzQBU_-|*Qe zC6`iG;l(L4rg-uj`m=jjk0YTf)D(m!52pJ2r8(^(iHE&eJt-YyU|i&d8lS^i?` z4^Niz{3@h3o&a6D`-0Qvb&#=dIL4jMfzma@v1(&i)C3Fy$uGzC)pc7S{Np>(ntbev zz27fYJnPBJ+YaNg=R+X9t^zWO+?a9Bzk)kuF=C!~;Lf34xT4>D7~RX4nAr4wKBEIA zb>1_hXlB%-jhNJCBfRf4fTzdgVCuVPqLMtLn!d9|$@42>O7nI6xnd|g^zR8&Z|0Co z&eZ7kzo25(UNPMC5o8`U$E6M3c+u$pAfDXQCGMxe#>5eA%;sVp<)K|}o(I3QJTM(2 z<$HSvvia9W@+&KP^KGVsm}*xi)E|)$8>I~kACZF_-`Fred&=Fv4rCgy2tlqcg^D-C zUHk6CS8r4?l~gMfczg1~%4B4{&SGq=Csr&@6`!fySkU#JJa<`$5PsrUh(9ra8AJXO z4DVXVWlFu$8#J$UydkFa|A@&;zhjSSz^4~igHf?n%)W6MwWm_hYg87N=1D}tr2HN%bP%-Po}7iL;R49To8tO)DCtrKk7 zW=}uv72(Sh#&u&sLy6J%_<|5JWFRx@bJ61JJBaQgVJ?q{Fn|9d2y=2`hUYVACR!F~ z(f%DKEUTvPJ^2#jo3S#v0=#3Te5WvgC1>uyiaba5`Jz8ByBESWCs(3k={=$O_0`qOx{9@v*D#_tiXN#tx&t21C6Fn}2j zOF*qV4ziFnLaR9)S;f=IQ0V>x_3E_{Uo#60X1_rA-To{-$eu0Vd5Taq3qL6IDw>u+cPCkeQtZ^;J_WyQ9OjFWuP2 zuW}~;HI(*g19=JMCiVSuu`y&JX8v9W%O7;%mC3~A`R}G^y5<C#n zy8u$J9fjO~K4bXCY;x3I2IG_0V(_L*#1{Pu+dp(>#_=*%tLVb@E#yDEPjf&QTV7WD z5yQm|VuaIksBoTvHdoB}38e>rmh8@VmN+w_FthqsTd-qNTejPU82(N3U=j6Bb8l7& z<^}y&)`?W8Sa1SU(?U>HX;0X-gkVIT07q!9@Zq}1Rd|HX1P<_|i><@Egmb4zg zLN5oSq@hK*WtJUJ82lYmhKdkMc_Z~|@^|bA!PMtHVD#1zy#DSI%#xo3Nk(`;M#e#K znSKWjrFLVnJ-hS9C(F@kQ7U@fPNo@FBz2{_@cK6q7Sywh% zPyL{e_d)*81yQ|8%HkD6P?I-V>2>Kk8fTkBRw8Af$JdEb!wu;4umRW8j52naBdAaL zvGo>1Sb9f+1r6!W$AtE0wy*l~xLyZfbxRN{%zOofzaSRMk6`==6Vz}86y&mM zh)YH(xyCf_1Rg&nS8N(UJ$LhY(9rrKZg+c#i3_RMHopoM-0aHw-)DRfHPpOXE>iC& zmAb6`x#U|{VfyZl%yy;&58YRQC6jl8;&DZw&(k7k8qk_0*!JM_>9gYBuO1iNAI>8v z`irX)=K3)~-=1q8IuN_-fmqFoAbQhO^0<8> zCT@3};5nSR6j<`KfvZ8~l@5go%b;eeHy<&o6H{Dj#jc#SMp89@sU4o)aHw|;lV7_xi^+P@jL40!~)&z|7Axd1J9 z1GD-?#uV;hLe8qAXnG(WV-{}!|LwN?#=2n24Bir(OUHvMd=jW9+!Q2se+vHUr@${) zL&?JZAnx{LeNVLIk?n#}Hqt>nb|#D^?RkUJ)cu ze-o?S4nXGZNbq^E9&&%RVA@sN&>}_73V+ajwQL`5Za_MQ6E7UcSM>8=WldaV<>bY_az z7euc)DyDeQh`T8#SLV{2Nvq6xf6CQGcIt+cBDP|qrz1uWABM5hO)=Q-B-(}CfaX5b z)pa{2D59Pai+MSe-qL_AamKBzRzvu?R($j!XKrpyJGoZNA=s`jKh(#UONJViVsrVYFB?|M()2Q!v9LjonTUBo`L=JncmFr>|Z z>TUg5N?$*IY_bzyJ=2_(MEY{e+rV^<=fLApFwgyS7N~oWm-lQNE^D8Mnz_G-y5c?h zopV&Ye;03Foqr46NA;t*fj4Wk?Z-5Wmk9y?bGv)g!I-zil1MC}?x!3VbfI}jO);kR zqipGT2W)8G1>rg8P}a{J_-JymIFpOuNEyuDY0J!)bt3-UP##{s2@+k)V0&d6dTp;q z+1vg?!|KNnJ)8Wzm*0cU`ZFk-yIp9q+JmN6xge}_XBEAAqU?2FR^Ho_=~zzS`#X;8 z(9m3v7~Tu{J1z;S+cvLK-ZLsg$W%Yz`~N-4y{CW zDi?KW--X8HJCuWcE6jZ7%}aX zXF(cqwk+66XkO6)OAiF_=ze|K_EyVK5j-_8^3Y?-%;=%;Z7!+@k0gd>8EU%{x3uh$ znA70ET4oWW{Y)UvjVh%_~T;y8`M`8+Kz@8(uxS26W3BMLI@d>BL|R?wte; z8$W@qM?2pCmMPQd#)wTGF3hNP=U&1UFdZu5%+783uU&?5>4g^JW=;_5OZ>3HFoeYx zNtvGmxqpf4xTVidl=bZ<#%-m1mE)32l3v#__EQDkeCWfo-kWfFlQm1AvwCJ=H3nI> zWi{^RJih%cDBPfic$b-Y(efJQe_Mo-{j4 zEWEEw-St37y|_wf`bP8Oo5uxX#7oiKsufH09S56jotaK{5L{=|p26=E=ARvmgIf3E z`ZLsVpJBszWlv`M#+TO-Z^@NB8nTo&_)-X)c3}8@@@Ng{ z%p?~}P`{QO21i7()@a7#J_hoGUS_PcZxlwpS_(EmIjD-=7=`61HC4fcw!@gV$wd78 zmo;k?ek0ED8}g!`7X3b@LfrndXt?uKpFKK>*wr+HFWw`_u4D^w#Awr&zX5H}tq{@W zJjAmtf{kq-%Dl^=YAnb0ABVD!L#RXKNqP8Un!^oli|XSE=vuxLT|aea7O|huu%ZKt zK6o3(bfWLfqfM8%a1U8&F_@+h%cqz+`wBTob1aywM-5z*?u1NYahLtKihAjOV$iyV+~r(Bu2>AatNMCte3aviPv6F8EG?S9MoM%`cjHjb9#f}B+czR+(1hWu_(s> z1EKwvqtW{-Hk8tPCfk}fJbVWJQG>Zf4++NS?Z?Eqe}eY#Lb0%@9|qs(PChLgW?tvR za>+3*U-456du`8S{m(&t;6AMQZ56Jzq`qa_1;Xjnp~Ogy#4Q2Wz~8+MOKtZVlm0wQ zz7QoJF|iHPO<#`{r#=X`uLp3o^=op5hu}};AkLP42iN=MuyY7G^AQn8HyB>3;FgDpYyL73DxS5b9knSdj09Y|%8EFIYs8sm5|+vAu_EEP zSo@_^BZgoQQyGb-B@#K;AGFEMV2Xm<}Zoi-v zJO8CM*A~SJt{*47Y$ z2IJx@u(rJoQ`_Fb@R{Ajl$9cC_&tnSTaGzThV!bX-Yj=n5boaZ$y>(KUD&S(Qrm14 z{e5Yskf;{OF=PG4$dyb*U5Q>8Cz1Z;dC)JkLk#Ds%*?| z8ihKyDnVWnz?I4UpmFbNVf$YKq{d6I`Wdk!rckf;a}rAa4H8P}*{Tw>kp8(7k2jwP zir)ug*pYv*WWzD6ifWDO_uoa$CRdE_nkK4BUW&Dj2s@^e^UI(OJr@tr*MAog#c36TsAC%~8@} zw5Uid)q96_Wo6PJrdr&FRXJF)+_(0E!p$5f-S5aFcU>pmhYpHm*38BHCP?JNU~O9~ zHi>r2#-Kam35zhE*)dsE#O=j5mjZ-iYtb3(qfr6V%V0 zl0t4JuMVP^NzA>yS0Le4Yo@Q?1BVu`!)BO)(vMN7_B^^p=kUuTv)00Wj9xmQ~2Y4I8;citeVfznEFxhJ0@kjo9wu%=7;E# z5RJ-=X{fnfqf{LDq0id72Xz}%Vx*LIlgkG(*R}naddObEb#Z@QS!&N>#@B-G!E0gI zZ>}u66XnK!dZFy&s=`d^09ff7!0>eylh|9bD;VtT-0Jix0YT?Z9irTkPaegjQ`a$Uy-3e{7iA^aqIY-d zu!k4wFB%+J{X`?m(T2xq^D)nKFmG09VGa)B5&s+lS9uFe>M)o~-qBn|_y|!uHiJ%~ z;Dy^7Kz4JikTgz@x;by*e113LkDn4%HuTOseF}mK+fn{fOMa1uO4$-WmbKy}4*HF< z{*`U9EO#i^+N~87kcUOqw8wGk&z3AO=ZTWPaCv+?9#3a1ml9t{hRf8&w%2c_drn#D zB^aT%VUO>5@X86LAz|4=Ivc7e zSFlOQ7)#vM10Jlj@K22QmV?ur<2b<-cvV|F)GRWC_{fpKew4$YNH3;qbpT4vtS5iL zK$hFk30&?jg7`zjdG5t`;61P(_mBchJ!r;?XN}9`zN-q?yw@AC?KoOfguo3{#9XFBu7yXyr>tLcJ|>E9@Mra<=PC{(yc;PN#& zSk!O>8=HuV+_`AX(`xd>C%`|yIU+aUD90rYnpz?Mh9!U-#YZ%i;} z>KzPYH(ZAao;{g%=qcR2l6LfFp1kR_9XE}kd!FL1Xix_$r_U3lh;C(XUB!EfkV%$*e{*gd_is%-#xjUe~2rl`+-lOV(K+)GSR_l%GQ)RI&s$s8B6XLk3OmM@YhAuhcoQO{aQJ1GCc?3 zzb%BI&2%sSD;}G*pFzGq7&4p&gKC$6f{QQD-I;-LL9bE2#*xc^zlOQvEV22(E=Y`- z1Y^edun|gE9zWzgdYquy>_Zc-I$?{p3GUow?RGe}DS(G}?#qLEnX;vNS5|#+B%5h6 zf~A|@L*XBknrnmD7xP$M2Mx1eP4HV9`_RvBA~} z9rF{>%w!0tJ~<1S{%2v%QR>(79FRqf(9s){mHn42sxMlgX6{rW*lQ(}{G0(T{#$Xr zCWv4EiZO>kTdt~HrHfv48qz|$pf#u$e_Aq}WffJT<*F8pxBCTxyF!_|eGsJnazf00 z`Uo}Y%}_U;u{8HT(KwZ!Z-Y$5!()c?kOO`^b-tXNrT&Ba8@yP@O?xiw_zN6m--_V5wj@Tzw?}lUkd5;3b#fLyrw?1?w+5`2xo{6}Ti&?q zBzE*_!WBintZ-sGH1#?TA;hTD^oWud_DI0QV*_Dw`8%jLrulP+0!xUI+Sp&>SD^ae5W07ic9-UQB;myDwg#!5kHYzTbIxWD)zU4q5P^4J$V94vAP7x zy3tH_E|kgc-6G%K|DbNsTTDFe$<^;eq3zLi)P9dbO5G_~B9f2hnk_f?ld_QG4!kVS zAaJ^XBZXM?#6Oop8~80DCtH^vMteSj*7}j$m-4I51GW-Nhh{4VIrmdt0(OCd zE>&A_Z=*RoeBP0TWcK9sHRnM+(Ffj<-@WUHvFP~QVsud3v5Ku{1?_JN>^ia-7CD)+ z#sd0xs!f90^jncdP)>O1pP=3+2w^5hEMfOHFnn7srtkZmm`R^d+ja~09(U)`NBgkA zKwOZa=Hx!BM%nKXLdR|eT7_1L&XPxvD1Ql0%0no#^#)o-?kCsiIIhp1Cvs&G)SA+c z|H(7FB@LpE0L?{acyPs~rP!89T(r3}F}ultSK6V9~Bp_tyDhTX%iLB~Ebo*q&MGN&a%g}()FdDD*- z{ImsT=3B8{rIzf}z5#6AWSU)E{{b|$BYE6a@=f>JDnu1$g1UZ)Xh{yE9F+-n{a}l( zp=G!%oc1(Z5+=y#poxOhV;$ci7*{h?l0WhS}qzdAG9_^+MWl zvLqAPT3cSecr}*zy+g?k23@D*DCwUn&zaN{tS;HJm;J86eC z(h<}dcX5Bt5VppDD4#vkn3X);iSqmJvF`FP=(g}@4RJTbD<>RSXA5=YgFXlib-#k& z%3wCmmVBs3dZJqW0JYCPh|&!|g2w+hD7i2KwgiN8rQ!#nEvXpdo{eIv33OKX9}F20 zGQ`>Lfl2^lUh*nU{Mw8e9yX7$Wz8K4*4sL{-r;5PrWjJLR$S0tavAkQWAb8qX zzIVV_w&=rENLl001HLoj4L2+Ci}P5v!4Y`gwKEVkWfRo>NEytiaa=ye15^IvAW8TR zZmIh6PQjct_1OWnFq)5hIF^S5l7IKS1$TCP3vCV~_~P#anQl9EQ*Xb8;BEhae#8Py z+eyx`52wMA_Kq&QjQQld<9MOr6IxL>HEMnU9G)fcin>;i*1QvzmJDI`x<*(vcr??- z55pTp#H<(G!0cXMmXj5RDgAxXkQ+|#&d<73)`^Yok>Fb~kXu^P8Nbq=@1^ddQnC^x zd-ln9P&c@$QY7zgA1*ChDQYrH;Yw3qz9W&bDCYpkTDT8V_jQ1N=WU(qz*zL5bL{ob z7}osi27XZVXI(|ds2{ZpwpEz2aZ8A6Ml4myhybB0WFhE(X_Xh<*#wWWpM&Egawt!6 zz-M)%c+@5t4{I~!ZSAxNvRxwDem9=o_;mtH4tk1)XUSO7)eSH1FkahUgpJFlQU#?(+=$V+Fnea{|xFv&*J0>E!ei92hTl9-?Il^7(Tl% zQ@9_6x)w{OXk7&H{t}d0+<}&LwV;C#u!{Q@QeMfi;%z6qi<7XD7X{$FTVRe0$-`;( zL?~A_L#+8U9R7<5o0Jg1{g?~y_^nVV`7Iw~`$n>RF+Qw)>}Z`5DI1~M%+j`g!|T5bDzMYcuN+{8U{~5nW>TJLp`zr|F?K? znICuDO`Uw30`wWsk8;PKAn!scG@UDh>ARe`=5;8OR^An}{b=7iYy()S1@?tF2eV6M z%+z!wt8K1={Xa&MFZhcf^?D;pdfpOu459Pt31418-rwM*CotC6gijXCc)w=KVV>+K zBs&$8%V#q>PannFUcN=me-b`^voBX3`0)D}! z^Jh`1NEPCz8-&a^19+lkGIqsnL)pRK#k~_lnW5iLXuivgkFPXme3Tor-s-_J`yYhn z%NxPUb2`e-N<`bigPHQ~bS#*~*p^eiEc<`N{`}SV;8OhtqH(+qHC+~PJ#kBKQ+-ZY1&&Vl3h2f(Zv4K1_!MmZufzFk4 z?+RYUL%B^_B`VJJ{(twksO}oW)Ap{$_-T{Db;|!h`xj*`!+MCx)Z1-wuY;TY5JM)J zF+0~NW+i=vG&TXLsVinYrCH0thp6}1EVjo|UpQwz)}P!7U9ShCPhdZ8ua)i|FxkhT*aVAP)x+}311#swZi{o)b2cDlnj4kkDAhAh;N4upmm z?I2WBhESgdX~pdj-Orfi^JpfJoBI?olR7R9$Cm!@p~_DUF8OZUe1e2&etE7Ne#3_~ z`O*w&!AVp#j>CkBa(*vmFn`)(BrDk}fs}g*aOJ8g{rtz`>;x~qxO6aUb)$a8k2^q? zz8MHXw;?S`^)%DtC4jT8+T(R|gnCcIJ~$?7dO zVwRUJi~E!KDtp%m(iN`6;q`-%T_ad$fgP_sOnHHg-po*81g!sMIJj&u6U_!Pw{&7d zj+%rgjtyn=j#%(Hfup&sdkZ?gs22;beE|K`HgUt(67oOn#1^SLOtz^Dn#MDCF zuM*-h>CsB~6Bphc&5}==^Sqk77{B@k#ut{0`_0W+YXHp;`tPu3QB8?Vf<Hh8 zK%O}Rz5i=n;rn<@nnX^#u!&3?Faqo?juWr?TXC5eanXY|=q9_qBPRcT5bFckIbr~7 zqJ9wD{jNIp?Tz~7|8UJ%jfmJSLJ3G7s3acX*GXqO+3GR;h@5qCqB84hJVAb z==5t;)_lM(GAGt}HUh3xn=;GCZY=JrU6`0>gQ+>>GBm_+FY5t3CtWKxz6jtIvuA;0 zk4ACQ`hmRB<~){Z;>Co_+#}9@EFzV1#$VS$_^Pq|#fUy!S$Ir%)Naq3 zp4~#n$0p*Q1D@Fy*h$#VlW%txZ{v9Yxnf{lw|#ySHD0ylW-+rOt!N;y&Pv$WdJ9v=R;Sc+zxSBruzptWZ^jEr~TE@B- zT?LOo4=#E7Ozu587b>PcLuaonsQyw)J%h_oIZw(R{pO>}t5e5(&H^V-hV12gUBndP zUw?EJ6D|rYKa%lYBZ8Uz@Mn<2GqE_rpQ(3S2ui~UE`RV5WH-ix&*}+idSC=oKdBQE z$v36B>#)G86>?S{7ZVpLLE@Kse%*D- zTzoYORIeN5nI`3!`-0{mK_)QyfgNvdAO^fXOP;# z$KoiympG&)UlU*C%6R7LzZ_G)b;GXX-$LV$rl?roMV#y&Olj%EGuDmc%2x|eJ7FHw z9{L*{U(s%Vz#DWO(}W(Y{kXK>8fdl)WG5mmS@O)Sm=fc~)aL!sta3W!{P_dg?y$n# ztIbeWkc`E>@1x?>UTFEdH+J8+3O-&+>{xtTkj>vCs!h1)JV?R5tQgH}_5|~!gL&XH zk9;y`TJX{QozO7yjF@!JpMUt)merlDgVCI z?Zovw*dW|?s5!z!`$up`$uEyYH&wV;SOD$2g86^9S?XLl3= zcOFCx@IUUST+%n~=)FqG@T8Y--%*Hj3X5C9guFm5`1FKhyy=CSN5_Ioo8&vl+qEvH{XPW@%?#Y zr6*-IeR)yNF-TiM&^L_*H~3R_iQcnqt*20N$sV!}c{6L-FfM!YFXmM25OaQu#Oa~L z3^Z;)L*Gr{^2V20X6*yb;%bO#H)F|D55S`>#JGC;mi``$dJ5N2`ML-DbS0GAylw=Y zJQ|&8W~=dw;aB3ExZ?>?Fr1nwsyw@O>FZ~~{-1$a1qM-X$CZ^uW#Y_<{kUY#bg|x) za%l5@hKjqT!m4^>*0q}$+5ZmUS-H~@|4$_I+qwqpE8b#Ka}gT8+9W({9>ENE7K@Jxj5yQWLCd#okVee%!m2I^ zIp@dXTOUGYz(^MKj|I=)E3ilF-FVKI5qxPVFe7fwyRK(~e$07Y`woBBwKIsxO_DIn z-;w#9*n}PXS3_B^u|oIxt+?SOaTF?E>2&0IY_TGz$a2aE?-(ZdY`H?bEMgNAhe{Rl zy{2}RF0qGsMb!DuF(=k^)!!S{Vw^KR;o|8!X%k+K_;>djgDn;E#|raP0Y zj)x})y}8wcRS-}^^9Pqk2(Bo_uujV2O>`3_9dkwfH!f(95Kp?U0jrIJ(2w|%@fp<^ zJDWN;!%pgC_lBTy(o)!A@6VzG_knWOH(*P1D%WZksF?i&c206;2|KG%(;XrzPp%SF zmG9(*BSNTK6)3uvP+w$4A0G0=hRvni>$WcH>U!8S>GQD=ZWzlezFv%ux6?p!wMIPA zLHvfFP5AXebcddi0`bFcfFkD*3VlbijH6@OmhE<|F1rDp`>EmZui-4QuL-yP1DJQ5 z8+qDhL(1pgEKUCc3Vi}l`CcZL{Y1NjS-o)X%f4*R!T!9IKSWnL({vi$2JQ0$n7+u3 zGGr6LIwq29OKOD^-4RT7eSloM<&v1TQ)f~ybejE*ll4t- zCB>gTXc@xl4<3SKYaMoseI_JL+kwx9n6r?NGOk)4!|z<8JM?d3+0<_YW_hj_%Z`uc zxo5UQK|v6&`2GXL@6N_1t43t)RcKmo!Se>1!_wn}sZ(Ty?ibuy$IEC~^j#D9&B=x> zlyz2?6C=xb9D8T%&C2hL;&T2O7j~MnjyTHu{$4Jo6&W$tlV+H*)D}}`zeB6?!_YA> zoBG)af?~%|&~FS0Egr(uV?o?X2;z#<+r&K2E{HPf zhN(2$aGWk-@h^aj9l_kTVmL2+K~AR~CM@V`Kb~B&3sf5?V^z+6EbMa=wfCGrR`ZuG zed-{nJFpe4wsIj+5zg{HeuWi|DOm7=GSKPE(a|LfQu}_7*^4Njs9TQ4!_T8KIa!Dh zsoU}XV2H%>u`Wfu31hdufW)0qtT}c&bqTGRstm#ETr4PKK!`nRK@2i~Ub^ZO8dJA2 z>f$$;+Wc5lG><~%gB3#I(Cwg^ZA|QcZ`S0@|-fr1`NK z8=QE03NcZC--qL8#qd)`eYs)8C9%Bt5-!ac&8!YlF8pH< zJb^Wg_GDT62C=e^Uj?(1ZIDgw@{8cf;ck$=Y+gT5hy5-}jcq_XQ!QqV zx=l`Ue^&d$P@Z;pIG=YliZ$*u0!{Er)IB~8Qm0o!y3PNfVE+KNO{UDK{+^(lTI|SOTKlq& zu}h&Q)r%R;RdD&#yXc;j54DAFaR+rUCG&H2rmt;S`Ohmc-NcnCTO-Ix?I-A`dFqxv z0-mw@2^i%w=GNJdow(`B_5O3vaA*vs-XDZD{=IqqwtP(5H;M-gr2ZgvFs=UAE}Fgi z0#>j7659f3AG?pT)w(O76y&^%hwFR8 zlIZuAakdL_nR8FJR1?bdZ8HViXAjYN@kUH-y&%DdMt6zBZq#>;2_N%@U%rZHIR;#bqTIk`%dPqO10!zY0^ZzfL$byjT#vZ&9W zv2@%`s9ru6rH23`V$Z{5|D)99o+V_}yWr$t;Hr@4V(^!hXdX^px}vuzZFI);f3Bir zmWfXGQ@5bKQwnkay#oD-4N$i0XG|SiCw9Kt4Ygm<9K+pHXwF!ViU15H;j=;-iW!N)_=+89>nLwou5 z^*Zg%b|J``n2DknF-}&Zubh~|Yz!|wx|woxKY`)mNwKu74ZQ+= zdG+q$ta!Z#?LDly{Y_$C+k6C@(A(&mJ|2y0oVeA|XuNTl-UF_e#OwnXAn#NmcEumY zqOKcw@x(xu_02+z);QC>J6`y-Y$8idb_Gp7d6!HU=_I#ii8l`&!c&=)GuYrDltq*X zlLpe+%5gkPZ}ErtB@HMqw}*zP!L;vwFD9iRW=wrd&%2jG;sQ0XZ5WeymI{TLN_2I* z244=#dDtCy?*4cHSHzg2)yUpV+JoMaW23pu`hxDBIFS3?vIU>%W-$C(Fpq7f*#-Mq zP@kck=$!q~ZFCoNo^KM3-geM=p1X!^L)+FXBY8-evKY&oM6q;3!EV5BmJID zPXpC}5HbHW@QzX$XfHe!>fTr}!{0rH^dF|-)Zg6r@a5rL%iTc}r2{s~hd2cfF)8jL z%}(6Jgez;%NovZCesJK@2{W+vWEEx3$VV003{6?bQPyC_6cG+wJ^T&cOQL?aV{h&# zEd-_e7O^al&M)P}hIpt1>*XWZ-shIY#M+5Pe*a)$0l5k$Web%;0M9<@!A!$Ec|;ri zS$B+3_jD0x)t;a}Z6O4|JAf78_fem00#Ex4=E~>P3w>+ehfS|f;lb4o zT(=N$c?YC8sHt{&5Kg_~9mmAQ*2I88#W_&X_xfbu!||Awvo2k}L=gL&-5G5nnUL~dyK6SmN2YjjCUIkeO0JgNn)4twzGKW(6nI>wtb z!dc0ILC`_m3CF=RFm^~Ym@PR1+A+(qx^f_De0_A2w?D(9PuzIC-vX?E=D=Ncy7P*> zz2NxVmom?Vkh+BW`tAq8?6?JQKkvjc3Rgocy@wmu*TX$dKd0~ZKxM_h5FMjF)X5$qxeghj$oZ zeIotyHzA_>-3>wg)o###p^WoTD^^mv4cZ@uv1N$^S?Yrds0u%Y_5H44%dzoX^6!t> z@dr8W_7l$_@q%D@I8E>o>HYr$!xcZq@Tu#mA2zrhHGNFksu=1l?#xE{(V%U;U%&JSchpG3$hYeKVzf1t2uE;O`ufa8Fd zqM_d$L8d<@WH|#i>=2=0&MwjK9c7Hh_JOMRbjBJO2I_V0 zoKl=wS=L~J_|~DuxB$JvOn5H!>1~!#cjWz0EUdmnOw@d_)yR|1i{w2!PjipNPOXIRpJJRG$~}PEN+WUglhq=qr>wb_3;KeIPdUdvs1L z$EOW++v&OThle~qPZTTeexUj62lAUqh0YK)`O6WsHx3fV z&w(kM`ZC}9_AI;eZ`^FG~1OVZN017B!fS=G-B`m`gJjp{!}~dE@CdkTipG4R1L#zL(>;93S3d z_y!~XybAV~cFcIpDE{S)KX3VV4jMZQ<`TD0)FT`Xw)^H{<8(R7f^K0{(jJ_1Q(#Gx z$>nEq6(4;$2aUfqKr;DQq?(I@*_19QyF_PN!$!0U@5{CO79QH_Auc?MA&k7~%`s1sm7@(DC_5VTG9)vsvB>S$F-g#bXAr0okZb(+V#( z)67pFCOUe>isv>t^ENLpR@^HODvab>Xza{yUPTwswi#i4QP!#YR!G=5M*r_{Y$^g6s=4(*6FZ8LW-{?q2O`NhgF?E z4*J%C0`Xi~`nW1kssyf9dN5a4ntcbkvLwojI$8H;esd&TULo)XyJ$9daS(4@a|BgJ zk>ZZ>5iG4|3#R=l<9a*Bs{f$=%)^7|)l8oG=A~$O6Ajw_anQBvE9@!|c+>Trm=pFB zG3ESO9=k^Gt;rCw6L==_m*ReTL53tKr{WH zZsQD>u`DN@a?Wd2p!MA-Pp8NULB|2&La*a9J&iT~yC3w&8F=gRA+ z#HioOy@k9Lb(sDj1r!s1f+rCpctXi8 zbnflKx>oCO*&${lYyoVYQVKeq}arL%;%TkZ%QD zbutvMx&=+M===I;f{^;*x}g6f>fEk~Vbu3H2im;91*^%AvEJe~l<#YV9II;LmNpW% z`6Q%l{{;DctyyaCQ-ZB=CB`3jM*E*WBU1xcEWQMfUeOGrXQ0s8{}>z`Fp-TP6vDDs zAHg!}*L4_8pzH=>{Ql2SxaAs@L=I>AZDlA;*eA-0rpX(tRzZCFEiqzjKGwXG@vK{i zz^djRY9pOl{pdDetE%A2M;Cr`L2s7uS8v{MK8z=I*TWWO$EvS{psQ{pb)28#%-;Q2 z{GXe^ywjC+e{&x^sK=1=^e`IuKZ44~55~E9GxP9Jwx$QLZMTRoxzAthiWj*G-3i4g`QWZAo8!Yb%yecQ<5Tfw7KhHAufSCS zm~sD4jQZa(D4O{eTYH2v={&{~t7wS$^%ZiP+?J<&QKH{OVn$P=(8bq}%Vt~zcbYL( z9lwatmnmQW!`hIpWfr)@Obwe|qM2*zMl@_3CZ;9^(+vKOm{~=v2Ji%{-#GZGz5ZE${*6EnRV z#u{HLpl+l;uXeY9mJ!22Z@opho@v4IzWfYPT@O%c2YBaQ5AHX54XU<(lIQ%SVDo2D zAH(H5*wpxPd#_>SHtfk8y4-n(MWd*YnPI}^?U>c}84O#0gqD>*qk8IGQF>!L+zjf+ zG(qH64vA)J+Sl;|XTUx37|hri!lHIPCO!mpqpviBwJ-lsnNRpkikwOkYHLc_SGJ-BsbpubAo>RBD7IBvc%e8 zX3OS+@lUy!clR2k=Kn3WE{tS4=j)jH#}TZO?#6f5ow?7qdq96TbJ_k1WNUN zSbn(?H%+qS`T+7PkEWe|=v+vhcVEb!aSO6%Y(Q7jKk@%(zlBv}nD)SRkZte9nm^j` z9rKL&4v!#iT;a_uBMgwwiJRbkT32`GHk=4^;a}ca@|^XTplqQvD+v{NW|x93+H23s zy$A7b+dL@WYs%-nH{ylVLDUHJwe=1{`yqo_Q~C*bRUOHL@_m@sK2Kh&Mtt6?twK+Na@BRp9;_{_#{|mIzZvK=`gp;hgntFAU7{Sm2Ir(<`Bs*E~MYt zokrX&o$?dfzTE81YLFi8&$^H9LTP9dZur%M#Z2hW;tMtj*Lw`$MZJM_pRr=mld{3p zJP5u_3SbTEPK&ZTPjs5DOSo%LZ@N$Uik+YGk^OQC9Vb)9Wv3~7^p7122I?DMSfMMQ z{s(&JjAVwg6!hshNVAQ7qV84zzj><;yUv?q$UEu^nYxRTP4wqXFwhL?XSwPH-Qx`8 zM^apX9c7v5F~W#Vo~Qi1Yg-kHN4%kJdf~`Sjq`{eto47R>dp$@Ji}?uFePj z>vy9ow*CQDm6YfC@rIaXW5OO>4CS{p{dlHH3+6g}q0DTH?$oz|EcN#=uDwcb@{_eN z&&z@7*X)$y+LQv7vp)yV^bWyoER-v zt>D}^SFY`%{&#C0c2Bv3>Nq#S?WqgDk=~Q*?)AaRHQk^u_QhmkN|>qEL8D6yx=XzH z(b0Wbij?M+iuJnUGj_~2BZz0=cC7Ik#1+?-5U-3t?WO<7iD|`>hYsNx(+WVliFkCn z>ALiY9%vX9%0GBSF~jyBN~s%Ho}dgX2i0xOA>3Z}_(oCjDZ@t5^Jo za-*?KGgKpHbXjtO?#WcQKVp63E70sbFshPvKAI!V=Ot|@xB9CJqC0P5_$MH)ORJf_9ECvv=vw9oAW}=~aE0WAG@DuiAmv&yHZy zxV4lGm<*4cOR?0v8Pz{N7bQR36y@$#80|}+dG%N<+EhWEwC};I%841f1v1ND^bkD% zA~s*9Jm1a)OucZ3$+r^IqnC%l^R79)+WuyYN4S7QKNBV#4I0D-i_2shL zJH-&PlTdpH{dsSL1$^<4G449oHawR1kYL-<223SAoX$KQVcT7Rprf z>0T3n-YQQnTi+(wkIMtem+hDxOdc|)Dxu|7Fk5zz`T~^^5arT?H{{BA!;MN*H5bZj z`b4rPTWF7*wT}9Fv!UAgD#|*81%1F|xq9eiAN<3# zR^f|@G2FT=iigz%v4zd{+-jQ{<^*(tE-suq4wxcj=eqFpL?>eB-lN{~Ik~ZCJLviq z;g09$K=VA58|Mn#b)hpho8&^1Q4#86c8Yq*PY~-}isO>Q`7X*+b^cOaVF$}i-J zTlWYu*F483zeV!0m?TJ9+zy(!G*PkR1YFrXfp;zbhq6yEMY}tL*urivW}Rosqphw$ z<1SC8xV~MGc<+$M|7Z}5SJ1xiY=2g8VGGW0Bd+j7SMKX;&h@Pdv32nn+CAopg&x_E z(^*YEsykvKJ@<9eUaatMQ@+iCG4CJcOlAK;tY}yV1@&v7;rI)&z=ggB_w#u4SOh;h zE{4ZH+XN0~-mF2i23hEB@B?D}o@o?fxA%iNVaB}hk33v@(uivp>=R3abIJ4eocL}> z(D=}Jo~4yBM~@V7`VAkh_%WD86woXqkFrRqUm&{SDCX}o=UwMyJnG*RFzm|}t8MJj zs&x~zAENu&ge>&6>%nvupYibJfm{-C4IAttSX%xdjpt8xvi-F$!rf`znyI-T9j53Jr`JmN21-|}Itc>&Ih1Ujw z{!E8X^P&rCJWW}P+dp_^fiu@z6zd#6elOVOk!y2}6qkowC5M- zIJgHUZSTPxQ&kx8ia0h;thmjgi7aaIO-y_>3sNP-d%YV9aVL&o!fA399xuV#DK=a_ z!v|F5eNZy-h~ek= z3A3`=K~e0E75?EY^*D9o4?f0%a_Ur-S)p1%PK^1t#fl!UKxrfwv*xto^mUZs_3;(6 z26kXp6?HfCh=mr!r)u{SO)mwo{lt-qwJsu7j0!XZe;e(1&XL=G+k{pxz5~ULXl(g# z7E1STLff*LSp4fSUN!a!mTHcpWd2N1T4BVRy4u0)^EA|*HDyKp-b4MeBT#>{0c2U( zLWA)k(AfP4T~j?N&qA5@$D2^Tgt{X+Sy1?-1v5*pVnnhJE2t~LisL##c;1H@KIe%6 zp|(8lWg)2S$1#tM#J0VD4ZNiXF!RzU)GU|`sZ+ke8HS;p&KDsiYZ0P$jpLH39oVqy zSJHX3@TDkBDMBjLgZH;Si*B&SKj=S-~A&FQ%!kaHTTM3%7Q@WB>hakn*1=ISE z;L;QWrdh-=wOy*HdEzPWzU#)G5tlFH+BT@5{7l?7OD65!fhyW%E5f!5+f8utas_*k6vhfBnQ`USVNAEe8q2)Rg=E`v_^!&Am2LGv$5CTM&C`6y%07ZV z%ZTwga5&v-?dWc=VCtpQC=2f<%t@!6;r?^rW=Z_d1&&a$-Utdb#FBK+m-nlQ;T>JX zKl%JK#`XAu&tzk{P1PCnQF>zM)kt30tVe^93!SHlGaeW~i~_n>txFZ2eAkn^7E3{5 zkLVR-!7bB{Lrq8!U;4n9)vk+Ts?k=W%;nR0Wy5G8dIjwj-`O&s){8LXij+$)kjuGu zFsQ6IP*-Uzy72M5ygUz5KLucs^b_7Z=E7XJO(bvBS)pOmGzflT#u`(5a|a74*K8L= z#cDsM*|vs!utiYO^RA87RKLz$C&lzy4OWYxzo-8&N99@EcRgEcpG zC4Ni9EIjBz?6{5Nn9_L+IcYZtIh03L(;YwevxrlDdb9Y9b9nP1WmBtDA!aA>a1XA~ zy-2ZU!6l=4^{SIlj3r?Hy%*E=>cZ=D#`5{^e0bTPmxMC^5L!%^qkgcP&fxvMnAqZi zX3iS49C8|N_36vpTPRa^s$7snEr8^+#L~#Uj2YCWZ@Bx1=vrWoxhJ&{eWVTIb3K{s zMm<=Kras~@V)ozsOJ3%x6w)U7GDUAA(8?}j!MjMFvn~r|MLp#OzSS6`rQMI-jinE{ z3R^Ds<0ZTP0a@Z|vBY}^bjeP=Qdq5sa3?_oi!$Jsu7r~5E-(h^xY1CYMBcu+` z6ShfRcv-_<%=qOcG>v})y20~6mVZnqn>%0bXHkd;7xdx0cPXmReJv&wZbZlMF;M^F z9$ZN>;;)88a-UW7Gy3#4I5AI_Ugy9oyefsXKaXSMFFDXuh_sK6gX>SdnR?m~o|icW z|KF3>@v%RbES1Bu-ze|?zbB&ff2T#&gdM2&Eh1Js@%X;V6&|H&ahZb`D~dAWC2fz; zpx#Jl-F%2&o&wW9jAUv17?&cAH(;SDPK z`(CEGfXZ0Ro!bi6x7sn~t9-FvXAkb)a{w3T&>W~F8#hxw;OQ*N%0&EzFXR@iGscyb zcHV{RJsi_xLs2@P8hfRgv+>HD_H|~$(v7atO zLwzE&kEES&_W+@8wHs=)p9)K#^0{9;ttKNZ@Pi&f4*Q?Gm$C2zJn)dU;L=y z0yf|4Ky`{*n3qfEj7268yw-xxEDGWJig%d!(3f`j?=i*Qkq?g!=QRd$4E##m+HLLBC$QW+4&yB0pV+5tYwct4NSCl2p!{jn!_M(;ex(n!Mzv-<|+^Y;8%pA|m z#=nHZ?LByl**uJx@F!Hc>A-L&478bFP#I~?N>&r2%31|Ntr0Ru5i2dc5z4}U6Vi0V+&id;?u>eLz14|4?;K=)p}gLwb|Lda7>`S! z-riAsteSFNO>_r7bQw~D|A3{`yUqDuPq2S(&mZ0Y8(Su?r87&tJn0Vk9E$hKRc{`h z=i^qP{KV#zhbnbB5X z-eN@lol&7I&hrW`a~i?)k9+DADMtjy$>X8mNDJEka2uYM1@hu$`OrcPr{s$(i3b@% zzM*h-N->Dlww_04&tAOf#zT;Hz5(sfba+=zPSS^~@z!E;T%7%cj`B&^7I*|I)=dLL zQ?eL!{5nWtfjwm-*_^JCtm4CEG4<;e_%2w%(}*{u)vARG?HWv9M=XJZLs-ryInTYe z3uI*{M^E}3#%;Y_c+rEgydjTr0Eejal9w@B7aa?qs(P~>c2~*G8p&e^{tl}5M!LE! zBT?pm6U;`;hxp4mg1y&y5a|x0S#BiiPb?E&J)vCEzxCt*NP?214%{|0hcYY=u!i<2 zvVo)Ilg=u*_W3GtgGDc5LzuH1dKPEyy@zp|+aSZDFONIr&4O$9qhZh=khIU5y~rny z$|uT zJKTl}OkMQ{JUHpaB!0ev!fYYhm!Cz&ILfTdAITHHaOj$T1LR9ASvtK#ZL^C|JIe|y z_D3@9(oTq7smA2IXzueDhetu%;OSOi#y78Em-l~AR8ft#ZUI>L%#PjpKZ?#iF2?lx z<3ku?6eBT}rYDn#7)o=WOX)#Gv|6&+h%la_uh7c2B1CP9JnhgHEwvF5V(xPhA!>(+ zjc7$Eky0K@{Lb%Rul4fUX}a(0I_L9wzfV#}L|TA}A;j=4Xo2{TJy>8=8Ok0eWAql{ zX$(3ElC=xOj%*!f_ne5Hs+1{t258Oy-RGx) zVL^$$bZ;r7*!VGKB4q~#67%ESN4&k)ork(!1UGwgUgNP1t!&6$`~Cw&j2w*GtV}$- zU&(E~$i`pujcz z_d+^51=8%hkQK8Vq(iPi*6-xv{mp{8O{>S(@un<#_F$e^=fXOSESbx5dL|H?zqM&U z6gaFyPniXCDCo-Dd_y4Ikz;3K%SW6gzV;g>%ey`j>H@iF_}^9iFFow|(tf+3vza-o zUf!2x?Pg;Y8^{&*9z@AobN0fS{`|=>?)kqZ@XW3&ThrH`=}w=5svqWK%KFhfY}h1d z3#50!m9F}T;)#%uk^rrJuZ#Nj7a*|=(m%=75VxE@v$>v3N?Zli_>EAyegq4iUXFeT zy|K|Xix^Zt(r$ML&ZZu_qVhU6nmdxKo;X`h)uQLJIWX49lm(_=gBL$qu*!{3@L012 z582v>$r}~CrprWBc($TMRU>F0?8Y$4q6SdD#?b$u#`)t0kbaRBw*tSN(# zHY+g9xU4C^GLRErh!;fegZ#)pV5vl8#ntKTiqds8L3k{PDA*RQj#e!Dx6!gSrE%KLPazZvJyv!gq zOv9?q?G&xlt&qC+2)1>MfK=LzYGz!5f`5CB3IUm)x`tjs?DOP3w ziFtK&Hpwv~%#$NOZ_%5T&V7I#SyEn=o{j~Fc0zW}c4*uD4)n1!3yZLWn$x{dvCx}` zDJwBo0S}Mq_dSNs# z0(tN&RR6V86PIxXQ|9+!HI8F2L!dog^fsZ$`%mD{u0hqQ4nf(~3vwS8LgrWMk0>fH zT%+uL>Bwi;nY;3a{`aBzz!C7?XUEG%6PNAIEDSjA#?lT>fcw+~n(>|PeYchpKXU@J zx*rd*p){`)N3zeieaJbROnC$aJ6$k}#p_41(AZR{7^s8;m&|x=^(XLHL+81_ow&2x zPU`vZ7SkGRQMz>*I;gGrHt7^rAff!u;$<44QDEhJ)8Jap03MKEBbLOe!E;xV5S23x zRW~RL9qPhkcYMd1A8fI-eGF@OY|GLbELf(5oKLR@@!T!9u=St?N}fI!EC1dF!EwV_ zazq@|9LtB#!75&KVk;KCv|`f2fhfB*2SN%Th;FkUK;wRM7CPY}RD@b{K3>iZ0Yil1 zxep=WhtZU0{ujc&?ZpRm#I^dIL;NOhxP5mLD|WNxF*YhlnM@9{$%9x<@F1q%JW!1J zmIxjH%fWK%V_4NX8?;ND1()nHa%&#Of*T+{Ina%3Wp^O8V>oZ>I)UGxZ^_!`KE{C1 zg8imiT#anbfXdw7Wu@U%B;{aQ!v zncH}Kr!7l-M!!#&hOo?j4UoTK5A{ta0I_?RY{7n^!>krtm3`50uoYXs{)_4n_L|iA zCSptt;H7VEnVTtn?hS*`s=f~|-_@TLB^?L9|B;8Lb{FLSRs#zM(0hQmm3tb8@XVo~ zp{~q|W&d+cOn$xrl|=)>Aoc| z0RDzNeJ95-{b@bzu#7Q_dd=$xEBUHQV>Y+kiZySsBtA|TUYynly9_2QE4ddB_;^$( zobSo_(=2Fh`3X~AP2{C#_Jbr>s`1;^jqQuNi=QNZbf4?NpM0XeO@$H4&by)U?=LXu z(|)X376vP?jA63o)6fxl57i&N*o-q~ETZr!CUo@&c0Ut3zXjg5btD+sr+};^7>qCU zR4XjbyL79@CP^9wZi92#RRct>sEa2O1tySwb0Xp z_D0+4(UA5MtG}8tne$TA>gZ>1dK_0|wTQA!r-k^Xb!hr30jhk;pwPTOtMb|>Xm3~u zC7V6CCU7%UEus62&YHS7^%~z1?tI}+dat{6h%x0x@TkO;og6)yE58iEikfsx9eN+U zZ=MI2L&X@fUW7@W6tR_6O}XCI>8Z&tZ5?S?16`-SF^Ue(yz+8~s#$ik9OQlrc7zJrI9N2169RvGuPHqW2I}rp^w-kb6x+ zz)j-Kr4qm5Vhes!XQF1ioEiUE0~s49vEbBU%y>{bMBTY87{-C7xla>>jC%_EKMmt) zQz+k_^p_C1?l18EOiiPK-T9{LC69esVElD*TF((Y-c010&10b! zMbLif!QA!^;OeoW7-83&O?=vmONq#u3H?xWU=&u3E)lbD(H-HiJCg@5$2Fl;tKFD# zq3CEF23-Ckn4BBSWVzii z<};|r+u4p~@8}CvsfR(4xep>XnX;_5Ll}69=3hO6Fy`a}c=BZ+H$O`A!N^`6y%RSKtJbSSTQr${IxzE%DUUfto~gtAp!)q6 zyt~kWMJ@Os1bKSE`BlVCS>nzrPtfx=txRJA|-qy}+-)J!wQyth{&Ey;?<9qrAWhWIdNav?SkaOZV%^PnjF8;0DO1Lp@(|BH|0 zk&zkHQ@jO{*1p7NNEJuT@6HcA_Tod|Iq7Lw=BLl?dm430KD zdG5~`{es?QhaQ6>0fZSF7uO@K2+3n~zF$5~U*+D+dry~E{hD$#u zqU`4nV!~Vth@Hl`LoD@k6s^LvE|XZwjxPMua}&OJPamdwR0ak9`S`(N5X*dN1r^tP{fbgP4?}3* zRx&{j(*1|gfF)zcQunTI{X39`j^R}k8bx*EI(_)BS25&;5etyM61Hu3WwqD$!|6mn zUi0(<7*twlRec9TokFbohd5#VG@7Q7<9O6VVvDTv63x%{=F0oTm`rLF5?A!+i>oPr zpS@JjZXYIU+f`_o@K~?wycn*KPdfN7Vk=CGLe&Ff46qIt(;QMkIh#D9GTOzh>#d3U zPfFe0&w~2D8jZAaBoE49F+X4*^>ADH>YDZlI@2ja&jxR%xHUou{7VGsovYATYtD@=2eXNtFK=97(x?U{<0{HQO_KNF80a(c!O9>#n(IIvM$JbA~t%~KCmXS(vdwO=tiFaTl?N?3(*8dkm; z$E2@*hx9`tM!6mlXY3otB77ghu{G4)@EORO_uWRr@15KO9#(?7GEX0_I*d7Y>1X*_ z4RJ#zic#mixMach3$bH&!LiFzc=qbIqO3q*SMGLZ%C*#=lWoKB-y1>e5lili-!W{m z9hjcT#=?+VSnqDbBB&z}Gjj%L>P%p|sRLJbGU7oq9zlGa39nkK$NGA2R+{x0d4UIu zqH|w;guwSyn6jz`BUp4x3uGisWV-Rkg^!6v{&tyz!^8Frao6aX!My^VKH!Jrpt0mNHK@shEA_Uzo0; zeR7N^D`-z9I9XR7-Mbu8#`omvfU%lodnPdL{%(Ti>sVg9qBC=O@D`<4O~BNp86?e8 zP2spl#PdqRf<662RrWQZ@@f+Kw10+*?A}b`wFjyKKZ#@LSzw2?*{mqb9_!**ZO~vpnZ|Ny|9qJa+F5f9fC^%LI5tCk0cVdmG z;EP4`H^f?XvKAy;tog2;-MQz9B&hg}Sag51V3=PB+U}Uh`b7Vz#Be zJpVJIyi$*mNS+f<;%c?zLNfKQZ70&Z#;X9dHa~$fGXuTR5jUbR{c(mi*f@Sh7Pds;8Z7oNuIA?GmbMJ_hU zyYi|bov>ibdlRP+^dWhBy8!s^=FmV%QR#nD4+^&-R0I--T&pZKU~;QmMH?_n?T+)E!*+4cDv&mLjT{?dXxbs8_kL@Edsr zymyLPzd@M)@Em0er(%V(Ir#lKk`&c>QH8}ZZFaAB*g2_)1PtdI^9{byuX`lPCYolsA^g-A@JS z$sU+~HxYw(c0kyVE1`aaJvXNuo1`!Z>u9bL(yCx_BfAN4;WDO6&lB4z+ZOWAHz6T= z5Zk}UiDyRr4=Q)LbHfkwU_{+$o-lAZl&Z7PYUOygZ`pJ7{HYLH=Sd;Ceh`icwq|IcR$zolX5(@azrIH5M$l1buL=}UtrF~>1uc%-liYhJv@ zbj(9T^ZztY_PF!urmo~O`i#178r;-vD9ik(5;w&6WL2{+p=8oJOq;|koA=fLu(l4s|HC3P;IU{T;%aVhne6b;)zCF?`sf$b3Y`6r?17(j($4oZHR zcVXzz39PXo5K9g%0_|VUTr&CtsuxuWRh}C~{meg5U0~|2uA);@&I&f@ygYURPrd zc4vmO)>V51#rykcXxJ|*aw^f`ZVwh!_*7KXQ&(p=iqX-3K&kI#;?V$9J%^#JZPtTY0PgofO)o(cT7Rl>Idt!TSh_v zn<)hBC%&wmLQLx=;|0XL&dG4$b33{)=h-4Wnd!?%Z5zk}b}u4#>qhGD{tx};E{2Xn zgIQg;70v!^*!1HXcKfY0PyX5Ot@`6Q8P@G->MKUv%uusAaKd1!TZl=8I)=jZYreJ9Yda%&{(V2Be4=l*`5)2F8 zi-zUxW97&DanD}|K>4FyEPY-da@zU|YU9^Jvb;jbJWub{547VulZv|Zt{TU40G~Gc+tVV>GXg&xP}T&fM_W0#BC{ zvq-RJ32i+`!#p`*V_h1Ig zM39_yLEC4xydZn4P^BppQfQCA;ZSGp2UhTCwI55!r(Vd_6WDMi9+vLx#1bhZ6484d zI=@x12wNo#4W$hF%8NqBJ?bbt^u_AVJIV9fi$}~PuHFVWB=0cI9|xdvQg1%M*pC%` znguQc>{xE;d9Z4?=Z(3y&=B4k*IcsV{zb>(1-XoC2VI5qNCgkRS%RA1e#X|9Myyd7 z4l9q_FvHOn47f7{wd7gJntKF#5^F~LSrkml$FQ2qw5M|JgR!Q5T+y!_e5cEJ{J4Ly zvh*e-EjM9`3)akky9ZOwNx^_4!^Dsg_o47%F_a`t0QoNi*nm%Ntlan#R1MsOMgKB> zdu3NHmv_NS*C#W-edMY1r(AAvBKFnDSWU|jRK-3J)usQ6rs|JSZhZyiOKI*116f?g zPZ*Zr%Bq4g(DULOP@e)ZB5Dv1&8b3*Yu^0Hv7Ws4Z4l`8DAEhIs#)!O-)yIp6fKHw!y+0;Lo4K+XTBu?0`rUMk}$cHV9e`1!98g3I=$+ z6dSkBfnBQyu)tq0WBH5;Z1D{%#<~yX1?}Wizgh%V2WfAxlsX?h%t5PIqJG%4aLC-PaST8WK9!cz1CCsSl#Mg_HxOC!A5L(rl z+@f#vQ6-l!_|qnw9O6eV7bjNz@Ej)lZw1K>Q^7o8Ag@alP*&{;cN^rq@5qsyhhKuW z#tc~X)PvZmQi*`NAy=O;#>pFbLl#(cnpC_lXNlM!dJ zZrmmJ2GmU7g~KUlVX1@frk%J|kO4y`48o$=;fL&*Pz zLfzzY9I@Ssjh#h|>2B?!JbWm&m0NN3`W^1ht~MA!&VckHSDxJGr69lX4kNY^!!G67 zg=xn$)E7j$Ly_NSp+a>7W2DQW zwQjSZw)WH1Zj%s8`4wtz%|m^A7v2{37W3zoW6!D~+@q5_n{Vw*o(q5BS|2BF?QYD| z66j77-Gr@9mqp3CfAN~D4bwXtVS)W)AtE#b<1;4ns(}w_pF(%T-amjrW-sWhM#4f* zV`j+7xKKh4x<`-3@w9iu-pV_Qt%q7LbFn$d^a*0PIE2N#UjUkA&fK`mS@`657}~PG zK--W_us)9Z)ZxRS3T?TY<}gT zWO08ut9562_uVjaz&gwa(J^VlX@W6&YGw;sS8AItIi4vg6wRV^AE^o25}lP2!VJcjs79)+t9we7hS|LBt6>{9c%~ zLe8V-3#>Ca&nr@VS-Ct1BR=KEwm7~eY_b;^N7Q@o1yU;G^Jr=%pV2anrMCl`UW;`Vm+U`(J z;Nv*GbEgk*x3fQU+i;M&E-x+|>pg(UhKZtQMY$Md*a-{&N3PE5@#y`k1HLB?;!jfg zbIrr^5UA+ROUbXG@-*Y+vF%v3!50i&J+bbSoJ|uZFy#d599phLFDDPm#JMu(0AH}9 z{Du19Kvp`M{D3#lK%HeRwm&jv(&a*vc8`@ zC{JJpr4LWS?e2qla`iqjwmAdC)MKz=a3`jD^#)D<_y)yG-h-FQiaT15-grY8FPq$px0&B6lz=IlmM`GNyp>Z)H$poClKax%fT7mDRQAFzw`M@RpkK z&h(w=n6ViPr@esi6)D)5%`vZL1l=31i+P`G@Y%xwJTKud^ry}p`NR3L=a$So=@n?F zJl1!-d4sbY&3Ljy9{4HdK-&G`*g9+^gwLcoxo09?>oS3v#ad8iJ6gy(S`K0UTQPE7 z4QhK3-}HpDSoCKE#@=nfkn?4tJmv(=lBzUCKDFYuTwi{TT<3X~LqWQ93^(?>3)!De z3ZDmfv7(jV#Jcl7&`e#w_~5g+{*@(DFPDmh``x29K*aMAx$u9J- zyoA|dtzz5xyEuDgXC@2p$)m+=X#IKs@-JTkm+`q!yY?MiJKKwuf4vBu|MF)Ajvp}d zh?=WSb70|~VZ2R6F2@gNv0%Lei?$lYY63G+9MFp!rcUBj#D>bEd{eFCQ;<9OveeUk zS-_`DLfy6o{2Xt|13olh?)vReS3eP3le%K?K`Y*{Y8a2Y`aw{8J`tpri}3EbZoI+g z9`g5>G1hoIFa4ze6dMe{ubhMEJNXbWW&v)QXUji@4&!W<9j|JS6E#2GM*h}~McbIL zXM_E@p?H{>o!%jOI(R^`yj%=Oy#@wJo*1#C2?FQsp-#gHuK%M6%5Cbvc$OJAm)AqF zUl}HBSqZ04IJ4^Y#0=OWz{t#dMNPVpD3Q%;!ii+ETf;y`6`MKqtSj92wc+L$J&DRWPk8J2( zb#H}GwK88Ung0}Oe;-6V2{&$#^cBKZ&qH;h9HM?}5#u^V2-AAdyv)W7XAnPac7G#o z>}zt##&A-zx{VT(se*@}35!cQE)=EbLTUPVRy5rbl0(`=|9p<}&7-lLdgrlQ%vrAN z2|Annh=s0RTe!2lNlR~dDP)FfuD}YxLOwI&bR~7SCb8DR)!_GL66It(usoDzDMd=kYEAHIG)8>3}pfJI#5Lu2c^#t zy?R-tpg2QbvPKOOCm8Fsy}9~uBi4bbASpU_A$hxo#jNQMei!Im|Fa2bT(4orI~^Rk zM&5ezvVD-#JLi}aueff{>hi_i)7I;>Ti0!!A^P8Hk1V&SK5AMx3~J6xWZEL3q;$Ru?gb zcyS{k->EzGYW6{DjXA3e%McWuCLyyUe*OuXGY-EaBn=(N8V0pNsr)$X-)YLqFQ$UZ zz5}#fTJ%u|nuYuYPidZYM#w6j%p_&|gvg!l++>J3*Q$zz#J-f_->?W19R2{k#SQS! zJB~|l_F)S8%$%N|gJidJLSeat<^Plmp$-3{-@tNE_q-s~9k3ORf2Yo0#xt5T+JiQy zMIRet&(yVrf?IF~#wA=t?_)J!Hhnm2ud`!@MP5Sbl3`ryaRakYd=<0r$e=!x=D?Gl zVaTII0zExb0}F*4<)}lU|Xs)X5EVi+h4qS zYG@1UpKb?5#Ai{vYY*NyGJ@MZB`(IXNuV4tAEf`fqC#8&`jG-Z#-v=gt4=g>IFCo3 zPUeO`bb6Qn62bW2y~xAcu_${cD0g*Xp88utL`n$Q(Q|3>13MNJlOfc_AGPc zbd2?R4YSsI^0;^X#7GQ(th>Pw>nb#+4gwdE<$mU_`kl)0Vw-4&8xqyUj#H8hx(c z84vKAD2AWg27xB4+CXKD=0tPqjv5& zlzjXkB)y_NpQBPE_x()YvkVOE?a11059N;zn{wxxTNsl@-j6C7?OK$qaQF?7yqe4y z_om(EMsORQgZEQ9@si_%QL?x@XkQ*j^#un(mz0YMVHdIFoGY_)pUfh1?D;QUhjWK< z{aDp`k(lsts5Q>gILFpQ?gvwD*N0}dS~<(iSc~RM%RxnVJo&|J&>wEYiXAhsBYPlg z>(T?34OX#I`3_Vo;xzj-PYinH$YN~sv8ZVa8WoeLYMq4peLqGsh<54?pVi2w zd`7pkUs3%oPfRyE1f@2e*y3Bm*)OxqSky%`9y_H9V|JKf)Qev1^*4H+wr+x&ovsj7 zJC>)^O=1Nhr!eQ`AU-+CoqH|s%rc*^qdln(lV@iNyP{p0V}czql&*npc9_swUn>T% zE~4aMu&6&v`Q~npke(C|(FIo^s`{%a4WWCf&v5dbdExy>Qa)xL^nKCP#- z#k{*<+hsV**=WP8{4`u|N{%f$|I|=Mw|rn3p6ubvLih30F^n-wEoSzfToBcQ~_^%}&hIfuXeHKa^Z; zbnp7cg*^#3qr8_Cy?-3R6K4Mn(!cF^!T<-npX1JQ){SK4y1i&VY%gS9%>k_!6AZVA zV>8eZLbD`X(sUC8R2PJ81>{3~CW5(7Gss>Xhse!!5Z~tl)Q1395A3XWE>Q8Z1%tRz z`!;Cs>CF6Y8$qolIUq+Q39hZd?vkq|lGL~r`_+e>bJu1v}!UpQg8G3C+qmTWV z@l`*rzB67>=Z+H`ZyPhonX4Cq_7CRIHmG@2<94BHj+DhOI*A3HYXsBb)^zR}#gkKN zLHAZoJ9{@KNutk8eh+ld_i4gjjiEWL^ny-0S?t(<1&a2NS0&7XDIa%cS=LIn@TS0> z0<2m4A=)Ed8w<&|?eX;e@qAzLA(X`aBcz9IL8q}TSQXlZxrIz%5tnu%TcTt#KVlcY z^Tn{1(L7&y0@lp-4K-oKIsM=nFtv^2z z^f!pN>(?mg%C~9^BX?`^m%Sp7gg39B;9Z{!l)Nkry#^VCdg8H?QQ0`cT zp7$+57j3K8mH!HoVFxrp=Xc_+lf;pk+9Gs4JCw!zy$dBxuKda+a~2Y%<}M!RAhOGU z==b+vY+Yy&4b9&nA~*te-R{gv&U9xk$FouP-(gS(XSs`&Cd{GSg?qbKKt+=e-hF7v zoX-<8B)_wZ`UrUi(MqtjJD-Y z@n*bO{sEnO(3#-kKEbJaJCufAqH|L;hGh=`{=ke!Pp>BzjRi{)CNY!X-s~6Z1L&Xr z2p*OLm~&SH#!xP_ok7Lx0%54*o~AkSGfp5>e- zH{9SXth|x|A(s?bIxZWsT?>V0Yg8;e&xz^t(xKJn3YJXnjCO-OxU+8(##{XXWj%~} z$%m~_MV)c|0&84<$(lC~dkw02y?M#0mpJ2)Ie(HggcbKZNL}O9^Cts=I-Q+)>6CrI ze{912(Y?9zk5eGTg8G(Tmms*a2}^692fmDR`J;k6KRbyo zJp@*}A{QFZ+oA+^L)hx&m>k|r@94=IUBq%Mo>hV!eVVaJAP()-LCo!E12HqUfnRSs zluUaocs>Sp<+Ys0EbI!BLUOo2Er6QiWf;{?9GaUUs4zKz`Ly?rx|)l%9@!vS9}cR+ z2ZW{Srff&+i_o5N}X%ZocRzQ-ifz=vf(u@7jd$`oO>s}L9@b6Tw;=p z>awds*j-0#Z65$pm%HE*wB)fHE~Edhx2SdPAt=5F2>Nw&X4_!Hvp3(x3bj2;4KZSd zu-W33oSw|(@Kr3kpr#pFfApN$4hoy!gyb|ip4mKOpIBp=E`F$u?YSXbtX5k&BqAjy#SK0 z6k?>i0}nV_EtZT@u#!Ffu=CIMy!4N+(0SZwrYgIFQPgcp_`Zea?DPz|9gI=VpM~^V zU#75{47N^vx$!Jxrtc&tUeX+~Ii7M}LJP*acHtdx4Hbh!#j3_{VzytZxTaS(=JLN= z7+ks?3L;;Kc^4DVapnX*y~US`de6}MHxnkB*|WIk!D7M8&w_4EnqJvmU~zY5i>6Pv zQF?G6v}OJdh994~>m`?f9e5AZ`$_n|K`LfFhj^1C7YH>MC$XJjS`lYjbjbuibzQr&(mHw(_M5vR5of(L8nfK<+6!iY2Jx zS5j15+Y}+_<~i$&X8tX-r>mIG?_Yh?zDJmDm4;R4o}g{3l-I4)iZ4cvB~F=x&|zJT zvTVSDUk{4TAcA3mJ{m_M3 z-8qDr&Kq!+-w>|wP(aLDBQE)?9?jZ-G0Fn_kx%_W8@c@&ow>5_A>wp>a1UQq0gBN} zpy3{^StE4F3y=8ZqQ z@@+?bm}hbhM3Ixtv+8GzwMd3!ZL_$cg#1hUR)Y4I5>5VXBNm=e3=tMfKzwGz3S5&w zT|w^M>H@U=#gsSb$Zz4(K@Pc5g1qP%q%LfSnrBKrL(`YI57bZCXT|J@!6NI_A*>oQ znE4fopvWU{+^vaB``1Xl*H~koTh;`+89PPkraRcx&X}v`JO|T`(X8mrak0j@KRFL} z3H%5>Pk*bSjM{0Gt~dY+U*c9c&^~A91ySOv;;m*)ru=Q&@mG z=E)+XYlLyTJc>B^U-} zYhn}JS@<{dL|o=rVNCC);rmeIWRLA%htsU#m_Dz&lu3_T5R&A&}gk88(hg zIe#d$E>95D;Zs39zzV~MUxX0pTFAVtAjYI6i+VC0iw4g^wbqy$m@VCxC1Bg)#vK0f zjY!x4&c8G8^wYsUywSHGXGz8;HZydSnx^Weh0S0>pPA-`SD3%(?4VB z^BKXiFPdOT+XSpDIwoq1O~ryg2MEiKdN65E7w{bWuNZxrSQ;T0G3c=r{PvjOfzs~$ zLlAI7wv(8WWX8f5k75Nc&tqI!KjzWdn8|xmKVju)RLr2xikTZ%PNeQ(^nVb&{SEp> zrh#%Z{SJE|cn5Ujx_xUQ#Bvj6&)z0Dcv6nRa|pY=coJ)HCDyLV1}wUA1sqQ(dBvog zP#@^aR6uK&8L+J%<(6|Ep!p!m)Os4+UEbD0 zjjsv4i++V<7>FI0{=<@Axa+UNjr<(nbw>`{B>bq-i1bth|D)}d_P zzv90Cl|#sH+o73!Q$=z)^HR}%;425`f-$h~nHTT6#Do{UuxGEo8nMqi2JtG{d(ln~ zJj|s(k8-*u)Li@%5?5R&|D_){-FFi6hit}rTT3>SX0OWg!E|=b2h|TBMW;t8$p54c zpW7LbwHJ%(E7U`c*e{k)wkmtwH!!CAB^$Q`!#3OVn$sna-R%q1ob+M^!>DIzVvbUF z7iMhrVNp|nWo}*vyX?s`73Blm@*(QBXK2>{u4Xl7Ct;Nx`3{#|BsN=yDEaD$e86#Z zjwhDrT3;yVpK7x~y= zRrD9JDz!~e-99W$}%6c`Q1Q$CUhU}We z)0Z6rUBe@&C}J#V$TrN9oPm^`o#_ALD5y0r^m@7{n@zUhT6Z<%DGM7Y z50!83#1oB1vxW_SqAf95%}k8g3_t1}A05q>^>JYA{R?R8qD3n4f}*%ts3@Nce))s3 z>8%mp^jgZimwdtC|8$_A-VH7}Q-0}n7w#MzNjxYMVwv}3v(~#2_sf>2p2-H;nsDGt zsGo0JCmxa zCgfz`hY%}PqaOlMr=vl0elaAwQ+K*EWf_XdVW4tnEoam$D3h0I zU4+mD)TeMNz@e?iJhJFLoK2#$VKRN!ulMKqZ|vy|SRi<|p2i~8KiE2?FK2JPxZFDq zJ=2pRChh?qu^-0^FKtJ4pb_dW%ETZuM~t)0#6;GQtDhbaqHd33^2?VnqE$e@^Cv(X zSdTKw?aMwHu&R3<PrQP~LJ7 zlDkI`E5;i9m$;Gx?<^PeQr9myz%GNwHI(! z$QW*BL9U8ia*va@$vCbABWJ0of3_RO^s(ncNO!Kdu|$mG*CGGMB&hT1FNQ4kWU&@1 z7M0<}uPia9J{NUDUC2MTTFv))ox-@NJs^D=f|{FySlF;LnD90mDsTGp2vdLN`Nb65 zeQkNi=`48A-GWtgWT2m0XI@#DgXv3iF=VF`TllLPSC)^(yH(WF@tZD6qQB_XS-mf$ z)@Q({Q5jf0ljejm>6k{(Z}w9y7FGTOW);1-cC!p36-KP8woQl|ctVtnIW5eJqqFII zJ;t1paNX-f$T~?m0ozw7eU$-mmE)Ks%v1mA^*PXF*)sn_#Hn6%M9`Yf#(m>ULFPm6 zcXK~XJO2O-*~d`ZN=$*%zv3?9JsFnl)Kmspvj@M?Jd@@g9kV~c_haTfB+Zd$x}1a4 zKl=P1L+2e=;~R$Yl(eLXN@-}QC`zQx``pSZO2dqZ2*=8*>}b)_&~_?q(H=C;``kpx zii}912q7avL_fdh@AJ?3ob$fVbC2u#UN|+`2>O|pL-|1l5p8)b7Fk{q56pZ|EZ6mj z7k^iRc3BmaI5C$7Yq5FF=VYe&`b(O&ex_A%nM_Ar&xuB=aH2Ek#Fmc}#Bq;JFhkVK zSP=OfnoUBxY3lgzq!|WVDT3XVX>^pSEPSvlq0je36FUWC;<#!k>@1i9fujO=%Lk71 z;GuJL!1pBeXk+tRt2^Uv%(Peiq61iHp~q;GE=A9z>?pI;ME zkw+@O;)^yOG-f&LKii3WVmS@DS4NjiVw%-Sqafjl2^q|o3%BHQXfM-0cP}&N?b=L; zw^umPc{>{qh0I3r`8i;CO%E0S4RB)Z7SeeupEUi^MjIi-yO{aX-^E7c@n3YPU0J!HWw1*b8fn)^eo!2ShBNDUBw) z97lH*|D+O@hdEE9JaTWL1)IZ;py7IqgEEIe)c)Dbhj)gWKT!nZAI#6>y^v`{HL>B_ ze{@%6KNZ#}a|NsS)6VEv(w!bhx<4M^6_1}L&PQibky{F}+cg(-AGwkzuPe#Hr_8s$ zOon!DWi`k!Eo?BVV}0*%#z%`|8k$dx*|h*G*G+(`HaRRij?WNFh51Tbq=7I7QrX1W+5Y;n&`dV8hz)+QsKib+>;$& z$h+0*AZ5UK>8tN?5`Rl=?4BAEfsZ0x60HMvvxgIj@ZpTJ7)}$`T4D%)kF-0>g4Y!w zsc)uZ<0J*r7hl6izGWJ+&mnZiHTE2QWxeByb%G}k80+xfAbozXoOY^@VO+BX&}6-q zPw+92P%Z+zSjv1rNFMliv-H5lJ0S`{KSxMdQ_hAvR$2^OGsMlD2-7d&0EXCwXOtxk-X)_P|c)1o*U3L5k5 zS;oP7+BGPHbN;nauhv?szaW+lJTe7t;&@FuS~Xou!=ztb7>^}sH4KaCbz6EpEz(l`4WEg2y}g&sEqUDMfJd$yxa(bk_z zoa*5mf|6)6(~${FeaWBr2~ePz#L3+6q2BlJ(|zi*+50v~K3&(MaqD@aD4Ean@4j^4 z$q^#4%U$d|ZWha_xkvtW7@)`MNb1Qn7N08ilIHDlU~YSlMq3IfYtz6i+m;RND#9((L5Mr!Vk*}i-_y` zF%bBE6YX}ZVCQ6BZQ~1nYPp%!RtLY+A3sg8=ba=B?0H9=Kh|-=#7?TaEs*HuPp1-8 z6?HEVTJ}@{AKum{Ig@8%y=w+_Bw^uEj_^Lx;en?7i>PG`-96JY6+s9iQ_7Yy)r>~o7{m^QD zRi+evYS%wmSL2OLi-=S} zH<>hS6m}m`fM`dK7}bW*$lVeUCupM1^X~9YEQ`~ZF|OORu2H>*<`BJTBYCq+8bwXt z>I9`WM0fXeR5u%g4r_0dC7qd6Xi!c!>qvo{+a!?HW;@ocTKqnbBC0(2jvlNmCbgTU z!u-k12b*)8zMHQKbCO$$Aj^$z4zR>#{b2f`vxS;)1m@v)DqVdG<-#D70sYt zlCcl89?>|Z0UGx20oDH;Mn$hj5L49%U`;4oURwhqKCUT?N$|#HgzMCFffxov6llQ z7eX{N2vy0SK|ih9O4YX15Mh3=VD>&K2;4jyY?=S#*3~Ptx0mg7cQWtR9YcuAEhB*+ zj&sHu*5LedB)AnnC*6t%IOlC8ME^uDef35UTYKhV;Px!)fnMa@91E-}-AUK`PC=yr z6$rFt`_Tbq%-2%DIY-`8<3d~T2=XNLmQ7q?%{`(!E|Unsj@Mnre4}qQ@lU5Ch~7nT z!lfx<-*s%F5PXS~cx}n^8x^6YTm_nHw5fZM3PfsW5HH>N7~*A)ZocgNcK$>%L z7Ic-{v0U(pj0@>c+-6Cz%#}`FD1J-I*twgVJ`SF|xkP%L^w5svaz-IMr?NwpthmR4 zW0N(uyNp4*2mW;Ga#b{&znDst*722H^I_{N#((^%LbJ9;v;3k0Zq6zzi2jt$8LqYj z^}EKfcKiYy=(1orfMYn}w6mN>T{b5%Y9sZ09Yf9D=M&+>k3=SCDAfqop@C=TQT0zo z_~#+pQ{@_qn?hYku2u~7klH~#!Va){>=<=0sU|90<)Bk`BoRGp=KJ~s$PK2g={)2^ zEtf@*U;S3-bH<9?9yb}px13;qJlne#7{iRfQrZ$H1J4j=dhA?jn)9}AGL9^5@+I<8#sU5|PYb;OAecQL(4BJuR$Mg2{>w$A`La2R%F4)KZ6=A{Z$&RGGJvwbY_4fk&2q!kiBf|sJYhB3 zu{cSX?!|nd-Kx}iXbs)}ULFKHn3s9u5D2?GkxDV0>%n6aaH50_+HLyGX?aMYkHirw zblt^exPK)HK9k^$(r`3lozVGG`jQlX0ZM zu$~CQI!IUq<5B%`p}lM8f!$43^Y^SK{D2Cm)iPG=+cC7jKb{w9+M~o(QC*6wHCP_b zrXE+ixvV35NJ&c`xpANAs*-xCICBE5%3Xj3?FF=;yO{TwG7=2uj$%A1#;Q-!q_xu- z8{Nf{_~cCm$<6FN?!Cu7yt|NUExS!rS~gRmS2HJd*BqA&kpSV9C;Xw6PTC^hgrO(M6aOFYaCkcm*ylU%q1m%8ULy4C~14d*f^PNM(aF_ z|Dv!EM6t`cDy8pKer-7Ik2_17risXv3%b}~pFm4^X;3pCj(d&}sE@ozvls)X&!K?m z);bVNsS@k{tP*;3jv++fGGhGxQPf?hh6-7x&yrtOWZ=<4a;RlGUir>g0FEQMtn*W8 zO4w+ovD3$byQ7F`?{y-KN#ljLT5K#!=JJC(b7;sG9p=xvNZO{(fPqVaWS3Jt88mxE zHIBWc3fpF*aKA0fq<0~$D-~d3dKuGl{Nq!^<8VpUPZFtX02+^XaaG~-`GO5YiKes? zNM!X3gdJ(T>{2yszG#KHr?s)+0pn@g-6bD}w2@rq>uZvW6F|pb+Gq5G*Qh>5IGY7b zb3cOjy;n=Sj@W_`d;ip)+rXUta_E_NnQD%80{N;cYP?K?<*u^)$R}^f#L7C_eS1D; zOf4gtqDiPWg6$7#)rgbRNUYM&ptrc&^x@DGG;pRo(b&+*>#kc!559FItr5dPBpoTx z81b4+y)+!V{4$uYzLoDTlIOwf7VA;#`N)iHDsznGSABTF)sEDGmX9ORE)r;gHS|D-ptjTg!!sr?ND>j z^`A_IE>KtUYz#P``$P(+kD_LNR@ggA394e>kO#mx6lP{DBa5+q`wW?uD3EyUWz4OB z>5yW@X6T;9oaK8@&P|v}C7v<p36x8~Vm=YbJKuqT^(W?v^El5MOf z>7m_HV>vhGH`i^RM0$SevD%RJNJFBDM|ubG6Mg)K}&5n4zaSr5{)YbxGqkEG5|&rmfiB|YvoFi_q=MSF&##=$ut z)cj4-SpMRuO2(2|n?XCAXTZS=9LPs@5VzPumg~v1Tp5w1r$QAhkM5wdD@WjM0|Q;*oUzWJ44gH;N-4JtKC} z#sGl>q$+h2fACf@k-sb=I~jv1>z4%%j!&gE&(Dxmd-U*O%2}GSf>JMy2>`c$QRmh9 zoW$@@9)A2s7VAi%#|_5IdNP%G7JVdN-fN?)_hgXx9WQuZ&_H@`+@!91$D^HfGI9R- zAKzilvLM170B>F;m)^_-FVSzJB5omZ`B6miaTqZ&`^kx9zVVlGCc!_p%Mq@RsIwfU z!*7-QMjsZ(k=$S<6rQl49%H}JPcjj7%uMFtxPF~V{f;M6{)}6A_@=n|C1d-ZyG5h> zR*=G%kEn#?OWtRrJ+XXomp3ZTo~HDH-C4Mb;}A`CRIqfJ;z`k1FnX6t^^os!KdER)Cjx08tT-&)?`MG_Uw0n*3M zB$k)M_`$bMIOy|}x@}?_toUCvVapg;^3|7gdBl+Jqi0FCni{FLds z0&sMXC(l1)$)fk9H(@t9WG&f=d0^irdvqjBIJ%O)$h z#cjP5N}i;S0gpE&G(jzpgojLmN_~0I9c@pXT@tuC=SO48gg4ag7t_B;u$+a{GsG&k z!`S@i9!+u(fV^%kO}KuWZe*+oL9zjgc6$<|F&|0Q%Deob(Pm($Hv_y<&e2cp8)@|H zRzA?PhboL_zNY?WUV_-8iu4c^i`Z{EJe5f7bLMhe%ZO{H7W_j8Oi(^S)~cz1gy0b^ zTWyGzzXt@CkA6`JfF;1;gZ|;r?S6}KP={b= z;w+-EYZ@t7{gzk0{DOv$(uBeAw^Y7&7;1&Gx;bhejn|a}uhola=Y>7Qn(+$?%#x{u z#{vkPSxUnE*V2Egj48dhl20if1v}l1VAZ_YkS0F{)Y9i8H;X-ciAD57hzh!0j9}~= z<`Ldp#)oZ}#W`Qte=>sYClC6t49oy)3GF`Gefb`DP#j2%4om=zU6$Y=ua2UvvuWVg zrJRFIF45=GXs{ofK^zc(C9BsZOgrmRt_X17o=no0W=N1(r`?KaT*our(jU4pb^=7Z+A^Kbd?Hgl9J9``oyTEknwxA0!Bd!5`QC1^ zTYx#ZGHqAv5T-HTsE=9_OcOOyk!jA^-udta8f{^V&R>Gas3D`V>+%~SJ!ULOcAge-vRoRW?n~Vv1%)P)%tSBH`weyH`S|3#m6c7;_ z^1B{OL-#TcL<+`qpZGeF_v%-c2lDQ#_4294>*xJY~Ue!(LWIMkPuvf>H&vHWL$dpz`-^^GeQ z5Ya{lB78HQHZ92I%=gMd_s3!UD-C@#`|m2%e=!SsZZJ;5L=JtVDoEHhOBB+(B#?P8 zg?*;fQSlBn@-d`sUSB!iRn27cK6!XJoz27A!}$GDtl!CFTInla>7=U@ST5~Lj>Y0) zEoGdhNJ$L%GYSUQnX@_Y55D226-sQq#Vxt0Pdyekv-(S)8vQIIf*@8mDBIQr3`u7E z>jJ)Vza1P*E~5jV*0bK}4EM=e8q%!v;fu5iUJ4xz58IScA!sO;{g?<-?Brlq=?gNj zj`>(iS(cC)P=A$?nERxL)YK7_h$y8+qwV2CL<)V9IUDvjjKPBq0fjtIupW zn>if={#l@6X)((#IYB2L9S`oyMAS#mob8PRiQ5;Zjo-ChJm8%{zO>JQszsA&N7p0} zdH*AB8Kop{axIa#G=!fKHJbU0V#uibQXtT<#r0pcQ2F;SYJA)lqJPat2UtP6m=CXE zi5cibj>d+%S$lej7kjSEIoaf}7FU1;WW_Z5*wUJhR^9|2Vk z@2If0OdPU{(*7@U;NGi+cD7@n@4F(!x0I?xam4P;EB=8R(}gd|rpg!9z|8M2an5e% z0tY72s;!#5OzBfnV4=%&ott?XiF>rOK9;O~H~~63^f7CynEqq>1jXhhBruVARL?TK zp>I18jo-t+S*3u^-3Acdk;WDLVmUooSHwQ6)X1w<@|eOh${WU2k?;*1me2S@BnGE* z1*gvPzNW9aCOJ1wB%8wdUa}*)`jesC$_iyF<`eg0!_ z8ifUXeOWfId{rK7yVz`~vX;#0w}GbfX=LD140-aZm+smn1?@Ew@T*J!O8e$xrOl*-%lP zt=RJXcFw3qlN?mmgo8IsAVE@{h`vlmqn%fHhdd2X`DcbIDQqs*QcOI#JEVC>1M}NQ zk-$&(*nNTB$u_OjQd0?aJ+(=Mc+3)%yoE98-Kmy}g-mumg zBD(Ne;C18>mGKNE^=r%_A$$>a^PLCXHM7w7&UBEOI-Pip%%#UZ4#TF)OG)(P^L+iV zMWjFU3EB1HGi`az-rpzxI70(TR(F&W%Sl!?S;jUny>bqQml%S_mr}01K?gT?C*sTn zugUa#(sTo7$XMl3GQkIi_&pb^)q=JjJ+R=U~qEAZjm8 z0NIYQ*kTh7m#+H($+}KM7IRp?^gJahC+LzJwRmSB9KSjn(5~=sSpL@rFMq1VW`{al zJ$??}hcH|}qX<-6Pr$~5JURIz1$zG`L4vX><}1tro3goduiqhT3Udd$&l6Gg=_&rv z*ZEMQ9!T1)O8IH;(}3K~he>m`Lguh+Y%wTA{c$sKuY^9V|NNNlFir$h_fV2>VJz9f zYR+`0a@6XVqhCff@jX7;SSib)dBt8jPo^4%n!D47x*?3cAVy`E0j}lZ0Xi=2EciPl z!PV*j@;Bf#B)ODf=!jn8zr&CZitC|AZjFT)mdRc+!5eBGmg5EYTKe|%8MHPy#9!^r zC&kA~5E)puX^bblWd6S`-%f#Q$3$#9poDIX+v$CKd-Atz3eur3$R(vd?%c2aIM}X% zLnQ;yBGm`>?u~*1P>BcR_e6l@NJk-FJF@FMXL#O&V;HN)cZwSOWkyfq4QJ@%nk z@(j}yE=I%G$KXG?xq?T2lX3ajTDC{dMn06~(s$InNvbWnvWy)EVouh zw+ig0GgkV^ax!FAEC$CZ!s)CqTouoF0`meO#b7QwNAfXB_{K(2S_uOu=D-r^#qjKA zAN{lb3>tpYhn9!EWS{&M=G{v|)5jCY!Lzj>C6SG%qff$)qDpjF9t+RAQefW%mN$CR zmAuvsBu*ba(5}-3x?ki%YuA1lCT$Cx?gsM9YCrDDjDkN7!}y&392kAJ5En^4B-iwv z;Q8*Alsh7iDx=?$U$Oakjd`H2_rD`6n2#)sd5@=TmxO))RpZK7AB+#04QmyPai@Je zy3|?F4L2O|j)M*^dzgxO)85iCX%FceyY={G_;mceb}>H4U54ALa_OUI>4NqhlK87M z1rA+&!F%dt;E16$SdmnKX%}MQ_q9@RES9Ft9_eJXV-akTnGQAb$uPk-3&;BU(@la1 zvc0+zdUAF{$810FQ}f2trC+$IPH~VJy%a|+A5V0Kd!y_>Ni-{GgEfBSnPDOUr}|0H8J?WQx8|I(JiXnObVZd96|k4KlQ<5h=L?CdB) z)gOO&_i;5?d%l$%>Z~Uha#!Lz*F1jB9vjqu=mY%TY|uYB4u7s1jXRnZ@!Ic`c>U`V zY`eah#_n zpXt|-8fqLQOMXi40Qq(iy!}-LL4Gk9XRk-Q++4xGIRw`{f6o62nZw1U55>}(fv|%l zL9<0Z`YUdtFCr7bW@a&0do>fTDlWnu$usat;1Tqf&x7T8=V8(QOjvaK7_KC4 z=-1EZmpohv->RxG)Y*@GTu_ROo)zIE@7i4Gsd-%C#C z{y5FYwQY(J+!qYjjoazRrff*7se>8bw`u1K73h2wfe|};% zpnQD@rd*l^|ILelBQNF2ER}c+Sp8i**QgA48OLG4kTg6#Vj+x~F^2dBoQ2im^Eie1 zsTN3|!50^@;pfR5oI5Q9Ba)Wm-lkN%`*;uRuksMw{#gM3&XwQ>mRFHh{(=vF#k|`l z6*z6>S6Zx^#`$rnc-}Y}6*h^vBDFF+(;S7}QEBk$k`-)9bAYv9{t=?9hXsdYah=^g zvU}D+&}rIBd&ZbR&GM;m%?El08V zIIpae4N9t)=%o4}JS9~CWm$W0{L6EoF;bfzK6j2}$)qr?Qz92>H59*$jOvuXPLY53#R8H}H^5EKu@z34bemsPaT;7q_TuzEbFpU9 zL(nzFRMcQaMHxP$7*Wyia2x~%E?b~bs`^HGY@%&?WvoQ>!!`OE; znNI7qx5IzKSMtFxSHgzaN5q?ZO1@^NLB^LLu!y*TQ3qDTj4P3#&a#zOhp@S0`El}l z%}LC@GaMJSrjoPIPs7N&$MDDwO_1yRKn|6!r_Ucv#X{8OcWZ9MDUMf2&h~hU+sbUan5*W(-fTi#76w7G6cT-F(PVC zPHv=&?rc~Hx(%KLkGjKjYDWZ_Z8aM`MxQh?JSYP+^MYW6%^5r{zZ?G5*kS6} z0vNaBH(9+T7nj;_^ySo2IMI0uCpUQF^(+_EE*J&Z-;c($!dR>}yg~kL*+(^=JmY@b z5L_ScfV1EDfN4w@p&ya}P8q~?22foI7 zXrX4*pjlnX`=wISg7#R>f<#>2OfUbrbF7e90c!KK#?-1DQAXxtEjXUprjPqS=*Q_{ez z21}5H&juad08OqXq1@0Q++Gn3ZJ|5xR4ilkm&bC`)>ooUu?q2g9}Med6HxD`Ik!`# ziRC9IlL(#5w7PyOb&gbr_~jdLW62I2H&_hwPM5H{R01X6n8CV(`EYDc37xrz`LdS9 z!Mgg@U>cQ2-Ywq7ZMQxN9YMOpO;>;$3=}cbdq3pAI|C^(o6+W|Ee%V^gNWx7iML!h zp0}^%D$CA80GlU#G?|ee;t}du7Hf{WW4>R3~fi8;&s$3uutMVt{IYxRSH}2 zj@vfeD>}t(9eo^zpHRf=5aguqB*E!)Gx+514dM}#VDHws_*T%vS=M~yHpHgk7lZQ< z^{R=6k6s25Z&qUM=qjAmTm+MzjK#sRQP9IBLW25YdeGNXpnNf%aPcOv`Oyk&E2)MF z2ZKQ27KawkkB-%;s6 zI;k+Ey;lLs?=`Ns;Q z@Hv}t2V82U`M&=qf?d8IE`>ibL8OLqE^00_8$G!T897aoq+##?SXq1k+^o_UT)oA1;$}c#TO>$ zaa-0NX8mQN4^@H0n%GTp}z7QXqpw1_m?Hm zaOYAq%$Q3hewBfR+hW|)uLpS76Ccmq40}fj7=x-9`Yd7?i+(j$s)umTo{ptybq4U~ za2)m-catXbaI$qyAt7kS{L-x{?4TLaA* zb_n|CR^YXzVKC%zDU3S8n3MnQq!)HeL&=)+WRsQ)zqySUZ!xw(`}{(*>Q)fEj5vaF z#p9^!ngjTzHUeAYib#3Gdi<{JiXpbX5W@$-rbAE2y6L0oMR(>ooj-yu95##Ws9lU@ zjyjMR8HHz^Ebz9t8qM=+p}cbgbhs9go{_7mve{DnoT7w#XPSbUZUs)C>W-GF{}G?V zOL60uXvTT$CNm0C@K%chDtS)EPU#!;fDKZUhGdrao{rPQZ;0pVRY20@XI%A^K)gdv zgZK9{WKDLfXk2Lz10H&xvQZ0p8h zx!Q z=^L`|*CI@->f=P8eev*-G$?#j4E>r)P}`kN-oB0nPwUe#|6mB;JM#cZ=l!@!qY~)q zUk=T^2dGM80V+i*W^oFD-7Ds%cBeb%Z4pANzmqy^?Ll)#i>J5I}`WO_nYqI)mEPdX#1vV-rFt+qg zn6fty6POv7 zFgT6(Z)M^(GC3EA!zR#QXJCtEZ%v4Ms zR*m+jW4MiOi}0#t5xk#Vhc_B|V!0w2_G*}c?AlDe=*UStWa)*|Qflaq1YzZDzZ1TJK~`+4hYL>lD(xEyKZnj2P#8XOn_Fe>9gngqh~W+#03x%=fnrj@562 z*~;0(a^Ntknufv(Q79aGoruyiG|z)-ta)K9t?qQ9QP-P4c4?TA=XSrdc3 zpXT8#|HGWlxsjm1Ckp3XKaZB#Gf0Vf1QDuNqUxMJQ6c?$e?hhjlhF(}V11i!hnaq!Ab;vx>i=ZQUJ+m=n>v&I+A6pLxi z;d1nPRRs0Pj192jH`P^)5zJ$`z=syN)49?Iik7i@!+)P|7 zV?z(ly&?`eq9%Xb;z+-+;&PD3_)^r%C1t{Q`UPthfM$6H!Qq=UMg)VG1|3wFZkkw1J)qfzqp0crD5mr*zDM z6T!j1Ft(mvx6Rnz*!7UiKhZfwdLP?K!{F%(6)N}OET1mve zeo+Y1@1DgY&A9-6$#nDhv*fLN4ty{zB-0W`;J20#oC51Gi=2`SeQWj!vS(LAb%Q!7w_1&6JJ@`_HQ!p+T@qhpFG9gD1z1zzLA)Y% z($pipWM1bPn4P){eVxa!tR-s<-4X|#J=^(=-LcrUdl`Jw7K4e&G;Za}k(`xU2_EbU zg5lEn_#L`v;gw9Bzr%$eIw=)%UoXe4@6KS~__tJB{UmD6(Zf1*SM(Ru(CdkJ=nIEc z*sUJJrOpe(NULRNt2q3M_^gnE&AN_9v#YdeJO=8 z_+pF=`bAA3*Tz@DeXs+qNKbOZ^8i+vu7_?mtEn#NqIvb!C>1FSYxd^y{IG>E5McY$2VD)+`=*I2A=+Ciq z+ub1E=Fui_$=-<9O#xds+JeO+7kH938tg`T@^;CW=*-`H$oTQ;$eX%TS7TK=tT7fJ ztb0tuDs>sQ_X+tjG>^P-TnsyhPJz80&19z1LOig42u64vhfBdZB>HeR#5@S0Ta=*Wm?dEb0G|iHpRnF zazO2sCnj!5!-sd7Z}3|lNO>2bt85+q8R7-68Gpt?MvOO9Snb*_LKm|*sJd|&4(yWx z<5&CfJKGbOxo1Q5`4hP7iY0z{!g8wKbaN4tjzP+dN%U*ta!~p{1_m$zHa$qD>2-S8 zG}#6fs`KD(0OJ$K6=G%0X>gM*!SZ3T*!K7=m`>B7-$sSPf;Aqn{d+HWF2)3^g;C5O zeh7;kU2)vKL-g_dEJ&Z{js9sO!gAPAuH^z~9LvY%1B$r&Zyc1blLXCUar9l}2wZIv zil?@^lJ)-uL5=b;IK+H1^2_VM%CS^DOY|Q}c1*#cmch8t+a1zj3!Qp8i|OtqNtSy% zO;cFPdLb*k5PSx6lw%;$e>}JtrGl}0Ii1{gSD=)!i+uZ3ihs&dam2toZsa%>ba6_e zx0BbvOZzl<^&%X~PX8gfr`&P!y#`87&7*c|@pPMd4lR0l9K+HZNs#b3bg}#AMk8c5k@^pn}C_K1j_2*8g}Uez0I7M!Q90CgZ>ysw}`x z&4aM)^J(0o6i!woR$<2YbkLvrj;}DiORc;{(US$Hq<35&ap{f+)wfF2;cq6(AY??s{{ zV-{172lPYUFVZNeLGis9n3K1O4(>?Au&fBOzVbAkbM7xW^w$DAb*k{&yfEAZKeF`FBf35 z>v*F7i21}OtN@W+7SUJA!A#rmfuE|3GCqqzDJu+ioLNDrz?)7PHH6IDvjwXTy29dH zvG6eJDjgy;#cuhdc=YgT*z)xZER!msPv>kVRU3}cRbBVVrk=BS-!GD^J=#lqdW=!E z*cT38&w+rQo3P2_C?3A)hogQapy}a-@Za(EpfbCjf0{NQofS@iwag%SdRu}lUw4wK z1=nFlESpOxeYMe<6-RvhB5BX*3%K@T71OA%go4r}JZlgSU2oVvZfPXv*Ov?hL5HBz zb2%LF%VoOcWVHQxocQ$*p;lWL;8V{6s8Ku({YoF{`d@x{&15yoEsVj9Gd|FAp&^>g zWLl>z57c}XNlN_7aCp%|$i{dWVI~xZAO6hrfxE@IR%_9GYJ+%#W*U^|$lz6#3>X@B z4sQA~AGU81966Uks;>yZRGdX?cPHRPriYGN+D|?0e$lR?BD$)r40R4^K)gmM#72Z; z_m+cX%+TSWcXB&tcXubavOetZzc5^WW`IAWp+esVi6QN04o1c&(pHOFdh|dicaGZ9 z!^Rb`JZc@PyirHo7|Ze}%`j6~g|52_$%eD#ARku94a?sNvx2IiXk`KhZ&`)%56^-@ z;}RS-?;NS~w#N|$3Gn8!2!4FkAsg2%!Uff`I4k!ADlomWu@tOjQez@}rUqgc zE<(dIWjJO*5Y)daLYY-z824EO8}b^c+<+YHIWi3N3=HVgl%q^%6iUqxeQVZaL}d1pk)<8vri&vweux62OPfJdyNzrpapdfCs^H8O){nJ}hwXh1 zz_k`Z*+2qg=hkA+$z-@&wg*&#FVQ0z`S{`NHPZ4-nOBpajO+erfZ{D}Y-;v~MNv|4 zw(Svd=10Kg;mSDi>KFdOI8P{^B7n!kXOm;&^l^ynGMv~r11@~FplQpEL4I&LDy(ur zP1|Ce`|S|CzoHJCds6uAJK64kTME_;oWt2eH{lakF=5SDlcV)j>WepQ4Sbi5x!=rxY3QI1EF2{*h^qeaYn4hLFA898}qz6_WFuM7_I2+=|S>c6T7HPxR$p79A6i zDGub_dlzi!k0u?j+ll4=JKUs=o5{K-&6G|^CiZGMjK5nBhL<$yn8P7B-dqghCLY7- z@gCSO^?*E&(4ZaKHS9l0g5JK*^q!Y5jb+~{J>?W$X%7XMya3K!dPMKsS&XMI>9QUD z4&ptt0_?LxpzTr}q_R0&vU~)|asNqligF+{>O5Mtaai-Lfj>CH5~e3bP_MFBRMI_w z3*$oZj=~IlX`Kr81xeiLSEX?I&S@CEvPge}8SIAR>jBvoR) zZ6Y3?6Ae4et1-{N1STb%1ss!&m)i?C^M*JqtBi&a9mbaU&j>%Rm`9Wv>*J0{ml#(P#p+QST(oSgaJ+$}Ud-vX# z8M4Waip009tO((E|NhT$bmV=X=en-*JU^$?zA`jk!#c$7#K5lcO6Jj9112unRI*tN zt6$4h6sK9L_!*2B`-?c`^d6PdQpfUcTWMctQ%gG}lm`BW2+ zF|0rMcbgQ zyccwFSv4_LIu2eUU&z`trbT)F!`z zTR>h$O#oACc}~VHf*#JR#XotaShT5)<;lx%SfB-k_!Y>*J>_7(dM9MvzD@Le1M$#; zN_71_t~oKq2&JlT@nY+2;b2B48arg*&BqboWG$ARfzLEeq!t-Q+NSok(G4XD(q-*Ix%*IXd`a>xS?lwdXv2^(3 zk_wvBOIRP;ZjLYF4D9*tNOy@a596Z=FtEb`W}7_bO$%n6jL%89TgeX7r;O3x(`3-` zZ5d_?T_ykC9>!N6lrd2^h(6dx=#fK*pciaWU%-Zbnv;hkN0Nuh*=9D!Cmy=I7F}1Sfj%Y&PumTLAY(%uxQXGw?AVgKVn-_3wO6+-GXQ z)(z=M!W2OKTOE4uYQ+scwYb~C5_k1RgT=w~^n%<3N>lRj!=X3~_6x;p$7(tH+M1}x zS)omf9}V+Ug`M&atmnEOKZhtHC-{O_l&eUtoh>6dtJi^nbrM;*@F{(+;K@_`*MPT3 z2$cNlrneuZlXdEsxY=2~WM0-yD)jUy{F(5DsxMK%-@Syjm|Nj))d60ZN)q}BhvV;@ zd(@NpXN-2Ty}$jr=Fa=FaIv%nsZ|!4rAFzK);Bzlp*-q(tqPYM?tl%ABEUXo}b;7@mZ=0fkUYzSjrC#&T( zQLbYz`MTuy!!nVmXT?Zq91q9Aa#M|q;|_*w4Eaaue|)QF|&i3nQehJvnG)}?+kf8 zk8Olmh?8`VcQtf5 z=-{r0?|J$MO2Jex5>jVa@{&fvXx2z94SYD#QFFHgt`DX`;)z3G&JJWIpQ}jIU18KO zWW6>c^N9UUX;5g^1e+&Dc&l2BIPDdHPlmBL$(YqG*@y;| zaB9V}d`V&=u>pb|H{vOqFXYk57|dsPfr+pgvIA3r_t2Z}@A9N(F3B*rDhCET9dPuK zH6(h9K+6>#8HyG|H(yb_6m*}rIwcWL&A3hTW-Z3Ct2@c)umP;`d`(;?7J<;437{!# z1!-W=k?GM2Zrj$uiDgxI|J7A;bND6oyMGL#%}w#HYy@n%9^0<@v>7wE`V%{;S5zR! z7LjSu9b2d4J-%mDPooO^W;KEH17Y4Bt?h7GdpYoDt%fhNreXZ+Lsa~}Fm62%4}RC}d0|dM=+yF( z?zaf!V#ivUpR9)#4ur#xp)|NQ*P67yp9g6l9H6@G8r|le4JKc9GYx4a^zST$ssK$` z_cxu)(vn7=W~OGAj4Y%|3gBwCjxVDw%iMYIg#vA2Wk%gVh-P zz5_Q;HKb$vML{Ar9i7dBknhe)ZWhlJ&9I4$5q4qc=!4l@9-<`Jm<;<=u_Op~fvVjp?WXJI-MG@sGs4N)da;{OMZD zT5{JhsblJ01(Z0w9OfmuW81d`&f>HU3?Ea0{CywjFP4StC^<}y1Qp@+Lyl}7iG^P; z3(-8Z5T*rIpw-ANS~%Xrn0J9-ZDNdzLsvoTk2So*yLO?vrMpI*pFJF(RSdT&>$c=G zf{_3fW{^?^-^nL&sel{Y)0#s6e%*`OLSdNjEe{HE{&E4#BYGZcnJ!!cE32x&FfyH! zcJYVZ4`xDcwio@Oy`FPlS4lTrm4T4rMwr}x3|edZ=%Wwq_^qv!-dLnWhAx-!&hRQTi>~RulcXY*^AV-)U zTSo8qPR2HoR>+!d3?<_>;BG0776#FpkDcP-vC?lUWt;+0ek_li@`WrEKZeMyqCc9G zNMO|*QZ&hk?)u;cofkKPQItP!8s9~Pw2V8xE_H`P)Atd5k?r`Y>L>4!$uvADI)RSq z?Icqh~4iP_7ugugff0?j{D+kJgRmfayG<)uLK+XAZe^(^Us;{b2=<>KC} zVHhnq9Rh~+iPDh?=q#TAmbwOb*g^xBX2iq!T44xURi1OPm(KOr|2|{nf zAUjG1j1t>ui>NDXa{NmTC0CN$I!iikpO3+?GX%FrM3SAiXLH@hFOs=xrEo>P6fHd~ z;ok%{f7UO7%TF6&Fz5^p)@P8)-fGUY=?uzT3WrmI-9+QlB+mI)J0yH?WO|J@G0=L> z+qUKuJb84D-e|Q(Nz3{Uqt6cbu;Dieu3wIk$7hi7r~{a!(hO?fSHokO4!k+h3fyjI z;MMnMaJ1ip9v?VMwPcfM!fscXX;^}DPX>~gjKQn1E)R3uJ?S??UkEw42b5?4?D}AZ zX&V5l}TROImTwgw)wMAOTCUvo7G9D z+pGkde;N#)4e(MNCS%DI4ftKcGUfp_^lu;YnYFjmdqFiYotI1o^&(NoCJ`&L^BA-L z3tf}+kof!%0I+MIHKn^SFZu%cUYJjDkvUeGr{U$XNjT?JF5vo;^xT}^)PMgsUi85X zEIlzyI&$LRd-F2xobxU?RPuzb4D`hAEt_fh71sTCJqP3Yi}8ePs77PVR+dfvMlyH% zaxGEJ-O&;V&kiSGxoj*oyqpYk#8TkKLN+%fhfwb@Bit(zOa9r`qH2i+9=Y5_PZ^0o z;8x~$w#x*`8?kUs#|3x3D8Yj@9{7}bfVIb8kU3NJ;q!}^^nSE79$O;V@la+iSv`E6 zr@twfH*sPC%t$l2V$Jsn$@s}q~Y=Gf`$j`P#T zX}r#3-p5;y=whqs;4fl{@xm=AW|{;~v}_?pK8;Lj$ZglLiJ&^G%0VnCng(=-gR@5k zwr&@~`Fx`!&DKow2z$o88GJ$95D;V?ao6c0obg5wed`2aqQg0=Ips8N$o{}P)p8K6 zr|HA1QWXdY^Ma+d(J23;5%cc>XesZbdJ=y747%?PY)TS*r;51*&oBi3HZq z;eh+MD&bX{fhn_obL$ii)A>5qXl;9rHje_PugV}tUnoGLZUHak>o7Irzekn3_dwzq z4ij_5>SB1WG!>iP11{KinK$cKHqMmS!8b33$*%avyl&`JDt0USK-Ex{4ZnXr?{qIYUE z)M&Z!KDvLS!{gscywGB-D9VFCDLo916o#$Rrm*|hCKwG#$745sa#mM3s%B|IZp+%> z{YUD|m#qd>&Ku!_zY9KE=Yl)-H)4DqyXB8BM3EkU7+kG_zc)z0khU}DE46@b@~MTc z=6GCoaW9!QcAh++aFFz!8ln0(gFvXL6^`}9($161@WWpR_@iqKBT5?B)E9_WO-V$! zw~r<^6rrWDy(Z2Y=i*Q1;&d8_Z}tnq_GPRquVWF8HNK;E&PwfHW0xVHAIs%mNdZfj zHrRG1x})UXGh)aKg;A;XP+5^gX?ABcl^b!!r`Wd&r&Idf0@Sq`{J5vOJa^W;Ptp{#Z# z^iInmR_pp{*1LH4Pq!K#YN*rI9)Vata)d^9+TnyvM&NB&kBe>9;Z5!e?4A-pRj)3? zt;=(u%IqMT9u~t%daX2Kv=Jkg>4D((co3>T1t}*>@LH=focrQJ$hAD$*=s>=TrWa1 z<&`MdorCMyuKRDO77Xt&Ak3!QyixX?Dcs!#|LokLL}oL*Oo#+WMNg28D#xZsWqMAy zodlJ}!0Z=u(A=vTJNE{Gr|T~={o8HwE>xS;**M~Zqy`L6i-d_+(jZoUEzWAM!nI1w z1J*7N%k;#h14(THztR1e(j!CP zuzRuoZ8eC_lELJ+29Pq6MQu$axTt|2^ix+o-v8r9-MfP@tu+)HW}c__dLr?W;Y2)k zF94;E)I$6n0a*3cl{&c{B*Og1Nk@4&wwq3(@3V?|pPwdkQqHGwsmx7su5>p#Jrl*{ zQ;$QNC_kLLAVI{I%s{Jd(p-P zuUhHoFm*k+<7OfoUZQNi0O6+dgT{+*rzptu29#*|M&}` zOfv`gESHhy>_FJ1pACNQ6F}=?B9V|g1^Jd@pm{tUeZZFTU+04YYH^i+WqI)ykiH)guB1dxODvM-~{~KZ3TRy0FDM27S*5 zz*`6<<0?|ncRvIsJqyD5(phjJ$O$nfAHK%sGPeCuDA`*MRyBcCrCtip_#R^4&nd8= zNQDH6ZNTnqQ(WsRj$ek9@t+U7tF9^qX}LrA?L-4w^gJdv5&#B<1aW76I7-B-5pDYf zxOs62{P}Mh5q)@@_j1m6y5o};Sd}KCP(=yJOfEyqIcp(HNEMx4>B5j}JmX7A!^S7d zbfdH@tvD-(OJr*C{n17=cp3pB)8%>hUY}v!897{(k_7MCJn(^W71=Vxhgnn@oldU@ zpJ#!fHLs8O&fA5z8<+_;Du;3C$qDS(mIK~7 zm1IS%BTmZ*f>meoVAH!WIGa6_76;FQ8-rss==5$_Q2C1f9^Q(T|8<~tzX7#j`8^2( zN3j1lpXXFsiq2{*TUO!8dRq^mypS+s_+)9O|6GhhYZrodf;!S8Y>p|j#FNqQxy(g{ z=r$Hfb(AwSRoLGl{@;F(D42ymRFueqQdz7m87JjSg78tE8?K!5jx1=uMt(18!hg9- zI0>N zd=*`YSdoTSwe#@gf91GuToE5WYX^JTaGbqX1<#$3hi_(O2d6NivA{P#0 zErVoxPARA(aRc%EHAD{5TH@Xd0Z6@ZsN8Xfn2pj+w^MnyDFZVsa#`H?HBf{bL-uS-YtHCOg=Xc>>R= zE+c%>N?^4%6<2zfqeS#D^_px%?oB%Y8=p$Sjs8hou#zkm^G}DB%&XwHuZU)S48@~O zt@u*87JjgIXqLPP@Tttib0*6%Xx}8wkH=HU&YWDlCmaX@B2`?=(Hso#JAfC3Lx@*m zDV!9V3=d8M*4-+HFQWmFywMfaAG@JZwm(_;op~II7--is=GWbqyo>59ps&xUW9h+q z?qQBR2u7^MtDbv^o2WIBUl0o#H+1nh%g_EO?WNP-93Y`z*23Sb!FclUWH41KCVaxX znF((yyh;zHvr98rpK(0&4)SAvjS|~S5`3#2O(c3qjpGVlruG=Q zCUKVQ%8kUYqjmIQq$!D9T#D7F#oPVU&vKtEn_=pqbh_n95H#9;CfV_~sEJrLc%H7q zp2S7qAyx^T{eB$YW{W$ElhJX02hruV@ecYOK&^yil>MFv=dS9&8m&KcryWd4oM-vAx%d?x|VSRQwx1{Pcw zCl%#gL}7yfHFjnms$h9KwLgq|w`4iSd--zlTMmKr?|d9S)QaW}%D|p72v?iwa~g?9 z&0}#(ZvfLE1<2XaEG#ya!!(wmemqTz^$PmK{K-ame{vL^YSIiouYzIc91Wb3xf*9P z?Xz1MoJm^f7F=By z1xs!*234dz_-|{1_5G62D!!k-dRmM}lrnMBmT`JGrxDcl#}T)PZn`mi0*z*QLcV?( zoP|9!b7~z-4t7A>!$NTJRRGD1ZbI{63+m<>2&qBR)Ne`i%85%+;1rv)dQ)KGr4VAkx))spt-*9g z3b@>~z#ki`@vA`%`MAxOclq=V-sNsHT%?ppzuBgvoL?Z<`I3Xbzow9&=v_E|rJuyi zse~-{e80QP9!|?H0I4%-kT)_PU#Z;UP4eM`^1n~Gd70ax?6@{~JSYS=bLK0!;)o~q zjS`&S#P#?6Bfs_6Va382(C@mz8QXjyq8<$>s2Gex%l42Bstu%S`3EYyxPdeZC~zY1mgnS z(OkYD4Hs(*p@jPYjafE_E-RP^y=!GU3KMUTH;D&8{NYJhm{de&PZtD?|HK^;TnSo? zq44!}3ys{*W*XfX9Bt_)@9fHO=xjKQ=dT6jZR6BorZZV(?Z|7LT7oizl@O=bK@6v4 zf>K%oX5R*ERZhnT-=#yj^&mX}fEyZR_sww{)(&%k+3r^$@&&ve7I zB=Uj%|4-Y&!BSaCyj^dFs_*uL{v+1uE)&l?=&FzJKgtvPx=c(CW6!GQc67NH3>L?1 zdHyEu^zzCDP~XyqA9m_ui_(2!{OuOKW1|RZ&z(_s{c>mu4MgwWA{gk2L11?#$v1g$ zs?HDt4hDjmn;uH*IbdRJEV--Sh@y-5F}6>Zo0K6;Yx%<&r>upnwogR!mxVNCcQa!? zB;i=wA^LM{4p!$bB5^bIP-s>ucF5EdJaG!1#~xuGti2#u-O4;MBA{}167yWQVM5}6 z?P6D^u)KW`45{xTf!6wHrBIC$!#knmP8Du@c?g3F{V-o32}?gYSb=w(YQ;^(a3JEoPcQC8+3R(edRfP?jSMnij#h{-q7Xe6)k~I1wDl zF(GdyJm8b)YPfXpDk1oSrY}B)k}~TN&P9T2X)4oOjv*^_<8;DL@}7s8;2DE zULTdi6ASx@*k)m-r_6@m9}jX_z7ZJD3&IDbopizIBux3T04^A=!ApgSxcTj3RJd`O zYNu%6-F>AfT7QR@X01Ur$pZL6@>%|?k1OeJ=Dd{>VCpE#x+}$?Tx7oHs)wOeZ($nR z*BFrnKFK(E(jV!19oVmthO#~!@yNZ#yPTMdn**w_$+iIBrLcap_s(43-m7$8iYoSa zIl>nQUHH$Y4zG9WV4HUqsQH=E{-_YB_(>GQ@!7DB-H@X{gks0&LbM8+3jI=X^sdlKxP32+dDDgQ{M_Ye zdoB<=T>&$toM1&r5dIPnL1o2C%!pV=_Z8>DapO9iNV-@@b{h`zy`yJ#IdI$3TG6g! zH}r{D!fp8`P9yOWc|D}Un`So;u9$>EXZb>!V08?WgN3odE*DnLmZn*39~55DhQDeQ zsat0l{-GK0OYQ{b#zceItpb#NQcNO_?IItnZ*o=^uDk?(f0BFb1Bq5%fjrTx)L_V% zH^0k_Jl(}S%BP+ZIa?KcsdJWkHy@-PsfV#gL<7HB)kC0x9lhhM20xYm@v?Hs_hjhFDa^A}|)42JJuUsRO5ix8BI}9$7_#{GJ6EdZiZJ5=QB4 z?U}fFULgK#PHuHwkP4wZPn^m6S#=M$b+vZ&u4-%a|8hEzdyy=1SbYX%*N_U5Is8e6hx97hcQE0rxr2NlXpP zQe030PRbt}r<9|Q!JH1M7pFmSLOUFE@8XW>+QHKgjWlgXJ!&!znvmLK8fJ5Y*e;&Q zcpXag~KazK-zQ`CfC)#xn)KWAXJP?N_WCf zopvm~bCI5&wG#WM2VvMg1p?el`Sudba{E_zans_=*P1&*%Jh7(?m0Z)CN8 zVSB}9Js4S1g7%ANplEC{d;nhgj6_qYbQANoJJ43@0y|3r%?S; zG8o?$!~tIo_{cbX`&UhdxsFZb-Zp8XGStW2$*)BX4?Vi4vx>+?c9A$WJKPwZ!#cWb zq5Wws)Q88=zM@9#oBx&+jJZ=j>2MH`R7QnEWoWp)ii|(eB`4;Tfy_?>*j$(okxZx4 zXlcZE(o5*>kzfcM%?C3ZGuFE@2QC%tgThI*ln64dQ}{DC`FJ^bY2=J*cf!f}x+d(m zE9iI`5&&Po34TSEnNEc2msd=<6_@`$7s)0|A_(*BI&)TE{qTyEupMxzDQuUHD#E%QjBb~-*~ z8PUaKUL-KD2_?lfA?e>{C|t6gBrl&pguRcDG4}2bQegdWS5(m^{v4UTst_5zhKmhN z!n`+9)UazlQDWZ9Nq+*t`T7EQ6ruxHwAk$bJPwrXcYvhN2wAtkf;RWgz|d;}*g0W4 zm2u3Z%CfdBYZeE$M9z{WR%x*I%~QIt$D4lW)x^{jJK))tX3*Zq5u^8%8oh|e%2z5d z)j$&mzAV9&mI?49T?blhHqvj(`S40Sna*-M4d2zO;8Md%@T%Q~Rn(j9>^k&9Ydl;z zewX*t#DlC?u>`7khCHolM5QrxG@5UPmvml`%;~YvGFucvw~K>UTn8Oxdf64>O#pny z;7-jMC{s+PpH{x6IljdhKI{PpFam6LTkyVYN<`-qMrg-{z!YVFTpZ^OyN@s*ZLI-} z^RFQ9M8)Zff9~8%i_ffIeKww#Au#t*9g10oJ6X5T-C*H)n(=McW zW;_r!KOUw1V~1g6YdEB(n~<|-nrPJAYz&e+3^sQv=)|e&xMG4Ao~>2K0i9A@5~2aA z!o_5vY69!+m<+!XzEHi_*)U25=&Lk+Fo|7(!{bL8J2--zO8Z7H44dORHGa^R4#xy7 z8Ibyrh1F~AKp-%Vd-BNwvyUIbZH$qk{b3)TpU_DZ#@b+dmjWg=nPS~qz=>YzV5N~r z`OjI>*dr}0hcQ4~`fbqfVLOUeFTm-{<7PS*gBAPup~yy!3ums-+?6ypc=O4b8}u zW#zbh?M-4Wq(!b*Z-H2gD9{~?0qH0KSY3G(3$Px}Un;{C+go&ZObgWyZO7zId9eR{ z0+^m6D1UPb-d|%+7dD-fcM0oH|HYf~pAM8? zY=TqtF){x6jofMtpuN+!!LNSi$##27pZd*#fgABqyJHK!p26PTfr=z;jt^Zj=!|X0 zZODl3I_Nzn%56R41zHZPNsw<15d)q*S0*QJ2g9a(Z4L=ZQn3&`;k1j^D&D=x%q=?Sv?d%I~uz*(P=RXF!}64uzUT5 z_I2f>kZmxUiS%;u`W__q-9>t3KnRlh(@42@1rh(R5I*;~L7vl5vf$DwY%ue~wVrw8 z`oSsmzw|e}7YZxDZux2$B8=fQGYkfwU+YMWt%029?EV|vNY+mm#SfPW)1H;Mx#q3N ze{wbbo=(6eoUv;z`?8-az;!}-JiU|q@Y1~-^gnG8rnjlU;X_{NzIZL}8jNAz6$40R z8sm+-L*(H@;L?vgT(It;=BC+}-2KI|rU1dfi>@-phW@e6}!)#=Z)?O7Pr%@$R+=Wkg|LHt{)Lve zLCNb+2o8*K#zPA+_SzHLu{;X=3cr#n4Qa4et|6Jbk}$2iohBRz0I=Nw zb{a9TT{{2=_s@mb1xdiK$+~9VX^?66f^f@d2uv2VvRdg=nOjgOzWUQKR`I_n%D^<7rw! z{DnZeMmmF7Xw9TPN0;&xgY%)C=0K`z5H7nbg!jz;-($|0L4F*B-A!R#u7>oW`vLHm zb^%q@5MG`^0&o%c=<~~!jCU`BugB`JV6X$6_Ag|dPdQRpkO!*Hx;UN?gYI?w@T2TH zebITHTzP8@krEE02bb}$6|-0; zPwHV~nz1em^#s@R#tM>Qt+f?Iu9!rc&nU4y?=s`Ns^cc#P+YKfBFvI)hJAOkz_>pL zZ(VyyE~^a?w+nK#ci#xPHx>?3q6YZuVkYrhF%K@Bwj*i5-wC;k^!?ARr2B^wZ z`{O?FxFLg2%_VW_P%6*yumKnLX!)-z;b3tcEA?|W*#iA#{T+d}^ zDsVs&Y_wP4HQz3(+V_LptJ#CX9}{t3y8{~UuE){K(_!YXEoN%@P8CCNmb>JMvwafQKaG%_X*B zif9Mz?QX{~*;>--6a~T|B3K$7g1rr0G{bl%^v0U;Uf!(-FLE0FHm2a@-I}ahvYmc+ zt02X)f#ibzFkLVCf=DR0;`gn3@S;qV$o4jX(OG|DZny(>{?R6fCgp3+508MYJ2pdA zofd3M7UKZlj*>5a{pbfq5a1XIxO#Wm_@y9k}d`KbB%!;n}~hPCW7 zRCwtT4a||^obGMKdl7SiKY{fc-v|bQiSn8m3es5qQ4AU;G=jCHDDGrFn*di8v<{-Y z|Jn}_TfYvRp4S=7GB9|}6A$J_^1hmv;QW++dYRqh*TkfO#1lJMcOeKbbrnJD zK6bZTW(g+`FvjJ>VC-KoKwTZh&`Ud4bLa66@LE|6JJ=36^}HzbT&c#iSW)c$d=i2x z*T4&#dswUm4`whA%;sX8oElEACa}JjMpeyTp&EF5Jrsl#cB5$S zC*F$hg&6Bm3y~?caBE>2tY2CH(jgNd>RAu1+E=YEZc|{-x(W@m1M$?!j31u; zjzY6n5{zF{i;GKV;G}+62oG+CN9n=5&IOVXf8Lg{4#YGL?@%SX-NkVrO_3~5W&BDq z#PPRB!Ga^@c+xuxaXT1>y!{Vp z!_|1y?99d|q0i_cD2E8=IDG#r7s;14a#%eWPkX*2lWw{~&*?gNHCzskBa}L+#^a(H z>*!wYCDmKk0ky;IvzQ=ClgspQU&0}3Hkbi3pkCIgYolf2|xJ{YY| zpm*6#EcUw`@CjQ`L)CN~Ul>i+h>p^o%WQE(JQ*w|m16d1MN;W-6cj^8Y1$k;uus2F z=RIwL=YG=IGc^PK-h?6lbZtPnB)UAc|NN)n+G(&3U`3wAyahbLTg{$|gbL z8{tl$|HxoG5OZ+IK8*8r2HbmPk)^r9*-V+ok_z zp^tDQ3Hmz$)_jQvi*6ANuX{>F1eSFyo*s&#E5eAbtqFa9G?8p07l`Li6}<3t1$f3E z!YgGBAayN`bmsk{?q>{$gm(Z;9$FFq~}o@Mrmf^^Cn}cmurNAVkbz-zD2dFj$z0>M?6+tONGaU zAf*y_MHiuEsrGnamsi(Clr@kU!rcmB_Yf80{2j$lAi85OXz>o(bOaljQ0ZyIJSc3 zSv}miu8#M@p#;XN#XZvs#_?&3@Zn{3c_N4B`E80fT*0utL z|HiimO=_WGHyH=?Ko%;?>O=JO{V;sJ3~$Iy1HstY#J_e5-H3{?_g)T+T{{5ZL>`js zkHtYoNgAu_X5xpln&`MV2g081XPu9$I5`0Wn(DHYmz)<5YI$1lr=M|Z!iC@sUmYHN z%{Y4MtI6miJs=MS!4^I;Rt?kcYBj*y!jgAev5~QQJmHpvErdi~Cu43hu;+6KWVsz6 zLX%kr=2JTMR$0L%-$v$x4u;Y{<($huC6*5*@G`y~N_O{>7lNfI_oYvBL39YY?a7)f zp1&m~183k~VHN)Do=Z#<33e@2!&s4}+>WnLNybRGX7tYu)RwmokJfG=-*s-0SI0sj z;z}v}VZD{VWdD+)aBUd4YQ!=lOG$TuIF1UmL!MML-Fp84xffpnj(UH|kh?GT6%mkL z=?WvNm2l-R%YP+#z{U0^jsD3+u*s$fR~4mTLQ5WYZGTHd{Evg0y*i}MKa6Q|wJ_UL z8>%}!dCjGYaL-Bv_XJKRg`dhn)IN-QJf06H#lmpRv#~vG%{$ut(GM>aF2tI5f63|- zJnqQRG;(?QZ2T$4aw$Jq57Q4>5SGqBYM+1)W%lCC_wKMwxfzx3@WUgwN-DBxH8y@+ z3>&4=(DI@pl=VhoiEa_rD60^+LP?yG$C&iG7TleseDI%^6ZnY@5xennjBX6W_ToaY z%GATovYB{Zmt`vE)q=xM2eL{ujehI0MBa-~*t;wex>JkbbDSaLe*}TDTpY_O-6u2Y z8kp25j9F^avE*0;v|Qdx7p={JIa8Ow=D

{r)ltZEE8r8sZ^;C>##xF)h8d3>Ins zB}b;GqR5yFMsApaI%gf=Nxv}tc<~Vl?|w~x_GO@mVH`~}s(>7x6!;mGqJ7|I*vG#a zG$+PDLaQ~*mUM>3hX1&@E_W#3caHaTOAbyO9-`I~QRvaS5kBjz0?Xrypg*@BW*NkR z+9^rwJ}X5sHbi5e*G$wNU<{DYi_p_|1{wuvQKx0~$d?nMIcT*Q3r?_kA-fqb#d#28 z@krD@F&$oSctPG@%E5hiQXq0;6cVFV#4$z|LJV)wk+2&)pJlEfI}701g=CcbltIGu z?7_u!C$1{>VYNwa`0nW&Vo{X`d2;P&5XgMaY+oyv&BkD}I8^vmfya9K>9ON4Y1)rD zG@a`rPH#?N(zyx@+Ov}Tez%ITmt~Uuf$_#NL z>r~&AxfQo-v_ndkD9F8&hw0vP;b?^sIqN!3b7?bl`9*^7yrU4<>4WYE!oXp{N!)$5 zhpv}fgl!LYVE@mnB#`k?k|ZD6X2bjeEDb1SfrNguSxEn$8Bf(JRrQC{F6@fnDPl}Z8d z{XjB(86OP)4Q4{{i529sg(sFdd?K9mK2%uZjIYJ`@W5XeXt-+w$ye9HkC$uF*4q#6 z@HeC5$UmyB)l0d27v$@zgPM_z%um&fo1V?aIa{K!qM`ySdf(BnqXKwfUK#Jvq7<~R zl_VkR=CoU`2>13X(M3XQpoiU&W6O2|w{SMhJ+l`Yd}F}zLoxYuy#ZP@KebONr~(}FHXs`1jLY%JTSM?3L1Cpi{Eq+t;r?$?85`b)XdO;Rx5 z)dHL!vwvf+Nw2EZV}MyZymfShu@rTTKNp4<*1cvO&bPV621O_;x)%=L>*;tWdWO=3 zRn*ci5y$82z$E6kdV97A+xBsoT|A$p-u+8$w`oB4Ovc;N)y2GsDtwaI3V$X1!Tquu zIDKawdj^%<@HG$6n*!v6nIG^r$>ENRtMI`SGxW*wg?{OB$Uiv)BtNt6?Ur0LiS>fv zJPVj8Bth~oOoU}EebjqbF+|;U#_=_6oXsQ;NN3t=9-GZ<0#ni7lIf$-a`1Yy8C(y2 z+u^}i1tH%9Veh#!Y;7=ebFVHoyxA*L3jv|nDO0E<5E zf%Gp)pyl+G@VSm~8v0-8Z#GxXwatSG%}R8RbS0`?ZG-QIS|p)#A3i%0%6i2k;k8Bv zjtO``y!C5}-YdAH@+@zvm;#r6&q7tbOz683O$^2vufOpMad}q;oxauZb(#~^c)SUw zzgvsoS4+RP7-9T`Iy|1JiuuC);2R&qcwdU};*&e5InHNHZ2;-GY7qKT4Fc-_k=NfV zaN*q$FnX~L68WQW>C+H&Te}u4yL@3&I?HNur7+w5B%BXAOVURY(c(iDoI6-S1mt5; zczYb3ktzTKQ~)Y`hDdhCQizh>LR*inB34?i7-apKXL9!k9iJTrWeVkxCd2%#KMK%1 zI0dN(AFRx?BmSQSSgu2b(;E-N={5!^=a|Jfs-B?b5ev&$KaR~yZOj{81B9Gst2=P7L%^_f`6Hb|D)(U+;V*1Fs`K~8YHWt zAtV~8p8KMKwkUm-SqUYp6eYCx*4|S~d-uNgWp71fH&FH-5fZ=q_Xp@W9Pjfy_kEq$ zd45iO>DR>%R}RN1%zwR9z6D>pGd`F|3UqA{fI}ALxNmtfMu$h@i@!hUUs?_~SRTw( zcPjq*)r_(1jGO*1gNy`hfkWtwgDz_bt8nh)PTlaRm819 zFcg%79Uof3CsYPH%ke?cuJ(@X%;Im-L?WCe-5Y zSA+N7FZ7UBDO%wGNoD$s_pVuxyMqT(p1WappbVM!+nzdUBb8`2hDwboT(Z0__x^${ z{q@U`^TCs50IaeMIR~h2sX(dqOdxO7ZFoSXrDzP%u z1Sa0o$MaD+WCde37|e2^KW+x#mHKad{Qy-eE~y9hM;rM6hN9^8p$l}ai8hgqc}>-0 zg@JG74G{(7$`xZ7%0__hF^bhnp<6H?8l8LZ!s0? zuQQh4Wm$0SaK~HT(@=iOR&4NGglDTx6B+v=R6H`Bn{l~}41EnnbI*_Zd*_Wqt9?4m zJF|!`U&y*XvN`%$&BegH_c!adUyY-m++p~{8a$(*Nj}XA#j@xUmh}o?b3!uilJvtT zYA12*$|kf(Sb|?zhI7FlElmHJ&vIc&n6e`Wu04pwCrKNac6yPxnVRE#S0ukZGf~;J z2IO{iQynycsY&o+nRv2!gDBO(mprX>(D zriv0jO>o%cA62}r3UTw4m>0W@aRL%J+s&z{Ay9>f?%n6-CO;=rWRJnBQ%0y{Wk!uN&ht_#+;ZLdofUR5|%C zmmygP6>FIf{`yudIL`cYPg$Rt<8yj)NDU*+9N@-*`!sI<9jb6D5+WG4x=cI^wD#-- ztsDY{J$cxd^PHwFHKY3ihxniJi*WasT_kT&DVD674TYDj;J%wV*4WjO#Ly77v(?43 zdC?eX_LNLYY=gk0gXrE@2@%EwErw)X}(E)1Y`H z7%G#FqA%0K{C>Tq`Q0fXS)u}mn?1M@9w0Xoi1`y;K;xtnH2CGfIjt{bxkoAd;Qk}> z5*w-d<9fUptbz?TIl!M82YZK%@C)OqTK4%6?~Xe}aI}Fm1VrNp|5D`U#gK(cd+ArD zeX#Tm^XG1eCu0`k5C)g1Zl*StH0(r+M;kGr_6s+s3V+j0(XcayoW1-~A2~=HEHUa`7(vh~zF0kd|rS zPu#N-U`r0}&GkcXmK*BI9fv(flJUK1Ay)oV0{y0X{$91^r0Z7>EOIQyi_J!GV`4e; zOt$g|e@0==WDZr7bLo*YLVVfEvlN9U7nUR%P(cea+L`%{9sR<^f z6hW$2J!3|vqnBMIH154lSI@JD$XBcBgf}~J@Vq7%9QjSmdT)>)_O5i!BpEWCkPkXe zVt6AX70ycwqt)8kfXwp|`a2wN>&1gsj1&AA?OIrB4qkM6X+g##oa1q_W`YV6y2~Ah1_Fk&2Sj5bTY(A z>$cE>ohR^<0^9H3ZNg2;p75J6U0UQ5=f(VCkC`VTW+r0;+npoL8$-b~AsoyvTH?dx z0^Aoh6ArA)p}LoqdO@Y5|!!?G;^O z{(x*|cMEY3Y2sLt41M{n&@3HC1>zo(!)q7e%Cf0QlOkb#SPE<~+(ipB1i*PsAC21X z1k$k~G(~eKH2zG5-K(0xB5o>v@^Hd{@y8e!lE?I=sd)RDClS|* z3;uVroJe-c;?=QatlF83I;tU%`??TT3mf2b9DLKVAEgR3Ky6+I zd7ltMds`xKnob(-FWm&=jKr9)K?C*uoAFgL^M&j<1_FoLP`h*n?ot%S3wIL8y*Zvx zI(IzyRXV|9$u>+T=eX_X+`y3+iEc9za5QNpB&^yRp#M_Lsg)(;Sv%1 zTR`*gWun0SSUP6?jAqX}1jG6}u)mALh$ww(SN?{s{Sb!Z(`7+zJbT7I+LKE!Be1#A zoF>+f(J6)oSEE?_bhxMf3yy>K2FCMj}>wK!+IKfnf*HrU9606h6D5W zfXktESkjXVy-(ahIh5UX$C^>Yd;#9QbsV?!JR^+*q12$K6o##8P)ab4|9x5#TB+4y zQ`H%Iy)g|ekMijpAt(M-uUzPVTtKN#0j?})LAg_9xV*R>gz+f6)%7BvUWV3vX*fxK zfPeGXA$WB#7c5K{fo0cy^6{n-G~c~IC#FW@j~BlUXEO~kX5B?ne#9G94{gWw5!QIK zkNGvP?}wnEaL_&Wm#Rv%3WF)OEWOQ2z8N70YihN-? z@xrCxwmFyiej;I5HkdKWB#>XTi590%z<=F|RCjG59_TJ2Dbplq>2gV|&`<=`99?`c z98Qi8U!dpgf}nCF5@P(AU*|;_Zk}Vq?>W8=jQ_MShV&5GaY~45ud0Cb(pYTRyA^k@ zjimZtbLgCP1>`|)53#3fX6 z@dqOCbP_fV`QmVZ0FJ)5hEktf9ktG8kbPZ(?NHe7S)omj&bmdn{LV$CP($kc+zpoU z+i6dk6{>u814-*ax~V;aj60Q$;@|7}x799F@cPAXX7^0ZTVkk84r2n-ga3SCJwCB% z@YscQg|20tiAyc9WL6~Yxe~>EOw0peF`rDEwH~G?v8+LEA&A^fhGV78{FJ+`C^zsQ+l@x$4)tw;h^{;0!_|37ca(J(`-5YC(l#oH?$k+hi=c+bk0`G3?=^1moZ zbxeW7Uuy8&-D={}Yl`BJ`25ArPsv2R=~QF-3h0T^#KS*^NqR#QL}}RI{rw53Gq>9? zR-~R}`laCm=@j%Zj%HrN8e(>-4tM?B4{AFySkGuWo_>0irj-E=I{Sh-e^R2LD)1Z^fBS=Bn{p)ySfozcf)nB0XLVu|kY}*yTr9Ych0z9~ z?PzgzE1K>!#Nd#FkRQyLn}LQX+iD0^Syu3+TAH!1MX~5n0k`V;BO=1Q!~2qR$N^GL zM^yii37ju^E>%KK->#-F*zA3~PX|&@7!c{-Z1)~N8xt2kC;X9ALyI#<$*RB-@?~;0 zbTBUTt(^@pSuqbP1`^Ty*AH^mqzq^Gh!K^=n`z-;FP5JThRGM(K-H)WLhs5WseeTl z`-g-6+A+)=DcjrtF0J!<9Jr*dAi^v{5HegnDmG0w4CCKkR)% zmor|o;s5WQD1*dt3AcMe477UZlg6a&=zVhs{x$eOP6WpBpM+WBNJSyFu9yc&yS?eU zO}3!O=7H&3_w!qKWur4=8Z_D@@f)~!`ci2P2zZu4aR;AFb$`w8%xM6w+Y8jS=3-D+ zEACtoh8`Ci4ZGE6LWe>w?CU9nVB1q%(i>6aoC%$9rUI|t*o+Y}nJ8oYfbu-$$@lBp zu=K?VFfb7V6>BfJ`2IcpQ|E~$Cu`utx<9n~W)OZ2ZN;VW%maC26-J)3$8)Cb5KkxI zl*$|!X=MCyfhKsL6%T%pL$Y6w2j`;=EcYt~VU>&Ut@38LH!c#cl5hg6*+w*HfcWfU zS#Z_$V0f{dPF|RYO4+LX4;n_KUPlmyN_QBzCGyy_-$Ha7@}Yc%2V>&8WAe16c)P&_ z1iwu|%fa+jEyY%wx^J)gmE~#`xLld&jOt;M@+9XCJXI%LZ=H5i^O?UB`4AF zkfay>3%XD4PZ%R9BU9KeREl_)u{^6u0c_D*g=dCN!K5{dpl6d0{*s@GPd9Bw!*$zH z<*yEWU7Ucw#nj+?`VzREbqvovr~;?lKlCSG1k2fM+iVle(Hq4utILCX_AL?jEHA-H z8z!QEc?T+Al*9Bng5AuoKj{})p%{T(BB`*;r54lG zSHk>VZDjWye-z;MWtE+eJ6?J=9{HNWlt$)txCgDs|awk(!>L~4&dIy zGIqfU7^v&Q*bUP3!|B6#d_al$nD28-br0d3%uv49YEv{x-H($4r$JoxdYsu)##mX? zaH4oE?EjqyU-QSozvKI8=9zMW(tBXWEpNQmWDIgUevv%QTU@PqDz1q7Mq}nKhg;`M zKuFOQpPI8haqR^f7~N{vTw?@R1vY?k#6t4q3Y!TzHLzvRf0!NX8++7*`#YGXI_VOv zb_~ON-nB5e^cajB+6Qij62W*;C^$|^hqrpB=y+}do!~q`d-GpXU8hZW`A@My)Ujen z*PjdnBNilqXh8U0Lo6SQA%X|g=uE#nwy1tEQrsWj_5ee4QFdWufgFAdAft_}f%B zgI)b(5Q>xopSceBtv!MMTqKU>lRL0kq@PB>BKWjz3QQ49C%=w`nrbgzdm($Cty$8~)^evKeZ6=aIkObKw;0$qAH*rVmR$&}*+k!D~r8#t|9lj(RVqMIK~KKvvd=_d{u@{$~ll~ zeGncmKM37F3nBOUPPnOd5D)#$rjI?-AOc$H6%@jSV?nTbO&EP8x`<2PZviRkr!XsL zBfg4$LugnFdD&e?KE%h-*&&a~pWr~!xp^IxT$2Nnj5A^TV?Pq~Dg~R_thC-o1FGh~ zC%h_MjGpw7JM-=_aV(HQ=hH{2)tL@3TfzLt>-C{(vL5yOF$=twRnsKBiS$8f6&7~h zB*~W@;J~A7)_u4c)VB6g6UjY}k}@yQ|u ze$u;h+|^4JEOQ$P>B@#++Es(MFSbMa#v=0LurceTPQy#P%RwuACskOP1Bnk;;DXkZ zV8@>UWh@`6Riwf+(Zf_-jQK>Wr{F!29hkClF8by0&>@8FHLs)^7DFMlYpzA*v%U23 zW_2=a@e<3w!V~aI2VIpUa{GKG*`xT4nDyDf_OUHcKR+F%AC6IS_X>UXm~~=4yG^;?y;!I? zfeyT{r`u+@At!ng#1uB-3M~y>t+4>Q+H_F6cLpq7Iu8{Y$KCqs2!HCw7`jn%4^b8q zf_ud?K<=R$>fD%uiR-2i+41Gzo-czRj67j}U@1s*tavXp8X~LoVZX_K+#j!l7QLBF zn;!>N8^Yjs)B#eu(g2^Sw3Ec33>-0-!*8F$*wEe~c+Y=~NLdDBp=UpNcE5~dho(a2 zFXq>et;FD&_86g3LJpnCh3yibX;UBwwK9`yZdduI?f6t3BdfqBrzoZ$Bt^24- zV~(Ns^nI|?Gz;4QJIQtT^bvz8_29eVCvB;U0O1%DxY!km<--1Wr7IRs-wcDW8duog z>lV0lzajtTC+QPS+|9*n8H_?!FtC_u^~j5Z$)s8kw5(l33){ zkS(J}abFyOv`9GASTtem%1^ZJ%^oUpA{icB(j{iYNk9SwAuLbaaAw3ZsQHqE`*zJo zm!-0d$6JJ>qpzrsLojOW3L;A^^-%lECGP8EEqwcpc~3ShM8{)wVEFqhk;y26l#8n% zuSo~DOTMO779RZH&%)vSfEW0UM1cCpdJNa$;il6M=wS0XPMG;QXWhvoa!04&3<*Qn z{wbfAe%y|5Ixoo5WzM+p_7^%^*Bc+XKO%3R&8LCE1sMFE&D@V)(E*bT9E@zC|8fk` zzPt?NweHgkx7Og*IW^E!*u$I6w3~_Z7`un@$~K?2#E-s3pxLI1yqQ;s{g>%*Z9^G( zCg_Nt{;=msO_Pi>sRgq&rJyFf8^*7GMSP^|4I?JDQBAEDe5;y+o}1Odew!wa9B;sj zLz^*DQ6CNC;>ud*o<3uvcel|pH+J-HflktT> zJ8gU7MUN=+z)SogT_9}@f=7bL_0O7k-XxZ|F-_xFhCb^w3}Lq(FPvo|g!j&5L$1Ys zOx+SrpSOnNfum(~V(kg|(aBguKTnfiR}@)a?oJ%u#V7kONTXY4Cw2R{lApVDIXpEf z1EWSM?9keW_Mchi-~0tx_hBDxmD|bx2p-?T7;=oeRT(0NjpB!(sKW`nna?tk^+(*krHv81nfPd?5DD0|hR!c! znGpFpG>JRM*Ld`m3ZH0#Fy|&5Os>GhcQ(X!!wIUjI)RS|_Ykf9t6@4n8z(%M0bjRW z7_qFF#{ND_t!10R!+4aeUD}S~rKw<9m5y^vgYc183F_=+tas*Z`%pI*maEso0)s)? zt>*)^manMG*c-aVB^kvg-=PvYCqSEJd)7?MhB4z>&=Qt}vK@KgU7`!!hg4wq+pU-w zyB$t@M`2&teY#@{%X+J5(21=p(SNX;xNf#V1KsyT$jJ?whpoZo90xW(m@RfrISshb zODjs&;Ul^0FP-PwOD5rxITch}LIxL2 zs)4a#H+bP3Nm$MrrqT9sHZ$14iJ`eSL*(~1AuQ!{|>wh^FVxCCh?4%I> zl;A32RG5uokwbJWI2D3+c)+Vf)_Jo~lCeQ6ae{Fp^>8c&pY!qjBBczn`j{^?Zf1J6 z^+v)g1- z0C@W{m#7|R-2Zu7z@Xd;dNxLZ%#OtvCVYr`w&ZhvWdz~N#9VB#?52A+&4-F4}6qUdyyaAjCkAk$~bMh*%4ac^}(&k)a zB<#JtA<#gwPUw*uw$qXsmLf|B93W)EZJuv!3YVOBgDY(ag@u##(QHuz+0q??l5wlB zvay+drNz*+FBR;8@NAvM_(=tMK1$BUEz^}S|KMeBO-Ynu@6_n! zj6-f4Nf*p(fcrOt0DA1DhK3@ii_7 zbsKkl^5VQcUtv3!kemX_y9B`Ci7Sj6uzmD?Ux;?8V7uQ)BJ30dzfm%`MaV#r~;^|WWdod9zJMJ zg`eS@abNl#a4`EvT31X%v6oG#B47lO<@sROT+XH4$zXnb#@05k|-4D-31X!%u5ok3g2J`RFhf|o6fQv;-xh7m=7H3 zjKpm5eE9HRFC}{XbPD(Zsb>^WO2`quQ=AXpt?^+Cy(gDTY*|^*K1|6SN z2~Rx3=;LpT;m>{^^H6or{t%X#agzb(D&{48;s8HM8QoJ^x0Y- z*nH!`1$S0rAW#k6*P9>}>>p-NL0 zTA!FlCBE9w(Tl3+4l2+!_?SffQbWOYmfTh8I9!yVL$39lf_t-+Ve`X-khaDO+!yo7 zi#==UneGm{_p&y=_@2jf!4UYd@HJUBUKiilyOOR&T_j^~Ikww>qDQL2nP+z)Utm)y z+9i8H@y?%Qh2L6uKF~#eHl~v3LB^qP>?TttTc8Rj04B~q_~P#R@a)iP;?!9NyCpuc z4&QigpC0>8-`Rk23c6rwI34eWwxGG}9FTqLMmA1w!vwXhaDHyW?w2miT{AY({QM}s{axyWBe{`^e_GaYT1@T-HIRB6XYlX75+ z>mA~=Wjd)3w4f4~W`lBDE1Kpmgq-LcGWp(am~4?iRd;O!1wP}o2WsHi zYa^AfLUFwNOe}3ugzz_|Bx`FG|KaB-#+-=6R(^SPm(OK+qmL{3jNV7s@-y(_lH6 ztNhNeNp2gAds&Jz6jN}y#Ule>%NX=|C;@eA{?K>ZI?yQPCV!r=6%q7M#SN(s=$qAd z$9xCz5p+2B+(M-RHUg-sN?r~ z$UmNlIjx8AtHMGOm!yN&SO>@=fquh?SUntOcb2|uIjCaP!15f$BxL}PkCl8HE3Dg z1jA>-pkY7+>iV|8o+3wVjELY4Y+*Tey-!?Kj1&sK4P$xNH6Zjs7Ye>VG@L$SNE5ck zfk|Kr9(gwdq`x2FYnNuCmiJ%E3_mbcyd7h@#L#QvB-Yud4IYe#{)l-6ijxbl=i?pH zv5N=!Pj+C5sWUA8GzGG!HRBc0IxJ>c&IG619cJ!@AoQh-EGurMlMKpf^4x!P^O+Mk z_qGRem-#HS>w!fZa$xuKT(~_s7v$|t;qRv=i+;+nHel?ge znn5G}ZG^s~7UZPH4{|ASGd>t^!1^U)__^J8$gf@E@F7whN-J7Wu{#Wss}gCVE1{;V zgUPwcNz}ln9p>g|!^mcRY&|i;RgdPu;k9|xX+Ox$-;5~A|HHVz4y-pJh zh4H%$%f2OtK;ogrU<4pPVL*{rOLex;(oZ;*#|mFppF zR1K9!R^ivl%quNu2Z>94h*?e$D(ua}sS$c;?>s~ewz`6V%`{SBZ^L~|4~IlAT_~K} z3T;K7=}=rc{qt%YJ+;USUTWN??;7Ue=3x=M%R7P{GMjKo%R=T2&O*l%ZN&N_`}R*a zCqF)`ft)_eqYF2}!{7+C*kS=*n~s9h4goG(Y!Q5%tPds1d%1v9tfM)+7^A|3F;VXz zaH?+%Dr^ereYZK-_hJHs?~x&-q8#qD>4CugC(FG5&gIduJe+tz8s1O;LKOw)!u1Og zD=RI(LMLpOYTtcH5ukBp$UZ~JqiIaF8{9)HP*y-8=yg%XiMl>A19h?HaUxFa` z<^enzYfBe!G0+jp`YkTDfRm^_6xgTX*OWB89mBlZrQI}%ad=$+O9%6ImTTG`529uH zsN@@sRh^pfdEg=WdQ2bpiuTaUd)9$cH;@0k^D!~9H6b6Di{rLsUf`1-N;Hf0A!m&@ zO#Bf6T^%PeZTDF+-?JD!r8eM>;WEe*+69{`PSb3kN^CoKlV3c<@>?HY(GSk;sHhbauhes=hVcV;A)~Oj7#}R-X608J*?*D{Z52WkN4r~ z12t%89E8s<{-L^?lrdj%Epc8I2ZZ^hJR^0p^uO%|MG{S9@pka^-0G23|xP1QrO?@R$Ubp)Z65T; zhA}nKuYsgD%4`>RhnilP0JEwi>G&pTaw|U+RF469{JIDvXE5(;krGbmkVEmBwIFqQ zCZ=4lAzSAP!n?RavfES~JFVAaKhu7WSe^jcvGG_|)C{jGUho3%G(vE}Nf=u>neQ}Q zkKQs{z*#PuzW5#k{Zni}+;20klh}i=*O{Q1v=~h~^O~**KZPf5HRAPg0l1WQa=(^C;mmkA+`j^s)JSb#@;66oluevaDt z(kV}uvb`aY#F^D(U_~#d&}9Zio{kvd6amY-J4jA-F63?4getn#80Z!Zy$y>nM0gLV z-_|6m_6I@nhbGn8D2eVmrg*X-6Q3Vap#S_PQQ;3)aac$xp2 zr?Wn2*1Mg-<^h{x*!D?)A8N6XTgWxz%yF$?KA-IXrnTcN_e@-;(t+xR4N$X?`RcAc zruNm<7{6VK1Z|ywM$ws&)Y(fHZOO%s&0Fz-!+yBJn8o9=*^cs83(UM?!uof2V8HG| z*!vLqYj*m>C%;rMQZ0r<3uEwxcQlQjtbqe7qw)NbVY<4x1SS2adM z@KQN54djfFYHv!oDd>jAV!#EroOu>~g>v6`aT<~3yMz(F7fckiW8;G>Us1-)A zP46hge5-?drh%+~HWvauSJPU*$&k?G3szZ=Nw~ry{`;?~Fyrb1=vfsD;&U|MQpI0l zB&7f&tp}j_T{%`-TadQ4U_5hT3H}=Gr$?T3lIhxllq@?1bEgOpxAXGg>LOz(x+@l1 zSAU{ALbhRD`5s_NDdKM92A6*f8ZHs4g!BDB2$#a{iGkzDO8w>dKv^1Rp8Gd?%q$s)Pxcy_E?$SDGP>|)T`oK|4Mp`U zedOFRG1$~ING+DVr!NnypvRl@&pAD0CS{l11>L3+pO2 zS_W}Xkc6qR?p^)GaEA<%cujS9DKrWE7A(P!_w#U2?gG;s5;1kjTo^D|fnP_G_$9j~ z;MpH-tRFdmou;+0Ri2~w827RJrUE>fREldi6+(@s55%^+A=$M7HhheSFr`DFyE_#h z9dcm3k35+AdM3QRw*HyqhYm{8559rW_0a*B zWlo}>)-w&=Sb=jH$iSUy!eq$o7#cqg;fu;Bfktr+^v5{R#(RNe6adaHOMyhQ7?5I| zgSVga$@jNLIKOKuW;_$aPb^n{;Z7Mg8RenjSR`(-cLEh(#26GE_+OTupS~ok2mes+XA@m2C}3FBB7vEXsc^`S^+&Va>4KqT zTrfR`Ouv&(+{gtYzt$1HADm47yJ8P5m*?S$xf94xz8-ECvE#3jZl%K`Pl?Xd8gThu z1~>DIQC6gpo^GyWehWp?w6+0quGG=OWDdud&7w2Q0dk8^qT`c;O!H8NZ4Q5ky_G9! zoLmgs7)UFtMWw<9D7fp&I9ydICK|~)nNQ;Qhud-P>`Lg`6AMSb zh!NM`3h>Zdh<6QT;r4$;@b!luOp`ioSl=ZN_Lrxyzw?Orwr|97P0z^8XA9uw^Kr1^ zNdtPBm*Mu+8Ti6d3>3Er!*}~X)L8T!*|&QJnyLKadghOil$;l|Prd|CZ(;oG>{jfZ zdY0UBkcRKFK4ez-B%m+Tso16WWI={Kte=;`4>KvkCtg+18Lmv>yeU@R_9VJOzGykI z7331CX++Kml5m>+ztbD=@~|d&3D$$(x82CsHHCn#Kx%CAmw2n61iyhAV)GJ-XV4Bb z;+(NP?jSW&jE6I+1xyP%2G_0{f_msM`D9oP6B}A#^Laa5qW=JLUz|anpArAoyn5PQ zRYU&Ni@_?j>u~1uIezUm5vVPG4231;xMT_+7IsdB+P)kxZ@z_|zSlwbzcPGQG7ZWj z=2OG!8f-VSB^~?nz~^8c5-)E!G37jZ4(q^%j4OZ*%jhwqV5FT1ICrWS-V@NkS+6hA zPBUfHkaz-mSMoqcZyemy?1#REw`jW35M6S633yD~4{Tk8a4oXqjm2)L=q`XS>KCv| z?Krhq`3R=g2%`MCGPwJ0H!3_2p<$M8VB>$AzAf*B6DCD)cab9GJz!psh;78yJPUif z+5BY{0$g4Iy*c|jf)fWG6lkDFKDwOy%FLCgC0^n0TOC``_v`=hRcT!=ND-|GlUA}ttsFdA?D>85VC z6Ct$Z7m>1H-Q$~{le*z1Y$i#7E;U4v$2>eqTj5u@4Sg@%0byM#bilR>R~ZG3+`9G4b8uzskiKM)`62q7B0Ha{5Jz|lQ- zTdkliFf7?FLNchE943}B6S=O484r z!vIr7`f&OqqF2&Ql_!MbbNeFlY~572UBNl_6q-NWNGq%h@NV25`rk+-U7sffKX|!tXKfR59o_JCyBr;8Z^xWh zm+|72i(u>-hudcBV|rX9c$aDMJ)`G>mVgo%kF10*Mz&ZNc@~E5rhtUwPp((%Hl_qu zz?6GEEN`6y&-VrKR)6Ee;5=!b5zA9~_$OerfEqHNK)rz+V+zWVA;4@Jye*~fx9r#P60h+TVA>u$crj0A6 z`LCI`!+!}{g(UKx?oh;x_k7$U%fqk}v6wmVng95E2OPV@!)zw>C=79<<++~C_wHb3or4Hn`2H~aTF6cE& zL<^$^ddDamGHUYqnN|^~RI`~!XYatuuX1#3MFv6KN$K2|tM&#f6#~A+oG*peB zPM*(>Lfy_sxcB}&ESW3_*18dxCNmxzrrih4z$@Ho{c6(tV>|wy^9Wb;ALXBsZX;4f z4JZ-m3Putg0KaCS#5X^1%CUk+Oc(vcejm>lu@H9g9N2E6cxS5%Tu$+$q4#r0)8R<) zvNpz(*DmpoKe$T9zx>NpZ!P&!tHg_D+A>$VAD~h3Iy7@y!rvzFF?)oCzD-bbXd>n-t;Z>0CrEc?5`DqE z(^tML;;_UiY?(NNtF?6nF8?_F8g&wrynXny64_ahwF=@nqgg;~%8Mf$=>yHlXM3Dryb2P}qJ z*?Q1%=ordxkA;^Owyf{R7S>M;#Hz_D7;aF5N;~0@Z<0&1kcjpTd&hBwLr$pnz|HSznxsm+A zE@S9E?tn5&i}9KhWWBvVGA+IJqDi!uxaKpN$t^@9YW6|L~i>O}T6tJ&wU%4j|C-GgEAuMN;&eH+r^XVVS8@1U3h`}XW;f_0OQ!rA~^ zeD)de&=+fvVV$ZT0gKTjw-Fq-d?VR|U-&)~-LSQCJu#|C1pAqK81t+cQ>wm@z3aBX zrzO#(WB_rq??Z@?D+9jHINTySM99x78kziu^GWK1F9Z9)|Lz0ga#|6VKC}VdZ5+7w zw!`}yLZGF7p7%rOA?PjIN)%#K;mosPu2fMUzB&5CKvV;m_R8adW;WNIzX;uo5v2-# z)V5_2m3kY;_!j<*Ju-)Gy4Zs{ew1_VAM7|+Tu9G1)uLOVGtrOY!BtCZmXDUh>jn`t zqR@7vHy>gTTBpJY{tm-Z3uInK2GxyDsBCsVI8WP#ElG?!(r^b+9ooj1!!E9(l{k zNZ}B{ZIvBZDfkF&FHVO-*NSj zvvX*BToPxx+{5xstth?ZHt3%zfa^BTsafegSnST{Hu<$-Us5?PvgyTx18l#ww2}FF z2dQ~YBPeO##PZ9f_}uaW=KNw=FVlxuu-hG4QZq=r!#Mcyn}@RtU(9q9s zHQaMr67~dmL#CS@kzG5D$d?@_Z-)6WRiOeif7an3>!-`Ml|`3|c9{6o0<~_(pxnh^ zzIAUORuy*BvjbTWJ;xfRDJEm0q%%%s_oTLE)o|%XHIaSS!B_iMgLkIg!9S}a=$m~` z#C*In|6+6scAz`8SMsL|vqZ_|^v|SR^d;r^aWp6;slz}f9|PuJqVL7Eq4VY&x?4~e z0<5OONRK`4A221&*^Tsm%3|XGc{@zj`pxgreNX$>JjPjC>DXjjgcd72$^Yv55~v!# z@9)w)7%GHjBB4Q}>ON;*N+k1?qL7pnQl>~6zRig=N2BIJG)eWGeTC9QD2d2WBr0RZ z@?QVn@4Mdrde?fNyUsfM+4r1hpLOqj?%B`h?6W^*nkCF0fhWwo+(h=oa#_};<|k)) z;QQJre*C$IN@8t?mM5G0#F?$``&uipD}iG!RAW~9_$wJ~%W z)0`g0m{+W2)8zu$JZ)*VbixQ{Y-TBQC8(W^dr#PfreU@7))0={=fzB#cQxz#DVkkp zH;pMDNntU{kohbFtgkMggJgUmKR*hG?O8C7O_Jy5PvMBvuE^D4yE(U5zanEc_G&cy z+=Vca8>X{A2b-9X<*zuKg_Srboo_Sm2M@B{!%6JYk9XKbJa>-4q=?#)l(X#l!r~h9 zRv}i;rMSj=K!*7cznQU8c4CcBy0iK1f}AS#Cydk0E@mx%UBo)jiJc}`$*im3v2P!a zaK`&2GH-3Fm~T2gwPwBUOIh%_3*nCVP%Q4jCG|tmw1ZGl3a|W=jJ$W^^$laQ0{(ISyp!JM^ZZ%ty`GDED zXAPUA`?RLQBd+$;Z$tL=t3`ZY@gmb2-od;JU(8G&TEP)|sKNZ`;nhB!AImD}Uf}2B z&ttAXGvV)VO{%f$iDE2bFSB(*rR1DY+N8K@c%gk`Fde~Q62vC zr%LTV@;Ltaqe=gf7xDGS{@do?wl|0V{Nbwv|Msi>k1V>JFY~+n&$a&c7vMLZ%db*v zgP6oIWtyRH$A+1$We)RscRq^7)-FBQR+}iD!%o@wt2Qb+s`hV}zsKLcf9**AxBhFx zfIo)6r+?)I|B*!%`NsaW_g60}?P~t;N`R;R0fwR7?Ytt@w8=jE*?_tC2amC zbRATsWyThe`q2qXwxz?6y%_J^LQN1hSLCr4Vn{9w;MI}}j z{jr`-b?8RvJNIBx?gh-SY{wtJr9jrLkIXMF!-Rbjcx?R;{E+xYH;y+Y;x}eO-atQ% znII4A|0sdnttH@b<}&OZKT>O3dJQJLQRSU!`bx7lj6eU{WPcR#3*%NftQD6D6L{ROlODGqg-`Cu@C| z5K+xwRxNuDElT}E11%e%tyGDdyp>{b@N9VQ*Nx(`L3k_2h?f|ziem{}*rl6CTJ$!- zGoxE<&Hf2=)7VdDY+DcYRS(7eqkg2jAq*$&5$3us4#9>VgtT?%(MxaztQ4&Pzr%4P zuHqytdvc4|H-x}=nW-4GJ`u~Vws6jj&!Mv4cZ0sXJ3nTl7gR)*sin~Ztd5c321dr= ztn4{B)VG?PSr^QjHe4bv4?4h^XJ7G@|60cUXcaRzXeAtJ(#1tGe(;hv9z)K3ggS>0 zD1|v#k_Pmj=XO?U0{} z-@H6Q);^0AZ8^ra1{tv)iRL)Z^bV)w@+hd@azOJhX6U~l3uDut!O<`Nt*Y_?>bc-73C^8}EAsS-pXog^DLewb zcW!1TmCoRL1P?IFvYhcykO8E(d}sYDg2@n@iUw0=5jz?NLKoW1@+~MTdrrc7!R5TF zl1kF{REs*c>Efp=XPJtLB9OZFFs(Qm3+=xCxT*g!&a1X3BldGaE;xWBgwkWV0=!1_7$9gVfQ*5WK2NokvcZAm(bcJ8wM4W;Iu!|h@>|# zyJrZZ3bgUogktC`dq?Uo^H3voI@eso8z;RI#$C@RqDqB4FR*5ecv*N-*V!Umy}+BK zD=rMB_6K6Uvnv#ADyqG^@H5#jk&Uyu&2aCJI-cAsl+Ip@A%~MN`HnRD_ya_* zAETqWO?1>qpKENJIj;^$!(MX1@j0{c9EbngvBtF`Zsc>C1qvUS&U>zV z1Z%H2VdObQ%-p;OlqyYOTiZqIH5N|v#D6iNyEAEZz{^vGvFdKJA7SLZ6J8{3#L-?TCiI0`! zdGnN2u+*iL%+?V^ttIc^(71g50{Ff+`6%WRKrkl2w0EwiN(9BbJFwTa3+z2jKLR z5N5rCJa|s;1KCMZsMst5u@9b*@~3^&&|(g1d+#QxmUYY<<(yNUiyo!(-EX9^9xX0ZQ+;$%()$WD3#tnE` z>@|7(ToB9D5|BntgSo?&RCD7CHrq`KT{Fk=%m!OQO+1DQ%6|vTyh7wBXvD(!r;y^` z!Yn%_!)W&0z+XZ7v`>5!YDtRYn${!`yXp+H8r0ycTO3Amub^<-eKzOkIIiT17)%wt zf_G&_(W=FqF8kaLCjvWYq9P@Xe+!ga0#ojHbZ_4#D-wn9jgDO|#)6%+cMDVRUtxb{SdEd-I z{dI--W88OQ@#;Q|p6J2XZ;UZw;{)hSJ#IFk;R0D~9ESdf=Ye%tDjZfULY=;BDyrcR zF&`eVg53oi-N+^6ca$FtxSOEQL1CUuRR@_KycLhu1mYa62C{QX8`ZhCnh52Vz>0+v zsM)$Jw1qbeWryS;a#cJ1eOVnB59MM4)4c1qP z%<2dT5$u4)(Skf{-Eg>ZKbXqO3UU8vm%)W6gN(q+yF}nqICu(&64ATs!F|I@G&vGa z#~vhrX{H9R>%n4ltT%@IFcWO|5oN9gU%}Nu6S({A<>6NSYj6z_f^B}efNv+js8c!d z-{Oe8iSF?2pDid9yPsy>pTip;tO7}CYBagk7QfB8Ov`o#5ZNP%;3V~x&9v{tJ@w;A zzwvCW@hqVSPqvZQM`waT=wXbQ>HymEZ$>@a-SYDQm6XI%SWfbWZ3!b1aGwDk;!N@ru{gH|oNh^AvouK@^%c_4pJH|$Cj z0LH}^-^c4g>3Pa5T(<|#%ASRIZ7--c^M}UP`$W}=kdE*2xa`Rx_O0v)#&ka-GoCJl zvi?hSoAEWa#-s?me~4p5ya~iP&7(ch0dyo`Jnhd|fvW^Qu$c{lQ1rQuj@=c!$Sp$W8ktds6YlK`|?hw+M4#AHqemE^633*}Skhss~1RxB8QU(CZ+<1ol-F|)! zx=0j5jMO;Z$H)I>XA^C7MzcM?FIaJ(4W-h zSvKwuoPqY*BXq~ZAGmn&a@=q;AHF8tX8vU8fY6XR`RaX;Nka~bDked%Sqgw#4SRT# z1APU_WYW|_DCaj7lxxcI-X$lH;(w>6H|sFcRGkWCpTHXnvNW^ke z`gmU>?fEB%nrwbfISondca9QQXu)OJIbj97p6-Y1$G^hIFC$Unnm@h|QpdP2MRfBO z8En5^iQl~~fv1{ zn7VvA5BQ5A{4J{gE#d;we7!jTB_ME!U;b_Lw~qbufANLL|Hz^ieEq-p!vDrI ztoQKu*e;z&&!}%uM#H<3rvw9PY*dVB?*uW#hXXQhL++<5N!tPS*&c{u9bEXO@vkIl91(}RCeg5{t)7Sn zDuYGSa-tR=1>!yb(8K0I%uD?#pv!D0i+BjpGL7JM)B%@xSi$Pb40_rmkXi)nz+)Yn zV4rS=AH?!8r%oKZC`6S$Sp>T3^m`J>nhR;bou!J#& zRoRcQDs2sZ&JBP|?Rp?5&V{)BvRrOUAN=(aRIB5oj~;k! z(P|7hB}dO0RN<37F*=G7F*rn(y~U}F(Z?{EeqjuJ8!|) zYK!P2qt!4&=sbPga1?H2&W7$w;hO>CE zIIcg;wk@H}jZ^XXr`QmsdUj$P_bg)rh zkQ#jbMBjW8fv#v{{L#%pm1T$E@rNDQ;+RI_FDc_6(@h{H&2o->SPzx0-mt-C3Px>u zTwCyF5h(8prRwVS(3h?S0d{f-W$rXN={P1$TLbszD?uk~3=`TD(Qyueu!8~U8sdtE zL(xFy9T&=j9Ma%)r*>s@XWrG!Ojy!19WYHN|N1{J6& z6-PR(?vS(_3l|;Gn6_@bU+X{C3*$=f%o(5=kCK4T1 z?8F-tee`ntXW}+=fD`N%54P{B*j2_F-1_1tgyR!WKYhE4C2Qx>lNn3#*7R7=oIS{@ zPP)Y?%x*w!r(H1Eae+9<%Hx5WK-St=0Nb8wg1&zM49Td`3z_BY&++FeS7at8TScHW zALns!T>!mR%L$)Z#7xsA8}oW*;qj&rs2xb5CDK!%D)kSOW?cy!iW@!G*wMBrpZcicI9VdmAk>fFqKiAO9#kf)FI(DCx<5?_r!81L_F5{Vtx>b#6_Is)N3ZUQV}$iqklvJQB*I4n z9$W4O0o6bpG-+jz{Sn~K%m{$~emxMA>c$=J;wYfOg;A*or2b<882NpqJN<$gk6WR* z?{NevOur7!=k;mR!W5d=uZlm0x`~}?2KCEO2cf7FX!tq|t`|H(ZHZI3!nXwsEEG^n ztQVv2zoHF6Ze|5GF;q9t7hVrHz?z4R$a3pp?{f{_Z`gvqzqVu5uB8|>WCvJ z!b!ohx6m$^f#O54O!vnMD?r*1>HrK)Iyb!3bNrX8mZ$NJQdTh+Kg^smJ z(tCETIfw~R2K5Ahf8H^)e<0;PlCC+5ti-zJ4?-{S~3TTP&ClXstdCS!_;l#cW zYFWgRq3{$4<30s(Z7E)wqYtB;&!s760Xdg;ebmBh}-4Js0IP;|F3ht@CgmM*XSpBpfEK9s`{)y|2~QuJMaR4}FkOB#T=@M3PkL1n^V9q|XZPngXJa^BHSrMBvS=I4SooOP zU7C-MuVP_j;a03EoDVW5Z=+m8KZy%<#RZceVXUMoDm0qYd2XlC>u?Hw7@No3+4dQA zEylrMx+$@Ws)6Y|KLABt@H{(822-!|^Y7iKO)m^UZuu2FSlx#6kIuvHBjRY{s$P@j z*?}MXt+20nJKW^2bySd0(vUNew`HUjTeZm|2uOe{-|f-C0{2RtNjwV(*>ee6$)IDgn<)v*|OHvz`(>EHwNHQ=zN z7*=2FMYjP9-t8;5snSV*CM?vK5x?IE?HM(gcR4Wf4zSX?L_9$s25dUXu>}{f zsxTU&=Lh2s7X^sxISFQHkNGMU$Qipxa&DagsTWIGQEwK;_4mQPO|Pl)-P`0^w>lii z@1`qvDq(CyshMDc9#tv2gkJ4BxHwn`Ewqoo^tawv>KwvY#>+v#o0Hh$o62t7Kghg) zu1c(j?n0f978vcj&di{*aHYpwuEG;x7+AO$R`|GqbnaX@U$c?P+So=M?_2~o4KdXC z{1hs0`qH33b#Tg48+ea=u~$tU+9k~K#EB}L89ITtK*0r?%Cf-W*CxpOD9xVBP~|F` z#McySWl>gk43`Ylqd`V2&EYCCn;r+ktPDX4WrZ|VGYNAzsxa}m6{&kMgWPZ{gcY`Z zXujNx9NiQ}EyQJssiP?dIM~52-UoW&j3!4~WeLuxe?XkS2SA&y2ygf&AD`|_qwBk) zpm))B(kE#KR&A51q~ui4WV^}W)loKnEP?of`vdMY+a?;#eKVj=Ks2D(b5vuXK!T*)P!`6PLj3n2{Gd=nHk}cGjL-9lcXbWT_}~Ur%4b2Ci1M_P=V0Q;Ci+{~ z1MInva3o=v_`1rYMvOGPeKkyDY + + # Example: pybindings with CoreML *only* + ./install_executorch.sh --pybind coreml + + # Example: pybinds with CoreML *and* XNNPACK + ./install_executorch.sh --pybind coreml xnnpack + ``` + + By default, `./install_executorch.sh` command installs pybindings for XNNPACK. To disable any pybindings altogether: + ```bash + ./install_executorch.sh --pybind off + ``` + +> **_NOTE:_** Cleaning the build system +> +> When fetching a new version of the upstream repo (via `git fetch` or `git +> pull`) it is a good idea to clean the old build artifacts. The build system +> does not currently adapt well to changes in build dependencies. +> +> You should also update and pull the submodules again, in case their versions +> have changed. +> +> ```bash +> # From the root of the executorch repo: +> ./install_executorch.sh --clean +> git submodule sync +> git submodule update --init +> ``` + ## Targets Built by the CMake Build System ExecuTorch's CMake build system covers the pieces of the runtime that are @@ -87,12 +179,12 @@ cmake --build cmake-out -j9 First, generate an `add.pte` or other ExecuTorch program file using the instructions as described in -[Setting up ExecuTorch](getting-started-setup.md#building-a-runtime). +[Preparing a Model](getting-started.md#preparing-the-model). Then, pass it to the command line tool: ```bash -./cmake-out/executor_runner --model_path path/to/add.pte +./cmake-out/executor_runner --model_path path/to/model.pte ``` If it worked, you should see the message "Model executed successfully" followed @@ -159,7 +251,7 @@ xcode-select --install ``` Run the above command with `--help` flag to learn more on how to build additional backends -(like [Core ML](build-run-coreml.md), [MPS](build-run-mps.md) or XNNPACK), etc. +(like [Core ML](backends-coreml.md), [MPS](backends-mps.md) or XNNPACK), etc. Note, some backends may require additional dependencies and certain versions of Xcode and iOS. 3. Copy over the generated `.xcframework` bundles to your Xcode project, link them against @@ -172,6 +264,6 @@ Check out the [iOS Demo App](demo-apps-ios.md) tutorial for more info. You have successfully cross-compiled `executor_runner` binary to iOS and Android platforms. You can start exploring advanced features and capabilities. Here is a list of sections you might want to read next: -* [Selective build](./kernel-library-selective_build) to build the runtime that links to only kernels used by the program, which can provide significant binary size savings. +* [Selective build](kernel-library-selective-build.md) to build the runtime that links to only kernels used by the program, which can provide significant binary size savings. * Tutorials on building [Android](./demo-apps-android.md) and [iOS](./demo-apps-ios.md) demo apps. -* Tutorials on deploying applications to embedded devices such as [ARM Cortex-M/Ethos-U](./executorch-arm-delegate-tutorial.md) and [XTensa HiFi DSP](./build-run-xtensa.md). +* Tutorials on deploying applications to embedded devices such as [ARM Cortex-M/Ethos-U](backends-arm-ethos-u.md) and [XTensa HiFi DSP](./backends-cadence.md). diff --git a/docs/source/using-executorch-cpp.md b/docs/source/using-executorch-cpp.md new file mode 100644 index 00000000000..4f8a83830e0 --- /dev/null +++ b/docs/source/using-executorch-cpp.md @@ -0,0 +1,75 @@ +# Using ExecuTorch with C++ + +In order to support a wide variety of devices, from high-end mobile phones down to tiny embedded systems, ExecuTorch provides an API surface with a high degree of customizability. The C++ APIs expose advanced configuration options, such as controlling memory allocation, placement, and data loading. To meet the needs of both application and embedded programming, ExecuTorch provides a low-level, highly-customizable core set of APIs, and set of high-level extensions, which abstract away many of the low-level details that are not relevant for mobile application programming. + +## High-Level APIs + +The C++ `Module` class provides the high-level interface to load and execute a model from C++. It is responsible for loading the .pte file, configuring memory allocation and placement, and running the model. The Module constructor takes a file path and provides a simplified `forward()` method to run the model. + +In addition the Module class, the tensor extension provides an encapsulated interface to define and manage tensor memory. It provides the `TensorPtr` class, which is a "fat" smart pointer. It provides ownership over the tensor data and metadata, such as size and strides. The `make_tensor_ptr` and `from_blob` methods, defined in `tensor.h`, provide owning and non-owning tensor creation APIs, respectively. + +```cpp +#include +#include + +using namespace ::executorch::extension; + +// Load the model. +Module module("/path/to/model.pte"); + +// Create an input tensor. +float input[1 * 3 * 256 * 256]; +auto tensor = from_blob(input, {1, 3, 256, 256}); + +// Perform an inference. +const auto result = module.forward(tensor); + +if (result.ok()) { + // Retrieve the output data. + const auto output = result->at(0).toTensor().const_data_ptr(); +} +``` + +For more information on the Module class, see [Running an ExecuTorch Model Using the Module Extension in C++](extension-module.md). For information on high-level tensor APIs, see [Managing Tensor Memory in C++](extension-tensor.md). + +## Low-Level APIs + +Running a model using the low-level runtime APIs allows for a high-degree of control over memory allocation, placement, and loading. This allows for advanced use cases, such as placing allocations in specific memory banks or loading a model without a file system. For an end to end example using the low-level runtime APIs, see [Running an ExecuTorch Model in C++ Tutorial](running-a-model-cpp-tutorial.md). + +## Building with CMake + +ExecuTorch uses CMake as the primary build system. Inclusion of the module and tensor APIs are controlled by the `EXECUTORCH_BUILD_EXTENSION_MODULE` and `EXECUTORCH_BUILD_EXTENSION_TENSOR` CMake options. As these APIs may not be supported on embedded systems, they are disabled by default when building from source. The low-level API surface is always included. To link, add the `executorch` target as a CMake dependency, along with `executorch_module_static` and `executorch_tensor`, if desired. + +``` +# CMakeLists.txt +add_subdirectory("executorch") +... +target_link_libraries( + my_target + PRIVATE executorch + executorch_module_static + executorch_tensor + optimized_native_cpu_ops_lib + xnnpack_backend) +``` + +See [Building from Source](using-executorch-building-from-source.md) for more information on the CMake build process. + +## Reference Runners + +The ExecuTorch repository includes several reference runners, which are simple programs that load and execute a .pte file, typically with random inputs. These can be used to sanity check model execution on a development platform and as a code reference for runtime integration. + +The `executor_runner` target is built by default when building with CMake. It can be invoked as follows: +``` +./cmake-out/executor_runner --model_path path/to/model.pte +``` + +The runner source code can be found in the ExecuTorch repo under [examples/portable/executor_runner.cpp](https://github.com/pytorch/executorch/blob/main/examples/portable/executor_runner/executor_runner.cpp). Some backends, such as CoreML, have dedicated runners to showcase backend and platform-specific functionality. See [examples/apple/coreml](https://github.com/pytorch/executorch/tree/main/examples/apple/coreml) and the [examples](https://github.com/pytorch/executorch/tree/main/examples) directory for more information. + +## Next Steps + +- [Runtime API Reference](executorch-runtime-api-reference.md) for documentation on the available C++ runtime APIs. +- [Running an ExecuTorch Model Using the Module Extension in C++](extension-module.md) for information on the high-level Module API. +- [Managing Tensor Memory in C++](extension-tensor.md) for information on high-level tensor APIs. +- [Running an ExecuTorch Model in C++ Tutorial](running-a-model-cpp-tutorial.md) for information on the low-level runtime APIs. +- [Building from Source](using-executorch-building-from-source.md) for information on CMake build integration. diff --git a/docs/source/using-executorch-export.md b/docs/source/using-executorch-export.md new file mode 100644 index 00000000000..62a52edf839 --- /dev/null +++ b/docs/source/using-executorch-export.md @@ -0,0 +1,178 @@ +# Model Export and Lowering + +The section describes the process of taking a PyTorch model and converting to the runtime format used by ExecuTorch. This process is commonly known as "exporting", as it uses the PyTorch export functionality to convert a PyTorch model into a format suitable for on-device execution. This process yields a .pte file which is optimized for on-device execution using a particular backend. + +## Prerequisites + +Exporting requires the ExecuTorch python libraries to be installed, typically by running `pip install executorch`. See [Installation](getting-started.md#Installation) for more information. This process assumes you have a PyTorch model, can instantiate it from Python, and can provide example input tensors to run the model. + +## The Export and Lowering Process + +The process to export and lower a model to the .pte format typically involves the following steps: + +1) Select a backend to target. +2) Prepare the PyTorch model, including inputs and shape specification. +3) Export the model using torch.export.export. +4) Optimize the model for the target backend using to_edge_transform_and_lower. +5) Create the .pte file by calling to_executorch and serializing the output. + +
+ +Quantization - the process of using reduced precision to reduce inference time and memory footprint - is also commonly done at this stage. See [Quantization Overview](quantization-overview.md) for more information. + +## Hardware Backends + +ExecuTorch backends provide hardware acceleration for a specific hardware target. In order to achieve maximum performance on target hardware, ExecuTorch optimizes the model for a specific backend during the export and lowering process. This means that the resulting .pte file is specialized for the specific hardware. In order to deploy to multiple backends, such as Core ML on iOS and Arm CPU on Android, it is common to generate a dedicated .pte file for each. + +The choice of hardware backend is informed by the hardware that the model is intended to be deployed on. Each backend has specific hardware requires and level of model support. See the documentation for each hardware backend for more details. + +As part of the .pte file creation process, ExecuTorch identifies portions of the model (partitions) that are supported for the given backend. These sections are processed by the backend ahead of time to support efficient execution. Portions of the model that are not supported on the delegate, if any, are executed using the portable fallback implementation on CPU. This allows for partial model acceleration when not all model operators are supported on the backend, but may have negative performance implications. In addition, multiple partitioners can be specified in order of priority. This allows for operators not supported on GPU to run on CPU via XNNPACK, for example. + +### Available Backends + +Commonly used hardware backends are listed below. For mobile, consider using XNNPACK for Android and XNNPACK or Core ML for iOS. To create a .pte file for a specific backend, pass the appropriate partitioner class to `to_edge_transform_and_lower`. See the appropriate backend documentation and the [Export and Lowering](#export-and-lowering) section below for more information. + +- [XNNPACK (Mobile CPU)](backends-xnnpack.md) +- [Core ML (iOS)](backends-coreml.md) +- [Metal Performance Shaders (iOS GPU)](backends-mps.md) +- [Vulkan (Android GPU)](backends-vulkan.md) +- [Qualcomm NPU](backends-qualcomm.md) +- [MediaTek NPU](backends-mediatek.md) +- [Arm Ethos-U NPU](backends-arm-ethos-u.md) +- [Cadence DSP](backends-cadence.md) + +## Model Preparation + +The export process takes in a standard PyTorch model, typically a `torch.nn.Module`. This can be an custom model definition, or a model from an existing source, such as TorchVision or HuggingFace. See [Getting Started with ExecuTorch](getting-started.md) for an example of lowering a TorchVision model. + +Model export is done from Python. This is commonly done through a Python script or from an interactive Python notebook, such as Jupyter or Colab. The example below shows instantiation and inputs for a simple PyTorch model. The inputs are prepared as a tuple of torch.Tensors, and the model can run with these inputs. + +```python +import torch + +class Model(torch.nn.Module): + def __init__(self): + super().__init__() + self.seq = torch.nn.Sequential( + torch.nn.Conv2d(1, 8, 3), + torch.nn.ReLU(), + torch.nn.Conv2d(8, 16, 3), + torch.nn.ReLU(), + torch.nn.AdaptiveAvgPool2d([[1,1]]) + ) + self.linear = torch.nn.Linear(16, 10) + + def forward(self, x): + y = self.seq(x) + y = torch.flatten(y, 1) + y = self.linear(y) + return y + +model = Model() +inputs = (torch.randn(1,1,16,16),) +outputs = model(*inputs) +print(f"Model output: {outputs}") +``` + +## Export and Lowering + +To actually export and lower the model, call `export`, `to_edge_transform_and_lower`, and `to_executorch` in sequence. This yields an ExecuTorch program which can be serialized to a file. Putting it all together, lowering the example model above using the XNNPACK delegate for mobile CPU performance can be done as follows: + +```python +import torch +from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner +from executorch.exir import to_edge_transform_and_lower +from torch.export import Dim, export + +class Model(torch.nn.Module): + def __init__(self): + super().__init__() + self.seq = torch.nn.Sequential( + torch.nn.Conv2d(1, 8, 3), + torch.nn.ReLU(), + torch.nn.Conv2d(8, 16, 3), + torch.nn.ReLU(), + torch.nn.AdaptiveAvgPool2d([1,1]) + ) + self.linear = torch.nn.Linear(16, 10) + + def forward(self, x): + y = self.seq(x) + y = torch.flatten(y, 1) + y = self.linear(y) + return y + +model = Model() +inputs = (torch.randn(1,1,16,16),) +dynamic_shapes = { + "x": { + 2: Dim("h", min=16, max=1024), + 3: Dim("w", min=16, max=1024), + } +} + +exported_program = export(model, inputs, dynamic_shapes=dynamic_shapes) +executorch_program = to_edge_transform_and_lower( + exported_program, + partitioner = [XnnpackPartitioner()] +).to_executorch() + +with open("model.pte", "wb") as file: + file.write(executorch_program.buffer) +``` + +This yields a `model.pte` file which can be run on mobile devices. + +### Supporting Varying Input Sizes (Dynamic Shapes) + +The PyTorch export process uses the example inputs provided to trace through the model and reason about the size and type of tensors at each step. Unless told otherwise, export will assume a fixed input size equal to the example inputs and will use this information to optimize the model. + +Many models require support for varying input sizes. To support this, export takes a `dynamic_shapes` parameter, which informs the compiler of which dimensions can vary and their bounds. This takes the form of a nested dictionary, where keys correspond to input names and values specify the bounds for each input. + +In the example model, inputs are provided as 4-dimensions tensors following the standard convention of batch, channels, height, and width (NCHW). An input with the shape `[1, 3, 16, 16]` indicates 1 batch, 3 channels, and a height and width of 16. + +Suppose your model supports images with sizes between 16x16 and 1024x1024. The shape bounds can be specified as follows: + +``` +dynamic_shapes = { + "x": { + 2: Dim("h", min=16, max=1024), + 3: Dim("w", min=16, max=1024), + } +} + +ep = torch.export.export(model, inputs, dynamic_shapes=dynamic_shapes) +``` + +In the above example, `"x"` corresponds to the parameter name in `Model.forward`. The 2 and 3 keys correpond to dimensions 2 and 3, which are height and width. As there are no specifications for batch and channel dimensions, these values are fixed according to the example inputs. + +ExecuTorch uses the shape bounds both to optimize the model and to plan memory for model execution. For this reason, it is advised to set the dimension upper bounds to no higher than needed, as higher bounds increase memory consumption. + +For more complex use cases, dynamic shape specification allows for mathematical relationships between dimensions. For more information on dynamic shape specification, see [Expressing Dynamism](https://pytorch.org/docs/stable/export.html#expressing-dynamism). + +## Testing the Model + +Before integrating the runtime code, it is common to test the exported model from Python. This can be used to evaluate model accuracy and sanity check behavior before moving to the target device. Note that not all hardware backends are available from Python, as they may require specialized hardware to function. See the specific backend documentation for more information on hardware requirements and the availablilty of simulators. The XNNPACK delegate used in this example is always available on host machines. + +```python +from executorch.runtime import Runtime + +runtime = Runtime.get() + +input_tensor = torch.randn(1, 3, 32, 32) +program = runtime.load_program("model.pte") +method = program.load_method("forward") +outputs = method.execute([input_tensor]) +``` + +For more information, see [Runtime API Reference](executorch-runtime-api-reference.md). + +## Next Steps + +The PyTorch and ExecuTorch export and lowering APIs provide a high level of customizability to meet the needs of diverse hardware and models. See [torch.export](https://pytorch.org/docs/main/export.html) and [Export API Reference](export-to-executorch-api-reference.md) for more information. + +For advanced use cases, see the following: +- [Quantization Overview](quantization-overview.md) for information on quantizing models to reduce inference time and memory footprint. +- [Memory Planning](compiler-memory-planning.md) for information on controlling memory placement and planning. +- [Custom Compiler Passes](compiler-custom-compiler-passes.md) for information on writing custom compiler passes. +- [Export IR Specification](ir-exir.md) for information on the intermediate representation generated by export. \ No newline at end of file diff --git a/docs/source/getting-started-faqs.md b/docs/source/using-executorch-faqs.md similarity index 99% rename from docs/source/getting-started-faqs.md rename to docs/source/using-executorch-faqs.md index c7c03488de1..56384c8015e 100644 --- a/docs/source/getting-started-faqs.md +++ b/docs/source/using-executorch-faqs.md @@ -1,4 +1,4 @@ -# FAQs and Common Issues +# Frequently Asked Questions This page summarizes frequently asked questions and provides guidance on issues that commonly occur when adopting ExecuTorch. diff --git a/docs/source/apple-runtime.md b/docs/source/using-executorch-ios.md similarity index 93% rename from docs/source/apple-runtime.md rename to docs/source/using-executorch-ios.md index 4114b780607..029914eb498 100644 --- a/docs/source/apple-runtime.md +++ b/docs/source/using-executorch-ios.md @@ -1,6 +1,8 @@ -# Integrating and Running ExecuTorch on Apple Platforms +# Using ExecuTorch on iOS -**Author:** [Anthony Shoumikhin](https://github.com/shoumikhin) +ExecuTorch supports both iOS and macOS via Objective-C, Swift, and C++. ExecuTorch also provides backends to leverage Core ML and Metal Performance Shaders (MPS) for hardware-accelerated execution on Apple platforms. + +## Integration The ExecuTorch Runtime for iOS and macOS is distributed as a collection of prebuilt [.xcframework](https://developer.apple.com/documentation/xcode/creating-a-multi-platform-binary-framework-bundle) binary targets. These targets are compatible with both iOS and macOS devices and simulators and are available in both release and debug modes: @@ -17,8 +19,6 @@ Link your binary with the ExecuTorch runtime and any backends or kernels used by **Note:** To access logs, link against the Debug build of the ExecuTorch runtime, i.e., the `executorch_debug` framework. For optimal performance, always link against the Release version of the deliverables (those without the `_debug` suffix), which have all logging overhead removed. -## Integration - ### Swift Package Manager The prebuilt ExecuTorch runtime, backend, and kernels are available as a [Swift PM](https://www.swift.org/documentation/package-manager/) package. @@ -84,9 +84,9 @@ swift package resolve swift build ``` -### Local Build +### Building from Source -Another way to integrate the ExecuTorch runtime is to build the necessary components from sources locally and link against them. This route is more involved but certainly doable. +Another way to integrate the ExecuTorch runtime is to build the necessary components from sources locally and link against them. This is useful when customizing the runtime. 1. Install [Xcode](https://developer.apple.com/xcode/resources/) 15+ and Command Line Tools: @@ -106,7 +106,7 @@ git clone https://github.com/pytorch/executorch.git --depth 1 --recurse-submodul python3 -m venv .venv && source .venv/bin/activate && pip install --upgrade pip ``` -4. Install the required dependencies, including those needed for the backends like [Core ML](build-run-coreml.md) or [MPS](build-run-mps.md), if you plan to build them as well: +4. Install the required dependencies, including those needed for the backends like [Core ML](backends-coreml.md) or [MPS](backends-mps.md), if you plan to build them as well: ```bash ./install_executorch.sh --pybind coreml mps xnnpack @@ -195,7 +195,7 @@ import ExecuTorch ### Logging -We provide extra APIs for logging in Objective-C and Swift as a lightweight wrapper of the internal ExecuTorch machinery. To use it, just import the main framework header in Objective-C. Then use the `ExecuTorchLog` interface (or the `Log` class in Swift) to subscribe your own implementation of the `ExecuTorchLogSink` protocol (or `LogSink` in Swift) to listen to log events. +ExecuTorch provides extra APIs for logging in Objective-C and Swift as a lightweight wrapper of the internal ExecuTorch machinery. To use it, just import the main framework header in Objective-C. Then use the `ExecuTorchLog` interface (or the `Log` class in Swift) to subscribe your own implementation of the `ExecuTorchLogSink` protocol (or `LogSink` in Swift) to listen to log events. ```objectivec #import diff --git a/docs/source/using-executorch-runtime-integration.md b/docs/source/using-executorch-runtime-integration.md new file mode 100644 index 00000000000..08e071e59ab --- /dev/null +++ b/docs/source/using-executorch-runtime-integration.md @@ -0,0 +1,53 @@ +# Runtime Integration + +This section describes options for configuring and customizing the ExecuTorch runtime. While the pre-built packages are designed to provide an "out-of-box" experience, it is common to require additional configuration when shipping into production. ExecuTorch provides the ability to compile-time gate features, such as logging, customize system integration, and include only the operators needed to run specific models (selective build). + +## Logging + +ExecuTorch runtime code includes logging statements at various levels, to aid with integration and debugging. Logging inclusion is controlled at build time by the `EXECUTORCH_ENABLE_LOGGING` and `EXECUTORCH_LOG_LEVEL` CMake options. Having these exposed as compile-time configuration allows for all logging-related code to be excluded when not used, which is critical for resource constrained systems. + +Logging is sent to STDOUT and STDERR by default on host platforms, and is redirected to OS-specific logging on Android and iOS. See [Platform Abstraction Layer](#platform-abstraction-layer-pal) below for more information on log routing. + +To configure log level when building from source, specify `EXECUTORCH_ENABLE_LOGGING` as on or off and `EXECUTORCH_LOG_LEVEL` as one of debug, info, error, or fatal. Logging is enabled by default in debug builds and disabled in release. Log level defaults to info. + +See [Building from Source](using-executorch-building-from-source.md) for more information. + +``` +cmake -b cmake-out -DEXECUTORCH_ENABLE_LOGGING=ON -DEXECUTORCH_LOG_LEVEL=DEBUG ... +``` + +## Platform Abstraction Layer (PAL) + +The ExecuTorch Platform Abstraction Layer, or PAL, is a glue layer responsible for providing integration with a particular host system. This includes log routing, timestamps, and abort handling. ExecuTorch provides a default implementation for POSIX-compliant targets, as well as a Android and iOS-specific implementations under the appropriate extensions. + +For non-POSIX-compliant systems, a minimal no-op PAL implementation is provided. It is expected that users override the relevant PAL methods in order to enable logging, timestamps, and aborts. The minimal PAL can be selected by building with `-DEXECUTORCH_PAL_DEFAULT=minimal`. + +### Overriding the PAL + +Overriding the default PAL implementation is commonly done to route logs to a user-specified destination or to provide PAL functionality on embedded systems. To override one or more PAL methods, take the following steps: + +- Include + [`executorch/runtime/platform/platform.h`](https://github.com/pytorch/executorch/blob/main/runtime/platform/platform.h) + in one of your application's `.c` or `.cpp` files. +- Define an implementation of one or more of the `et_pal_*()` functions. + +The default PAL functions are weak symbols, so providing your own strong-symbol +definition can override them at link time. To ensure that your definitions take +precedence, you may need to ensure that the strong definitions precede the weak +definitions in the link order. + +See [runtime/platform/platform.h](https://github.com/pytorch/executorch/blob/main/runtime/platform/platform.h) for the PAL function signatures and [runtime/platform/default/posix.cpp](https://github.com/pytorch/executorch/blob/main/runtime/platform/default/posix.cpp) for the reference POSIX implementation. + +## Kernel Libraries + +During export, a model is broken down into a list of operators, each providing some fundamental computation. Adding two tensors is an operator, as is convolution. Each operator requires a corresponding operator kernel to perform the computation on the target hardware. ExecuTorch backends are the preferred way to do this, but not all operators are supported on all backends. + +To handle this, ExecuTorch provides two implementations - the *portable* and *optimized* kernel libraries. The portable kernel library provides full support for all operators in a platform-independent manner. The optimized library carries additional system requirements, but is able to leverage multithreading and vectorized code to achieve greater performance. Operators can be drawn for both for a single build, allowing the optimized library to be used where available with the portable library as a fallback. + +The choice of kernel library is transparent to the user when using mobile pre-built packages. However, it is important when building from source, especially on embedded systems. On mobile, the optimized operators are preferred where available. See [Overview of ExecuTorch's Kernel Libraries](kernel-library-overview.md) for more information. + +## Selective Build + +By default, ExecuTorch ships with all supported operator kernels, allowing it to run any supported model at any precision. This comes with a binary size of several megabytes, which may be undesirable for production use cases or resource constrained systems. To minimize binary size, ExecuTorch provides selective build functionality, in order to include only the operators needed to run specific models. + +Note the selective build only applies to the portable and optimized kernel libraries. Delegates do not participate in selective build and can be included or excluded by linking indivually. See [Kernel Library Selective Build](kernel-library-selective-build.md) for more information. diff --git a/docs/source/using-executorch-troubleshooting.md b/docs/source/using-executorch-troubleshooting.md new file mode 100644 index 00000000000..16006802611 --- /dev/null +++ b/docs/source/using-executorch-troubleshooting.md @@ -0,0 +1,20 @@ +# Profiling and Debugging + +To faciliate model and runtime integration, ExecuTorch provides tools to profile model resource utilization, numerics, and more. This section describes the available troubleshooting tools and steps to resolve issues when integrating ExecuTorch. + +## General Troubleshooting Steps + +- To troubleshoot failure of runtime API calls, such as loading or running a model, ensure that ExecuTorch framework logging is enabled. See [Logging](using-executorch-runtime-integration.md#logging) for more information. +- As a prelimatinary step to troubleshoot slow run times, ensure that performance testing is being done in a release build, and that the model is delegated. See [Inference is Slow](using-executorch-faqs.md#inference-is-slow--performance-troubleshooting) for more information. +- Check [Frequently Asked Questions](using-executorch-faqs.md) for common issues and questions encountered during install, model export, and runtime integration. + +## Developer Tools + +The ExecuTorch developer tools, or devtools, are a collection of tooling for troubleshooting model performance, numerics, and resource utilization. See [Introduction to the ExecuTorch Developer Tools](devtools-overview.md) for an overview of the available developer tools and usage. + +## Next Steps + +- [Frequently Asked Questions](using-executorch-faqs.md) for solutions to commonly encountered questions and issues. +- [Introduction to the ExecuTorch Developer Tools](runtime-profiling.md) for a high-level introduction to available developer tooling. +- [Using the ExecuTorch Developer Tools to Profile a Model](tutorials/devtools-integration-tutorial.md) for information on runtime performance profiling. +- [Inspector APIs](runtime-profiling.md) for reference material on trace inspector APIs. diff --git a/examples/demo-apps/android/ExecuTorchDemo/README.md b/examples/demo-apps/android/ExecuTorchDemo/README.md index 931509891a6..09045ceb7a7 100644 --- a/examples/demo-apps/android/ExecuTorchDemo/README.md +++ b/examples/demo-apps/android/ExecuTorchDemo/README.md @@ -17,7 +17,7 @@ This guide explains how to setup ExecuTorch for Android using a demo app. The ap * Refer to [Setting up ExecuTorch](https://pytorch.org/executorch/stable/getting-started-setup) to set up the repo and dev environment. * Download and install [Android Studio and SDK](https://developer.android.com/studio). * Supported Host OS: CentOS, macOS Ventura (M1/x86_64). See below for Qualcomm HTP specific requirements. -* *Qualcomm HTP Only[^1]:* To build and run on Qualcomm's AI Engine Direct, please follow [Building and Running ExecuTorch with Qualcomm AI Engine Direct Backend](build-run-qualcomm-ai-engine-direct-backend.md) for hardware and software pre-requisites. The version we use for this tutorial is 2.19. The chip we use for this tutorial is SM8450. +* *Qualcomm HTP Only[^1]:* To build and run on Qualcomm's AI Engine Direct, please follow [Building and Running ExecuTorch with Qualcomm AI Engine Direct Backend](backends-qualcomm.md) for hardware and software pre-requisites. The version we use for this tutorial is 2.19. The chip we use for this tutorial is SM8450. ::: :::: @@ -44,11 +44,11 @@ mkdir -p examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/ cp dl3_xnnpack_fp32.pte examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/ ``` -For more detailed tutorial of lowering to XNNPACK, please see [XNNPACK backend](tutorial-xnnpack-delegate-lowering.md). +For more detailed tutorial of lowering to XNNPACK, please see [XNNPACK backend](backends-xnnpack.md). #### Qualcomm Hexagon NPU -For delegating to Qualcomm Hexagon NPU, please follow the tutorial [here](build-run-qualcomm-ai-engine-direct-backend.md). +For delegating to Qualcomm Hexagon NPU, please follow the tutorial [here](backends-qualcomm.md). After generating the model, copy the model to `assets` directory. @@ -164,7 +164,7 @@ This allows the Android app to load ExecuTorch runtime with XNNPACK backend as a mkdir -p ../examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a ``` -We need to push some additional Qualcomm HTP backend libraries to the app. Please refer to [Qualcomm docs](build-run-qualcomm-ai-engine-direct-backend.md) here. +We need to push some additional Qualcomm HTP backend libraries to the app. Please refer to [Qualcomm docs](backends-qualcomm.md) here. ```bash cp ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtp.so ${QNN_SDK_ROOT}/lib/hexagon-v69/unsigned/libQnnHtpV69Skel.so ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV69Stub.so ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnSystem.so \ From 83a77735ac0c7e3398116ae177f793e86cddf28f Mon Sep 17 00:00:00 2001 From: Guang Yang <42389959+guangy10@users.noreply.github.com> Date: Wed, 26 Feb 2025 11:02:42 -0800 Subject: [PATCH 112/584] Check in collect_env.py to improve UX of GitHub Issue reporting (#8692) * Check in collect_env.py to improve UX of GitHub Issue reporting * Fixed copyright header and instructions --------- Co-authored-by: Guang Yang --- util/collect_env.py | 749 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 749 insertions(+) create mode 100644 util/collect_env.py diff --git a/util/collect_env.py b/util/collect_env.py new file mode 100644 index 00000000000..7d35c0636ce --- /dev/null +++ b/util/collect_env.py @@ -0,0 +1,749 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# mypy: allow-untyped-defs + +# Unlike the rest of the PyTorch this file must be python2 compliant. +# This script outputs relevant system environment info +# Run it with `python util/collect_env.py` or `python -m util.collect_env` + +import datetime +import json +import locale +import os +import re +import subprocess +import sys +from collections import namedtuple + + +try: + import torch + + TORCH_AVAILABLE = True +except (ImportError, NameError, AttributeError, OSError): + TORCH_AVAILABLE = False + +# System Environment Information +SystemEnv = namedtuple( + "SystemEnv", + [ + "torch_version", + "is_debug_build", + "cuda_compiled_version", + "gcc_version", + "clang_version", + "cmake_version", + "os", + "libc_version", + "python_version", + "python_platform", + "is_cuda_available", + "cuda_runtime_version", + "cuda_module_loading", + "nvidia_driver_version", + "nvidia_gpu_models", + "cudnn_version", + "pip_version", # 'pip' or 'pip3' + "pip_packages", + "conda_packages", + "hip_compiled_version", + "hip_runtime_version", + "miopen_runtime_version", + "caching_allocator_config", + "is_xnnpack_available", + "cpu_info", + ], +) + +COMMON_PATTERNS = [ + "torch", + "numpy", + "triton", + "optree", +] + +NVIDIA_PATTERNS = [ + "cuda-cudart", + "cuda-cupti", + "cuda-libraries", + "cuda-opencl", + "cuda-nvrtc", + "cuda-runtime", + "cublas", + "cudnn", + "cufft", + "curand", + "cusolver", + "cusparse", + "nccl", + "nvjitlink", + "nvtx", +] + +CONDA_PATTERNS = [ + "cudatoolkit", + "soumith", + "mkl", + "magma", +] + +PIP_PATTERNS = [ + "mypy", + "flake8", + "onnx", +] + + +def run(command): + """Return (return-code, stdout, stderr).""" + shell = True if type(command) is str else False + p = subprocess.Popen( + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell + ) + raw_output, raw_err = p.communicate() + rc = p.returncode + if get_platform() == "win32": + enc = "oem" + else: + enc = locale.getpreferredencoding() + output = raw_output.decode(enc) + err = raw_err.decode(enc) + return rc, output.strip(), err.strip() + + +def run_and_read_all(run_lambda, command): + """Run command using run_lambda; reads and returns entire output if rc is 0.""" + rc, out, _ = run_lambda(command) + if rc != 0: + return None + return out + + +def run_and_parse_first_match(run_lambda, command, regex): + """Run command using run_lambda, returns the first regex match if it exists.""" + rc, out, _ = run_lambda(command) + if rc != 0: + return None + match = re.search(regex, out) + if match is None: + return None + return match.group(1) + + +def run_and_return_first_line(run_lambda, command): + """Run command using run_lambda and returns first line if output is not empty.""" + rc, out, _ = run_lambda(command) + if rc != 0: + return None + return out.split("\n")[0] + + +def get_conda_packages(run_lambda, patterns=None): + if patterns is None: + patterns = CONDA_PATTERNS + COMMON_PATTERNS + NVIDIA_PATTERNS + conda = os.environ.get("CONDA_EXE", "conda") + out = run_and_read_all(run_lambda, "{} list".format(conda)) + if out is None: + return out + + return "\n".join( + line + for line in out.splitlines() + if not line.startswith("#") and any(name in line for name in patterns) + ) + + +def get_gcc_version(run_lambda): + return run_and_parse_first_match(run_lambda, "gcc --version", r"gcc (.*)") + + +def get_clang_version(run_lambda): + return run_and_parse_first_match( + run_lambda, "clang --version", r"clang version (.*)" + ) + + +def get_cmake_version(run_lambda): + return run_and_parse_first_match(run_lambda, "cmake --version", r"cmake (.*)") + + +def get_nvidia_driver_version(run_lambda): + if get_platform() == "darwin": + cmd = "kextstat | grep -i cuda" + return run_and_parse_first_match( + run_lambda, cmd, r"com[.]nvidia[.]CUDA [(](.*?)[)]" + ) + smi = get_nvidia_smi() + return run_and_parse_first_match(run_lambda, smi, r"Driver Version: (.*?) ") + + +def get_gpu_info(run_lambda): + if get_platform() == "darwin" or ( + TORCH_AVAILABLE + and hasattr(torch.version, "hip") + and torch.version.hip is not None + ): + if TORCH_AVAILABLE and torch.cuda.is_available(): + if torch.version.hip is not None: + prop = torch.cuda.get_device_properties(0) + if hasattr(prop, "gcnArchName"): + gcnArch = " ({})".format(prop.gcnArchName) + else: + gcnArch = "NoGCNArchNameOnOldPyTorch" + else: + gcnArch = "" + return torch.cuda.get_device_name(None) + gcnArch + return None + smi = get_nvidia_smi() + uuid_regex = re.compile(r" \(UUID: .+?\)") + rc, out, _ = run_lambda(smi + " -L") + if rc != 0: + return None + # Anonymize GPUs by removing their UUID + return re.sub(uuid_regex, "", out) + + +def get_running_cuda_version(run_lambda): + return run_and_parse_first_match(run_lambda, "nvcc --version", r"release .+ V(.*)") + + +def get_cudnn_version(run_lambda): + """Return a list of libcudnn.so; it's hard to tell which one is being used.""" + if get_platform() == "win32": + system_root = os.environ.get("SYSTEMROOT", "C:\\Windows") + cuda_path = os.environ.get("CUDA_PATH", "%CUDA_PATH%") + where_cmd = os.path.join(system_root, "System32", "where") + cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path) + elif get_platform() == "darwin": + # CUDA libraries and drivers can be found in /usr/local/cuda/. See + # https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install + # https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac + # Use CUDNN_LIBRARY when cudnn library is installed elsewhere. + cudnn_cmd = "ls /usr/local/cuda/lib/libcudnn*" + else: + cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev' + rc, out, _ = run_lambda(cudnn_cmd) + # find will return 1 if there are permission errors or if not found + if len(out) == 0 or (rc != 1 and rc != 0): + cudnn_lib = os.environ.get("CUDNN_LIBRARY") + if cudnn_lib is not None and os.path.isfile(cudnn_lib): + return os.path.realpath(cudnn_lib) + return None + files_set = set() + for fn in out.split("\n"): + fn = os.path.realpath(fn) # eliminate symbolic links + if os.path.isfile(fn): + files_set.add(fn) + if not files_set: + return None + # Alphabetize the result because the order is non-deterministic otherwise + files = sorted(files_set) + if len(files) == 1: + return files[0] + result = "\n".join(files) + return "Probably one of the following:\n{}".format(result) + + +def get_nvidia_smi(): + # Note: nvidia-smi is currently available only on Windows and Linux + smi = "nvidia-smi" + if get_platform() == "win32": + system_root = os.environ.get("SYSTEMROOT", "C:\\Windows") + program_files_root = os.environ.get("PROGRAMFILES", "C:\\Program Files") + legacy_path = os.path.join( + program_files_root, "NVIDIA Corporation", "NVSMI", smi + ) + new_path = os.path.join(system_root, "System32", smi) + smis = [new_path, legacy_path] + for candidate_smi in smis: + if os.path.exists(candidate_smi): + smi = '"{}"'.format(candidate_smi) + break + return smi + + +# example outputs of CPU infos +# * linux +# Architecture: x86_64 +# CPU op-mode(s): 32-bit, 64-bit +# Address sizes: 46 bits physical, 48 bits virtual +# Byte Order: Little Endian +# CPU(s): 128 +# On-line CPU(s) list: 0-127 +# Vendor ID: GenuineIntel +# Model name: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz +# CPU family: 6 +# Model: 106 +# Thread(s) per core: 2 +# Core(s) per socket: 32 +# Socket(s): 2 +# Stepping: 6 +# BogoMIPS: 5799.78 +# Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr +# sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl +# xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16 +# pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand +# hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced +# fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap +# avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 +# xsaves wbnoinvd ida arat avx512vbmi pku ospke avx512_vbmi2 gfni vaes vpclmulqdq +# avx512_vnni avx512_bitalg tme avx512_vpopcntdq rdpid md_clear flush_l1d arch_capabilities +# Virtualization features: +# Hypervisor vendor: KVM +# Virtualization type: full +# Caches (sum of all): +# L1d: 3 MiB (64 instances) +# L1i: 2 MiB (64 instances) +# L2: 80 MiB (64 instances) +# L3: 108 MiB (2 instances) +# NUMA: +# NUMA node(s): 2 +# NUMA node0 CPU(s): 0-31,64-95 +# NUMA node1 CPU(s): 32-63,96-127 +# Vulnerabilities: +# Itlb multihit: Not affected +# L1tf: Not affected +# Mds: Not affected +# Meltdown: Not affected +# Mmio stale data: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown +# Retbleed: Not affected +# Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +# Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +# Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence +# Srbds: Not affected +# Tsx async abort: Not affected +# * win32 +# Architecture=9 +# CurrentClockSpeed=2900 +# DeviceID=CPU0 +# Family=179 +# L2CacheSize=40960 +# L2CacheSpeed= +# Manufacturer=GenuineIntel +# MaxClockSpeed=2900 +# Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz +# ProcessorType=3 +# Revision=27142 +# +# Architecture=9 +# CurrentClockSpeed=2900 +# DeviceID=CPU1 +# Family=179 +# L2CacheSize=40960 +# L2CacheSpeed= +# Manufacturer=GenuineIntel +# MaxClockSpeed=2900 +# Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz +# ProcessorType=3 +# Revision=27142 + + +def get_cpu_info(run_lambda): + rc, out, err = 0, "", "" + if get_platform() == "linux": + rc, out, err = run_lambda("lscpu") + elif get_platform() == "win32": + rc, out, err = run_lambda( + 'powershell.exe "gwmi -Class Win32_Processor | Select-Object -Property Name,Manufacturer,Family,\ + Architecture,ProcessorType,DeviceID,CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision\ + | ConvertTo-Json"' + ) + if rc == 0: + lst = [] + try: + obj = json.loads(out) + if type(obj) is list: + for o in obj: + lst.append("----------------------") + lst.extend([f"{k}: {v}" for (k, v) in o.items()]) + else: + lst.extend([f"{k}: {v}" for (k, v) in obj.items()]) + except ValueError as e: + lst.append(out) + lst.append(str(e)) + out = "\n".join(lst) + elif get_platform() == "darwin": + rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string") + cpu_info = "None" + if rc == 0: + cpu_info = out + else: + cpu_info = err + return cpu_info + + +def get_platform(): + if sys.platform.startswith("linux"): + return "linux" + elif sys.platform.startswith("win32"): + return "win32" + elif sys.platform.startswith("cygwin"): + return "cygwin" + elif sys.platform.startswith("darwin"): + return "darwin" + else: + return sys.platform + + +def get_mac_version(run_lambda): + return run_and_parse_first_match(run_lambda, "sw_vers -productVersion", r"(.*)") + + +def get_windows_version(run_lambda): + ret = run_and_read_all( + run_lambda, + 'powershell.exe "gwmi -Class Win32_OperatingSystem | Select-Object -Property Caption,\ + OSArchitecture,Version | ConvertTo-Json"', + ) + try: + obj = json.loads(ret) + ret = f'{obj["Caption"]} ({obj["Version"]} {obj["OSArchitecture"]})' + except ValueError as e: + ret += f"\n{str(e)}" + return ret + + +def get_lsb_version(run_lambda): + return run_and_parse_first_match( + run_lambda, "lsb_release -a", r"Description:\t(.*)" + ) + + +def check_release_file(run_lambda): + return run_and_parse_first_match( + run_lambda, "cat /etc/*-release", r'PRETTY_NAME="(.*)"' + ) + + +def get_os(run_lambda): + from platform import machine + + platform = get_platform() + + if platform == "win32" or platform == "cygwin": + return get_windows_version(run_lambda) + + if platform == "darwin": + version = get_mac_version(run_lambda) + if version is None: + return None + return "macOS {} ({})".format(version, machine()) + + if platform == "linux": + # Ubuntu/Debian based + desc = get_lsb_version(run_lambda) + if desc is not None: + return "{} ({})".format(desc, machine()) + + # Try reading /etc/*-release + desc = check_release_file(run_lambda) + if desc is not None: + return "{} ({})".format(desc, machine()) + + return "{} ({})".format(platform, machine()) + + # Unknown platform + return platform + + +def get_python_platform(): + import platform + + return platform.platform() + + +def get_libc_version(): + import platform + + if get_platform() != "linux": + return "N/A" + return "-".join(platform.libc_ver()) + + +def get_pip_packages(run_lambda, patterns=None): + """Return `pip list` output. Note: will also find conda-installed pytorch and numpy packages.""" + if patterns is None: + patterns = PIP_PATTERNS + COMMON_PATTERNS + NVIDIA_PATTERNS + + pip_version = "pip3" if sys.version[0] == "3" else "pip" + + os.environ["PIP_DISABLE_PIP_VERSION_CHECK"] = "1" + # People generally have pip as `pip` or `pip3` + # But here it is invoked as `python -mpip` + out = run_and_read_all( + run_lambda, [sys.executable, "-mpip", "list", "--format=freeze"] + ) + filtered_out = "\n".join( + line for line in out.splitlines() if any(name in line for name in patterns) + ) + + return pip_version, filtered_out + + +def get_cachingallocator_config(): + ca_config = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "") + return ca_config + + +def get_cuda_module_loading_config(): + if TORCH_AVAILABLE and torch.cuda.is_available(): + torch.cuda.init() + config = os.environ.get("CUDA_MODULE_LOADING", "") + return config + else: + return "N/A" + + +def is_xnnpack_available(): + if TORCH_AVAILABLE: + import torch.backends.xnnpack + + return str(torch.backends.xnnpack.enabled) # type: ignore[attr-defined] + else: + return "N/A" + + +def get_env_info(): + """ + Collects environment information to aid in debugging. + + The returned environment information contains details on torch version, is debug build + or not, cuda compiled version, gcc version, clang version, cmake version, operating + system, libc version, python version, python platform, CUDA availability, CUDA + runtime version, CUDA module loading config, GPU model and configuration, Nvidia + driver version, cuDNN version, pip version and versions of relevant pip and + conda packages, HIP runtime version, MIOpen runtime version, + Caching allocator config, XNNPACK availability and CPU information. + + Returns: + SystemEnv (namedtuple): A tuple containining various environment details + and system information. + """ + run_lambda = run + pip_version, pip_list_output = get_pip_packages(run_lambda) + + if TORCH_AVAILABLE: + version_str = torch.__version__ + debug_mode_str = str(torch.version.debug) + cuda_available_str = str(torch.cuda.is_available()) + cuda_version_str = torch.version.cuda + if ( + not hasattr(torch.version, "hip") or torch.version.hip is None + ): # cuda version + hip_compiled_version = hip_runtime_version = miopen_runtime_version = "N/A" + else: # HIP version + + def get_version_or_na(cfg, prefix): + _lst = [s.rsplit(None, 1)[-1] for s in cfg if prefix in s] + return _lst[0] if _lst else "N/A" + + cfg = torch._C._show_config().split("\n") + hip_runtime_version = get_version_or_na(cfg, "HIP Runtime") + miopen_runtime_version = get_version_or_na(cfg, "MIOpen") + cuda_version_str = "N/A" + hip_compiled_version = torch.version.hip + else: + version_str = debug_mode_str = cuda_available_str = cuda_version_str = "N/A" + hip_compiled_version = hip_runtime_version = miopen_runtime_version = "N/A" + + sys_version = sys.version.replace("\n", " ") + + conda_packages = get_conda_packages(run_lambda) + + return SystemEnv( + torch_version=version_str, + is_debug_build=debug_mode_str, + python_version="{} ({}-bit runtime)".format( + sys_version, sys.maxsize.bit_length() + 1 + ), + python_platform=get_python_platform(), + is_cuda_available=cuda_available_str, + cuda_compiled_version=cuda_version_str, + cuda_runtime_version=get_running_cuda_version(run_lambda), + cuda_module_loading=get_cuda_module_loading_config(), + nvidia_gpu_models=get_gpu_info(run_lambda), + nvidia_driver_version=get_nvidia_driver_version(run_lambda), + cudnn_version=get_cudnn_version(run_lambda), + hip_compiled_version=hip_compiled_version, + hip_runtime_version=hip_runtime_version, + miopen_runtime_version=miopen_runtime_version, + pip_version=pip_version, + pip_packages=pip_list_output, + conda_packages=conda_packages, + os=get_os(run_lambda), + libc_version=get_libc_version(), + gcc_version=get_gcc_version(run_lambda), + clang_version=get_clang_version(run_lambda), + cmake_version=get_cmake_version(run_lambda), + caching_allocator_config=get_cachingallocator_config(), + is_xnnpack_available=is_xnnpack_available(), + cpu_info=get_cpu_info(run_lambda), + ) + + +env_info_fmt = """ +PyTorch version: {torch_version} +Is debug build: {is_debug_build} +CUDA used to build PyTorch: {cuda_compiled_version} +ROCM used to build PyTorch: {hip_compiled_version} + +OS: {os} +GCC version: {gcc_version} +Clang version: {clang_version} +CMake version: {cmake_version} +Libc version: {libc_version} + +Python version: {python_version} +Python platform: {python_platform} +Is CUDA available: {is_cuda_available} +CUDA runtime version: {cuda_runtime_version} +CUDA_MODULE_LOADING set to: {cuda_module_loading} +GPU models and configuration: {nvidia_gpu_models} +Nvidia driver version: {nvidia_driver_version} +cuDNN version: {cudnn_version} +HIP runtime version: {hip_runtime_version} +MIOpen runtime version: {miopen_runtime_version} +Is XNNPACK available: {is_xnnpack_available} + +CPU: +{cpu_info} + +Versions of relevant libraries: +{pip_packages} +{conda_packages} +""".strip() + + +def pretty_str(envinfo): # noqa: C901 + def replace_nones(dct, replacement="Could not collect"): + for key in dct.keys(): + if dct[key] is not None: + continue + dct[key] = replacement + return dct + + def replace_bools(dct, true="Yes", false="No"): + for key in dct.keys(): + if dct[key] is True: + dct[key] = true + elif dct[key] is False: + dct[key] = false + return dct + + def prepend(text, tag="[prepend]"): + lines = text.split("\n") + updated_lines = [tag + line for line in lines] + return "\n".join(updated_lines) + + def replace_if_empty(text, replacement="No relevant packages"): + if text is not None and len(text) == 0: + return replacement + return text + + def maybe_start_on_next_line(string): + # If `string` is multiline, prepend a \n to it. + if string is not None and len(string.split("\n")) > 1: + return "\n{}\n".format(string) + return string + + mutable_dict = envinfo._asdict() + + # If nvidia_gpu_models is multiline, start on the next line + mutable_dict["nvidia_gpu_models"] = maybe_start_on_next_line( + envinfo.nvidia_gpu_models + ) + + # If the machine doesn't have CUDA, report some fields as 'No CUDA' + dynamic_cuda_fields = [ + "cuda_runtime_version", + "nvidia_gpu_models", + "nvidia_driver_version", + ] + all_cuda_fields = dynamic_cuda_fields + ["cudnn_version"] + all_dynamic_cuda_fields_missing = all( + mutable_dict[field] is None for field in dynamic_cuda_fields + ) + if ( + TORCH_AVAILABLE + and not torch.cuda.is_available() + and all_dynamic_cuda_fields_missing + ): + for field in all_cuda_fields: + mutable_dict[field] = "No CUDA" + if envinfo.cuda_compiled_version is None: + mutable_dict["cuda_compiled_version"] = "None" + + # Replace True with Yes, False with No + mutable_dict = replace_bools(mutable_dict) + + # Replace all None objects with 'Could not collect' + mutable_dict = replace_nones(mutable_dict) + + # If either of these are '', replace with 'No relevant packages' + mutable_dict["pip_packages"] = replace_if_empty(mutable_dict["pip_packages"]) + mutable_dict["conda_packages"] = replace_if_empty(mutable_dict["conda_packages"]) + + # Tag conda and pip packages with a prefix + # If they were previously None, they'll show up as ie '[conda] Could not collect' + if mutable_dict["pip_packages"]: + mutable_dict["pip_packages"] = prepend( + mutable_dict["pip_packages"], "[{}] ".format(envinfo.pip_version) + ) + if mutable_dict["conda_packages"]: + mutable_dict["conda_packages"] = prepend( + mutable_dict["conda_packages"], "[conda] " + ) + mutable_dict["cpu_info"] = envinfo.cpu_info + return env_info_fmt.format(**mutable_dict) + + +def get_pretty_env_info(): + """ + Returns a pretty string of environment information. + + This function retrieves environment information by calling the `get_env_info` function + and then formats the information into a human-readable string. The retrieved environment + information is listed in the document of `get_env_info`. + This function is used in `python collect_env.py` that should be executed when reporting a bug. + + Returns: + str: A pretty string of the environment information. + """ + return pretty_str(get_env_info()) + + +def main(): + print("Collecting environment information...") + output = get_pretty_env_info() + print(output) + + if ( + TORCH_AVAILABLE + and hasattr(torch, "utils") + and hasattr(torch.utils, "_crash_handler") + ): + minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR + if sys.platform == "linux" and os.path.exists(minidump_dir): + dumps = [ + os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir) + ] + latest = max(dumps, key=os.path.getctime) + ctime = os.path.getctime(latest) + creation_time = datetime.datetime.fromtimestamp(ctime).strftime( + "%Y-%m-%d %H:%M:%S" + ) + msg = ( + "\n*** Detected a minidump at {} created on {}, ".format( + latest, creation_time + ) + + "if this is related to your bug please include it when you file a report ***" + ) + print(msg, file=sys.stderr) + + +if __name__ == "__main__": + main() From 0163902b867d202077a1ee9066689a4c253fa353 Mon Sep 17 00:00:00 2001 From: cccclai Date: Wed, 26 Feb 2025 11:35:21 -0800 Subject: [PATCH 113/584] Add buck file for unit test Differential Revision: D67463640 Pull Request resolved: https://github.com/pytorch/executorch/pull/8106 --- backends/qualcomm/tests/TARGETS | 39 ++++++++++++++++++++++++++++++++ backends/qualcomm/tests/utils.py | 1 + 2 files changed, 40 insertions(+) create mode 100644 backends/qualcomm/tests/TARGETS diff --git a/backends/qualcomm/tests/TARGETS b/backends/qualcomm/tests/TARGETS new file mode 100644 index 00000000000..b6a9664dcbf --- /dev/null +++ b/backends/qualcomm/tests/TARGETS @@ -0,0 +1,39 @@ +load("@fbcode_macros//build_defs:python_library.bzl", "python_library") +load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest") +load("@fbsource//xplat/executorch/backends/qualcomm/qnn_version.bzl", "get_qnn_library_verision") + +python_library( + name = "models", + srcs = ["models.py"], + deps = [ + "//caffe2:torch", + ] +) + +python_library( + name = "test_qnn_delegate", + srcs = [ + "test_qnn_delegate.py", + "utils.py", + ], + # env = { + # "LD_LIBRARY_PATH": "$(location fbsource//third-party/qualcomm/qnn/qnn-{0}:qnn_offline_compile_libs)".format(get_qnn_library_verision()), + # }, + deps = [ + ":models", + "//caffe2:torch", + "//caffe2/functorch:functorch_src", + "//executorch/exir/backend:partitioner", + "//executorch/exir/dialects:lib", + "//executorch/extension/pybindings:portable_lib", # @manual + "//executorch/extension/pytree:pylib", + "//executorch/backends/qualcomm/partition:partition", + "//executorch/backends/qualcomm/quantizer:quantizer", + "//executorch/backends/qualcomm/serialization:serialization", + "//executorch/backends/qualcomm/utils:utils", + "//executorch/devtools:lib", + "//executorch/examples/qualcomm:utils", + "//executorch/examples/models:models", + "//executorch/backends/qualcomm/debugger:utils", + ], +) diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index eeebb6fd8a9..4908bf889a9 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -190,6 +190,7 @@ class TestQNN(unittest.TestCase): compile_only: bool = False pre_gen_pte: str = "" llama_artifacts: str = "" + dump_intermediate_outputs: bool = False def _assert_outputs_equal(self, model_output, ref_output): self.assertTrue(len(ref_output) == len(model_output)) From 696d766a4a61b68214b24bdef9b56ab080cbc685 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 26 Feb 2025 12:11:24 -0800 Subject: [PATCH 114/584] Bypass find_package(torch) for find_package_torch_headers to help out cross compilation (#8629) To be honest, I don't understand how the Android demo compiles currently. Indeed, it doesn't work on my Mac without this change, but with this change it does. Should fix #8556 --- build/Utils.cmake | 47 +++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/build/Utils.cmake b/build/Utils.cmake index 646ef5ff285..6e7459430b7 100644 --- a/build/Utils.cmake +++ b/build/Utils.cmake @@ -329,29 +329,25 @@ function(resolve_python_executable) endfunction() # find_package(Torch CONFIG REQUIRED) replacement for targets that have a -# header-only Torch dependency. Because find_package sets variables in the -# parent scope, we use a macro to preserve this rather than maintaining our own -# list of those variables. -macro(find_package_torch_headers) - # We cannot simply use CMAKE_FIND_ROOT_PATH_BOTH, because that does not - # propagate into TorchConfig.cmake. - foreach(mode_kind IN ITEMS PACKAGE LIBRARY INCLUDE) - set(OLD_CMAKE_FIND_ROOT_PATH_MODE_${mode_kind} - ${CMAKE_FIND_ROOT_PATH_MODE_${mode_kind}} - ) - set(CMAKE_FIND_ROOT_PATH_MODE_${mode_kind} BOTH) - endforeach() - find_package_torch() - foreach(mode_kind IN ITEMS PACKAGE LIBRARY INCLUDE) - set(CMAKE_FIND_ROOT_PATH_MODE_${mode_kind} - ${OLD_CMAKE_FIND_ROOT_PATH_MODE_${mode_kind}} - ) - endforeach() -endmacro() +# header-only Torch dependency. +# +# Unlike find_package(Torch ...), this will only set +# TORCH_INCLUDE_DIRS in the parent scope. In particular, it will NOT +# set any of the following: +# - TORCH_FOUND +# - TORCH_LIBRARY +# - TORCH_CXX_FLAGS +function(find_package_torch_headers) + # We implement this way rather than using find_package so that + # cross-compilation can still use the host's installed copy of + # torch, since the headers should be fine. + get_torch_base_path(TORCH_BASE_PATH) + set(TORCH_INCLUDE_DIRS "${TORCH_BASE_PATH}/include;${TORCH_BASE_PATH}/include/torch/csrc/api/include" PARENT_SCOPE) +endfunction() -# Add the Torch CMake configuration to CMAKE_PREFIX_PATH so that find_package -# can find Torch. -function(add_torch_to_cmake_prefix_path) +# Return the base path to the installed Torch Python library in +# outVar. +function(get_torch_base_path outVar) if(NOT PYTHON_EXECUTABLE) resolve_python_executable() endif() @@ -370,6 +366,13 @@ function(add_torch_to_cmake_prefix_path) message("Output:\n${_tmp_torch_path}") message(FATAL_ERROR "Error:\n${_tmp_torch_path_error}") endif() + set(${outVar} ${_tmp_torch_path} PARENT_SCOPE) +endfunction() + +# Add the Torch CMake configuration to CMAKE_PREFIX_PATH so that find_package +# can find Torch. +function(add_torch_to_cmake_prefix_path) + get_torch_base_path(_tmp_torch_path) list(APPEND CMAKE_PREFIX_PATH "${_tmp_torch_path}") set(CMAKE_PREFIX_PATH "${CMAKE_PREFIX_PATH}" From 6410d47fef33f5ab9c15a02ec32c90d573da7a57 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 26 Feb 2025 12:11:54 -0800 Subject: [PATCH 115/584] Add extension/llm/custom_ops/spinquant/test to CMake (#8345) Need to run it to make sure build changes in this directory aren't breaking things. Test Plan: added EXPECT_TRUE(false) to fast_hadamard_transform_test.cpp, saw failure in test/run_oss_cpp_tests.sh --- build/Test.cmake | 1 + build/build_android_llm_demo.sh | 2 ++ extension/android_test/setup.sh | 2 ++ extension/llm/custom_ops/CMakeLists.txt | 5 ++++ .../custom_ops/spinquant/test/CMakeLists.txt | 30 +++++++++++++++++++ .../spinquant/third-party/FFHT/CMakeLists.txt | 12 ++++++++ test/run_oss_cpp_tests.sh | 1 + test/utils/OSSTestConfig.json | 11 +++++++ 8 files changed, 64 insertions(+) create mode 100644 extension/llm/custom_ops/spinquant/test/CMakeLists.txt create mode 100644 extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeLists.txt diff --git a/build/Test.cmake b/build/Test.cmake index 31e5aaf4d63..dbe590d610b 100644 --- a/build/Test.cmake +++ b/build/Test.cmake @@ -36,6 +36,7 @@ function(et_cxx_test target_name) cmake_parse_arguments(ET_CXX_TEST "" "" "${multi_arg_names}" ${ARGN}) add_executable(${target_name} ${ET_CXX_TEST_SOURCES} ${EXECUTORCH_ROOT}/runtime/core/exec_aten/testing_util/tensor_util.cpp) + find_package(GTest) # Includes gtest, gmock, executorch by default target_link_libraries( ${target_name} GTest::gtest GTest::gtest_main GTest::gmock executorch diff --git a/build/build_android_llm_demo.sh b/build/build_android_llm_demo.sh index 4119bde4c7e..cb2d47fdeb3 100644 --- a/build/build_android_llm_demo.sh +++ b/build/build_android_llm_demo.sh @@ -44,6 +44,7 @@ build_android_native_library() { -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \ -DANDROID_ABI="${ANDROID_ABI}" \ -DANDROID_PLATFORM=android-26 \ + -DBUILD_TESTING=OFF \ -DEXECUTORCH_ENABLE_LOGGING=ON \ -DEXECUTORCH_LOG_LEVEL=Info \ -DEXECUTORCH_BUILD_XNNPACK=ON \ @@ -73,6 +74,7 @@ build_android_native_library() { -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \ -DANDROID_ABI="${ANDROID_ABI}" \ -DANDROID_PLATFORM=android-26 \ + -DBUILD_TESTING=OFF \ -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \ -DEXECUTORCH_ENABLE_LOGGING=ON \ -DEXECUTORCH_LOG_LEVEL=Info \ diff --git a/extension/android_test/setup.sh b/extension/android_test/setup.sh index 725728b8092..c21d2c09623 100755 --- a/extension/android_test/setup.sh +++ b/extension/android_test/setup.sh @@ -21,6 +21,7 @@ build_native_library() { cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \ -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \ -DANDROID_ABI="${ANDROID_ABI}" \ + -DBUILD_TESTING=OFF \ -DEXECUTORCH_BUILD_XNNPACK=ON \ -DEXECUTORCH_XNNPACK_SHARED_WORKSPACE=ON \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ @@ -36,6 +37,7 @@ build_native_library() { cmake extension/android \ -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}"/build/cmake/android.toolchain.cmake \ -DANDROID_ABI="${ANDROID_ABI}" \ + -DBUILD_TESTING=OFF \ -DCMAKE_INSTALL_PREFIX=c"${CMAKE_OUT}" \ -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ -DEXECUTORCH_BUILD_LLAMA_JNI=ON \ diff --git a/extension/llm/custom_ops/CMakeLists.txt b/extension/llm/custom_ops/CMakeLists.txt index 95f4bc559fa..da00320a89f 100644 --- a/extension/llm/custom_ops/CMakeLists.txt +++ b/extension/llm/custom_ops/CMakeLists.txt @@ -121,3 +121,8 @@ if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT) LIBRARY DESTINATION executorch/extension/llm/custom_ops ) endif() + +add_subdirectory(spinquant/third-party/FFHT) +if(BUILD_TESTING) + add_subdirectory(spinquant/test) +endif() diff --git a/extension/llm/custom_ops/spinquant/test/CMakeLists.txt b/extension/llm/custom_ops/spinquant/test/CMakeLists.txt new file mode 100644 index 00000000000..c793d2ed975 --- /dev/null +++ b/extension/llm/custom_ops/spinquant/test/CMakeLists.txt @@ -0,0 +1,30 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# @generated by test/utils/generate_gtest_cmakelists.py +# +# This file should be formatted with +# ~~~ +# cmake-format -i CMakeLists.txt +# ~~~ +# It should also be cmake-lint clean. +# + +cmake_minimum_required(VERSION 3.19) + +set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../../..) + +include(${EXECUTORCH_ROOT}/build/Test.cmake) + +set(_test_srcs + fast_hadamard_transform_test.cpp fast_hadamard_transform_test_impl.cpp + op_fast_hadamard_transform_test.cpp +) + +et_cxx_test( + extension_llm_custom_ops_spinquant_test SOURCES ${_test_srcs} EXTRA_LIBS + custom_ops dumb_fht +) diff --git a/extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeLists.txt b/extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeLists.txt new file mode 100644 index 00000000000..2e3089be72e --- /dev/null +++ b/extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Please this file formatted by running: +# ~~~ +# cmake-format -i CMakeLists.txt +# ~~~ + +add_library(dumb_fht dumb_fht.c) diff --git a/test/run_oss_cpp_tests.sh b/test/run_oss_cpp_tests.sh index f747100006d..ff2ed048257 100755 --- a/test/run_oss_cpp_tests.sh +++ b/test/run_oss_cpp_tests.sh @@ -35,6 +35,7 @@ build_executorch() { cmake . \ -DCMAKE_INSTALL_PREFIX=cmake-out \ -DEXECUTORCH_USE_CPP_CODE_COVERAGE=ON \ + -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ diff --git a/test/utils/OSSTestConfig.json b/test/utils/OSSTestConfig.json index 6eff74eec86..70cb2d2e44f 100644 --- a/test/utils/OSSTestConfig.json +++ b/test/utils/OSSTestConfig.json @@ -1,4 +1,15 @@ { "tests": [ + { + "directory": "extension/llm/custom_ops/spinquant/test", + "sources": [ + "fast_hadamard_transform_test.cpp", + "fast_hadamard_transform_test_impl.cpp", + "op_fast_hadamard_transform_test.cpp" + ], + "additional_libs": [ + "custom_ops" + ] + }, { "directory": "extension/data_loader/test", "sources": [ From 5b32a800924a63e419e0da3a3fe98bf6ab49076a Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 26 Feb 2025 12:14:04 -0800 Subject: [PATCH 116/584] determine fht_avx.c/fht_neon.c based on CMake target processor (#8346) Might fix #6691. --- build/cmake_deps.toml | 5 +---- extension/llm/custom_ops/CMakeLists.txt | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/build/cmake_deps.toml b/build/cmake_deps.toml index 21a8e282929..4b22a09cb5b 100644 --- a/build/cmake_deps.toml +++ b/build/cmake_deps.toml @@ -386,10 +386,7 @@ buck_targets = [ "//extension/llm/custom_ops:custom_ops", ] filters = [ - # Second clause is to pick up fht_neon.c/fht_avx.c from FFHT. TODO: - # remove filters and patch extract_sources.py's Buck query to fetch - # srcs; presumably filters is here to remove .h files. - "(.cpp$)|(fht.*\\.c$)", + ".cpp$", ] excludes = [ "^codegen", diff --git a/extension/llm/custom_ops/CMakeLists.txt b/extension/llm/custom_ops/CMakeLists.txt index da00320a89f..c3969e6f9bf 100644 --- a/extension/llm/custom_ops/CMakeLists.txt +++ b/extension/llm/custom_ops/CMakeLists.txt @@ -45,6 +45,22 @@ list(APPEND custom_ops_libs cpuinfo) list(APPEND custom_ops_libs cpublas) list(APPEND custom_ops_libs eigen_blas) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv7)$") + list(APPEND _custom_ops__srcs + "extension/llm/custom_ops/spinquant/third-party/FFHT/fht_neon.c" + ) +elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + list(APPEND _custom_ops__srcs + "extension/llm/custom_ops/spinquant/third-party/FFHT/fht_avx.c" + ) +else() + message( + FATAL_ERROR + "Unsupported CMAKE_SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR}. (If \ +32-bit x86, try using fht_avx.c and send a PR if it works!)" + ) +endif() + list(TRANSFORM _custom_ops__srcs PREPEND "${EXECUTORCH_ROOT}/") if(NOT EXECUTORCH_BUILD_XNNPACK) From c105ddac74529be8462663c994a9578ba0c11fe5 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 26 Feb 2025 12:51:02 -0800 Subject: [PATCH 117/584] Util to load a named_data_map into a std::map Differential Revision: D70186215 Pull Request resolved: https://github.com/pytorch/executorch/pull/8737 --- extension/training/module/state_dict_util.cpp | 107 ++++++++++++++++++ extension/training/module/state_dict_util.h | 35 ++++++ extension/training/module/targets.bzl | 18 +++ .../module/test/state_dict_util_test.cpp | 89 +++++++++++++++ extension/training/module/test/targets.bzl | 16 +++ 5 files changed, 265 insertions(+) create mode 100644 extension/training/module/state_dict_util.cpp create mode 100644 extension/training/module/state_dict_util.h create mode 100644 extension/training/module/test/state_dict_util_test.cpp diff --git a/extension/training/module/state_dict_util.cpp b/extension/training/module/state_dict_util.cpp new file mode 100644 index 00000000000..7c742d11c08 --- /dev/null +++ b/extension/training/module/state_dict_util.cpp @@ -0,0 +1,107 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace extension { +namespace training { + +runtime::Result> +load_state_dict(const runtime::NamedDataMap& data_map) { + std::map state_dict; + auto num_key_res = data_map.get_num_keys(); + if (!num_key_res.ok()) { + return num_key_res.error(); + } + for (size_t i = 0; i < num_key_res.get(); i++) { + // get the key + auto key_res = data_map.get_key(i); + if (!key_res.ok()) { + return key_res.error(); + } + + // get the metadata + auto metadata_res = data_map.get_metadata(key_res.get()); + if (!metadata_res.ok()) { + return metadata_res.error(); + } + + // get data blob + void* data = nullptr; + static constexpr size_t kMallocAlignment = alignof(std::max_align_t); + if constexpr (kMallocAlignment < 8) { + // Skip manually aligning the memory since PyTorch doesn't have dtypes > + // 8 bytes wide, and I don't expect to ever encounter a platform where + // malloc aligns to less than 8. + ET_LOG( + Error, + "kMallocAlignment is too small: %zu. Cannot safely create buffer to load tensor. Please open an issue on https://github.com/pytorch/executorch/issues", + kMallocAlignment); + return runtime::Error::NotSupported; + } + + data = malloc(metadata_res->nbytes()); + if (data == nullptr && metadata_res->nbytes() != 0) { + ET_LOG(Error, "Failed to allocate memory for tensor, malloc failed"); + return runtime::Error::MemoryAllocationFailed; + } + auto load_into_error = + data_map.load_data_into(key_res.get(), data, metadata_res->nbytes()); + if (load_into_error != runtime::Error::Ok) { + ET_LOG( + Error, + "Failed to load data into tensor, likely a malformed .ptd 0x%" PRIx32, + static_cast(load_into_error)); + return load_into_error; + } + + // Get metadata + std::vector sizes; + for (auto x : metadata_res->sizes()) { + sizes.push_back(x); + } + std::vector dim_order; + for (auto x : metadata_res->dim_order()) { + dim_order.push_back(x); + } + std::vector strides; + for (auto stride_index = 0; stride_index < metadata_res->sizes().size(); + stride_index++) { + if (stride_index == 0) { + strides.push_back(1); + } else { + strides.insert( + strides.begin(), + sizes.at(stride_index) * strides.at(stride_index - 1)); + } + } + + // create tensor + auto tensor = make_tensor_ptr( + sizes, + data, + dim_order, + strides, + metadata_res->scalar_type(), + exec_aten::TensorShapeDynamism::STATIC, + [](void* ptr) { + free(ptr); + ptr = nullptr; + }); + + // add to state dict + state_dict.insert({std::string(key_res.get()), std::move(tensor)}); + } + + return state_dict; +} + +} // namespace training +} // namespace extension +} // namespace executorch diff --git a/extension/training/module/state_dict_util.h b/extension/training/module/state_dict_util.h new file mode 100644 index 00000000000..f98dd77a5af --- /dev/null +++ b/extension/training/module/state_dict_util.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +#include +#include + +namespace executorch { +namespace extension { +namespace training { + +/** + * Generate a map of string to tensor. + * + * @param data The NamedDataMap to load the tensors and names from. + * @return A result containing a map of tensor names to tensors if + * successful, an error otherwise. + */ +ET_EXPERIMENTAL +runtime::Result> +load_state_dict(const runtime::NamedDataMap& data); + +} // namespace training +} // namespace extension +} // namespace executorch diff --git a/extension/training/module/targets.bzl b/extension/training/module/targets.bzl index cfdd0f9897a..0ae00aa447d 100644 --- a/extension/training/module/targets.bzl +++ b/extension/training/module/targets.bzl @@ -7,6 +7,24 @@ def define_common_targets(): TARGETS and BUCK files that call this function. """ + runtime.cxx_library( + name = "state_dict_util", + srcs = [ + "state_dict_util.cpp", + ], + exported_headers = [ + "state_dict_util.h", + ], + visibility = [ + "@EXECUTORCH_CLIENTS", + ], + exported_deps = [ + "//executorch/runtime/core:named_data_map", + "//executorch/extension/tensor:tensor", + "//executorch/runtime/core:core", + ], + ) + for aten_mode in get_aten_mode_options(): aten_suffix = ("_aten" if aten_mode else "") diff --git a/extension/training/module/test/state_dict_util_test.cpp b/extension/training/module/test/state_dict_util_test.cpp new file mode 100644 index 00000000000..14e5b0d4f0d --- /dev/null +++ b/extension/training/module/test/state_dict_util_test.cpp @@ -0,0 +1,89 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include + +#include +#include +#include + +#include + +using namespace ::testing; +using executorch::extension::FlatTensorDataMap; +using executorch::extension::FlatTensorHeader; +using executorch::runtime::DataLoader; +using executorch::runtime::Error; +using executorch::runtime::FreeableBuffer; +using executorch::runtime::Result; +using executorch::runtime::TensorLayout; +using torch::executor::util::FileDataLoader; + +class LoadStateDictTest : public ::testing::Test { + protected: + void SetUp() override { + // Since these tests cause ET_LOG to be called, the PAL must be initialized + // first. + executorch::runtime::runtime_init(); + + // Load data map. + // The eager linear model is defined at: + // //executorch/test/models/linear_model.py + const char* path = std::getenv("ET_MODULE_LINEAR_DATA_PATH"); + Result loader = FileDataLoader::from(path); + ASSERT_EQ(loader.error(), Error::Ok); + + Result header = loader->load( + /*offset=*/0, + FlatTensorHeader::kNumHeadBytes, + /*segment_info=*/ + DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::External)); + + ASSERT_EQ(header.error(), Error::Ok); + + data_map_loader_ = + std::make_unique(std::move(loader.get())); + } + std::unique_ptr data_map_loader_; +}; + +TEST_F(LoadStateDictTest, LoadDataMap) { + Result data_map = + FlatTensorDataMap::load(data_map_loader_.get()); + EXPECT_EQ(data_map.error(), Error::Ok); + + auto state_dict = + executorch::extension::training::load_state_dict(data_map.get()); + ASSERT_TRUE(state_dict.ok()); + + EXPECT_EQ(state_dict->size(), 2); + EXPECT_EQ(state_dict->at("a")->sizes().size(), 2); + EXPECT_EQ(state_dict->at("a")->sizes()[0], 2); + EXPECT_EQ(state_dict->at("a")->sizes()[1], 2); + EXPECT_EQ( + state_dict->at("a")->scalar_type(), torch::executor::ScalarType::Float); + EXPECT_EQ(state_dict->at("a")->dim(), 2); + EXPECT_EQ(state_dict->at("a")->const_data_ptr()[0], 3.f); + EXPECT_EQ(state_dict->at("a")->const_data_ptr()[1], 3.f); + EXPECT_EQ(state_dict->at("a")->const_data_ptr()[2], 3.f); + EXPECT_EQ(state_dict->at("a")->const_data_ptr()[3], 3.f); + + EXPECT_EQ(state_dict->size(), 2); + EXPECT_EQ(state_dict->at("b")->sizes().size(), 2); + EXPECT_EQ(state_dict->at("b")->sizes()[0], 2); + EXPECT_EQ(state_dict->at("b")->sizes()[1], 2); + EXPECT_EQ( + state_dict->at("b")->scalar_type(), torch::executor::ScalarType::Float); + EXPECT_EQ(state_dict->at("b")->dim(), 2); + EXPECT_EQ(state_dict->at("b")->const_data_ptr()[0], 2.f); + EXPECT_EQ(state_dict->at("b")->const_data_ptr()[1], 2.f); + EXPECT_EQ(state_dict->at("b")->const_data_ptr()[2], 2.f); + EXPECT_EQ(state_dict->at("b")->const_data_ptr()[3], 2.f); +} diff --git a/extension/training/module/test/targets.bzl b/extension/training/module/test/targets.bzl index 8b260e2a7e8..3b0ea9e31ef 100644 --- a/extension/training/module/test/targets.bzl +++ b/extension/training/module/test/targets.bzl @@ -17,6 +17,8 @@ def define_common_targets(is_fbcode = False): # intentionally don't work in xplat (since they're host-only tools). "ET_MODULE_ADD_PATH": "$(location fbcode//executorch/test/models:exported_programs[ModuleAdd.pte])", "ET_MODULE_SIMPLE_TRAIN_PATH": "$(location fbcode//executorch/test/models:exported_programs[ModuleSimpleTrain.pte])", + "ET_MODULE_LINEAR_PROGRAM_PATH": "$(location fbcode//executorch/test/models:exported_program_and_data[ModuleLinear.pte])", + "ET_MODULE_LINEAR_DATA_PATH": "$(location fbcode//executorch/test/models:exported_program_and_data[ModuleLinear.ptd])", } runtime.cxx_test( @@ -32,3 +34,17 @@ def define_common_targets(is_fbcode = False): ], env = modules_env, ) + + runtime.cxx_test( + name = "state_dict_util_test", + srcs = [ + "state_dict_util_test.cpp", + ], + deps = [ + "//executorch/extension/data_loader:file_data_loader", + "//executorch/extension/flat_tensor:flat_tensor_data_map", + "//executorch/extension/training/module:state_dict_util", + "//executorch/runtime/core/exec_aten:lib", + ], + env = modules_env, + ) From 1caa0a1da299ddfd8fc423adad1e2f18e2571745 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Wed, 26 Feb 2025 12:57:34 -0800 Subject: [PATCH 118/584] [build] Properly implement editable mode (#8722) * [build] Properly implement editable mode * Add CI * Fix typo * Fix macos job --- .github/workflows/pull.yml | 26 +++++++- .github/workflows/trunk.yml | 25 ++++++++ extension/llm/custom_ops/custom_ops.py | 18 +++--- install_executorch.py | 12 ++++ pyproject.toml | 19 ++++++ setup.py | 83 +++++++++++++++++++------- 6 files changed, 151 insertions(+), 32 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index b599f2fdc67..d9a7aa6bd5b 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -56,6 +56,30 @@ jobs: # Build and test ExecuTorch with the add model on portable backend. PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "add" "${BUILD_TOOL}" "portable" + test-pip-install-editable-mode-linux: + name: test-pip-install-editable-mode-linux + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + with: + runner: linux.2xlarge + docker-image: executorch-ubuntu-22.04-clang12 + submodules: 'true' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 90 + script: | + # The generic Linux job chooses to use base env, not the one setup by the image + CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + conda activate "${CONDA_ENV}" + # Debug + which pip + PYTHON_EXECUTABLE=python bash ./install_executorch.sh --editable --pybind xnnpack --use-pt-pinned-commit + # Try to import extension library + python -c "from executorch.extension.llm.custom_ops import custom_ops" + test-models-linux: name: test-models-linux uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main @@ -480,7 +504,7 @@ jobs: # Setup install_requirements for llama PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh - + # Test static llama weight sharing and accuracy PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama.sh diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 7f66474bdba..d8ec745b75c 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -36,6 +36,31 @@ jobs: PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --target-os macos --event "${GITHUB_EVENT_NAME}" + test-pip-install-editable-mode-macos: + name: test-pip-install-editable-mode-macos + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + with: + runner: macos-m1-stable + python-version: '3.11' + submodules: 'true' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 90 + script: | + # The generic Linux job chooses to use base env, not the one setup by the image + CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + conda activate "${CONDA_ENV}" + # Debug + which pip + bash .ci/scripts/setup-conda.sh + PYTHON_EXECUTABLE=python ${CONDA_RUN} bash ./install_executorch.sh --editable --pybind xnnpack + # Try to import extension library + python -c "from executorch.extension.llm.custom_ops import custom_ops" + test-models-macos: name: test-models-macos uses: pytorch/test-infra/.github/workflows/macos_job.yml@main diff --git a/extension/llm/custom_ops/custom_ops.py b/extension/llm/custom_ops/custom_ops.py index b3b05db68fb..d299b314816 100644 --- a/extension/llm/custom_ops/custom_ops.py +++ b/extension/llm/custom_ops/custom_ops.py @@ -22,23 +22,19 @@ op2 = torch.ops.llama.fast_hadamard_transform.default assert op2 is not None except: - import glob - - import executorch - # This is needed to ensure that custom ops are registered from executorch.extension.pybindings import portable_lib # noqa # usort: skip # Ideally package is installed in only one location but usage of # PYATHONPATH can result in multiple locations. # ATM this is mainly used in CI for qnn runner. Will need to revisit this - executorch_package_path = executorch.__path__[-1] - logging.info(f"Looking for libcustom_ops_aot_lib.so in {executorch_package_path}") - libs = list( - glob.glob( - f"{executorch_package_path}/**/libcustom_ops_aot_lib.*", recursive=True - ) - ) + from pathlib import Path + + package_path = Path(__file__).parent.resolve() + logging.info(f"Looking for libcustom_ops_aot_lib.so in {package_path}") + + libs = list(package_path.glob("**/libcustom_ops_aot_lib.*")) + assert len(libs) == 1, f"Expected 1 library but got {len(libs)}" logging.info(f"Loading custom ops library: {libs[0]}") torch.ops.load_library(libs[0]) diff --git a/install_executorch.py b/install_executorch.py index b35f5668eb2..0d82f0a05ca 100644 --- a/install_executorch.py +++ b/install_executorch.py @@ -65,6 +65,7 @@ def clean(): "prelude": "BUCK", "pthreadpool": "CMakeLists.txt", "pybind11": "CMakeLists.txt", + "shim": "BUCK", "XNNPACK": "CMakeLists.txt", } @@ -138,6 +139,14 @@ def build_args_parser() -> argparse.ArgumentParser: action="store_true", help="build from the pinned PyTorch commit instead of nightly", ) + parser.add_argument( + "--editable", + "-e", + action="store_true", + help="build an editable pip wheel, changes to python code will be " + "picked up without rebuilding the wheel. Extension libraries will be " + "installed inside the source tree.", + ) return parser @@ -226,6 +235,9 @@ def main(args): "-m", "pip", "install", + ] + + (["--editable"] if args.editable else []) + + [ ".", "--no-build-isolation", "-v", diff --git a/pyproject.toml b/pyproject.toml index fb4196d99bc..43b0a8c4daf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,6 +82,25 @@ Changelog = "https://github.com/pytorch/executorch/releases" [project.scripts] flatc = "executorch.data.bin:flatc" +# TODO(dbort): Could use py_modules to restrict the set of modules we +# package, and package_data to restrict the set up non-python files we +# include. See also setuptools/discovery.py for custom finders. +[tool.setuptools.package-dir] +"executorch.backends" = "backends" +"executorch.codegen" = "codegen" +# TODO(mnachin T180504136): Do not put examples/models +# into core pip packages. Refactor out the necessary utils +# or core models files into a separate package. +"executorch.examples.models" = "examples/models" +"executorch.exir" = "exir" +"executorch.extension" = "extension" +"executorch.kernels.quantized" = "kernels/quantized" +"executorch.schema" = "schema" +"executorch.devtools" = "devtools" +"executorch.devtools.bundled_program" = "devtools/bundled_program" +"executorch.runtime" = "runtime" +"executorch.util" = "util" + [tool.setuptools.package-data] # TODO(dbort): Prune /test[s]/ dirs, /third-party/ dirs, yaml files that we # don't need. diff --git a/setup.py b/setup.py index 59b468f8c93..ad5c561a7b3 100644 --- a/setup.py +++ b/setup.py @@ -50,6 +50,7 @@ import os import platform import re +import site import sys # Import this before distutils so that setuptools can intercept the distuils @@ -239,7 +240,7 @@ def src_path(self, installer: "InstallerBuildExt") -> Path: srcs = tuple(cmake_cache_dir.glob(self.src)) if len(srcs) != 1: raise ValueError( - f"""Expected exactly one file matching '{self.src}'; found {repr(srcs)}. + f"""Expected exactly one file matching '{self.src}' in {cmake_cache_dir}; found {repr(srcs)}. If that file is a CMake-built extension module file, and we are installing in editable mode, please disable the corresponding build option since it's not supported yet. @@ -372,6 +373,63 @@ def dst_path(self, installer: "InstallerBuildExt") -> Path: class InstallerBuildExt(build_ext): """Installs files that were built by cmake.""" + def __init__(self, *args, **kwargs): + self._ran_build = False + super().__init__(*args, **kwargs) + + def run(self): + # Run the build command first in editable mode. Since `build` command + # will also trigger `build_ext` command, only run this once. + if self._ran_build: + return + + if self.editable_mode: + self._ran_build = True + self.run_command("build") + super().run() + + def copy_extensions_to_source(self) -> None: + """For each extension in `ext_modules`, we need to copy the extension + file from the build directory to the correct location in the local + directory. + + This should only be triggered when inplace mode (editable mode) is enabled. + + Args: + + Returns: + """ + build_py = self.get_finalized_command("build_py") + for ext in self.extensions: + if isinstance(ext, BuiltExtension): + modpath = ext.name.split(".") + package = ".".join(modpath[:-1]) + package_dir = os.path.abspath(build_py.get_package_dir(package)) + else: + # HACK: get rid of the leading "executorch" in ext.dst. + # This is because we don't have a root level "executorch" module. + package_dir = ext.dst.removeprefix("executorch/") + + # Ensure that the destination directory exists. + self.mkpath(os.fspath(package_dir)) + + regular_file = ext.src_path(self) + inplace_file = os.path.join( + package_dir, os.path.basename(ext.src_path(self)) + ) + + # Always copy, even if source is older than destination, to ensure + # that the right extensions for the current Python/platform are + # used. + if os.path.exists(regular_file) or not ext.optional: + self.copy_file(regular_file, inplace_file, level=self.verbose) + + if ext._needs_stub: + inplace_stub = self._get_equivalent_stub(ext, inplace_file) + self._write_stub_file(inplace_stub, ext, compile=True) + # Always compile stub and remove the original (leave the cache behind) + # (this behaviour was observed in previous iterations of the code) + # TODO(dbort): Depend on the "build" command to ensure it runs first def build_extension(self, ext: _BaseExtension) -> None: @@ -630,6 +688,10 @@ def run(self): if not self.dry_run: # Dry run should log the command but not actually run it. (Path(cmake_cache_dir) / "CMakeCache.txt").unlink(missing_ok=True) + # Set PYTHONPATH to the location of the pip package. + os.environ["PYTHONPATH"] = ( + site.getsitepackages()[0] + ";" + os.environ.get("PYTHONPATH", "") + ) with Buck2EnvironmentFixer(): # The context manager may patch the environment while running this # cmake command, which happens to run buck2 to get some source @@ -741,25 +803,6 @@ def get_ext_modules() -> List[Extension]: setup( version=Version.string(), - # TODO(dbort): Could use py_modules to restrict the set of modules we - # package, and package_data to restrict the set up non-python files we - # include. See also setuptools/discovery.py for custom finders. - package_dir={ - "executorch/backends": "backends", - "executorch/codegen": "codegen", - # TODO(mnachin T180504136): Do not put examples/models - # into core pip packages. Refactor out the necessary utils - # or core models files into a separate package. - "executorch/examples/models": "examples/models", - "executorch/exir": "exir", - "executorch/extension": "extension", - "executorch/kernels/quantized": "kernels/quantized", - "executorch/schema": "schema", - "executorch/devtools": "devtools", - "executorch/devtools/bundled_program": "devtools/bundled_program", - "executorch/runtime": "runtime", - "executorch/util": "util", - }, cmdclass={ "build": CustomBuild, "build_ext": InstallerBuildExt, From 1eb2f94c3d4e3f20bfc4284204e014161d548897 Mon Sep 17 00:00:00 2001 From: Tarun Karuturi <58826100+tarun292@users.noreply.github.com> Date: Thu, 27 Feb 2025 03:00:06 +0530 Subject: [PATCH 119/584] Allow complex dtypes during tensor parsing (#8743) Summary: Given that we now support ops like fft_r2c we should allow complex dtypes during tensor parsing when the program is loaded. In order to support this we need to remove an assert that prevents these dtypes from being present in the program. Differential Revision: D70268416 --- runtime/executor/tensor_parser_portable.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/runtime/executor/tensor_parser_portable.cpp b/runtime/executor/tensor_parser_portable.cpp index 3a29c86700c..b72fedc5eee 100644 --- a/runtime/executor/tensor_parser_portable.cpp +++ b/runtime/executor/tensor_parser_portable.cpp @@ -43,11 +43,7 @@ Result parseTensor( ScalarType scalar_type = static_cast(s_tensor->scalar_type()); ET_CHECK_OR_RETURN_ERROR( - isValid(scalar_type) && - // Types that do not yet have deserialization support. - scalar_type != executorch::aten::ScalarType::ComplexHalf && - scalar_type != executorch::aten::ScalarType::ComplexFloat && - scalar_type != executorch::aten::ScalarType::ComplexDouble, + isValid(scalar_type), InvalidProgram, "Invalid or unsupported ScalarType %" PRId8, static_cast(scalar_type)); From d651dcdabfb677274086a9f133c6ee29f95381a3 Mon Sep 17 00:00:00 2001 From: Shen Chen Xu Date: Wed, 26 Feb 2025 13:54:58 -0800 Subject: [PATCH 120/584] Add mask_val option to StaticAttentionmask Differential Revision: D70255619 Pull Request resolved: https://github.com/pytorch/executorch/pull/8736 --- examples/models/llama/static_attention.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/models/llama/static_attention.py b/examples/models/llama/static_attention.py index 43db873fb65..9bb5cee5b2a 100644 --- a/examples/models/llama/static_attention.py +++ b/examples/models/llama/static_attention.py @@ -125,18 +125,19 @@ def update( class StaticAttentionMask: - def __init__(self, input_len, cache_len, style): + def __init__(self, input_len, cache_len, style, mask_val=float("-inf")): self.input_len = input_len self.cache_len = cache_len assert style in ("shift_pointer", "smart_mask") self.style = style + self.mask_val = mask_val self.unmasked_len = 0 self.tensor = torch.zeros(1, input_len, input_len + cache_len) self.reset() def reset(self): self.unmasked_len = 0 - self.tensor[:, :, : self.cache_len] = float("-inf") + self.tensor[:, :, : self.cache_len] = self.mask_val def unmask(self, new_unmasked_len): if new_unmasked_len <= 0: From 1f4da85ca848984ab5b962dceef69e87d117adb4 Mon Sep 17 00:00:00 2001 From: Max Ren <40742183+mcr229@users.noreply.github.com> Date: Wed, 26 Feb 2025 14:20:21 -0800 Subject: [PATCH 121/584] resize output tensor Differential Revision: D70218118 Pull Request resolved: https://github.com/pytorch/executorch/pull/8714 --- kernels/optimized/cpu/op_gelu.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernels/optimized/cpu/op_gelu.cpp b/kernels/optimized/cpu/op_gelu.cpp index dcb6bbc4279..ebe8923b590 100644 --- a/kernels/optimized/cpu/op_gelu.cpp +++ b/kernels/optimized/cpu/op_gelu.cpp @@ -98,6 +98,12 @@ Tensor& opt_gelu_out( ET_KERNEL_CHECK( context, check_gelu_args(input, approximate, out), InvalidArgument, out); + ET_KERNEL_CHECK( + context, + resize_tensor(out, input.sizes()) == Error::Ok, + InvalidArgument, + out); + ET_SWITCH_FLOATHBF16_TYPES( input.scalar_type(), context, "gelu.out", CTYPE, [&]() { gelu(context, input, approximate, out); From 50fb8a964d38fb59460d709f41a5a7bcfbc3eb29 Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Wed, 26 Feb 2025 14:22:22 -0800 Subject: [PATCH 122/584] [Android benchmarking] Increase warm up iterations Use 10 iterations to make sure it's warmed up. --- .../src/main/java/org/pytorch/minibench/BenchmarkActivity.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java index afa8fca3233..c0856f3e4fe 100644 --- a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java +++ b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java @@ -44,7 +44,7 @@ protected void onCreate(Bundle savedInstanceState) { .get(); int numIter = intent.getIntExtra("num_iter", 50); - int numWarmupIter = intent.getIntExtra("num_warm_up_iter", 5); + int numWarmupIter = intent.getIntExtra("num_warm_up_iter", 10); // TODO: Format the string with a parsable format Stats stats = new Stats(); From 09b592b58460d7b3d11483296c4df0cc965eeef9 Mon Sep 17 00:00:00 2001 From: Nathanael See Date: Wed, 26 Feb 2025 14:51:31 -0800 Subject: [PATCH 123/584] move import of VK 4-bit source quantizer into function Differential Revision: D70268708 Pull Request resolved: https://github.com/pytorch/executorch/pull/8744 --- examples/models/llama/source_transformation/quantize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py index 8923ab1fdec..d81c0849e62 100644 --- a/examples/models/llama/source_transformation/quantize.py +++ b/examples/models/llama/source_transformation/quantize.py @@ -14,8 +14,6 @@ import torch.nn as nn import torch.nn.functional as F -from executorch.backends.vulkan._passes import VkInt4WeightOnlyQuantizer - from executorch.extension.llm.export.builder import DType from sentencepiece import SentencePieceProcessor @@ -180,6 +178,8 @@ def quantize( # noqa C901 model = gptq_quantizer.quantize(model, inputs) return model elif qmode == "vulkan_4w": + from executorch.backends.vulkan._passes import VkInt4WeightOnlyQuantizer + q_group_size = 256 if group_size is None else group_size model = VkInt4WeightOnlyQuantizer(groupsize=q_group_size).quantize(model) From 8f509e18825ab52d2cd4b9a282825a3b91ed8518 Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Wed, 26 Feb 2025 14:56:12 -0800 Subject: [PATCH 124/584] Linter fix --- docs/source/backends-xnnpack.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/backends-xnnpack.md b/docs/source/backends-xnnpack.md index e41189d0089..77285d3cdbe 100644 --- a/docs/source/backends-xnnpack.md +++ b/docs/source/backends-xnnpack.md @@ -121,4 +121,3 @@ target_link_libraries( ``` No additional steps are necessary to use the backend beyond linking the target. Any XNNPACK-delegated .pte file will automatically run on the registered backend. - From afcec1d2c0eafd266aad36d26042c42f87947e58 Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Wed, 26 Feb 2025 15:15:14 -0800 Subject: [PATCH 125/584] [Android] Rename build script It's not only for LLM demo, but for general use case as well --- .github/workflows/_android.yml | 2 +- .github/workflows/android-perf.yml | 2 +- .github/workflows/android-release-artifacts.yml | 2 +- build/{build_android_llm_demo.sh => build_android_library.sh} | 0 .../android/LlamaDemo/docs/delegates/mediatek_README.md | 2 +- examples/demo-apps/android/LlamaDemo/setup-with-qnn.sh | 2 +- examples/demo-apps/android/LlamaDemo/setup.sh | 2 +- extension/android_test/setup.sh | 2 +- extension/benchmark/android/benchmark/README.md | 2 +- 9 files changed, 8 insertions(+), 8 deletions(-) rename build/{build_android_llm_demo.sh => build_android_library.sh} (100%) diff --git a/.github/workflows/_android.yml b/.github/workflows/_android.yml index 36b679eda44..fa7c331311f 100644 --- a/.github/workflows/_android.yml +++ b/.github/workflows/_android.yml @@ -29,7 +29,7 @@ jobs: export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded # Build LLM Demo for Android - bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME} + bash build/build_android_library.sh ${ARTIFACTS_DIR_NAME} # Running Android emulator directly on the runner and not using Docker run-emulator: diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index 8bebc7be1bc..201fb3b7a8f 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -363,7 +363,7 @@ jobs: PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh export ANDROID_ABIS="arm64-v8a" - PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME} + PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash build/build_android_library.sh ${ARTIFACTS_DIR_NAME} # Let's see how expensive this job is, we might want to tone it down by running it periodically benchmark-on-device: diff --git a/.github/workflows/android-release-artifacts.yml b/.github/workflows/android-release-artifacts.yml index d204e121ffa..8d2c1d354cc 100644 --- a/.github/workflows/android-release-artifacts.yml +++ b/.github/workflows/android-release-artifacts.yml @@ -53,7 +53,7 @@ jobs: export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded # Build LLM Demo for Android - bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME} + bash build/build_android_library.sh ${ARTIFACTS_DIR_NAME} shasum -a 256 "${ARTIFACTS_DIR_NAME}/llm_demo/executorch.aar" diff --git a/build/build_android_llm_demo.sh b/build/build_android_library.sh similarity index 100% rename from build/build_android_llm_demo.sh rename to build/build_android_library.sh diff --git a/examples/demo-apps/android/LlamaDemo/docs/delegates/mediatek_README.md b/examples/demo-apps/android/LlamaDemo/docs/delegates/mediatek_README.md index 6351640dcc0..34d52dd4e04 100644 --- a/examples/demo-apps/android/LlamaDemo/docs/delegates/mediatek_README.md +++ b/examples/demo-apps/android/LlamaDemo/docs/delegates/mediatek_README.md @@ -123,7 +123,7 @@ The Mediatek runner (`examples/mediatek/executor_runner/mtk_llama_runner.cpp`) c Next we need to build and compile the MediaTek backend and MediaTek Llama runner. By setting `NEURON_BUFFER_ALLOCATOR_LIB`, the script will build the MediaTek backend. ``` -sh build/build_android_llm_demo.sh +sh build/build_android_library.sh ``` **Output**: This will generate an .aar file that is already imported into the expected directory for the Android app. It will live in `examples/demo-apps/android/Llamademo/app/libs`. diff --git a/examples/demo-apps/android/LlamaDemo/setup-with-qnn.sh b/examples/demo-apps/android/LlamaDemo/setup-with-qnn.sh index 044d80832de..1d251de9ef3 100644 --- a/examples/demo-apps/android/LlamaDemo/setup-with-qnn.sh +++ b/examples/demo-apps/android/LlamaDemo/setup-with-qnn.sh @@ -13,7 +13,7 @@ if [ -z "$QNN_SDK_ROOT" ]; then fi BASEDIR=$(dirname "$0") -source "$BASEDIR"/../../../../build/build_android_llm_demo.sh +source "$BASEDIR"/../../../../build/build_android_library.sh BUILD_AAR_DIR="$(mktemp -d)" export BUILD_AAR_DIR diff --git a/examples/demo-apps/android/LlamaDemo/setup.sh b/examples/demo-apps/android/LlamaDemo/setup.sh index 4d70c67ede0..ec626c289b5 100644 --- a/examples/demo-apps/android/LlamaDemo/setup.sh +++ b/examples/demo-apps/android/LlamaDemo/setup.sh @@ -8,7 +8,7 @@ set -eu BASEDIR=$(dirname "$0") -source "$BASEDIR"/../../../../build/build_android_llm_demo.sh +source "$BASEDIR"/../../../../build/build_android_library.sh BUILD_AAR_DIR="$(mktemp -d)" export BUILD_AAR_DIR diff --git a/extension/android_test/setup.sh b/extension/android_test/setup.sh index c21d2c09623..e1306af4205 100755 --- a/extension/android_test/setup.sh +++ b/extension/android_test/setup.sh @@ -11,7 +11,7 @@ BUILD_AAR_DIR="$(mktemp -d)" export BUILD_AAR_DIR BASEDIR=$(dirname "$0") -source "$BASEDIR"/../../build/build_android_llm_demo.sh +source "$BASEDIR"/../../build/build_android_library.sh build_native_library() { ANDROID_ABI="$1" diff --git a/extension/benchmark/android/benchmark/README.md b/extension/benchmark/android/benchmark/README.md index cfc5ef0e594..9aba211b42b 100644 --- a/extension/benchmark/android/benchmark/README.md +++ b/extension/benchmark/android/benchmark/README.md @@ -15,7 +15,7 @@ Minibench is usedful for giving reference performance data when developers integ You will need executorch AAR for Java and JNI dependencies. ``` export ANDROID_NDK= -sh build/build_android_llm_demo.sh +sh build/build_android_library.sh ``` and copy the AAR to `app/libs`. ``` From 68042847fd0eb6aac94ab2ffad8e1440fca865f4 Mon Sep 17 00:00:00 2001 From: Jack-Khuu Date: Wed, 26 Feb 2025 17:12:01 -0800 Subject: [PATCH 126/584] Updating torchao pin to Feb 26 2025 (#8749) Updating torchao pin to Feb 26 2025 (#8749) Summary: Bump the torchao pin to https://github.com/pytorch/ao/commit/7d8794622f3ac7ffa98761314019a20fba06edef Pull Request resolved: https://github.com/pytorch/executorch/pull/8749 Test Plan: CI Differential Revision: D70280038 Pulled By: Jack-Khuu --- third-party/ao | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third-party/ao b/third-party/ao index 11333ba2cb5..7d8794622f3 160000 --- a/third-party/ao +++ b/third-party/ao @@ -1 +1 @@ -Subproject commit 11333ba2cb5c4e792bc4f5c0d70c12991f972008 +Subproject commit 7d8794622f3ac7ffa98761314019a20fba06edef From 69c76e69e4c3532373da4293e9a99b26fa4810cd Mon Sep 17 00:00:00 2001 From: Digant Desai Date: Wed, 26 Feb 2025 23:28:57 -0600 Subject: [PATCH 127/584] [Setup] Handle env variable DEBUG = "" (#8431) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ad5c561a7b3..d5b9fea8cfc 100644 --- a/setup.py +++ b/setup.py @@ -170,7 +170,7 @@ def write_to_python_file(cls, path: str) -> None: # set to a non-empty value, the build type is Debug. Otherwise, the build type # is Release. def get_build_type(is_debug=None) -> str: - debug = int(os.environ.get("DEBUG", 0)) if is_debug is None else is_debug + debug = int(os.environ.get("DEBUG", 0) or 0) if is_debug is None else is_debug cfg = "Debug" if debug else "Release" return cfg From 30d4cc87f37f43a493f243cb949a65970a8d6819 Mon Sep 17 00:00:00 2001 From: Jack <32371937+jackzhxng@users.noreply.github.com> Date: Wed, 26 Feb 2025 21:39:20 -0800 Subject: [PATCH 128/584] Use to_edge_lower_and_transform for XNNPack (#8624) Differential Revision: D70221944 Pull Request resolved: https://github.com/pytorch/executorch/pull/8717 --- examples/models/llama/export_llama_lib.py | 132 +++++++++++++++------- examples/models/llava/export_llava.py | 1 - extension/llm/export/builder.py | 29 ++++- 3 files changed, 114 insertions(+), 48 deletions(-) diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index 6d9ba750431..3a1f423aa27 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -676,47 +676,62 @@ def _validate_args(args): ) -def _export_llama(args) -> LLMEdgeManager: # noqa: C901 - _validate_args(args) - - pt2e_quant_params, quantizers, quant_dtype = get_quantizer_and_quant_params(args) - - # export_to_edge - builder_exported = _prepare_for_llama_export(args).export() - - builder_exported.run_canonical_optimizations() - - if args.export_only: - exit() - - builder_exported_to_edge = builder_exported.pt2e_quantize( - quantizers - ).export_to_edge() - - modelname = builder_exported_to_edge.modelname - - # to_backend +def _to_edge_and_lower_llama_xnnpack( + builder_exported, + modelname, + additional_passes, + pt2e_quant_params, + quantizers, + quant_dtype, + args, +) -> LLMEdgeManager: # noqa: C901 partitioners = [] # Order matters here, dynamic quantization should be applied first when both xnnpack and xnnpack_extended_ops are enabled - if ( - pt2e_quant_params is not None and pt2e_quant_params.quantize_linear is not None - ) or (args.xnnpack): - partitioners.append( - get_xnnpack_partitioner(dynamic_quant_only_partitioner=True) - ) + partitioners.append(get_xnnpack_partitioner(dynamic_quant_only_partitioner=True)) - # force xnnpack to be true if pt2e_quant_params is not None and args.xnnpack is False - args.xnnpack = True - modelname = f"xnnpack_dq_{modelname}" + modelname = f"xnnpack_dq_{modelname}" if args.xnnpack_extended_ops: - assert args.xnnpack, "xnnpack_extended_ops requires xnnpack to be enabled" partitioners.append( get_xnnpack_partitioner(dynamic_quant_only_partitioner=False) ) modelname = f"xnnpack_{modelname}" + logging.info("Lowering model using following partitioner(s): ") + for partitioner in partitioners: + logging.info(f"--> {partitioner.__class__.__name__}") + + # TODO: Enable generating ETRecord with XNNPack and to_edge_transform_and_lower(). + if args.generate_etrecord: + raise NotImplementedError( + "export_llama does not support XNNPack and generating ETRecord at the moment." + ) + + builder = builder_exported.pt2e_quantize(quantizers).to_edge_transform_and_lower( + partitioners + ) + if args.verbose: + print_delegation_info(builder.edge_manager.exported_program().graph_module) + + return builder.to_executorch(passes=additional_passes) + + +def _to_edge_and_lower_llama( # noqa: C901 + builder_exported, + modelname, + additional_passes, + pt2e_quant_params, + quantizers, + quant_dtype, + args, +): + builder_exported_to_edge = builder_exported.pt2e_quantize( + quantizers + ).export_to_edge() + + # to_backend + partitioners = [] if args.vulkan: partitioners.append( get_vulkan_partitioner( @@ -731,7 +746,6 @@ def _export_llama(args) -> LLMEdgeManager: # noqa: C901 modelname = f"vulkan_{modelname}" # Need to remove asserts from the graph to prevent graph breaks - # pyre-ignore: Undefined attribute [16]: `Optional` has no attribute `exported_program`. remove_asserts(builder_exported_to_edge.edge_manager.exported_program()) if args.mps: @@ -760,13 +774,11 @@ def _export_llama(args) -> LLMEdgeManager: # noqa: C901 # pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.utils.utils` from executorch.backends.qualcomm.utils.utils import _transform, tag_quant_io - # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `qualcomm`, Optional type has no attribute `exported_program` _transform(builder_exported_to_edge.edge_manager.exported_program()) if args.num_sharding > 0: model_sharding.split_graph( builder_exported_to_edge.edge_manager.exported_program(), - # pyre-fixme[16]: `Optional` has no attribute `__getitem__`. builder_exported_to_edge.metadata["get_n_layers"], shares=args.num_sharding, ) @@ -792,19 +804,15 @@ def _export_llama(args) -> LLMEdgeManager: # noqa: C901 atten.head_dim, ) ) - # pyre-ignore tag_quant_io( builder_exported_to_edge.edge_manager.exported_program().graph_module, - partial(get_custom_quant_ios_dtype, cache_shape), # pyre-ignore + partial(get_custom_quant_ios_dtype, cache_shape), ) logging.info("Lowering model using following partitioner(s): ") for partitioner in partitioners: logging.info(f"--> {partitioner.__class__.__name__}") - additional_passes = [] - if args.model in TORCHTUNE_DEFINED_MODELS: - additional_passes = [InitializedMutableBufferPass(["kv_cache_pos"])] if args.generate_etrecord: if not builder_exported_to_edge.edge_manager: raise ValueError("Unable to generate etrecord due to missing edge manager.") @@ -818,7 +826,6 @@ def _export_llama(args) -> LLMEdgeManager: # noqa: C901 if args.num_sharding > 0 and args.qnn: from executorch.backends.qualcomm.utils.utils import canonicalize_program - # pyre-fixme[16]: Module `backends` has no attribute `qualcomm`. canonicalize_program(builder.edge_manager.exported_program()) builder = builder.to_executorch( @@ -840,11 +847,55 @@ def _export_llama(args) -> LLMEdgeManager: # noqa: C901 if args.num_sharding > 0 and args.qnn: from executorch.backends.qualcomm.utils.utils import canonicalize_program - # pyre-fixme[16]: Module `backends` has no attribute `qualcomm`. canonicalize_program(builder.edge_manager.exported_program()) builder = builder.to_executorch(passes=additional_passes) + return builder + + +def _export_llama(args) -> LLMEdgeManager: # noqa: C901 + _validate_args(args) + + pt2e_quant_params, quantizers, quant_dtype = get_quantizer_and_quant_params(args) + + additional_passes = [] + if args.model in TORCHTUNE_DEFINED_MODELS: + additional_passes = [InitializedMutableBufferPass(["kv_cache_pos"])] + + # export_to_edge + builder_exported = _prepare_for_llama_export(args).export() + builder_exported.run_canonical_optimizations() + modelname = builder_exported.modelname + + if args.export_only: + exit() + + if pt2e_quant_params is not None and pt2e_quant_params.quantize_linear is not None: + # Force xnnpack to be true if pt2e_quant_params is not None and args.xnnpack is False + args.xnnpack = True + + if args.xnnpack: + builder = _to_edge_and_lower_llama_xnnpack( + builder_exported, + modelname, + additional_passes, + pt2e_quant_params, + quantizers, + quant_dtype, + args, + ) + else: + builder = _to_edge_and_lower_llama( + builder_exported, + modelname, + additional_passes, + pt2e_quant_params, + quantizers, + quant_dtype, + args, + ) + if args.profile_memory: generate_memory_trace(builder.export_program, "memory_profile.json") @@ -866,7 +917,6 @@ def _export_llama(args) -> LLMEdgeManager: # noqa: C901 output_file = f"{builder.output_dir}/{modelname}.pte" builder.save_to_pte(output_file) - return builder diff --git a/examples/models/llava/export_llava.py b/examples/models/llava/export_llava.py index 82c7aca09e0..a5057e5e850 100644 --- a/examples/models/llava/export_llava.py +++ b/examples/models/llava/export_llava.py @@ -67,7 +67,6 @@ def export(self) -> "LlavaEdgeManager": dynamic_shapes=dynamic_shape, strict=False, ) - # pyre-ignore: Incompatible attribute type [8]: Attribute `pre_autograd_graph_module` declared in class `LLMEdgeManager` has type `Optional[GraphModule]` but is used as type `Module`. self.pre_autograd_graph_module = self.export_program.module() return self diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py index 88d2bc0cab9..3e987489693 100644 --- a/extension/llm/export/builder.py +++ b/extension/llm/export/builder.py @@ -21,7 +21,7 @@ DuplicateDynamicQuantChainPass, ) from executorch.backends.xnnpack._passes.convert_to_linear import ConvertToLinearPass -from executorch.exir import EdgeProgramManager +from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower from executorch.exir.backend.partitioner import Partitioner from executorch.exir.backend.utils import format_delegated_graph @@ -39,7 +39,7 @@ from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e from torch.ao.quantization.quantizer import Quantizer from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer -from torch.export import export_for_training +from torch.export import export_for_training, ExportedProgram from torch.nn.attention import SDPBackend FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s" @@ -89,8 +89,8 @@ def __init__( dynamic_shapes: Optional[Any] = None, ): self.model = model - # graph module returned from export() - self.pre_autograd_graph_module: Optional[torch.fx.GraphModule] = None + self.pre_autograd_exported_program: Optional[ExportedProgram] = None + self.pre_autograd_graph_module: Optional[torch.nn.Module] = None self.modelname = modelname self.max_seq_len = max_seq_len self.dtype = dtype @@ -218,8 +218,8 @@ def export(self) -> "LLMEdgeManager": kwargs=self.example_kwarg_inputs, dynamic_shapes=dynamic_shape, ) - # pyre-fixme[8]: Attribute has type `Optional[GraphModule]`; used as # `Module`. + self.pre_autograd_exported_program = exported_module self.pre_autograd_graph_module = exported_module.module() if hasattr(self.args, "export_only") and self.args.export_only: torch.export.save(exported_module, self.args.output_name) @@ -330,7 +330,10 @@ def pt2e_quantize(self, quantizers: Optional[List[Quantizer]]) -> "LLMEdgeManage assert ( self.pre_autograd_graph_module is not None ), "Please run export() first" - m = prepare_pt2e(self.pre_autograd_graph_module, composed_quantizer) + m = prepare_pt2e( + self.pre_autograd_graph_module, # pyre-ignore[6] + composed_quantizer, + ) logging.info( f"Calibrating with tasks: {self.calibration_tasks}, limit: {self.calibration_limit}, calibration_data: {self.calibration_data}, tokenizer_path: {self.tokenizer_path}, seq_length: {self.calibration_seq_length}" ) @@ -430,6 +433,20 @@ def to_backend(self, partitioners: Optional[List[Partitioner]]) -> "LLMEdgeManag return self + def to_edge_transform_and_lower( + self, partitioners: Optional[List[Partitioner]] + ) -> "LLMEdgeManager": + if partitioners is None: + logging.info("No partitioner provided, skipping backend lowering...") + edge_config = self._get_edge_config() + self.edge_manager = to_edge_transform_and_lower( + self.pre_autograd_exported_program, + partitioner=partitioners, + compile_config=edge_config, + constant_methods=self.metadata, + ) + return self + def to_executorch( self, passes: Optional[List[ExportPass]] = None ) -> "LLMEdgeManager": From 9c12c8f7aa396305331e37c0aa0b0c59269d4f5d Mon Sep 17 00:00:00 2001 From: annakukliansky Date: Thu, 27 Feb 2025 02:14:58 -0800 Subject: [PATCH 129/584] update python_* usage Differential Revision: D70201831 Pull Request resolved: https://github.com/pytorch/executorch/pull/8742 --- backends/xnnpack/test/TARGETS | 2 +- exir/dialects/edge/test/TARGETS | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/xnnpack/test/TARGETS b/backends/xnnpack/test/TARGETS index b3143743b9c..9b2ce0a4e82 100644 --- a/backends/xnnpack/test/TARGETS +++ b/backends/xnnpack/test/TARGETS @@ -53,7 +53,7 @@ runtime.python_test( srcs = glob([ "models/*.py", ]), - tags = ["long_running"], + labels = ["long_running"], deps = [ "fbsource//third-party/pypi/timm:timm", "fbsource//third-party/pypi/torchsr:torchsr", # @manual diff --git a/exir/dialects/edge/test/TARGETS b/exir/dialects/edge/test/TARGETS index 52a0d3ec60e..8a689b0dba6 100644 --- a/exir/dialects/edge/test/TARGETS +++ b/exir/dialects/edge/test/TARGETS @@ -10,7 +10,7 @@ python_unittest( resources = { "//executorch/exir/dialects/edge:edge_yaml": "edge.yaml", }, - tags = ["long_running"], + labels = ["long_running"], deps = [ "fbsource//third-party/pypi/expecttest:expecttest", # @manual "//caffe2:torch", From 8348ec0ba72e5ad93e7eb74d5831200d88bdaa1e Mon Sep 17 00:00:00 2001 From: Erik Lundell Date: Thu, 27 Feb 2025 13:58:02 +0100 Subject: [PATCH 130/584] Arm backend: Remove N=1 constraint for MaxPool2d on U55 (#8765) This is not needed anymore after and earlier Ethos-U compiler (Vela) version bump this is supported/handled by Vela. Signed-off-by: Erik Lundell --- .../arm/operator_support/pool_2d_support.py | 6 +++--- backends/arm/test/ops/test_max_pool.py | 20 +++++++++++++++++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/backends/arm/operator_support/pool_2d_support.py b/backends/arm/operator_support/pool_2d_support.py index 7aa35a721b6..c1dd143a4fc 100644 --- a/backends/arm/operator_support/pool_2d_support.py +++ b/backends/arm/operator_support/pool_2d_support.py @@ -26,8 +26,8 @@ def stride_check(strides: tuple[int, int]) -> bool: def dim_check(shape=torch.Size) -> bool: - check = shape[0] == 1 - for dim in shape: + check = True + for dim in shape[1:]: check &= 1 <= dim <= 65536 return check @@ -59,7 +59,7 @@ def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification): if not kernel_check(kernel): return False - return dim_check(shape) and stride_check(stride) + return dim_check(shape) and shape[0] == 1 and stride_check(stride) @register_tosa_support_check diff --git a/backends/arm/test/ops/test_max_pool.py b/backends/arm/test/ops/test_max_pool.py index 6d6e0b8be5c..a31c12be3a0 100644 --- a/backends/arm/test/ops/test_max_pool.py +++ b/backends/arm/test/ops/test_max_pool.py @@ -1,6 +1,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -232,8 +232,24 @@ def test_maxpool2d_tosa_u85_BI_mult_batches( if conftest.is_option_enabled("corstone_fvp"): tester.run_method_and_compare_outputs(qtol=1, inputs=(test_data,)) + @parameterized.expand(test_data_suite_mult_batches) + @pytest.mark.corstone_fvp + @conftest.expectedFailureOnFVP # TODO: MLETORCH-433 + def test_maxpool2d_tosa_u55_BI_mult_batches( + self, + test_name: str, + test_data: torch.Tensor, + model_params: int | Tuple[int, int], + ): + tester = self._test_maxpool2d_tosa_ethos_BI_pipeline( + self.MaxPool2d(*model_params), + common.get_u55_compile_spec(), + (test_data,), + ) + if conftest.is_option_enabled("corstone_fvp"): + tester.run_method_and_compare_outputs(qtol=1, inputs=(test_data,)) + reject_data_suite = [ - (MaxPool2d(1, 1, 0), torch.rand(2, 5, 5, 5)), (MaxPool2d(1, 4, 0), torch.rand(1, 10, 10, 10)), (MaxPool2d((1, 257), 1, 0), torch.rand(1, 16, 5, 300)), (MaxPool2d((800, 90), 1, 0), torch.rand(1, 16, 850, 100)), From dedfdaff32a5783fbe12910f7ad65b0e00ab53c0 Mon Sep 17 00:00:00 2001 From: Adrian Lundell <36153706+AdrianLundell@users.noreply.github.com> Date: Thu, 27 Feb 2025 14:41:52 +0100 Subject: [PATCH 131/584] Arm backend: Support Short input dtype in EthosUDelegate (#8761) --- backends/arm/runtime/EthosUBackend.cpp | 8 +++++++- backends/arm/test/ops/test_rshift.py | 7 ++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/backends/arm/runtime/EthosUBackend.cpp b/backends/arm/runtime/EthosUBackend.cpp index b0fa5bd9723..2680714bdfa 100644 --- a/backends/arm/runtime/EthosUBackend.cpp +++ b/backends/arm/runtime/EthosUBackend.cpp @@ -193,6 +193,10 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface { supported |= (tensor_in.scalar_type() == ScalarType::Char and handles.inputs->io[i].elem_size == 1); + // 16 bit int (IOQDQ pass prepared networks) + supported |= + (tensor_in.scalar_type() == ScalarType::Short and + handles.inputs->io[i].elem_size == 2); if (!supported) { ET_LOG( Error, @@ -220,6 +224,8 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface { handles.inputs->io[i].elem_size == 1; bool both_int = tensor_in.scalar_type() == ScalarType::Int and handles.inputs->io[i].elem_size == 4; + bool both_short = tensor_in.scalar_type() == ScalarType::Short and + handles.inputs->io[i].elem_size == 2; // Select a compatible copy routine if (both_char and permuted_input_shape) { @@ -233,7 +239,7 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface { tensor_in.size(1), tensor_in.size(2), tensor_in.size(3)); - } else if (both_char or both_int) { + } else if (both_char or both_int or both_short) { EXECUTORCH_PROF_SCOPE( event_tracer, "+EthosUBackend::execute()handles.input.memcpy()"); // Sizes match and elt size matches so memcpy diff --git a/backends/arm/test/ops/test_rshift.py b/backends/arm/test/ops/test_rshift.py index 9637afead1c..d79be67dce6 100644 --- a/backends/arm/test/ops/test_rshift.py +++ b/backends/arm/test/ops/test_rshift.py @@ -1,5 +1,4 @@ # Copyright 2024-2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -75,16 +74,14 @@ def test_rshift_tosa_MI(self, test_data): def test_rshift_tosa_BI(self, test_data): self._test_rshift_tosa_BI(test_data) - # TODO: MLETORCH-644 - Add support for INT16 input/output - @parameterized.expand(Rshift.test_data[:-1]) + @parameterized.expand(Rshift.test_data) def test_rshift_u55_BI(self, test_data): compile_spec = common.get_u55_compile_spec() tester = self._test_rshift_ethosu_BI(test_data, compile_spec) if conftest.is_option_enabled("corstone_fvp"): tester.run_method_and_compare_outputs(atol=1, inputs=test_data) - # TODO: MLETORCH-644 - Add support for INT16 input/output - @parameterized.expand(Rshift.test_data[:-1]) + @parameterized.expand(Rshift.test_data) def test_rshift_u85_BI(self, test_data): compile_spec = common.get_u85_compile_spec() tester = self._test_rshift_ethosu_BI(test_data, compile_spec) From d4fd7be07b1a73ff03c4693a86702917b3b56cbb Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Thu, 27 Feb 2025 09:47:11 -0800 Subject: [PATCH 132/584] Make any Oblective-C(++) files be a part of Apple extension (#8756) --- extension/apple/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extension/apple/CMakeLists.txt b/extension/apple/CMakeLists.txt index ed233da1482..9aca9edede2 100644 --- a/extension/apple/CMakeLists.txt +++ b/extension/apple/CMakeLists.txt @@ -18,7 +18,7 @@ endif() add_library(extension_apple) -set(EXPORTED_SOURCES ExecuTorch/Exported/ExecuTorchLog.mm) +file(GLOB EXPORTED_SOURCES ExecuTorch/Exported/*.m ExecuTorch/Exported/*.mm) target_sources(extension_apple PRIVATE ${EXPORTED_SOURCES}) From 4b85ee2b0497f1e3bf7aa7fd62b1ea7818db8597 Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Thu, 27 Feb 2025 09:59:09 -0800 Subject: [PATCH 133/584] Sort .gitignore and add .venv (#8777) * Sort .gitignore and add .venv * Update .gitignore --- .gitignore | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index 7327aa2d1cb..093bf889b7d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,41 +1,32 @@ +# System files +.DS_Store + +# Python environment and cache .hypothesis -buck-out/ .mypy_cache/ +.venv/ +__pycache__/ + +# Build and tool-generated files +buck-out/ buck2-bin/ -cmake-out* -.DS_Store cmake-android-out/ -cmake-out-android/ cmake-ios-out/ +cmake-out* +cmake-out-android/ ethos-u-scratch/ executorch.egg-info pip-out/ -__pycache__/ # Any exported models and profiling outputs -*.pte -*.model -!test_tiktoken_tokenizer.model *.bin +*.model +*.pte !test_bpe_tokenizer.bin +!test_tiktoken_tokenizer.model # Editor temporaries -*.swa -*.swb -*.swc -*.swd -*.swe -*.swf -*.swg -*.swh -*.swi -*.swj -*.swk -*.swl -*.swm -*.swn -*.swo -*.swp +*.idea +*.sw[a-z] *~ .~lock.* -*.idea From aa3ee7e3a72db9b8bce0d2a83b1a28e4e118e3e8 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Thu, 27 Feb 2025 10:35:19 -0800 Subject: [PATCH 134/584] [ExecuTorch] Arm Ethos: Disable pyre (#8746) Disabling Pyre. Relying on OSS mypy enabled in #7776. Differential Revision: [D70146769](https://our.internmc.facebook.com/intern/diff/D70146769/) ghstack-source-id: 268178737 Pull Request resolved: https://github.com/pytorch/executorch/pull/8664 --------- Co-authored-by: Digant Desai --- backends/arm/TARGETS | 18 ------------------ backends/arm/_passes/TARGETS | 1 - backends/arm/operator_support/TARGETS | 1 - backends/arm/operators/TARGETS | 3 --- backends/arm/test/ops/test_linear.py | 14 ++++++++++---- 5 files changed, 10 insertions(+), 27 deletions(-) diff --git a/backends/arm/TARGETS b/backends/arm/TARGETS index f4ab883362e..63140dc7b9f 100644 --- a/backends/arm/TARGETS +++ b/backends/arm/TARGETS @@ -1,6 +1,5 @@ # @noautodeps load("@fbcode_macros//build_defs:python_library.bzl", "python_library") - python_library( name = "arm_partitioner", srcs = [ @@ -9,7 +8,6 @@ python_library( "tosa_backend.py", "tosa_partitioner.py", ], - typing = True, deps = [ ":arm_backend", "//executorch/backends/arm/operator_support:operator_support", @@ -17,13 +15,11 @@ python_library( "//executorch/exir:lib", ], ) - python_library( name = "arm_backend", srcs = [ "arm_backend.py", ], - typing = True, deps = [ "fbsource//third-party/pypi/flatbuffers:flatbuffers", "fbsource//third-party/pypi/ml-dtypes:ml-dtypes", @@ -36,11 +32,9 @@ python_library( "//executorch/backends/arm/_passes:passes", ], ) - python_library( name = "process_node", srcs = ["process_node.py"], - typing = True, deps = [ "fbsource//third-party/serialization_lib/python/tosa:tosa", "//executorch/backends/arm/operators:node_visitor", @@ -50,36 +44,30 @@ python_library( "//executorch/exir:lib", ], ) - python_library( name = "arm_vela", srcs = [ "arm_vela.py", ], - typing = True, deps = [ "fbsource//third-party/pypi/ethos-u-vela:ethos-u-vela", ], ) - python_library( name = "tosa_mapping", srcs = [ "tosa_mapping.py", ], - typing = True, deps = [ "fbsource//third-party/serialization_lib/python/serializer:serializer", "//caffe2:torch", ], ) - python_library( name = "tosa_quant_utils", srcs = [ "tosa_quant_utils.py", ], - typing = True, deps = [ "fbsource//third-party/pypi/numpy:numpy", "fbsource//third-party/serialization_lib/python/serializer:serializer", @@ -88,38 +76,32 @@ python_library( "//executorch/exir/dialects:lib", ], ) - python_library( name = "tosa_specification", srcs = [ "tosa_specification.py", ], - typing = True, deps = [ "fbsource//third-party/pypi/packaging:packaging", "//executorch/exir/backend:compile_spec_schema", ], ) - python_library( name = "tosa_utils", srcs = [ "tosa_utils.py", ], - typing = True, deps = [ "fbsource//third-party/serialization_lib/python/serializer:serializer", ":tosa_quant_utils", "//executorch/backends/arm/operators:node_visitor", ], ) - python_library( name = "arm_model_evaluator", srcs = [ "util/arm_model_evaluator.py", ], - typing = True, deps = [ "//caffe2:torch", ] diff --git a/backends/arm/_passes/TARGETS b/backends/arm/_passes/TARGETS index 843d6b159dc..f8bf9c0d208 100644 --- a/backends/arm/_passes/TARGETS +++ b/backends/arm/_passes/TARGETS @@ -3,7 +3,6 @@ load("@fbcode_macros//build_defs:python_library.bzl", "python_library") python_library( name = "passes", srcs = glob(["*.py"]), - typing = True, deps = [ "//executorch/backends/arm:tosa_quant_utils", "//executorch/backends/arm:tosa_utils", diff --git a/backends/arm/operator_support/TARGETS b/backends/arm/operator_support/TARGETS index 0de9f060bf5..c0c5af7487b 100644 --- a/backends/arm/operator_support/TARGETS +++ b/backends/arm/operator_support/TARGETS @@ -3,7 +3,6 @@ load("@fbcode_macros//build_defs:python_library.bzl", "python_library") python_library( name = "operator_support", srcs = glob(["*.py"]), - typing = True, deps = [ "//executorch/backends/arm/_passes:passes", "//executorch/backends/arm:tosa_specification", diff --git a/backends/arm/operators/TARGETS b/backends/arm/operators/TARGETS index cb08adb0354..b37823b60c2 100644 --- a/backends/arm/operators/TARGETS +++ b/backends/arm/operators/TARGETS @@ -4,7 +4,6 @@ load("@fbcode_macros//build_defs:python_library.bzl", "python_library") python_library( name = "node_visitor", srcs = ["node_visitor.py"], - typing = True, deps = [ "//executorch/backends/arm:tosa_mapping", "//executorch/backends/arm:tosa_specification", @@ -14,7 +13,6 @@ python_library( python_library( name = "ops", srcs = glob(["op_*.py", "ops_*.py"]), - typing = True, deps = [ "fbsource//third-party/serialization_lib/python/tosa:tosa", ":node_visitor", @@ -29,7 +27,6 @@ python_library( python_library( name = "lib", srcs = ["__init__.py"], - typing = True, deps = [ ":node_visitor", ":ops", diff --git a/backends/arm/test/ops/test_linear.py b/backends/arm/test/ops/test_linear.py index 3bdec0c694a..33bf9932b5a 100644 --- a/backends/arm/test/ops/test_linear.py +++ b/backends/arm/test/ops/test_linear.py @@ -127,7 +127,7 @@ def forward(self, x): def _test_linear_tosa_MI_pipeline( self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] ): - ( + tester = ( ArmTester( module, example_inputs=test_data, @@ -141,13 +141,14 @@ def _test_linear_tosa_MI_pipeline( .to_edge_transform_and_lower() .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) ) + if conftest.is_option_enabled("tosa_ref_model"): + tester.run_method_and_compare_outputs(inputs=test_data) def _test_linear_tosa_BI_pipeline( self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] ): - ( + tester = ( ArmTester( module, example_inputs=test_data, @@ -162,8 +163,9 @@ def _test_linear_tosa_BI_pipeline( .to_edge_transform_and_lower() .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, qtol=1) ) + if conftest.is_option_enabled("tosa_ref_model"): + tester.run_method_and_compare_outputs(inputs=test_data, qtol=1) def _test_linear_tosa_ethosu_BI_pipeline( self, @@ -186,9 +188,11 @@ def _test_linear_tosa_ethosu_BI_pipeline( .to_executorch() .serialize() ) + # TODO: Add FVP testing support. return tester @parameterized.expand(test_data_suite_rank1 + test_data_suite_rank4) + @pytest.mark.tosa_ref_model def test_linear_tosa_MI( self, test_name: str, @@ -208,6 +212,7 @@ def test_linear_tosa_MI( ) @parameterized.expand(test_data_suite_rank1 + test_data_suite_rank4) + @pytest.mark.tosa_ref_model def test_linear_tosa_BI( self, test_name: str, @@ -249,6 +254,7 @@ def test_linear_tosa_u55_BI( tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) @parameterized.expand(test_data_suite_rank1 + test_data_suite_rank4) + @pytest.mark.corstone_fvp def test_linear_tosa_u85_BI( self, test_name: str, From 0ab3499a9e7fd19b77a119084fe888430c20c36a Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Thu, 27 Feb 2025 10:58:09 -0800 Subject: [PATCH 135/584] Add Xcode related artifacts to .gitignore (#8779) --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 093bf889b7d..849a6c59fb0 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,8 @@ pip-out/ *.sw[a-z] *~ .~lock.* + +# Xcode +xcuserdata/ +*.xcworkspace/ +*.xcframework/ From 1d0e0ee0c2ab138f23cb9e087c17363045d0e0ee Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Thu, 27 Feb 2025 11:38:54 -0800 Subject: [PATCH 136/584] training module takes .ptd Differential Revision: D69547105 Pull Request resolved: https://github.com/pytorch/executorch/pull/8739 --- extension/flat_tensor/test/CMakeLists.txt | 6 +- extension/training/module/test/targets.bzl | 7 +- .../module/test/training_module_test.cpp | 48 +++++++++++ extension/training/module/training_module.cpp | 81 ++++++++++--------- extension/training/module/training_module.h | 14 +++- runtime/executor/tensor_parser_exec_aten.cpp | 37 ++++----- runtime/executor/test/CMakeLists.txt | 6 +- test/models/export_program.py | 5 ++ test/models/targets.bzl | 12 ++- 9 files changed, 147 insertions(+), 69 deletions(-) diff --git a/extension/flat_tensor/test/CMakeLists.txt b/extension/flat_tensor/test/CMakeLists.txt index 81fc7d63c5b..57c62d75e81 100644 --- a/extension/flat_tensor/test/CMakeLists.txt +++ b/extension/flat_tensor/test/CMakeLists.txt @@ -20,7 +20,7 @@ include(${EXECUTORCH_ROOT}/build/Test.cmake) add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/ModuleLinearProgram.pte" - "${CMAKE_CURRENT_BINARY_DIR}/_default_external_constant.ptd" + "${CMAKE_CURRENT_BINARY_DIR}/ModuleLinearProgram.ptd" COMMAND python -m test.models.export_program --modules "ModuleLinear" --external-constants --outdir "${CMAKE_CURRENT_BINARY_DIR}" 2> /dev/null @@ -30,12 +30,12 @@ add_custom_command( add_custom_target( extension_flat_tensor_test_resources DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/ModuleLinearProgram.pte" - "${CMAKE_CURRENT_BINARY_DIR}/_default_external_constant.ptd" + "${CMAKE_CURRENT_BINARY_DIR}/ModuleLinearProgram.ptd" ) set(test_env "ET_MODULE_LINEAR_PROGRAM_PATH=${CMAKE_CURRENT_BINARY_DIR}/ModuleLinearProgram.pte" - "ET_MODULE_LINEAR_DATA_PATH=${CMAKE_CURRENT_BINARY_DIR}/_default_external_constant.ptd" + "ET_MODULE_LINEAR_DATA_PATH=${CMAKE_CURRENT_BINARY_DIR}/ModuleLinearProgram.ptd" ) set(_test_srcs flat_tensor_data_map_test.cpp flat_tensor_header_test.cpp) diff --git a/extension/training/module/test/targets.bzl b/extension/training/module/test/targets.bzl index 3b0ea9e31ef..17e8d1fe6ef 100644 --- a/extension/training/module/test/targets.bzl +++ b/extension/training/module/test/targets.bzl @@ -16,9 +16,11 @@ def define_common_targets(is_fbcode = False): # an fbcode target path because the authoring/export tools # intentionally don't work in xplat (since they're host-only tools). "ET_MODULE_ADD_PATH": "$(location fbcode//executorch/test/models:exported_programs[ModuleAdd.pte])", - "ET_MODULE_SIMPLE_TRAIN_PATH": "$(location fbcode//executorch/test/models:exported_programs[ModuleSimpleTrain.pte])", - "ET_MODULE_LINEAR_PROGRAM_PATH": "$(location fbcode//executorch/test/models:exported_program_and_data[ModuleLinear.pte])", "ET_MODULE_LINEAR_DATA_PATH": "$(location fbcode//executorch/test/models:exported_program_and_data[ModuleLinear.ptd])", + "ET_MODULE_LINEAR_PROGRAM_PATH": "$(location fbcode//executorch/test/models:exported_program_and_data[ModuleLinear.pte])", + "ET_MODULE_TRAIN_DATA_PATH": "$(location fbcode//executorch/test/models:exported_program_and_data[ModuleSimpleTrain.ptd])", + "ET_MODULE_TRAIN_PROGRAM_PATH": "$(location fbcode//executorch/test/models:exported_program_and_data[ModuleSimpleTrainProgram.pte])", + "ET_MODULE_SIMPLE_TRAIN_PATH": "$(location fbcode//executorch/test/models:exported_programs[ModuleSimpleTrain.pte])", } runtime.cxx_test( @@ -29,6 +31,7 @@ def define_common_targets(is_fbcode = False): deps = [ "//executorch/extension/training/module:training_module", "//executorch/extension/data_loader:file_data_loader", + "//executorch/extension/flat_tensor:flat_tensor_data_map", "//executorch/runtime/core/exec_aten/testing_util:tensor_util", "//executorch/kernels/portable:generated_lib", ], diff --git a/extension/training/module/test/training_module_test.cpp b/extension/training/module/test/training_module_test.cpp index ccd1c995554..3ba46c6f653 100644 --- a/extension/training/module/test/training_module_test.cpp +++ b/extension/training/module/test/training_module_test.cpp @@ -7,6 +7,7 @@ */ #include +#include #include #include @@ -18,9 +19,17 @@ using namespace ::testing; using executorch::aten::ScalarType; using executorch::aten::Tensor; +using executorch::extension::FlatTensorDataMap; +using executorch::extension::FlatTensorHeader; +using executorch::runtime::DataLoader; +using executorch::runtime::Error; +using executorch::runtime::FreeableBuffer; +using executorch::runtime::Result; +using executorch::runtime::TensorLayout; using torch::executor::Error; using torch::executor::Span; using torch::executor::testing::TensorFactory; +using torch::executor::util::FileDataLoader; class TrainingModuleTest : public ::testing::Test { protected: @@ -105,3 +114,42 @@ TEST_F(TrainingModuleTest, NonTrainingModuleTest) { auto res = mod.execute_forward_backward("forward", inputs); ASSERT_EQ(res.error(), Error::InvalidArgument); } + +TEST_F(TrainingModuleTest, SeperateDataTest) { + // Load data map. + // The eager linear model is defined at: + // //executorch/test/models/linear_model.py + const char* ptd_path = std::getenv("ET_MODULE_TRAIN_DATA_PATH"); + Result data_map_loader_res = FileDataLoader::from(ptd_path); + ASSERT_EQ(data_map_loader_res.error(), Error::Ok); + + auto data_map_loader = + std::make_unique( + std::move(data_map_loader_res.get())); + + const char* pte_path = std::getenv("ET_MODULE_TRAIN_PROGRAM_PATH"); + Result pte_loader_res = FileDataLoader::from(pte_path); + ASSERT_EQ(pte_loader_res.error(), Error::Ok); + + auto pte_loader = std::make_unique( + std::move(pte_loader_res.get())); + + auto mod = executorch::extension::training::TrainingModule( + std::move(pte_loader), + nullptr, + nullptr, + nullptr, + std::move(data_map_loader)); + + TensorFactory tf; + Tensor input = tf.make({3}, {1.0, 1.0, 1.0}); + Tensor label = tf.make({3}, {1.0, 0.0, 0.0}); + + std::vector inputs; + inputs.push_back(input); + inputs.push_back(label); + + auto res = mod.execute_forward_backward("forward", inputs); + ASSERT_EQ(res.error(), Error::Ok); + ASSERT_EQ(res.get().size(), 1); +} diff --git a/extension/training/module/training_module.cpp b/extension/training/module/training_module.cpp index 52d293c69ef..d119738715e 100644 --- a/extension/training/module/training_module.cpp +++ b/extension/training/module/training_module.cpp @@ -43,7 +43,6 @@ TrainingModule::execute_forward_backward( uint64_t param_start = param_res.get()[0].toInt(); // Execute the forward and backward pass. - auto outputs = torch::executor::Module::execute(method_name, input); if (!outputs.ok()) { return outputs.error(); @@ -56,19 +55,23 @@ TrainingModule::execute_forward_backward( user_outputs.push_back(outputs.get().at(i)); } - // Extract and store the gradients. + // Extract and store the gradients and params if this is the first time seeing + // this method. if (method_named_gradients_.find(method_name) == method_named_gradients_.end()) { + // Fully qualified names + std::vector fqn_list; method_named_gradients_.insert({method_name, {}}); auto& gradients_map = method_named_gradients_.at(method_name); - // Get names. + + // Get names if we havent seen this method before. const std::string fqn_method_name = fqn_method_prefix + method_name; auto fqn_res = executorch::extension::Module::execute(fqn_method_name); if (!fqn_res.ok()) { return fqn_res.error(); } - const auto& fqn_list = fqn_res.get(); + fqn_list = fqn_res.get(); // Only have to initialize the dict once because the tensors in the dict and // the tensors in the method alias the same TensorImpl, so updating one will @@ -87,43 +90,49 @@ TrainingModule::execute_forward_backward( runtime::Result< const std::map> TrainingModule::named_parameters(const std::string& method_name) { - std::map - named_parameters; - const std::string fqn_method_name = fqn_method_prefix + method_name; - const std::string parameters_method_name = - parameters_method_prefix + method_name; + // If we haven't seen this method before, populate the dict. + if (method_named_parameters_.find(method_name) == + method_named_parameters_.end()) { + const std::string fqn_method_name = fqn_method_prefix + method_name; + const std::string parameters_method_name = + parameters_method_prefix + method_name; - // get names. - auto fqn_res = executorch::extension::Module::execute(fqn_method_name); - if (!fqn_res.ok()) { - return fqn_res.error(); - } - const auto& fqn_list = fqn_res.get(); + method_named_parameters_.insert({method_name, {}}); - // get params start. - auto param_res = - executorch::extension::Module::execute(parameters_method_name); - if (!param_res.ok()) { - return param_res.error(); - } + // get names. + auto fqn_res = executorch::extension::Module::execute(fqn_method_name); + if (!fqn_res.ok()) { + return fqn_res.error(); + } + const auto& fqn_list = fqn_res.get(); - uint64_t param_start = param_res.get()[0].toInt(); + // get params start. + auto param_res = + executorch::extension::Module::execute(parameters_method_name); + if (!param_res.ok()) { + return param_res.error(); + } - auto e = executorch::extension::Module::load_method(method_name); - if (e != runtime::Error::Ok) { - return e; - } - auto& method = methods_.at(method_name).method; - - // create dict - size_t name_index = 0; - for (size_t param_index = param_start; param_index < method->outputs_size(); - ++param_index, ++name_index) { - executorch::aten::string_view fqn = fqn_list.at(name_index).toString(); - executorch::aten::Tensor param = method->get_output(param_index).toTensor(); - named_parameters.insert({fqn, param}); + uint64_t param_start = param_res.get()[0].toInt(); + + // Load the method if it is not already loaded. + auto e = executorch::extension::Module::load_method(method_name); + if (e != runtime::Error::Ok) { + return e; + } + auto& method = methods_.at(method_name).method; + + // populate dict + size_t name_index = 0; + for (size_t param_index = param_start; param_index < method->outputs_size(); + ++param_index, ++name_index) { + executorch::aten::string_view fqn = fqn_list.at(name_index).toString(); + executorch::aten::Tensor param = + method->get_output(param_index).toTensor(); + method_named_parameters_.at(method_name).insert({fqn, param}); + } } - return named_parameters; + return method_named_parameters_.at(method_name); } runtime::Result< diff --git a/extension/training/module/training_module.h b/extension/training/module/training_module.h index 9e7aa49cacf..7bf81623c04 100644 --- a/extension/training/module/training_module.h +++ b/extension/training/module/training_module.h @@ -33,13 +33,16 @@ class ET_EXPERIMENTAL TrainingModule final std::unique_ptr data_loader, std::unique_ptr memory_allocator = nullptr, std::unique_ptr temp_allocator = nullptr, - std::unique_ptr event_tracer = nullptr) + std::unique_ptr event_tracer = nullptr, + std::unique_ptr data_map_data_loader = nullptr) : executorch::extension::Module( std::move(data_loader), std::move(memory_allocator), std::move(temp_allocator), - std::move(event_tracer)), - method_named_gradients_({}) {} + std::move(event_tracer), + std::move(data_map_data_loader)), + method_named_gradients_({}), + method_named_parameters_({}) {} explicit TrainingModule(const Module&) = delete; TrainingModule& operator=(const Module&) = delete; @@ -97,6 +100,11 @@ class ET_EXPERIMENTAL TrainingModule final std::string, std::map> method_named_gradients_; + + std::unordered_map< + std::string, + std::map> + method_named_parameters_; }; } // namespace training diff --git a/runtime/executor/tensor_parser_exec_aten.cpp b/runtime/executor/tensor_parser_exec_aten.cpp index 66202acabc3..de809ee09cc 100644 --- a/runtime/executor/tensor_parser_exec_aten.cpp +++ b/runtime/executor/tensor_parser_exec_aten.cpp @@ -169,24 +169,8 @@ ET_NODISCARD Result getTensorDataPtr( const executorch_flatbuffer::AllocationDetails* allocation_info = s_tensor->allocation_info(); - // Memory Planned, with initial state - if (data_buffer_idx > 0 && allocation_info != nullptr) { - auto planned_ptr = getMemPlannedPtr(allocation_info, nbytes, allocator); - if (!planned_ptr.ok()) { - return planned_ptr.error(); - } - auto err = TensorParser::load_mutable_subsegment_into( - program, 0, s_tensor->data_buffer_idx(), nbytes, planned_ptr.get()); - - if (err != Error::Ok) { - return err; - } - return planned_ptr; - } - // External tensors. - else if ( - s_tensor->extra_tensor_info() != nullptr && + if (s_tensor->extra_tensor_info() != nullptr && s_tensor->extra_tensor_info()->location() == executorch_flatbuffer::TensorDataLocation::EXTERNAL) { // Check that fqn is not null. @@ -232,10 +216,9 @@ ET_NODISCARD Result getTensorDataPtr( return planned_ptr; } - } - // Constant, stored in PTE file. - else if (data_buffer_idx > 0 && allocation_info == nullptr) { + // Constant, stored in PTE file. + } else if (data_buffer_idx > 0 && allocation_info == nullptr) { auto const_data = program->get_constant_buffer_data(data_buffer_idx, nbytes); if (!const_data.ok()) { @@ -246,6 +229,20 @@ ET_NODISCARD Result getTensorDataPtr( // guarantee that this data is never modified. return const_cast(const_data.get()); + // Memory Planned, with initial state + } else if (data_buffer_idx > 0 && allocation_info != nullptr) { + auto planned_ptr = getMemPlannedPtr(allocation_info, nbytes, allocator); + if (!planned_ptr.ok()) { + return planned_ptr.error(); + } + auto err = TensorParser::load_mutable_subsegment_into( + program, 0, s_tensor->data_buffer_idx(), nbytes, planned_ptr.get()); + + if (err != Error::Ok) { + return err; + } + return planned_ptr; + // Memory planned, no initial state } else if (data_buffer_idx == 0 && allocation_info != nullptr) { return getMemPlannedPtr(allocation_info, nbytes, allocator); diff --git a/runtime/executor/test/CMakeLists.txt b/runtime/executor/test/CMakeLists.txt index c9ac26ba673..49e5af76620 100644 --- a/runtime/executor/test/CMakeLists.txt +++ b/runtime/executor/test/CMakeLists.txt @@ -24,7 +24,7 @@ add_custom_command( "${CMAKE_CURRENT_BINARY_DIR}/ModuleIndex.pte" "${CMAKE_CURRENT_BINARY_DIR}/ModuleLinear.pte" "${CMAKE_CURRENT_BINARY_DIR}/ModuleLinearProgram.pte" - "${CMAKE_CURRENT_BINARY_DIR}/_default_external_constant.ptd" + "${CMAKE_CURRENT_BINARY_DIR}/ModuleLinearProgram.ptd" "${CMAKE_CURRENT_BINARY_DIR}/ModuleMultipleEntry.pte" "${CMAKE_CURRENT_BINARY_DIR}/ModuleSimpleTrain.pte" COMMAND @@ -48,7 +48,7 @@ add_custom_target( "${CMAKE_CURRENT_BINARY_DIR}/ModuleIndex.pte" "${CMAKE_CURRENT_BINARY_DIR}/ModuleLinear.pte" "${CMAKE_CURRENT_BINARY_DIR}/ModuleLinearProgram.pte" - "${CMAKE_CURRENT_BINARY_DIR}/_default_external_constant.ptd" + "${CMAKE_CURRENT_BINARY_DIR}/ModuleLinearProgram.ptd" "${CMAKE_CURRENT_BINARY_DIR}/ModuleMultipleEntry.pte" "${CMAKE_CURRENT_BINARY_DIR}/ModuleSimpleTrain.pte" ) @@ -61,7 +61,7 @@ set(test_env "ET_MODULE_INDEX_PATH=${CMAKE_CURRENT_BINARY_DIR}/ModuleIndex.pte" "ET_MODULE_LINEAR_PATH=${CMAKE_CURRENT_BINARY_DIR}/ModuleLinear.pte" "ET_MODULE_LINEAR_PROGRAM_PATH=${CMAKE_CURRENT_BINARY_DIR}/ModuleLinearProgram.pte" - "ET_MODULE_LINEAR_DATA_PATH=${CMAKE_CURRENT_BINARY_DIR}/_default_external_constant.ptd" + "ET_MODULE_LINEAR_DATA_PATH=${CMAKE_CURRENT_BINARY_DIR}/ModuleLinearProgram.ptd" "ET_MODULE_MULTI_ENTRY_PATH=${CMAKE_CURRENT_BINARY_DIR}/ModuleMultipleEntry.pte" "ET_MODULE_SIMPLE_TRAIN_PATH=${CMAKE_CURRENT_BINARY_DIR}/ModuleSimpleTrain.pte" ) diff --git a/test/models/export_program.py b/test/models/export_program.py index ada80ff342f..ccf8a965eb2 100644 --- a/test/models/export_program.py +++ b/test/models/export_program.py @@ -276,6 +276,11 @@ def main() -> None: prog.write_to_file(fp) print(f"Exported {module_name} and wrote program data to {outfile}") + if args.external_constants: + # current infra doesnt easily allow renaming this file, so just hackily do it here. + prog._tensor_data[f"{module_name}"] = prog._tensor_data.pop( + "_default_external_constant" + ) prog.write_tensor_data_to_file(args.outdir) diff --git a/test/models/targets.bzl b/test/models/targets.bzl index bb04c6bc5fa..6d5b6753f3f 100644 --- a/test/models/targets.bzl +++ b/test/models/targets.bzl @@ -90,13 +90,21 @@ def define_common_targets(): # case, and typically shouldn't be done. _is_external_target = True, ) + + # Class names of nn.Modules for :exported_programs to export. + MODULES_AND_DATA_TO_EXPORT = [ + "ModuleLinear", + "ModuleSimpleTrain", + ] runtime.genrule( name = "exported_program_and_data", - cmd = "$(exe :export_program) --modules ModuleLinear --external-constants --outdir $OUT", + cmd = "$(exe :export_program) --modules " + ",".join(MODULES_AND_DATA_TO_EXPORT) + " --external-constants --outdir $OUT", outs = { "ModuleLinear.pte": ["ModuleLinearProgram.pte"], - "ModuleLinear.ptd": ["_default_external_constant.ptd"], + "ModuleLinear.ptd": ["ModuleLinearProgram.ptd"], + "ModuleSimpleTrainProgram.pte": ["ModuleSimpleTrainProgram.pte"], + "ModuleSimpleTrain.ptd": ["ModuleSimpleTrainProgram.ptd"], }, default_outs = ["."], visibility = [ From 88a06ef5a863dfe76a466545962ffc24e828da74 Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Thu, 27 Feb 2025 11:52:14 -0800 Subject: [PATCH 137/584] Fix error in export example script (#8783) --- docs/source/using-executorch-export.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/using-executorch-export.md b/docs/source/using-executorch-export.md index 62a52edf839..9bcfedc256a 100644 --- a/docs/source/using-executorch-export.md +++ b/docs/source/using-executorch-export.md @@ -58,7 +58,7 @@ class Model(torch.nn.Module): torch.nn.ReLU(), torch.nn.Conv2d(8, 16, 3), torch.nn.ReLU(), - torch.nn.AdaptiveAvgPool2d([[1,1]]) + torch.nn.AdaptiveAvgPool2d((1,1)) ) self.linear = torch.nn.Linear(16, 10) @@ -175,4 +175,4 @@ For advanced use cases, see the following: - [Quantization Overview](quantization-overview.md) for information on quantizing models to reduce inference time and memory footprint. - [Memory Planning](compiler-memory-planning.md) for information on controlling memory placement and planning. - [Custom Compiler Passes](compiler-custom-compiler-passes.md) for information on writing custom compiler passes. -- [Export IR Specification](ir-exir.md) for information on the intermediate representation generated by export. \ No newline at end of file +- [Export IR Specification](ir-exir.md) for information on the intermediate representation generated by export. From 316099fb26949732a97be0c5a91da81a2d90b8be Mon Sep 17 00:00:00 2001 From: Jack <32371937+jackzhxng@users.noreply.github.com> Date: Thu, 27 Feb 2025 12:16:13 -0800 Subject: [PATCH 138/584] Enable kwarg inputs for pt2e quantize Differential Revision: D70206003 Pull Request resolved: https://github.com/pytorch/executorch/pull/7436 --- extension/llm/export/builder.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py index 3e987489693..47ad30e9390 100644 --- a/extension/llm/export/builder.py +++ b/extension/llm/export/builder.py @@ -360,7 +360,10 @@ def pt2e_quantize(self, quantizers: Optional[List[Quantizer]]) -> "LLMEdgeManage logging.info( "No calibration provided, using dummy input to calibrate..." ) - m(*self.example_inputs) + if self.example_kwarg_inputs: + m(*self.example_inputs, **self.example_kwarg_inputs) + else: + m(*self.example_inputs) m = convert_pt2e(m) DuplicateDynamicQuantChainPass()(m) self.pre_autograd_graph_module = m From 954246c47158d054d10a209e9d60e6881c27fa7d Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Thu, 27 Feb 2025 14:32:10 -0800 Subject: [PATCH 139/584] Update create_frameworks.sh (#8793) --- build/create_frameworks.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build/create_frameworks.sh b/build/create_frameworks.sh index a55c4aed1e7..3ed2c46face 100755 --- a/build/create_frameworks.sh +++ b/build/create_frameworks.sh @@ -76,8 +76,8 @@ create_xcframework() { fi local dir_suffix - dir_suffix=$(echo "$dir" | tr '[:upper:]' '[:lower:]' | sed 's/\//-/g') - local merged_lib="${output}/lib${target_library_name}-${dir_suffix}.a" + dir_suffix=$(echo "$dir" | tr '[:upper:]' '[:lower:]' | sed 's/\//_/g') + local merged_lib="${output}/lib${target_library_name}_${dir_suffix}.a" # Remove the existing .a file if it exists. if [ -f "${merged_lib}" ]; then From 3261d05bf8d2ab92bbfe6dc5c2012871fd2f1685 Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Thu, 27 Feb 2025 15:04:05 -0800 Subject: [PATCH 140/584] =?UTF-8?q?Allow=20building=20Apple=20frameworks?= =?UTF-8?q?=20for=20multiple=20modes=20with=20one=20script=20in=E2=80=A6?= =?UTF-8?q?=20(#8791)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Allow building Apple frameworks for multiple modes with one script invocation * Update build_apple_frameworks.sh --- build/build_apple_frameworks.sh | 104 ++++++++++++++++++++------------ 1 file changed, 65 insertions(+), 39 deletions(-) diff --git a/build/build_apple_frameworks.sh b/build/build_apple_frameworks.sh index ab2f45e41e2..e6a62f8515a 100755 --- a/build/build_apple_frameworks.sh +++ b/build/build_apple_frameworks.sh @@ -9,7 +9,7 @@ set -euo pipefail SOURCE_ROOT_DIR="" OUTPUT="cmake-out" -MODE="Release" +MODES=() TOOLCHAIN="" PYTHON=$(which python3) FLATC=$(which flatc) @@ -77,20 +77,21 @@ usage() { echo echo "Options:" echo " --output=DIR Output directory. Default: 'cmake-out'" - echo " --Debug Use Debug build mode. Default: Uses Release build mode." - echo " --toolchain=FILE Cmake toolchain file. Default: '\$SOURCE_ROOT_DIR/third-party/ios-cmake/ios.toolchain.cmake'" - echo " --python=FILE Python executable path. Default: Path of python3 found in the current \$PATH" - echo " --flatc=FILE FlatBuffers Compiler executable path. Default: Path of flatc found in the current \$PATH" - echo " --coreml Include this flag to build the Core ML backend." - echo " --custom Include this flag to build the Custom kernels." - echo " --mps Include this flag to build the Metal Performance Shaders backend." - echo " --optimized Include this flag to build the Optimized kernels." - echo " --portable Include this flag to build the Portable kernels." - echo " --quantized Include this flag to build the Quantized kernels." - echo " --xnnpack Include this flag to build the XNNPACK backend." + echo " --Debug Build Debug version." + echo " --Release Build Release version." + echo " --toolchain=FILE CMake toolchain file. Default: '\$SOURCE_ROOT_DIR/third-party/ios-cmake/ios.toolchain.cmake'" + echo " --python=FILE Python executable path. Default: Path of python3 in \$PATH" + echo " --flatc=FILE FlatBuffers Compiler executable path. Default: Path of flatc in \$PATH" + echo " --coreml Build the Core ML backend." + echo " --custom Build the Custom kernels." + echo " --mps Build the Metal Performance Shaders backend." + echo " --optimized Build the Optimized kernels." + echo " --portable Build the Portable kernels." + echo " --quantized Build the Quantized kernels." + echo " --xnnpack Build the XNNPACK backend." echo echo "Example:" - echo " $0 /path/to/source/root --output=cmake-out --toolchain=/path/to/cmake/toolchain --python=/path/to/python3 --coreml --mps --xnnpack" + echo " $0 /path/to/source/root --output=cmake-out --toolchain=/path/to/toolchain --python=/path/to/python3 --coreml --mps --xnnpack" exit 0 } @@ -98,7 +99,16 @@ for arg in "$@"; do case $arg in -h|--help) usage ;; --output=*) OUTPUT="${arg#*=}" ;; - --Debug) MODE="Debug" ;; + --Release) + if [[ ! " ${MODES[*]:-} " =~ \bRelease\b ]]; then + MODES+=("Release") + fi + ;; + --Debug) + if [[ ! " ${MODES[*]:-} " =~ \bDebug\b ]]; then + MODES+=("Debug") + fi + ;; --toolchain=*) TOOLCHAIN="${arg#*=}" ;; --python=*) PYTHON="${arg#*=}" ;; --flatc=*) FLATC="${arg#*=}" ;; @@ -120,6 +130,10 @@ for arg in "$@"; do esac done +if [ ${#MODES[@]} -eq 0 ]; then + MODES=("Release") +fi + if [[ -z "$SOURCE_ROOT_DIR" ]]; then SOURCE_ROOT_DIR=$(pwd) fi @@ -146,10 +160,11 @@ cmake_build() { local platform=$1 local platform_flag=$2 local platform_target=$3 - echo "Building for $platform with flag $platform_flag" - mkdir "$platform" && cd "$platform" || exit 1 + local mode=$4 + echo "Building for $platform ($mode) with flag $platform_flag" + mkdir -p "$platform" && cd "$platform" || exit 1 cmake "$SOURCE_ROOT_DIR" -G Xcode \ - -DCMAKE_BUILD_TYPE="$MODE" \ + -DCMAKE_BUILD_TYPE="$mode" \ -DCMAKE_TOOLCHAIN_FILE="$TOOLCHAIN" \ -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD="c++17" \ -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY="libc++" \ @@ -173,13 +188,15 @@ cmake_build() { ${platform_target:+-DDEPLOYMENT_TARGET=$platform_target} \ --log-level=VERBOSE cmake --build . \ - --config $MODE \ + --config "$mode" \ --verbose cd .. } for index in ${!PLATFORMS[*]}; do - cmake_build "${PLATFORMS[$index]}" "${PLATFORM_FLAGS[$index]}" "${PLATFORM_TARGET[$index]}" + for mode in "${MODES[@]}"; do + cmake_build "${PLATFORMS[$index]}" "${PLATFORM_FLAGS[$index]}" "${PLATFORM_TARGET[$index]}" "$mode" + done done echo "Exporting headers" @@ -206,42 +223,51 @@ check_command "$BUCK2" # So, just patch our generated framework to do that. sed -i '' '1i\ #define C10_USING_CUSTOM_GENERATED_MACROS -' $HEADERS_PATH/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h -sed -i '' '1i\ -#define C10_USING_CUSTOM_GENERATED_MACROS -' $HEADERS_PATH/executorch/runtime/core/portable_type/c10/c10/macros/Export.h -cp -r $HEADERS_PATH/executorch/runtime/core/portable_type/c10/c10 "$HEADERS_PATH/" +' \ +"$HEADERS_PATH/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h" \ +"$HEADERS_PATH/executorch/runtime/core/portable_type/c10/c10/macros/Export.h" +cp -r $HEADERS_PATH/executorch/runtime/core/portable_type/c10/c10 "$HEADERS_PATH/" cp "$SOURCE_ROOT_DIR/extension/apple/ExecuTorch/Exported/"*.h "$HEADERS_PATH/executorch" cp "$SOURCE_ROOT_DIR/extension/apple/ExecuTorch/Exported/"*.modulemap "$HEADERS_PATH" echo "Creating frameworks" -for platform in "${PLATFORMS[@]}"; do - echo "Directory: $platform/$MODE" - FRAMEWORK_FLAGS+=("--directory=$platform/$MODE") -done - append_framework_flag() { local flag="$1" local framework="$2" + local mode="${3:-}" if [[ $flag == ON ]]; then + if [[ -n "$mode" && "$mode" != "Release" ]]; then + local name spec + name=$(echo "$framework" | cut -d: -f1) + spec=$(echo "$framework" | cut -d: -f2-) + framework="${name}_$(echo "$mode" | tr '[:upper:]' '[:lower:]'):${spec}" + fi echo "Framework: $framework" FRAMEWORK_FLAGS+=("--framework=$framework") fi } -append_framework_flag "ON" "$FRAMEWORK_EXECUTORCH" -append_framework_flag "$COREML" "$FRAMEWORK_BACKEND_COREML" -append_framework_flag "$MPS" "$FRAMEWORK_BACKEND_MPS" -append_framework_flag "$XNNPACK" "$FRAMEWORK_BACKEND_XNNPACK" -append_framework_flag "$CUSTOM" "$FRAMEWORK_KERNELS_CUSTOM" -append_framework_flag "$OPTIMIZED" "$FRAMEWORK_KERNELS_OPTIMIZED" -append_framework_flag "$PORTABLE" "$FRAMEWORK_KERNELS_PORTABLE" -append_framework_flag "$QUANTIZED" "$FRAMEWORK_KERNELS_QUANTIZED" - -"$SOURCE_ROOT_DIR"/build/create_frameworks.sh "${FRAMEWORK_FLAGS[@]}" +for mode in "${MODES[@]}"; do + FRAMEWORK_FLAGS=() + for platform in "${PLATFORMS[@]}"; do + echo "Directory: $platform/$mode" + FRAMEWORK_FLAGS+=("--directory=$platform/$mode") + done + + append_framework_flag "ON" "$FRAMEWORK_EXECUTORCH" "$mode" + append_framework_flag "$COREML" "$FRAMEWORK_BACKEND_COREML" "$mode" + append_framework_flag "$MPS" "$FRAMEWORK_BACKEND_MPS" "$mode" + append_framework_flag "$XNNPACK" "$FRAMEWORK_BACKEND_XNNPACK" "$mode" + append_framework_flag "$CUSTOM" "$FRAMEWORK_KERNELS_CUSTOM" "$mode" + append_framework_flag "$OPTIMIZED" "$FRAMEWORK_KERNELS_OPTIMIZED" "$mode" + append_framework_flag "$PORTABLE" "$FRAMEWORK_KERNELS_PORTABLE" "$mode" + append_framework_flag "$QUANTIZED" "$FRAMEWORK_KERNELS_QUANTIZED" "$mode" + + "$SOURCE_ROOT_DIR"/build/create_frameworks.sh "${FRAMEWORK_FLAGS[@]}" +done echo "Cleaning up" From 25164789c6a65207ce597688df67439e6da669ea Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Thu, 27 Feb 2025 15:05:18 -0800 Subject: [PATCH 141/584] Add swiftpm manifest for locally built frameworks (#8795) * Add swiftpm manifest for locally built frameworks * Update Package.swift --- .Package.swift/backend_coreml/dummy.swift | 0 .../backend_coreml_debug/dummy.swift | 0 .Package.swift/backend_mps/dummy.swift | 0 .Package.swift/backend_mps_debug/dummy.swift | 0 .Package.swift/backend_xnnpack/dummy.swift | 0 .../backend_xnnpack_debug/dummy.swift | 0 .Package.swift/executorch/dummy.swift | 0 .Package.swift/executorch_debug/dummy.swift | 0 .Package.swift/kernels_custom/dummy.swift | 0 .../kernels_custom_debug/dummy.swift | 0 .Package.swift/kernels_optimized/dummy.swift | 0 .../kernels_optimized_debug/dummy.swift | 0 .Package.swift/kernels_portable/dummy.swift | 0 .../kernels_portable_debug/dummy.swift | 0 .Package.swift/kernels_quantized/dummy.swift | 0 .../kernels_quantized_debug/dummy.swift | 0 Package.swift | 86 +++++++++++++++++++ 17 files changed, 86 insertions(+) create mode 100644 .Package.swift/backend_coreml/dummy.swift create mode 100644 .Package.swift/backend_coreml_debug/dummy.swift create mode 100644 .Package.swift/backend_mps/dummy.swift create mode 100644 .Package.swift/backend_mps_debug/dummy.swift create mode 100644 .Package.swift/backend_xnnpack/dummy.swift create mode 100644 .Package.swift/backend_xnnpack_debug/dummy.swift create mode 100644 .Package.swift/executorch/dummy.swift create mode 100644 .Package.swift/executorch_debug/dummy.swift create mode 100644 .Package.swift/kernels_custom/dummy.swift create mode 100644 .Package.swift/kernels_custom_debug/dummy.swift create mode 100644 .Package.swift/kernels_optimized/dummy.swift create mode 100644 .Package.swift/kernels_optimized_debug/dummy.swift create mode 100644 .Package.swift/kernels_portable/dummy.swift create mode 100644 .Package.swift/kernels_portable_debug/dummy.swift create mode 100644 .Package.swift/kernels_quantized/dummy.swift create mode 100644 .Package.swift/kernels_quantized_debug/dummy.swift create mode 100644 Package.swift diff --git a/.Package.swift/backend_coreml/dummy.swift b/.Package.swift/backend_coreml/dummy.swift new file mode 100644 index 00000000000..e69de29bb2d diff --git a/.Package.swift/backend_coreml_debug/dummy.swift b/.Package.swift/backend_coreml_debug/dummy.swift new file mode 100644 index 00000000000..e69de29bb2d diff --git a/.Package.swift/backend_mps/dummy.swift b/.Package.swift/backend_mps/dummy.swift new file mode 100644 index 00000000000..e69de29bb2d diff --git a/.Package.swift/backend_mps_debug/dummy.swift b/.Package.swift/backend_mps_debug/dummy.swift new file mode 100644 index 00000000000..e69de29bb2d diff --git a/.Package.swift/backend_xnnpack/dummy.swift b/.Package.swift/backend_xnnpack/dummy.swift new file mode 100644 index 00000000000..e69de29bb2d diff --git a/.Package.swift/backend_xnnpack_debug/dummy.swift b/.Package.swift/backend_xnnpack_debug/dummy.swift new file mode 100644 index 00000000000..e69de29bb2d diff --git a/.Package.swift/executorch/dummy.swift b/.Package.swift/executorch/dummy.swift new file mode 100644 index 00000000000..e69de29bb2d diff --git a/.Package.swift/executorch_debug/dummy.swift b/.Package.swift/executorch_debug/dummy.swift new file mode 100644 index 00000000000..e69de29bb2d diff --git a/.Package.swift/kernels_custom/dummy.swift b/.Package.swift/kernels_custom/dummy.swift new file mode 100644 index 00000000000..e69de29bb2d diff --git a/.Package.swift/kernels_custom_debug/dummy.swift b/.Package.swift/kernels_custom_debug/dummy.swift new file mode 100644 index 00000000000..e69de29bb2d diff --git a/.Package.swift/kernels_optimized/dummy.swift b/.Package.swift/kernels_optimized/dummy.swift new file mode 100644 index 00000000000..e69de29bb2d diff --git a/.Package.swift/kernels_optimized_debug/dummy.swift b/.Package.swift/kernels_optimized_debug/dummy.swift new file mode 100644 index 00000000000..e69de29bb2d diff --git a/.Package.swift/kernels_portable/dummy.swift b/.Package.swift/kernels_portable/dummy.swift new file mode 100644 index 00000000000..e69de29bb2d diff --git a/.Package.swift/kernels_portable_debug/dummy.swift b/.Package.swift/kernels_portable_debug/dummy.swift new file mode 100644 index 00000000000..e69de29bb2d diff --git a/.Package.swift/kernels_quantized/dummy.swift b/.Package.swift/kernels_quantized/dummy.swift new file mode 100644 index 00000000000..e69de29bb2d diff --git a/.Package.swift/kernels_quantized_debug/dummy.swift b/.Package.swift/kernels_quantized_debug/dummy.swift new file mode 100644 index 00000000000..e69de29bb2d diff --git a/Package.swift b/Package.swift new file mode 100644 index 00000000000..94acfc4cd7b --- /dev/null +++ b/Package.swift @@ -0,0 +1,86 @@ +// swift-tools-version:5.9 +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +// NOTE: This package manifest is for frameworks built locally with CMake. +// It defines dependencies and linker settings for Executorch components. +// +// To use prebuilt binaries instead, switch to one of the "swiftpm" branches, +// which fetch the precompiled `.xcframeworks`. +// +// For details on building frameworks locally or using prebuilt binaries, +// see the documentation: +// https://pytorch.org/executorch/main/using-executorch-ios.html + +import PackageDescription + +let debug = "_debug" +let deliverables = [ + "backend_coreml": [ + "frameworks": [ + "Accelerate", + "CoreML", + ], + "libraries": [ + "sqlite3", + ], + ], + "backend_mps": [ + "frameworks": [ + "Metal", + "MetalPerformanceShaders", + "MetalPerformanceShadersGraph", + ], + ], + "backend_xnnpack": [:], + "executorch": [:], + "kernels_custom": [:], + "kernels_optimized": [:], + "kernels_portable": [:], + "kernels_quantized": [:], +].reduce(into: [String: [String: Any]]()) { + $0[$1.key] = $1.value + $0[$1.key + debug] = $1.value +}.reduce(into: [String: [String: Any]]()) { + var newValue = $1.value + if $1.key.hasSuffix(debug) { + $1.value.forEach { key, value in + if key.hasSuffix(debug) { + newValue[String(key.dropLast(debug.count))] = value + } + } + } + $0[$1.key] = newValue.filter { key, _ in !key.hasSuffix(debug) } +} + +let package = Package( + name: "executorch", + platforms: [ + .iOS(.v17), + .macOS(.v10_15), + ], + products: deliverables.keys.map { key in + .library(name: key, targets: ["\(key)_dependencies"]) + }.sorted { $0.name < $1.name }, + targets: deliverables.flatMap { key, value -> [Target] in + [ + .binaryTarget( + name: key, + path: "cmake-out/\(key).xcframework" + ), + .target( + name: "\(key)_dependencies", + dependencies: [.target(name: key)], + path: ".Package.swift/\(key)", + linkerSettings: + (value["frameworks"] as? [String] ?? []).map { .linkedFramework($0) } + + (value["libraries"] as? [String] ?? []).map { .linkedLibrary($0) } + ), + ] + } +) From 7b3921744897d436c0872c92bc7bc0752023c5df Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Thu, 27 Feb 2025 15:15:15 -0800 Subject: [PATCH 142/584] =?UTF-8?q?Cache=20Buck=20targets=20when=20travers?= =?UTF-8?q?ing=20the=20tree=20of=20deps=20looking=20for=20the=20h=E2=80=A6?= =?UTF-8?q?=20(#8796)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cache Buck targets when traversing the tree of deps looking for the headers --- build/print_exported_headers.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/build/print_exported_headers.py b/build/print_exported_headers.py index b24100c7a94..dc0102ff00b 100755 --- a/build/print_exported_headers.py +++ b/build/print_exported_headers.py @@ -38,8 +38,15 @@ def query(buck2: str, target: str, attribute: str) -> str: raise SystemExit("Error: " + str(e)) +# Cache to store results for exported headers per target. +_exported_headers_cache = {} + + def exported_headers(buck2: str, target: str) -> Set[str]: - """Get all exported headers of a target and its dependencies.""" + """Get all exported headers of a target and its dependencies, with caching.""" + if target in _exported_headers_cache: + return _exported_headers_cache[target] + deps = query(buck2, target, "exported_deps") headers = set(query(buck2, target, "exported_headers")) headers.update( @@ -48,13 +55,14 @@ def exported_headers(buck2: str, target: str) -> Set[str]: for header in exported_headers(buck2, dep.split()[0]) if header.endswith(".h") ) + _exported_headers_cache[target] = headers return headers def expand_target(buck2: str, target: str) -> List[str]: """Expand a target into a list of targets if applicable.""" output = run([buck2, "cquery", target]) - # Buck's output format is " ()", we take only the target part. + # Buck's output format is " ()", so we take only the target part. targets = [line.split(" ")[0] for line in output.strip().split("\n")] return targets From 623a9657fb609f2e144f5999454e19de8891007b Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Thu, 27 Feb 2025 15:18:21 -0800 Subject: [PATCH 143/584] [Android] Add a list of artifact history Show the history of AAR files --- docs/source/using-executorch-android.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/source/using-executorch-android.md b/docs/source/using-executorch-android.md index 1173674f26e..e41cd3e2bb7 100644 --- a/docs/source/using-executorch-android.md +++ b/docs/source/using-executorch-android.md @@ -13,8 +13,18 @@ We provide a prebuilt Android library (AAR), `executorch.aar` for both generic ( - Comes with two ABI variants, arm64-v8a and x86\_64. ## Downloading AAR -[executorch.aar](https://ossci-android.s3.amazonaws.com/executorch/release/executorch-241002/executorch.aar) -[executorch.aar.sha256sums](https://ossci-android.s3.amazonaws.com/executorch/release/executorch-241002/executorch.aar.sha256sums) + +### Released versions (recommended) + +| Version | AAR | SHASUMS | +| ------- | --- | ------- | +| [v0.5.0](https://github.com/pytorch/executorch/releases/tag/v0.5.0) | [executorch.aar](https://ossci-android.s3.amazonaws.com/executorch/release/v0.5.0-rc3/executorch.aar) | [executorch.aar.sha256sums](https://ossci-android.s3.amazonaws.com/executorch/release/v0.5.0-rc3/executorch.aar) | + +### Snapshots from main branch + +| Date | AAR | SHASUMS | +| ------- | --- | ------- | +| 2025-02-27 | [executorch.aar](https://ossci-android.s3.amazonaws.com/executorch/release/executorch-20250227/executorch.aar) | [executorch.aar.sha256sums](https://ossci-android.s3.amazonaws.com/executorch/release/executorch-20250227/executorch.aar.sha256sums) | ## Using prebuilt libraries From 2d0cf64f11aec5d802e94a84861551e248825d49 Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Thu, 27 Feb 2025 15:21:04 -0800 Subject: [PATCH 144/584] Ignore the artifacts in .swiftpm/ (#8798) --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 849a6c59fb0..67dd6be3342 100644 --- a/.gitignore +++ b/.gitignore @@ -33,5 +33,6 @@ pip-out/ # Xcode xcuserdata/ +.swiftpm/ *.xcworkspace/ *.xcframework/ From 5651a87dd094d4dbbfe31517d540da177ac50a26 Mon Sep 17 00:00:00 2001 From: JP <46308822+zonglinpeng@users.noreply.github.com> Date: Thu, 27 Feb 2025 15:22:48 -0800 Subject: [PATCH 145/584] enable fp32 cat slice Differential Revision: D70216001 Pull Request resolved: https://github.com/pytorch/executorch/pull/8759 --- backends/cadence/fusion_g3/operators/op_cat.cpp | 3 ++- .../cadence/fusion_g3/operators/op_slice_copy.cpp | 5 +++-- backends/cadence/fusion_g3/operators/xt_utils.h | 2 ++ backends/cadence/utils/facto_util.py | 11 ++++++++++- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/backends/cadence/fusion_g3/operators/op_cat.cpp b/backends/cadence/fusion_g3/operators/op_cat.cpp index 84224b37b04..7f8e1ee8710 100644 --- a/backends/cadence/fusion_g3/operators/op_cat.cpp +++ b/backends/cadence/fusion_g3/operators/op_cat.cpp @@ -115,7 +115,8 @@ Tensor& cat_out( (out.scalar_type() == ScalarType::Char) || (out.scalar_type() == ScalarType::UInt32) || (out.scalar_type() == ScalarType::UInt16) || - (out.scalar_type() == ScalarType::Byte)) { + (out.scalar_type() == ScalarType::Byte) || + (out.scalar_type() == ScalarType::Float)) { XT_KERNEL_CHECK( ctx, out, diff --git a/backends/cadence/fusion_g3/operators/op_slice_copy.cpp b/backends/cadence/fusion_g3/operators/op_slice_copy.cpp index 9158eecf133..249da9144a9 100644 --- a/backends/cadence/fusion_g3/operators/op_slice_copy.cpp +++ b/backends/cadence/fusion_g3/operators/op_slice_copy.cpp @@ -101,7 +101,8 @@ Tensor& slice_copy_Tensor_out( (out.scalar_type() == ScalarType::Char) || (out.scalar_type() == ScalarType::UInt32) || (out.scalar_type() == ScalarType::UInt16) || - (out.scalar_type() == ScalarType::Byte))) { + (out.scalar_type() == ScalarType::Byte) || + (out.scalar_type() == ScalarType::Float))) { XT_KERNEL_CHECK( ctx, out, @@ -132,4 +133,4 @@ Tensor& slice_copy_Tensor_out( } // namespace native } // namespace G3 } // namespace impl -} // namespace cadence \ No newline at end of file +} // namespace cadence diff --git a/backends/cadence/fusion_g3/operators/xt_utils.h b/backends/cadence/fusion_g3/operators/xt_utils.h index 443d68d0609..14b9b6f4981 100644 --- a/backends/cadence/fusion_g3/operators/xt_utils.h +++ b/backends/cadence/fusion_g3/operators/xt_utils.h @@ -19,6 +19,8 @@ inline int get_element_size(ScalarType dtype) { return sizeof(short); } else if ((dtype == ScalarType::Char) || (dtype == ScalarType::Byte)) { return sizeof(char); + } else if (dtype == ScalarType::Float) { + return sizeof(float); } return 0; } diff --git a/backends/cadence/utils/facto_util.py b/backends/cadence/utils/facto_util.py index 2fa0787a08e..74f846aa706 100644 --- a/backends/cadence/utils/facto_util.py +++ b/backends/cadence/utils/facto_util.py @@ -18,6 +18,7 @@ # seed to generate identical cases every run to reproduce from bisect random_manager.seed(1729) +MAX_CASES = 50 def apply_tensor_contraints(op_name: str, tensor_constraints: list[object]) -> None: @@ -46,6 +47,14 @@ def apply_tensor_contraints(op_name: str, tensor_constraints: list[object]) -> N cp.Value.Le(lambda deps, dtype, struct: 2**2), ] ) + case "slice_copy.Tensor": + tensor_constraints.extend( + [ + cp.Rank.Le(lambda deps: 2), + cp.Value.Ge(lambda deps, dtype, struct: 1), + cp.Value.Le(lambda deps, dtype, struct: 2), + ] + ) case _: tensor_constraints.extend( [ @@ -124,4 +133,4 @@ def facto_testcase_gen(op_name: str) -> List[Tuple[List[str], OrderedDict[str, s return [ (posargs, inkwargs) for posargs, inkwargs, _ in ArgumentTupleGenerator(spec).gen() - ] + ][:MAX_CASES] From a7570a7dac9485ca4822ffd5cd6505c747185269 Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Thu, 27 Feb 2025 15:31:00 -0800 Subject: [PATCH 146/584] Update docs on building Apple frameworks (#8800) --- docs/source/using-executorch-ios.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/source/using-executorch-ios.md b/docs/source/using-executorch-ios.md index 029914eb498..b4c27ebbed0 100644 --- a/docs/source/using-executorch-ios.md +++ b/docs/source/using-executorch-ios.md @@ -132,14 +132,12 @@ sudo /Applications/CMake.app/Contents/bin/cmake-gui --install ./build/build_apple_frameworks.sh --help ``` -For example, the following invocation will build the ExecuTorch Runtime and all currently available kernels and backends for the Apple platform: +For example, the following command will build the ExecuTorch Runtime along with all available kernels and backends for the Apple platform in both Release and Debug modes: ```bash -./build/build_apple_frameworks.sh --coreml --mps --xnnpack --custom --optimized --portable --quantized +./build/build_apple_frameworks.sh --Release --Debug --coreml --mps --xnnpack --custom --optimized --portable --quantized ``` -Append a `--Debug` flag to the above command to build the binaries with debug symbols if needed. - After the build finishes successfully, the resulting frameworks can be found in the `cmake-out` directory. Copy them to your project and link them against your targets. From 9a069c8da24320ab3474abd79876ad612a38cf23 Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Thu, 27 Feb 2025 15:31:11 -0800 Subject: [PATCH 147/584] =?UTF-8?q?Update=20Apple=20workflow=20to=20levera?= =?UTF-8?q?ge=20combined=20release/debug=20build=20for=20fr=E2=80=A6=20(#8?= =?UTF-8?q?801)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update Apple workflow to leverage combined release/debug build for frameworks * Update apple.yml --- .github/workflows/apple.yml | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/.github/workflows/apple.yml b/.github/workflows/apple.yml index 8349ddb4192..6929e12fa6d 100644 --- a/.github/workflows/apple.yml +++ b/.github/workflows/apple.yml @@ -170,24 +170,14 @@ jobs: PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ backends/apple/mps/install_requirements.sh - # Build Release iOS Frameworks + # Build iOS Frameworks PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - build/build_apple_frameworks.sh --coreml --custom --mps --optimized --portable --quantized --xnnpack + build/build_apple_frameworks.sh --Release --Debug --coreml --custom --mps --optimized --portable --quantized --xnnpack - # Bundle Release iOS Frameworks + # Bundle iOS Frameworks for FRAMEWORK in "${FRAMEWORKS[@]}"; do ( cd cmake-out && \ zip -r "${RUNNER_TEMP}/artifacts/${FRAMEWORK}-${VERSION}.zip" "${FRAMEWORK}.xcframework" - ) done - - # Build Debug iOS Frameworks - PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - build/build_apple_frameworks.sh --coreml --custom --mps --optimized --portable --quantized --xnnpack --Debug - - # Bundle Debug iOS Frameworks - for FRAMEWORK in "${FRAMEWORKS[@]}"; do ( - cd cmake-out && \ - mv "${FRAMEWORK}.xcframework" "${FRAMEWORK}_debug.xcframework" && \ zip -r "${RUNNER_TEMP}/artifacts/${FRAMEWORK}_debug-${VERSION}.zip" "${FRAMEWORK}_debug.xcframework" ) done From f439d7878dc3e3034578823002f9eef6c16fdec2 Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Thu, 27 Feb 2025 15:31:22 -0800 Subject: [PATCH 148/584] Revert "Update create_frameworks.sh" (#8799) Revert "Update create_frameworks.sh (#8793)" This reverts commit 954246c47158d054d10a209e9d60e6881c27fa7d. --- build/create_frameworks.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build/create_frameworks.sh b/build/create_frameworks.sh index 3ed2c46face..a55c4aed1e7 100755 --- a/build/create_frameworks.sh +++ b/build/create_frameworks.sh @@ -76,8 +76,8 @@ create_xcframework() { fi local dir_suffix - dir_suffix=$(echo "$dir" | tr '[:upper:]' '[:lower:]' | sed 's/\//_/g') - local merged_lib="${output}/lib${target_library_name}_${dir_suffix}.a" + dir_suffix=$(echo "$dir" | tr '[:upper:]' '[:lower:]' | sed 's/\//-/g') + local merged_lib="${output}/lib${target_library_name}-${dir_suffix}.a" # Remove the existing .a file if it exists. if [ -f "${merged_lib}" ]; then From ebcd90ec99baaa540386e6e95463c32137d43e00 Mon Sep 17 00:00:00 2001 From: Gasoonjia Date: Thu, 27 Feb 2025 15:55:57 -0800 Subject: [PATCH 149/584] make ETDumpGen use bufferdatasink Differential Revision: D69647096 Pull Request resolved: https://github.com/pytorch/executorch/pull/8741 --- devtools/CMakeLists.txt | 2 + devtools/etdump/buffer_data_sink.cpp | 5 + devtools/etdump/buffer_data_sink.h | 20 ++ devtools/etdump/etdump_flatcc.cpp | 73 +++-- devtools/etdump/etdump_flatcc.h | 16 +- devtools/etdump/targets.bzl | 4 +- devtools/etdump/tests/etdump_test.cpp | 453 +++++++++++++++----------- devtools/etdump/utils.h | 3 +- 8 files changed, 348 insertions(+), 228 deletions(-) diff --git a/devtools/CMakeLists.txt b/devtools/CMakeLists.txt index 58043067a54..aaee4d1d0b6 100644 --- a/devtools/CMakeLists.txt +++ b/devtools/CMakeLists.txt @@ -176,6 +176,8 @@ add_custom_command( add_library( etdump ${CMAKE_CURRENT_SOURCE_DIR}/etdump/etdump_flatcc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/etdump/emitter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/etdump/buffer_data_sink.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/etdump/buffer_data_sink.h ) target_link_libraries( diff --git a/devtools/etdump/buffer_data_sink.cpp b/devtools/etdump/buffer_data_sink.cpp index 976d6dc31fb..8a366339783 100644 --- a/devtools/etdump/buffer_data_sink.cpp +++ b/devtools/etdump/buffer_data_sink.cpp @@ -27,6 +27,11 @@ Result BufferDataSink::create( return BufferDataSink(buffer, alignment); } +Result +BufferDataSink::create(void* ptr, size_t size, size_t alignment) noexcept { + return BufferDataSink::create({(uint8_t*)ptr, size}, alignment); +} + Result BufferDataSink::write(const void* ptr, size_t length) { if (length == 0) { return offset_; diff --git a/devtools/etdump/buffer_data_sink.h b/devtools/etdump/buffer_data_sink.h index 9639f2e072c..c5bbcf6e435 100644 --- a/devtools/etdump/buffer_data_sink.h +++ b/devtools/etdump/buffer_data_sink.h @@ -39,6 +39,26 @@ class BufferDataSink : public DataSinkBase { ::executorch::runtime::Span buffer, size_t alignment = 64) noexcept; + /** + * Creates a BufferDataSink with a given span buffer. + * + * @param[in] ptr A pointer to the data blob where data will be stored. + * @param[in] size The size of the data blob in bytes. + * @param[in] alignment The alignment requirement for the buffer. It must be + * a power of two and greater than zero. Default is 64. + * @return A Result object containing either: + * - A BufferDataSink object if succees, or + * - An error code indicating the failure reason, if any issue + * occurs during the creation process. + */ + static ::executorch::runtime::Result + create(void* ptr, size_t size, size_t alignment = 64) noexcept; + + /** + * Creates a empty BufferDataSink; + */ + BufferDataSink() = default; + // Uncopiable and unassignable to avoid double assignment and free of the // internal buffer. BufferDataSink(const BufferDataSink&) = delete; diff --git a/devtools/etdump/etdump_flatcc.cpp b/devtools/etdump/etdump_flatcc.cpp index ec52621a956..92c977f778b 100644 --- a/devtools/etdump/etdump_flatcc.cpp +++ b/devtools/etdump/etdump_flatcc.cpp @@ -10,6 +10,7 @@ #include +#include #include #include #include @@ -29,6 +30,7 @@ using ::executorch::runtime::DelegateDebugIdType; using ::executorch::runtime::EValue; using ::executorch::runtime::EventTracerEntry; using ::executorch::runtime::LoggedEValueType; +using ::executorch::runtime::Result; using ::executorch::runtime::Span; using ::executorch::runtime::Tag; @@ -142,6 +144,7 @@ ETDumpGen::~ETDumpGen() { void ETDumpGen::reset() { state_ = State::Init; num_blocks_ = 0; + data_sink_ = nullptr; flatcc_builder_reset(builder_); flatbuffers_buffer_start(builder_, etdump_ETDump_file_identifier); etdump_ETDump_start_as_root_with_size(builder_); @@ -347,10 +350,10 @@ void ETDumpGen::log_intermediate_output_delegate_helper( ET_CHECK_MSG( (name == nullptr) ^ (delegate_debug_index == -1), "Only name or delegate_debug_index can be valid. Check DelegateMappingBuilder documentation for more details."); - if (debug_buffer_.empty()) { - ET_CHECK_MSG(0, "Must pre-set debug buffer with set_debug_buffer()\n"); - return; - } + + ET_CHECK_MSG( + data_sink_, + "Must pre-set data sink before logging evalue with set_data_sink() or set_debug_buffer()\n"); check_ready_to_add_events(); int64_t string_id = name != nullptr ? create_string_entry(name) : -1; @@ -367,7 +370,7 @@ void ETDumpGen::log_intermediate_output_delegate_helper( // Check the type of `output` then call the corresponding logging functions if constexpr (std::is_same::value) { - long offset = copy_tensor_to_debug_buffer(output); + long offset = write_tensor_or_raise_error(output); etdump_Tensor_ref_t tensor_ref = add_tensor_entry(builder_, output, offset); etdump_Value_start(builder_); @@ -377,7 +380,7 @@ void ETDumpGen::log_intermediate_output_delegate_helper( } else if constexpr (std::is_same>::value) { etdump_Tensor_vec_start(builder_); for (size_t i = 0; i < output.size(); ++i) { - long offset = copy_tensor_to_debug_buffer(output[i]); + long offset = write_tensor_or_raise_error(output[i]); etdump_Tensor_vec_push( builder_, add_tensor_entry(builder_, output[i], offset)); } @@ -497,27 +500,22 @@ ETDumpResult ETDumpGen::get_etdump_data() { } void ETDumpGen::set_debug_buffer(Span buffer) { - debug_buffer_ = buffer; + Result bds_ret = BufferDataSink::create(buffer); + ET_CHECK_MSG( + bds_ret.ok(), + "Failed to write tensor with error 0x%" PRIx32, + static_cast(bds_ret.error())); + + buffer_data_sink_ = std::move(bds_ret.get()); + data_sink_ = &buffer_data_sink_; } -size_t ETDumpGen::copy_tensor_to_debug_buffer(executorch::aten::Tensor tensor) { - if (tensor.nbytes() == 0) { - return static_cast(-1); - } - uint8_t* offset_ptr = - internal::align_pointer(debug_buffer_.data() + debug_buffer_offset_, 64); - debug_buffer_offset_ = (offset_ptr - debug_buffer_.data()) + tensor.nbytes(); - ET_CHECK_MSG( - debug_buffer_offset_ <= debug_buffer_.size(), - "Ran out of space to store intermediate outputs."); - memcpy(offset_ptr, tensor.const_data_ptr(), tensor.nbytes()); - return (size_t)(offset_ptr - debug_buffer_.data()); +void ETDumpGen::set_data_sink(DataSinkBase* data_sink) { + data_sink_ = data_sink; } void ETDumpGen::log_evalue(const EValue& evalue, LoggedEValueType evalue_type) { - if (debug_buffer_.empty()) { - return; - } + ET_CHECK_MSG(data_sink_, "Must set data sink before logging evalue\n"); check_ready_to_add_events(); @@ -529,7 +527,7 @@ void ETDumpGen::log_evalue(const EValue& evalue, LoggedEValueType evalue_type) { switch (evalue.tag) { case Tag::Tensor: { executorch::aten::Tensor tensor = evalue.toTensor(); - long offset = copy_tensor_to_debug_buffer(tensor); + long offset = write_tensor_or_raise_error(tensor); etdump_Tensor_ref_t tensor_ref = add_tensor_entry(builder_, tensor, offset); @@ -551,7 +549,7 @@ void ETDumpGen::log_evalue(const EValue& evalue, LoggedEValueType evalue_type) { evalue.toTensorList(); etdump_Tensor_vec_start(builder_); for (size_t i = 0; i < tensors.size(); ++i) { - long offset = copy_tensor_to_debug_buffer(tensors[i]); + long offset = write_tensor_or_raise_error(tensors[i]); etdump_Tensor_vec_push( builder_, add_tensor_entry(builder_, tensors[i], offset)); } @@ -635,8 +633,31 @@ bool ETDumpGen::is_static_etdump() { return alloc_.data != nullptr; } -size_t ETDumpGen::get_debug_buffer_size() const { - return debug_buffer_.size(); +DataSinkBase* ETDumpGen::get_data_sink() { + return data_sink_; +} + +long ETDumpGen::write_tensor_or_raise_error(Tensor tensor) { + // Previously, the function copy_tensor_to_debug_buffer returned 0xFF..F when + // given an empty tensor, which is an invalid offset for most buffers. In our + // data sink, we will return the current debug_buffer_offset for better + // clarity. We are isolating the empty tensor case here using the old logic to + // avoid any backward compatibility issues while introducing the data sink. + // Once the data sink is fully implemented, we can remove this check and apply + // the new logic to all cases. + // TODO(gasoonjia): remove this check after datasink is fully rolled out. + if (tensor.nbytes() == 0) { + return static_cast(-1); + } + + ET_CHECK_MSG(data_sink_, "Must set data sink before writing data"); + Result ret = + data_sink_->write(tensor.const_data_ptr(), tensor.nbytes()); + ET_CHECK_MSG( + ret.ok(), + "Failed to write tensor with error 0x%" PRIx32, + static_cast(ret.error())); + return static_cast(ret.get()); } } // namespace etdump diff --git a/devtools/etdump/etdump_flatcc.h b/devtools/etdump/etdump_flatcc.h index d7781066533..a0457a91de8 100644 --- a/devtools/etdump/etdump_flatcc.h +++ b/devtools/etdump/etdump_flatcc.h @@ -9,7 +9,10 @@ #pragma once #include +#include +#include +#include #include #include #include @@ -141,9 +144,10 @@ class ETDumpGen : public ::executorch::runtime::EventTracer { ::executorch::runtime::DebugHandle delegate_debug_index, const double& output) override; void set_debug_buffer(::executorch::runtime::Span buffer); + void set_data_sink(DataSinkBase* data_sink); ETDumpResult get_etdump_data(); - size_t get_debug_buffer_size() const; size_t get_num_blocks(); + DataSinkBase* get_data_sink(); bool is_static_etdump(); void reset(); @@ -158,7 +162,6 @@ class ETDumpGen : public ::executorch::runtime::EventTracer { void check_ready_to_add_events(); int64_t create_string_entry(const char* name); - size_t copy_tensor_to_debug_buffer(executorch::aten::Tensor tensor); /** * Templated helper function used to log various types of intermediate output. @@ -170,10 +173,15 @@ class ETDumpGen : public ::executorch::runtime::EventTracer { ::executorch::runtime::DebugHandle delegate_debug_index, const T& output); + long write_tensor_or_raise_error(executorch::aten::Tensor tensor); + struct flatcc_builder* builder_; size_t num_blocks_ = 0; - ::executorch::runtime::Span debug_buffer_; - size_t debug_buffer_offset_ = 0; + DataSinkBase* data_sink_; + + // It is only for set_debug_buffer function. + BufferDataSink buffer_data_sink_; + int bundled_input_index_ = -1; State state_ = State::Init; struct internal::ETDumpStaticAllocator alloc_; diff --git a/devtools/etdump/targets.bzl b/devtools/etdump/targets.bzl index 1d762e983c8..afa012ed948 100644 --- a/devtools/etdump/targets.bzl +++ b/devtools/etdump/targets.bzl @@ -117,7 +117,7 @@ def define_common_targets(): runtime.cxx_library( name = "buffer_data_sink" + aten_suffix, - headers = [ + exported_headers = [ "buffer_data_sink.h", ], srcs = [ @@ -153,6 +153,8 @@ def define_common_targets(): exported_deps = [ ":etdump_schema_flatcc", ":utils", + ":data_sink_base" + aten_suffix, + ":buffer_data_sink" + aten_suffix, "//executorch/runtime/core:event_tracer" + aten_suffix, "//executorch/runtime/core/exec_aten/util:scalar_type_util" + aten_suffix, ], diff --git a/devtools/etdump/tests/etdump_test.cpp b/devtools/etdump/tests/etdump_test.cpp index 664a5ee1a0d..50456bade42 100644 --- a/devtools/etdump/tests/etdump_test.cpp +++ b/devtools/etdump/tests/etdump_test.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -18,7 +19,6 @@ #include #include #include -#include using ::executorch::aten::ScalarType; using ::executorch::aten::Tensor; @@ -35,6 +35,8 @@ using ::executorch::runtime::Span; using ::executorch::runtime::Tag; using ::executorch::runtime::testing::TensorFactory; +using ::executorch::etdump::BufferDataSink; + class ProfilerETDumpTest : public ::testing::Test { protected: void SetUp() override { @@ -175,54 +177,78 @@ TEST_F(ProfilerETDumpTest, AllocationEvents) { TEST_F(ProfilerETDumpTest, DebugEvent) { for (size_t i = 0; i < 2; i++) { - TensorFactory tf; - EValue evalue(tf.ones({3, 2})); + for (size_t j = 0; j < 2; j++) { + TensorFactory tf; + EValue evalue(tf.ones({3, 2})); - etdump_gen[i]->create_event_block("test_block"); + etdump_gen[i]->create_event_block("test_block"); - void* ptr = malloc(2048); - Span buffer((uint8_t*)ptr, 2048); + void* ptr = malloc(2048); + Span buffer((uint8_t*)ptr, 2048); - etdump_gen[i]->set_debug_buffer(buffer); - etdump_gen[i]->log_evalue(evalue); - etdump_gen[i]->log_evalue(evalue, LoggedEValueType::kProgramOutput); + auto buffer_data_sink = BufferDataSink::create(ptr, 2048); + + // using span to record debug data + if (j == 0) { + etdump_gen[i]->set_debug_buffer(buffer); + } + // using data sink to record debug data + else { + etdump_gen[i]->set_data_sink(&buffer_data_sink.get()); + } - EValue evalue_int((int64_t)5); - etdump_gen[i]->log_evalue(evalue_int); + etdump_gen[i]->log_evalue(evalue); + etdump_gen[i]->log_evalue(evalue, LoggedEValueType::kProgramOutput); - EValue evalue_double((double)1.5); - etdump_gen[i]->log_evalue(evalue_double); + EValue evalue_int((int64_t)5); + etdump_gen[i]->log_evalue(evalue_int); - EValue evalue_bool(true); - etdump_gen[i]->log_evalue(evalue_bool); + EValue evalue_double((double)1.5); + etdump_gen[i]->log_evalue(evalue_double); - etdump_gen[i]->log_evalue(evalue_bool); + EValue evalue_bool(true); + etdump_gen[i]->log_evalue(evalue_bool); - free(ptr); + etdump_gen[i]->log_evalue(evalue_bool); + + free(ptr); + } } } TEST_F(ProfilerETDumpTest, DebugEventTensorList) { for (size_t i = 0; i < 2; i++) { - TensorFactory tf; - executorch::aten::Tensor storage[2] = {tf.ones({3, 2}), tf.ones({3, 2})}; - EValue evalue_1(storage[0]); - EValue evalue_2(storage[1]); - EValue* values_p[2] = {&evalue_1, &evalue_2}; + for (size_t j = 0; j < 2; j++) { + TensorFactory tf; + executorch::aten::Tensor storage[2] = {tf.ones({3, 2}), tf.ones({3, 2})}; + EValue evalue_1(storage[0]); + EValue evalue_2(storage[1]); + EValue* values_p[2] = {&evalue_1, &evalue_2}; - BoxedEvalueList a_box(values_p, storage, 2); - EValue evalue(a_box); - evalue.tag = Tag::ListTensor; + BoxedEvalueList a_box(values_p, storage, 2); + EValue evalue(a_box); + evalue.tag = Tag::ListTensor; - etdump_gen[i]->create_event_block("test_block"); + etdump_gen[i]->create_event_block("test_block"); + + void* ptr = malloc(2048); + Span buffer((uint8_t*)ptr, 2048); - void* ptr = malloc(2048); - Span buffer((uint8_t*)ptr, 2048); + auto buffer_data_sink = BufferDataSink::create(ptr, 2048); - etdump_gen[i]->set_debug_buffer(buffer); - etdump_gen[i]->log_evalue(evalue); + // using span to record debug data + if (j == 0) { + etdump_gen[i]->set_debug_buffer(buffer); + } + // using data sink to record debug data + else { + etdump_gen[i]->set_data_sink(&buffer_data_sink.get()); + } - free(ptr); + etdump_gen[i]->log_evalue(evalue); + + free(ptr); + } } } @@ -231,61 +257,73 @@ TEST_F(ProfilerETDumpTest, VerifyLogging) { EValue evalue(tf.ones({3, 2})); for (size_t i = 0; i < 2; i++) { - etdump_gen[i]->create_event_block("test_block"); + for (size_t j = 0; j < 2; j++) { + etdump_gen[i]->create_event_block("test_block"); - void* ptr = malloc(2048); - Span buffer((uint8_t*)ptr, 2048); + void* ptr = malloc(2048); + Span buffer((uint8_t*)ptr, 2048); - etdump_gen[i]->set_debug_buffer(buffer); - etdump_gen[i]->log_evalue(evalue); - etdump_gen[i]->log_evalue(evalue, LoggedEValueType::kProgramOutput); + auto buffer_data_sink = BufferDataSink::create(ptr, 2048); - ETDumpResult result = etdump_gen[i]->get_etdump_data(); - ASSERT_TRUE(result.buf != nullptr); - ASSERT_TRUE(result.size != 0); + // using span to record debug data + if (j == 0) { + etdump_gen[i]->set_debug_buffer(buffer); + } + // using data sink to record debug data + else { + etdump_gen[i]->set_data_sink(&buffer_data_sink.get()); + } - size_t size = 0; - void* buf = flatbuffers_read_size_prefix(result.buf, &size); - etdump_ETDump_table_t etdump = etdump_ETDump_as_root_with_identifier( - buf, etdump_ETDump_file_identifier); + etdump_gen[i]->log_evalue(evalue); + etdump_gen[i]->log_evalue(evalue, LoggedEValueType::kProgramOutput); - etdump_RunData_vec_t run_data_vec = etdump_ETDump_run_data(etdump); - ASSERT_EQ(etdump_RunData_vec_len(run_data_vec), 1); - - etdump_Event_vec_t events = - etdump_RunData_events(etdump_RunData_vec_at(run_data_vec, 0)); - ASSERT_EQ(etdump_Event_vec_len(events), 2); - - etdump_Event_table_t event = etdump_Event_vec_at(events, 0); - - etdump_DebugEvent_table_t single_debug_event = - etdump_Event_debug_event(event); - etdump_Value_table_t value = - etdump_DebugEvent_debug_entry(single_debug_event); - ASSERT_EQ(etdump_Value_tensor_is_present(value), true); - ASSERT_EQ(etdump_Value_output_is_present(value), false); - - etdump_Tensor_table_t tensor = etdump_Value_tensor(value); - executorch_flatbuffer_ScalarType_enum_t scalar_enum = - etdump_Tensor_scalar_type(tensor); - ASSERT_EQ(scalar_enum, executorch_flatbuffer_ScalarType_FLOAT); - flatbuffers_int64_vec_t sizes = etdump_Tensor_sizes(tensor); - ASSERT_EQ(flatbuffers_int64_vec_len(sizes), 2); - ASSERT_EQ(flatbuffers_int64_vec_at(sizes, 0), 3); - ASSERT_EQ(flatbuffers_int64_vec_at(sizes, 1), 2); - - event = etdump_Event_vec_at(events, 1); - single_debug_event = etdump_Event_debug_event(event); - value = etdump_DebugEvent_debug_entry(single_debug_event); - ASSERT_EQ(etdump_Value_tensor_is_present(value), true); - ASSERT_EQ(etdump_Value_output_is_present(value), true); - etdump_Bool_table_t bool_val = etdump_Value_output_get(value); - bool bool_val_from_table = etdump_Bool_bool_val(bool_val); - ASSERT_EQ(bool_val_from_table, true); - - free(ptr); - if (!etdump_gen[i]->is_static_etdump()) { - free(result.buf); + ETDumpResult result = etdump_gen[i]->get_etdump_data(); + ASSERT_TRUE(result.buf != nullptr); + ASSERT_TRUE(result.size != 0); + + size_t size = 0; + void* buf = flatbuffers_read_size_prefix(result.buf, &size); + etdump_ETDump_table_t etdump = etdump_ETDump_as_root_with_identifier( + buf, etdump_ETDump_file_identifier); + + etdump_RunData_vec_t run_data_vec = etdump_ETDump_run_data(etdump); + ASSERT_EQ(etdump_RunData_vec_len(run_data_vec), 1); + + etdump_Event_vec_t events = + etdump_RunData_events(etdump_RunData_vec_at(run_data_vec, 0)); + ASSERT_EQ(etdump_Event_vec_len(events), 2); + + etdump_Event_table_t event = etdump_Event_vec_at(events, 0); + + etdump_DebugEvent_table_t single_debug_event = + etdump_Event_debug_event(event); + etdump_Value_table_t value = + etdump_DebugEvent_debug_entry(single_debug_event); + ASSERT_EQ(etdump_Value_tensor_is_present(value), true); + ASSERT_EQ(etdump_Value_output_is_present(value), false); + + etdump_Tensor_table_t tensor = etdump_Value_tensor(value); + executorch_flatbuffer_ScalarType_enum_t scalar_enum = + etdump_Tensor_scalar_type(tensor); + ASSERT_EQ(scalar_enum, executorch_flatbuffer_ScalarType_FLOAT); + flatbuffers_int64_vec_t sizes = etdump_Tensor_sizes(tensor); + ASSERT_EQ(flatbuffers_int64_vec_len(sizes), 2); + ASSERT_EQ(flatbuffers_int64_vec_at(sizes, 0), 3); + ASSERT_EQ(flatbuffers_int64_vec_at(sizes, 1), 2); + + event = etdump_Event_vec_at(events, 1); + single_debug_event = etdump_Event_debug_event(event); + value = etdump_DebugEvent_debug_entry(single_debug_event); + ASSERT_EQ(etdump_Value_tensor_is_present(value), true); + ASSERT_EQ(etdump_Value_output_is_present(value), true); + etdump_Bool_table_t bool_val = etdump_Value_output_get(value); + bool bool_val_from_table = etdump_Bool_bool_val(bool_val); + ASSERT_EQ(bool_val_from_table, true); + + free(ptr); + if (!etdump_gen[i]->is_static_etdump()) { + free(result.buf); + } } } } @@ -432,58 +470,70 @@ TEST_F(ProfilerETDumpTest, VerifyData) { TEST_F(ProfilerETDumpTest, LogDelegateIntermediateOutput) { for (size_t i = 0; i < 2; i++) { - void* ptr = malloc(2048); - Span buffer((uint8_t*)ptr, 2048); - - etdump_gen[i]->create_event_block("test_block"); - TensorFactory tf; - - ET_EXPECT_DEATH( - etdump_gen[i]->log_intermediate_output_delegate( - "test_event_tensor", - static_cast(-1), - tf.ones({3, 2})), - "Must pre-set debug buffer with set_debug_buffer()"); - etdump_gen[i]->set_debug_buffer(buffer); - - // Log a tensor - etdump_gen[i]->log_intermediate_output_delegate( - "test_event_tensor", - static_cast(-1), - tf.ones({3, 2})); - - // Log a tensor list - std::vector tensors = {tf.ones({5, 4}), tf.ones({7, 6})}; - etdump_gen[i]->log_intermediate_output_delegate( - "test_event_tensorlist", - static_cast(-1), - ArrayRef(tensors.data(), tensors.size())); + for (size_t j = 0; j < 2; j++) { + void* ptr = malloc(2048); + Span buffer((uint8_t*)ptr, 2048); - // Log an int - etdump_gen[i]->log_intermediate_output_delegate( - "test_event_tensorlist", - static_cast(-1), - 10); + auto buffer_data_sink = BufferDataSink::create(ptr, 2048); - // Log a double - etdump_gen[i]->log_intermediate_output_delegate( - "test_event_tensorlist", - static_cast(-1), - 20.75); + etdump_gen[i]->create_event_block("test_block"); + TensorFactory tf; + + // using span to record debug data + if (j == 0) { + // TODO(gasoonjia): add similar ET_EXPECT_DEATH on BufferDataSink branch + ET_EXPECT_DEATH( + etdump_gen[i]->log_intermediate_output_delegate( + "test_event_tensor", + static_cast(-1), + tf.ones({3, 2})), + "Must pre-set data sink before logging evalue with set_data_sink"); + etdump_gen[i]->set_debug_buffer(buffer); + } + // using data sink to record debug data + else { + etdump_gen[i]->set_data_sink(&buffer_data_sink.get()); + } - // Log a bool - etdump_gen[i]->log_intermediate_output_delegate( - "test_event_tensorlist", - static_cast(-1), - true); + // Log a tensor + etdump_gen[i]->log_intermediate_output_delegate( + "test_event_tensor", + static_cast(-1), + tf.ones({3, 2})); + + // Log a tensor list + std::vector tensors = {tf.ones({5, 4}), tf.ones({7, 6})}; + etdump_gen[i]->log_intermediate_output_delegate( + "test_event_tensorlist", + static_cast(-1), + ArrayRef(tensors.data(), tensors.size())); + + // Log an int + etdump_gen[i]->log_intermediate_output_delegate( + "test_event_tensorlist", + static_cast(-1), + 10); + + // Log a double + etdump_gen[i]->log_intermediate_output_delegate( + "test_event_tensorlist", + static_cast(-1), + 20.75); + + // Log a bool + etdump_gen[i]->log_intermediate_output_delegate( + "test_event_tensorlist", + static_cast(-1), + true); - ETDumpResult result = etdump_gen[i]->get_etdump_data(); - ASSERT_TRUE(result.buf != nullptr); - ASSERT_TRUE(result.size != 0); + ETDumpResult result = etdump_gen[i]->get_etdump_data(); + ASSERT_TRUE(result.buf != nullptr); + ASSERT_TRUE(result.size != 0); - free(ptr); - if (!etdump_gen[i]->is_static_etdump()) { - free(result.buf); + free(ptr); + if (!etdump_gen[i]->is_static_etdump()) { + free(result.buf); + } } } } @@ -493,81 +543,92 @@ TEST_F(ProfilerETDumpTest, VerifyDelegateIntermediateLogging) { EValue evalue(tf.ones({3, 2})); for (size_t i = 0; i < 2; i++) { - etdump_gen[i]->create_event_block("test_block"); - - void* ptr = malloc(2048); - Span buffer((uint8_t*)ptr, 2048); + for (size_t j = 0; j < 2; j++) { + etdump_gen[i]->create_event_block("test_block"); - etdump_gen[i]->set_debug_buffer(buffer); + void* ptr = malloc(2048); + Span buffer((uint8_t*)ptr, 2048); + ; + auto buffer_data_sink = BufferDataSink::create(ptr, 2048); - // Event 0 - etdump_gen[i]->log_intermediate_output_delegate( - nullptr, 257, tf.ones({3, 4})); - // Event 1 - etdump_gen[i]->log_intermediate_output_delegate( - nullptr, 258, tf.ones({5, 6})); + // using span to record debug data + if (j == 0) { + etdump_gen[i]->set_debug_buffer(buffer); + } + // using data sink to record debug data + else { + etdump_gen[i]->set_data_sink(&buffer_data_sink.get()); + } - ETDumpResult result = etdump_gen[i]->get_etdump_data(); - ASSERT_TRUE(result.buf != nullptr); - ASSERT_TRUE(result.size != 0); + // Event 0 + etdump_gen[i]->log_intermediate_output_delegate( + nullptr, 257, tf.ones({3, 4})); + // Event 1 + etdump_gen[i]->log_intermediate_output_delegate( + nullptr, 258, tf.ones({5, 6})); - size_t size = 0; - void* buf = flatbuffers_read_size_prefix(result.buf, &size); - etdump_ETDump_table_t etdump = etdump_ETDump_as_root_with_identifier( - buf, etdump_ETDump_file_identifier); + ETDumpResult result = etdump_gen[i]->get_etdump_data(); + ASSERT_TRUE(result.buf != nullptr); + ASSERT_TRUE(result.size != 0); - etdump_RunData_vec_t run_data_vec = etdump_ETDump_run_data(etdump); - ASSERT_EQ(etdump_RunData_vec_len(run_data_vec), 1); - - etdump_Event_vec_t events = - etdump_RunData_events(etdump_RunData_vec_at(run_data_vec, 0)); - ASSERT_EQ(etdump_Event_vec_len(events), 2); - - // Verify Event 0 - etdump_Event_table_t event_0 = etdump_Event_vec_at(events, 0); - - etdump_DebugEvent_table_t single_debug_event = - etdump_Event_debug_event(event_0); - etdump_Value_table_t value = - etdump_DebugEvent_debug_entry(single_debug_event); - ASSERT_EQ(etdump_Value_tensor_is_present(value), true); - - etdump_Tensor_table_t tensor = etdump_Value_tensor(value); - executorch_flatbuffer_ScalarType_enum_t scalar_enum = - etdump_Tensor_scalar_type(tensor); - ASSERT_EQ(scalar_enum, executorch_flatbuffer_ScalarType_FLOAT); - flatbuffers_int64_vec_t sizes = etdump_Tensor_sizes(tensor); - ASSERT_EQ(flatbuffers_int64_vec_len(sizes), 2); - ASSERT_EQ(flatbuffers_int64_vec_at(sizes, 0), 3); - ASSERT_EQ(flatbuffers_int64_vec_at(sizes, 1), 4); - - // Verify Event 1 - etdump_Event_table_t event_1 = etdump_Event_vec_at(events, 1); - - single_debug_event = etdump_Event_debug_event(event_1); - value = etdump_DebugEvent_debug_entry(single_debug_event); - - tensor = etdump_Value_tensor(value); - sizes = etdump_Tensor_sizes(tensor); - ASSERT_EQ(flatbuffers_int64_vec_len(sizes), 2); - ASSERT_EQ(flatbuffers_int64_vec_at(sizes, 0), 5); - ASSERT_EQ(flatbuffers_int64_vec_at(sizes, 1), 6); - - // Event 1 should have a empty delegate_debug_id_str - flatbuffers_string_t delegate_debug_id_name = - etdump_DebugEvent_delegate_debug_id_str( - etdump_Event_debug_event(event_1)); + size_t size = 0; + void* buf = flatbuffers_read_size_prefix(result.buf, &size); + etdump_ETDump_table_t etdump = etdump_ETDump_as_root_with_identifier( + buf, etdump_ETDump_file_identifier); - EXPECT_EQ(delegate_debug_id_name, nullptr); - // Check for the correct delegate_debug_id_int - EXPECT_EQ( - etdump_DebugEvent_delegate_debug_id_int( - etdump_Event_debug_event(event_1)), - 258); + etdump_RunData_vec_t run_data_vec = etdump_ETDump_run_data(etdump); + ASSERT_EQ(etdump_RunData_vec_len(run_data_vec), 1); + + etdump_Event_vec_t events = + etdump_RunData_events(etdump_RunData_vec_at(run_data_vec, 0)); + ASSERT_EQ(etdump_Event_vec_len(events), 2); + + // Verify Event 0 + etdump_Event_table_t event_0 = etdump_Event_vec_at(events, 0); + + etdump_DebugEvent_table_t single_debug_event = + etdump_Event_debug_event(event_0); + etdump_Value_table_t value = + etdump_DebugEvent_debug_entry(single_debug_event); + ASSERT_EQ(etdump_Value_tensor_is_present(value), true); + + etdump_Tensor_table_t tensor = etdump_Value_tensor(value); + executorch_flatbuffer_ScalarType_enum_t scalar_enum = + etdump_Tensor_scalar_type(tensor); + ASSERT_EQ(scalar_enum, executorch_flatbuffer_ScalarType_FLOAT); + flatbuffers_int64_vec_t sizes = etdump_Tensor_sizes(tensor); + ASSERT_EQ(flatbuffers_int64_vec_len(sizes), 2); + ASSERT_EQ(flatbuffers_int64_vec_at(sizes, 0), 3); + ASSERT_EQ(flatbuffers_int64_vec_at(sizes, 1), 4); + + // Verify Event 1 + etdump_Event_table_t event_1 = etdump_Event_vec_at(events, 1); + + single_debug_event = etdump_Event_debug_event(event_1); + value = etdump_DebugEvent_debug_entry(single_debug_event); + + tensor = etdump_Value_tensor(value); + sizes = etdump_Tensor_sizes(tensor); + ASSERT_EQ(flatbuffers_int64_vec_len(sizes), 2); + ASSERT_EQ(flatbuffers_int64_vec_at(sizes, 0), 5); + ASSERT_EQ(flatbuffers_int64_vec_at(sizes, 1), 6); + + // Event 1 should have a empty delegate_debug_id_str + flatbuffers_string_t delegate_debug_id_name = + etdump_DebugEvent_delegate_debug_id_str( + etdump_Event_debug_event(event_1)); + + EXPECT_EQ(delegate_debug_id_name, nullptr); + // Check for the correct delegate_debug_id_int + EXPECT_EQ( + etdump_DebugEvent_delegate_debug_id_int( + etdump_Event_debug_event(event_1)), + 258); - free(ptr); - if (!etdump_gen[i]->is_static_etdump()) { - free(result.buf); + free(ptr); + if (!etdump_gen[i]->is_static_etdump()) { + free(result.buf); + } } } } diff --git a/devtools/etdump/utils.h b/devtools/etdump/utils.h index 8f9a78a1f99..e595e644b4c 100644 --- a/devtools/etdump/utils.h +++ b/devtools/etdump/utils.h @@ -13,7 +13,8 @@ namespace internal { * Aligns a pointer to the next multiple of `alignment`. * * @param[in] ptr Pointer to align. - * @param[in] alignment Alignment to align to. Must be a power of 2. + * @param[in] alignment Alignment to align to. Must be a power of 2 and cannot + * be 0. * * @returns A pointer aligned to `alignment`. */ From 7535a54c401cf928a65d4ce50fcfbaf1512284dc Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Thu, 27 Feb 2025 16:13:59 -0800 Subject: [PATCH 150/584] [Android docs] Minor update to existing sections (#8802) --- docs/source/using-executorch-android.md | 42 +++++++++++++++++++------ 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/docs/source/using-executorch-android.md b/docs/source/using-executorch-android.md index e41cd3e2bb7..e8579db1e6f 100644 --- a/docs/source/using-executorch-android.md +++ b/docs/source/using-executorch-android.md @@ -1,16 +1,26 @@ # Using ExecuTorch on Android -To use from Android, ExecuTorch provides Java API bindings and Android platform integration, available as a AAR file. The ExecuTorch C++ APIs can also be used from Android native. +To use from Android, ExecuTorch provides Java/Kotlin API bindings and Android platform integration, available as an AAR file. + +Note: This page covers Android app integration through the AAR library. The ExecuTorch C++ APIs can also be used from Android native, and the documentation can be found on [this page about cross compilation](https://pytorch.org/executorch/main/using-executorch-building-from-source.html#cross-compilation). ## Installation -We provide a prebuilt Android library (AAR), `executorch.aar` for both generic (image/audio processing) and LLAMA use case. +We package all ExecuTorch Android libraries into an Android library (AAR), `executorch.aar` for both generic (image/audio processing) and LLM (LLaMA) use case. In each release, we will upload the prebuilt AAR artifacts. Users can also build the AAR from source. + +### Contents of library -## Contents of library -- `executorch.aar` - - [Java library](https://github.com/pytorch/executorch/tree/main/extension/android/src/main/java/org/pytorch/executorch) - - JNI contains the JNI binding for the corresponding Java code, and ExecuTorch native library, including core ExecuTorch runtime libraries, XNNPACK backend, Portable kernels, Optimized kernels, Quantized kernels, and LLAMA-specific Custom ops library. - - Comes with two ABI variants, arm64-v8a and x86\_64. +The AAR artifact contains the Java library for users to integrate with their Java/Kotlin application code, as well as the corresponding JNI library (.so file), which is loaded by the Java code during initialization. + +- [Java library](https://github.com/pytorch/executorch/tree/main/extension/android/src/main/java/org/pytorch/executorch) +- JNI contains the JNI binding for the corresponding Java code, and ExecuTorch native library, including + - core ExecuTorch runtime libraries + - XNNPACK backend + - Portable kernels + - Optimized kernels + - Quantized kernels + - LLaMa-specific Custom ops library. +- Comes with two ABI variants, arm64-v8a and x86\_64. ## Downloading AAR @@ -28,12 +38,22 @@ We provide a prebuilt Android library (AAR), `executorch.aar` for both generic ( ## Using prebuilt libraries -To add the Java library to your app, simply download the AAR, and add it to your gradle build rule. +To add the Java library to your app: +1. Download the AAR. +2. Add it to your gradle build rule as a file path. + +The Java package requires `fbjni` and `soloader`, and currently requires users to explicitly declare the dependency. Therefore, two more `dependencies` in gradle rule is required: +``` +implementation("com.facebook.soloader:soloader:0.10.5") +implementation("com.facebook.fbjni:fbjni:0.5.1") +``` + +### Example usage -In your app working directory, such as example executorch/examples/demo-apps/android/LlamaDemo, +In your app working directory, such as executorch/examples/demo-apps/android/LlamaDemo, ``` mkdir -p app/libs -curl https://ossci-android.s3.amazonaws.com/executorch/release/executorch-241002/executorch.aar -o app/libs/executorch.aar +curl https://ossci-android.s3.amazonaws.com/executorch/release/v0.5.0-rc3/executorch.aar -o app/libs/executorch.aar ``` And include it in gradle: @@ -41,6 +61,8 @@ And include it in gradle: # app/build.grardle.kts dependencies { implementation(files("libs/executorch.aar")) + implementation("com.facebook.soloader:soloader:0.10.5") + implementation("com.facebook.fbjni:fbjni:0.5.1") } ``` From 17d4f04be816e7467fc2d411e2ebe6d4e0f478a3 Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Thu, 27 Feb 2025 16:39:38 -0800 Subject: [PATCH 151/584] Fix linter in print_exported_headers.py (#8808) --- build/print_exported_headers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/print_exported_headers.py b/build/print_exported_headers.py index dc0102ff00b..31f70fb9ba7 100755 --- a/build/print_exported_headers.py +++ b/build/print_exported_headers.py @@ -39,7 +39,7 @@ def query(buck2: str, target: str, attribute: str) -> str: # Cache to store results for exported headers per target. -_exported_headers_cache = {} +_exported_headers_cache: dict[str, Set[str]] = {} def exported_headers(buck2: str, target: str) -> Set[str]: From 3ffd24e0c02c10db5de3b55058a09da39d781571 Mon Sep 17 00:00:00 2001 From: mcremon-meta <134334895+mcremon-meta@users.noreply.github.com> Date: Thu, 27 Feb 2025 16:40:57 -0800 Subject: [PATCH 152/584] Enable quantized cat Differential Revision: D69499329 Pull Request resolved: https://github.com/pytorch/executorch/pull/8757 --- backends/cadence/aot/quantizer/fusion_pass.py | 26 ++++++++- backends/cadence/aot/quantizer/patterns.py | 58 ++++++++++++++++++- backends/cadence/aot/quantizer/quantizer.py | 29 +++++++++- 3 files changed, 106 insertions(+), 7 deletions(-) diff --git a/backends/cadence/aot/quantizer/fusion_pass.py b/backends/cadence/aot/quantizer/fusion_pass.py index 51d019f155e..a726f6c7fba 100644 --- a/backends/cadence/aot/quantizer/fusion_pass.py +++ b/backends/cadence/aot/quantizer/fusion_pass.py @@ -13,6 +13,7 @@ AddmmPattern, AddPattern, BmmPattern, + CatPattern, Conv1dPattern, Conv2dPattern, LayerNormPattern, @@ -246,6 +247,16 @@ def get_args_and_kwargs_matmul( return args, kwargs +def get_args_and_kwargs_cat( + inputs_inputs: List[fx.Node], other_inputs: List[fx.Node], op_node: fx.Node +) -> Tuple[Tuple[ArgsType], Dict[str, ArgsType]]: + args = tuple([inputs_inputs] + other_inputs) + dim = op_node.args[1] if len(op_node.args) > 1 else 0 + # pyre-fixme[6]: Incompatible parameter type + kwargs = {"dim": int(dim)} + return args, kwargs + + def get_args_and_kwargs_conv( graph_module: GraphModule, inputs_inputs: List[fx.Node], @@ -390,12 +401,17 @@ def call(self, graph_module: fx.GraphModule) -> PassResult: # noqa: C901 self.mark_fused(p.nodes) dequants_inputs = [] - for node, idx in anchors.inputs: + for node, idx, *_spec in anchors.inputs: + arg = ( + node.args[idx] + if isinstance(idx, int) + else node.args[idx[0]][idx[1]] + ) if ( - node.args[idx].target + arg.target == torch.ops.quantized_decomposed.dequantize_per_tensor.default ): - dequants_inputs.append(node.args[idx]) + dequants_inputs.append(arg) dequants_weights = [] for node, idx in anchors.weights: if ( @@ -434,6 +450,10 @@ def call(self, graph_module: fx.GraphModule) -> PassResult: # noqa: C901 dequants_inputs, quant_node, ) + elif isinstance(pattern, CatPattern): + args, kwargs = get_args_and_kwargs_cat( + inputs_inputs, other_inputs, op_node + ) elif isinstance(pattern, (Conv1dPattern, Conv2dPattern)): args, kwargs = get_args_and_kwargs_conv( graph_module, diff --git a/backends/cadence/aot/quantizer/patterns.py b/backends/cadence/aot/quantizer/patterns.py index 0e907812b10..66f6772d942 100644 --- a/backends/cadence/aot/quantizer/patterns.py +++ b/backends/cadence/aot/quantizer/patterns.py @@ -33,7 +33,17 @@ class PartitionAnchors: is used for other types of input values as well as handling default parameters. """ - inputs: List[Tuple[fx.Node, int]] = field(default_factory=list) + # Inputs can share quantization parameters + inputs: List[ + Union[ + Tuple[fx.Node, Union[int, Tuple[int, int]]], + Tuple[ + fx.Node, + Union[int, Tuple[int, int]], + SharedQuantizationSpec, + ], + ] + ] = field(default_factory=list) weights: List[Tuple[fx.Node, int]] = field(default_factory=list) biases: List[ Union[Tuple[fx.Node, int], Tuple[fx.Node, int, DerivedQuantizationSpec]] @@ -155,6 +165,52 @@ def replacement_op(self) -> OpOverload: return torch.ops.cadence.quantized_matmul.default +class CatPattern(QuantizationPattern): + def partition_types(self) -> List[OpOverload]: + return [torch.ops.aten.cat.default] + + def get_anchors( + self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule] + ) -> PartitionAnchors: + # pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge... + cat_node = fused_partition[0].nodes[-1] + + # Create args. The first argument does not have quant spec and + # will inherit from the overall quant spec. All subsequent args + # will share that spec. + # Note that outpus also share that spec. + args: List[ + Union[ + Tuple[fx.Node, Union[int, Tuple[int, int]]], + Tuple[ + fx.Node, + Union[int, Tuple[int, int]], + SharedQuantizationSpec, + ], + ] + ] = [(cat_node, (0, 0))] + for i in range(1, len(cat_node.args[0])): + args.append( + ( + cat_node, + (0, i), + SharedQuantizationSpec((cat_node.args[0][0], cat_node)), + ) + ) + + return PartitionAnchors( + inputs=args, + weights=[], + biases=[], + output=[ + (cat_node, SharedQuantizationSpec((cat_node.args[0][0], cat_node))) + ], + ) + + def replacement_op(self) -> OpOverload: + return torch.ops.aten.cat.default + + class Conv1dPattern(QuantizationPattern): def partition_types(self) -> List[OpOverload]: return [torch.ops.aten.conv1d.default] diff --git a/backends/cadence/aot/quantizer/quantizer.py b/backends/cadence/aot/quantizer/quantizer.py index 42cc1a1df14..62727985452 100644 --- a/backends/cadence/aot/quantizer/quantizer.py +++ b/backends/cadence/aot/quantizer/quantizer.py @@ -14,6 +14,7 @@ AddmmPattern, AddPattern, BmmPattern, + CatPattern, Conv1dPattern, Conv2dPattern, LayerNormPattern, @@ -144,17 +145,38 @@ def annotate_inputs( "quantization_annotation", QuantizationAnnotation(_annotated=True), ) + arg = ( + # pyre-ignore[16]: no attribute + node.args[idx] + if isinstance(idx, int) + # pyre-ignore[16]: no attribute + else node.args[idx[0]][idx[1]] + ) + annotation.input_qspec_map[arg] = ( + custom_spec[0] if custom_spec else spec + ) # pyre-ignore[16]: no attribute + node.meta["quantization_annotation"] = annotation + + def annotate_weights_or_biases( + weights_or_biases: List[Tuple[fx.Node, int]], + spec: Optional[QuantizationSpec], + ) -> None: + for node, idx, *custom_spec in weights_or_biases: + annotation = node.meta.get( + "quantization_annotation", + QuantizationAnnotation(_annotated=True), + ) annotation.input_qspec_map[node.args[idx]] = ( custom_spec[0] if custom_spec else spec ) - # pyre-ignore[16]: no attribute node.meta["quantization_annotation"] = annotation + # pyre-ignore[6]: incompatible parameter type annotate_inputs(anchors.inputs, input_act_qspec) - annotate_inputs(anchors.weights, weight_qspec) + annotate_weights_or_biases(anchors.weights, weight_qspec) # pyre-ignore[6]: incompatible parameter type - annotate_inputs(anchors.biases, bias_qspec) + annotate_weights_or_biases(anchors.biases, bias_qspec) return model def validate(self, model: fx.GraphModule) -> None: @@ -223,4 +245,5 @@ def __init__(self, quantizers: Optional[list[Quantizer]] = None) -> None: if quantizers is None: quantizers = get_cadence_default_quantizers() quantizers.append(CadenceAtenQuantizer(AddPattern(), qconfig_A8uW8u)) + quantizers.append(CadenceAtenQuantizer(CatPattern(), qconfig_A8uW8u)) super().__init__(quantizers) From b06bd8785c84d73d85cd26f693fc26015d6afd44 Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Thu, 27 Feb 2025 17:26:26 -0800 Subject: [PATCH 153/584] remove unused stuff in AndroidManifest Differential Revision: D70360815 Pull Request resolved: https://github.com/pytorch/executorch/pull/8807 --- .../ExecuTorchDemo/app/src/main/AndroidManifest.xml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/AndroidManifest.xml b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/AndroidManifest.xml index 4c16e3a994e..8d71b156398 100644 --- a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/AndroidManifest.xml +++ b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/AndroidManifest.xml @@ -27,18 +27,6 @@ - - - - - - - - Date: Thu, 27 Feb 2025 18:18:14 -0800 Subject: [PATCH 154/584] [Android docs] Add building instructions and code snippet (#8811) Add more instructions, code snippet, link to example project --- docs/source/using-executorch-android.md | 74 ++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 3 deletions(-) diff --git a/docs/source/using-executorch-android.md b/docs/source/using-executorch-android.md index e8579db1e6f..39bd0c1970a 100644 --- a/docs/source/using-executorch-android.md +++ b/docs/source/using-executorch-android.md @@ -68,9 +68,47 @@ dependencies { Now you can compile your app with the ExecuTorch Android library. -### Building from Source +## Building from Source -TODO Instructions on re-creating and customizing the Android AAR. +`build/build_android_library.sh` is a helper script to build the Java library (into .jar), native library (into .so), and the packaged AAR file. It can also build +demo apps to showcase the AAR is integrated into a user app correctly. + +You need Android [SDK](https://developer.android.com/studio) and [NDK](https://developer.android.com/ndk/downloads) to use it. + +Current NDK version used in ExecuTorch CI: r27b. + +You need to set `ANDROID_NDK` to the correct NDK root (containing NOTICE file). + +``` +export ANDROID_NDK=/path/to/ndk +sh build/build_android_library.sh +``` + +### Optional environment variables + +Optionally, set these environment variables before running `build_android_library.sh`. + +#### ANDROID_ABIS +Set environment variable `ANDROID_ABIS` to either `arm64-v8a` or `x86_64` if you only need to build the native library for one ABI only. +``` +export ANDROID_ABIS=arm64-v8a +# or +# export ANDROID_ABIS=x86_64 +sh build/build_android_library.sh +``` + +#### EXECUTORCH_CMAKE_BUILD_TYPE +Set environment variable `EXECUTORCH_CMAKE_BUILD_TYPE` to `Release` or `Debug` based on your needs. + +#### Using MediaTek backend + +To use [MediaTek backend](https://pytorch.org/executorch/main/backends-mediatek.html), +after installing and setting up the SDK, set `NEURON_BUFFER_ALLOCATOR_LIB` and `NEURON_USDK_ADAPTER_LIB` to the corresponding path. + +#### Using Qualcomm AI Engine Backend + +To use [Qualcomm AI Engine Backend](https://pytorch.org/executorch/main/backends-qualcomm.html#qualcomm-ai-engine-backend), +after installing and setting up the SDK, set `QNN_SDK_ROOT` to the corresponding path ## Android Backends @@ -78,7 +116,37 @@ TODO Describe commonly used backends, including XNN, Vulkan, and NPUs. ## Runtime Integration -TODO Code sample in Java +Here is an example code sample in Java that demonstrates how to integrate ExecuTorch into an Android app: + +```java +import org.pytorch.executorch.EValue; +import org.pytorch.executorch.Module; +import org.pytorch.executorch.Tensor; + +public class MainActivity extends Activity { + private Module module; + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + // Load the ExecuTorch module + module = Module.load("/path/to/module.pte"); + } + public void runInference(View view) { + // Prepare input data + Tensor input = Tensor.fromBlob(getInputData()); + // Run inference + Tensor output = module.forward(EValue.from(input))[0].toTensor(); + // Process output data + processOutput(output); + } +} +``` +This example loads an ExecuTorch module, prepares input data, runs inference, and processes the output data. + +Please use [ExecuTorchDemo](https://github.com/pytorch/executorch/tree/main/examples/demo-apps/android/ExecuTorchDemo) +and [LlamaDemo](https://github.com/pytorch/executorch/tree/main/examples/demo-apps/android/LlamaDemo) for the code examples +using ExecuTorch AAR package. ## Next Steps From 5a9479a0e54bbcda2d53646255c3655d1f33acd7 Mon Sep 17 00:00:00 2001 From: Gasoonjia Date: Thu, 27 Feb 2025 19:18:03 -0800 Subject: [PATCH 155/584] Remove no longer used `is_not_dype_exception` function Differential Revision: D70337526 Pull Request resolved: https://github.com/pytorch/executorch/pull/8780 --- exir/dialects/edge/spec/gen.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/exir/dialects/edge/spec/gen.py b/exir/dialects/edge/spec/gen.py index 6a9c8d3caf8..6be6a2ae5ee 100644 --- a/exir/dialects/edge/spec/gen.py +++ b/exir/dialects/edge/spec/gen.py @@ -236,25 +236,6 @@ def print_error_msg(unsupported_funcs: List[str]): print(f) -def is_not_dype_exception(exc: BaseException, dtype_str: str) -> bool: - """Check if an exception about unsupported dtype.""" - - # alias dtype means the alias name of dtype str, like "Boolean" is the alias name of "Bool". - # Set default alias_dtype as twice of str(exc) to make sure default alias dtype is not part of str(exc) - alias_dtype = 2 * str(exc) - if dtype_str == "Bool": - alias_dtype = "Boolean" - - return not ( - ("not supported" in str(exc) or "not implemented" in str(exc)) - and ( - dtype_str in str(exc) - or alias_dtype in str(exc) - or dtype_str.lower() in str(exc) - ) - ) - - class EdgeOpYamlInfo: def __init__( self, From 6c7b5b521ef3db5fff4307574bf4b1d67b1c54ec Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Thu, 27 Feb 2025 19:19:11 -0800 Subject: [PATCH 156/584] [Android docs] Minor update (#8814) --- docs/source/using-executorch-android.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/using-executorch-android.md b/docs/source/using-executorch-android.md index 39bd0c1970a..2535c267923 100644 --- a/docs/source/using-executorch-android.md +++ b/docs/source/using-executorch-android.md @@ -6,7 +6,7 @@ Note: This page covers Android app integration through the AAR library. The Exec ## Installation -We package all ExecuTorch Android libraries into an Android library (AAR), `executorch.aar` for both generic (image/audio processing) and LLM (LLaMA) use case. In each release, we will upload the prebuilt AAR artifacts. Users can also build the AAR from source. +All ExecuTorch Android libraries are packaged into an Android library (AAR), `executorch.aar` for both generic (image/audio processing) and LLM (LLaMA) use case. In each release, prebuilt AAR artifacts are uploaded to S3. Users can also build the AAR from source. ### Contents of library From 76fb84462d51c62e5b174d8196a8c4e9c0390de0 Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Thu, 27 Feb 2025 19:19:41 -0800 Subject: [PATCH 157/584] Update NDK version in using-executorch-building-from-source.md (#8812) Use NDK 27 because some early version doesn't support BF16. --- docs/source/using-executorch-building-from-source.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/using-executorch-building-from-source.md b/docs/source/using-executorch-building-from-source.md index 52fee0719f7..eae7fbabf57 100644 --- a/docs/source/using-executorch-building-from-source.md +++ b/docs/source/using-executorch-building-from-source.md @@ -218,7 +218,7 @@ Assuming Android NDK is available, run: mkdir cmake-android-out && cd cmake-android-out # point -DCMAKE_TOOLCHAIN_FILE to the location where ndk is installed -cmake -DCMAKE_TOOLCHAIN_FILE=/Users/{user_name}/Library/Android/sdk/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a .. +cmake -DCMAKE_TOOLCHAIN_FILE=/Users/{user_name}/Library/Android/sdk/ndk/27.2.12479018/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a .. cd .. cmake --build cmake-android-out -j9 From 80fcb414afcdea97c272999180d3186d78ec20a8 Mon Sep 17 00:00:00 2001 From: Chun-I Tsai Date: Fri, 28 Feb 2025 13:37:59 +0800 Subject: [PATCH 158/584] Qualcomm AI Engine Direct - Lift scalar arguments Differential Revision: D70069871 Pull Request resolved: https://github.com/pytorch/executorch/pull/8642 --- backends/qualcomm/_passes/__init__.py | 10 +- .../_passes/annotate_and_quant_scalar.py | 137 --------------- .../_passes/convert_binary_op_with_scalar.py | 41 ----- .../_passes/decompose_linalg_vector_norm.py | 6 +- backends/qualcomm/_passes/decompose_silu.py | 6 +- backends/qualcomm/_passes/layout_transform.py | 5 - .../_passes/lift_constant_scalar_operands.py | 161 ++++++++++++++++++ .../{convert_prelu.py => recompose_prelu.py} | 29 +++- backends/qualcomm/_passes/utils.py | 20 ++- backends/qualcomm/builders/node_visitor.py | 2 +- backends/qualcomm/builders/op_eq.py | 43 +---- backends/qualcomm/builders/op_ge.py | 43 +---- backends/qualcomm/builders/op_group_norm.py | 5 +- backends/qualcomm/builders/op_gt.py | 43 +---- backends/qualcomm/builders/op_index.py | 3 +- backends/qualcomm/builders/op_le.py | 43 +---- backends/qualcomm/builders/op_lt.py | 43 +---- backends/qualcomm/builders/op_pow.py | 42 ++--- backends/qualcomm/builders/op_prelu.py | 75 +++----- backends/qualcomm/builders/op_topk.py | 14 +- backends/qualcomm/quantizer/annotators.py | 30 ++-- backends/qualcomm/quantizer/quantizer.py | 4 +- backends/qualcomm/tests/test_qnn_delegate.py | 29 +++- backends/qualcomm/utils/utils.py | 32 ++-- 24 files changed, 320 insertions(+), 546 deletions(-) delete mode 100644 backends/qualcomm/_passes/annotate_and_quant_scalar.py delete mode 100644 backends/qualcomm/_passes/convert_binary_op_with_scalar.py create mode 100644 backends/qualcomm/_passes/lift_constant_scalar_operands.py rename backends/qualcomm/_passes/{convert_prelu.py => recompose_prelu.py} (64%) diff --git a/backends/qualcomm/_passes/__init__.py b/backends/qualcomm/_passes/__init__.py index 36e3fb4356a..c5499b52d80 100644 --- a/backends/qualcomm/_passes/__init__.py +++ b/backends/qualcomm/_passes/__init__.py @@ -1,11 +1,8 @@ -from .annotate_and_quant_scalar import AnnotateAndQuantScalar from .annotate_decomposed import AnnotateDecomposed from .annotate_quant_attrs import AnnotateQuantAttrs from .constant_i64_to_i32 import ConstantI64toI32 -from .convert_binary_op_with_scalar import ConvertBinaryOpsWithScalar from .convert_bmm_to_matmul import ConvertBmmToMatmul from .convert_interpolate_with_upsample2d import ConvertInterpolateWithUpsample2D -from .convert_prelu import ConvertPReLU from .convert_to_linear import ConvertToLinear from .decompose_any import DecomposeAny from .decompose_einsum import DecomposeEinsum @@ -17,7 +14,9 @@ from .insert_io_qdq import InsertIOQDQ from .insert_requantize import InsertRequantize from .layout_transform import LayoutTransform +from .lift_constant_scalar_operands import LiftConstantScalarOperands from .recompose_pixel_unshuffle import RecomposePixelUnshuffle +from .recompose_prelu import RecomposePReLU from .recompose_rms_norm import RecomposeRmsNorm from .reduce_dynamic_range import ReduceDynamicRange from .remove_redundancy import RemoveRedundancy @@ -27,14 +26,12 @@ __all__ = [ - AnnotateAndQuantScalar, AnnotateDecomposed, AnnotateQuantAttrs, ConstantI64toI32, ConvertBmmToMatmul, - ConvertBinaryOpsWithScalar, ConvertInterpolateWithUpsample2D, - ConvertPReLU, + RecomposePReLU, ConvertToLinear, DecomposeAny, DecomposeEinsum, @@ -46,6 +43,7 @@ InsertIOQDQ, InsertRequantize, LayoutTransform, + LiftConstantScalarOperands, RecomposePixelUnshuffle, RecomposeRmsNorm, ReduceDynamicRange, diff --git a/backends/qualcomm/_passes/annotate_and_quant_scalar.py b/backends/qualcomm/_passes/annotate_and_quant_scalar.py deleted file mode 100644 index 9daaa4aa624..00000000000 --- a/backends/qualcomm/_passes/annotate_and_quant_scalar.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright (c) Qualcomm Innovation Center, Inc. -# All rights reserved -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. -import itertools -import operator -from typing import Dict - -import torch -from executorch.backends.qualcomm.builders.utils import get_parameter -from executorch.backends.qualcomm.utils.constants import QCOM_QUANT_ATTRS -from executorch.exir.pass_base import ExportPass, PassResult -from executorch.exir.passes import dead_code_elimination_pass -from torch.fx.passes.utils.source_matcher_utils import get_source_partitions - -from .utils import dq_ops, get_quant_attrs - - -class AnnotateAndQuantScalar(ExportPass): - """ - For binary operators who take constant scalar as one of its inputs, - will annotate encoding to the constant if necessary. - """ - - binary_op_sources = [ - operator.add, - operator.sub, - operator.mul, - operator.truediv, - torch.add, - torch.sub, - torch.mul, - torch.div, - torch.ops.aten.add.Scalar, - torch.ops.aten.sub.Scalar, - torch.ops.aten.mul.Scalar, - torch.ops.aten.div.Scalar, - torch.ops.aten.mul.Tensor, - "add", - "sub", - "mul", - "truediv", - ] - - def __init__(self, edge_program: torch.export.ExportedProgram): - super(AnnotateAndQuantScalar, self).__init__() - self.edge_program = edge_program - - def _get_source_scalar_node(self, node: torch.fx.Node) -> torch.fx.Node: - """ - This recursion function is specific for multiply followed by a cast - """ - if node.op == "placeholder": - if not (shape := node.meta["val"].size()): - return node - assert ( - not shape - ), f"The output of node {node} is not a scalar, but a tensor with shape {shape}" - return self._get_source_scalar_node(node.args[0]) - - def _update_scalar_node_attrs(self, node: torch.fx.Node, quant_attrs: Dict) -> Dict: - val = get_parameter(node, self.edge_program) - quant_range = quant_attrs["quant_max"] - quant_attrs["quant_min"] - # Use 0 as the zero_point for scalar - quant_attrs["zero_point"] = 0 if val >= 0 else quant_attrs["quant_max"] - quant_attrs["scale"] = ( - val.div(quant_range) if val >= 0 else -val.div(quant_range) - ) - return quant_attrs - - def _annotate_scalar_node( - self, - be_annotated_node: torch.fx.Node, - quant_attrs: Dict, - ) -> None: - """ - This recursion function is specific for multiply followed by a cast - """ - if be_annotated_node.meta["val"].dtype not in [ - float, - torch.float32, - torch.int32, - torch.int64, - ]: - return - - be_annotated_node.meta[QCOM_QUANT_ATTRS] = quant_attrs - - def _traverse_binary_node(self, graph_module: torch.fx.GraphModule): - src_partitions = get_source_partitions( - graph_module.graph, self.binary_op_sources - ) - src_partitions = list(itertools.chain(*src_partitions.values())) - processed = set() - for src_partition in src_partitions: - # need post process here to identify partitioned nodes: - src_fn_dict = {} - for n in src_partition.nodes: - # e.g. - # meta["source_fn_stack"]: [('mul', )] - # we'll use as grouping key - node_list = src_fn_dict.setdefault(n.meta["source_fn_stack"][-1][1], []) - node_list.append(n) - - for nodes in src_fn_dict.values(): - output = [n for n in nodes if n in src_partition.output_nodes][0] - # if all args have been annotated, it shouldn't be a scalar operation - if all(arg.target in dq_ops for arg in output.args): - continue - - if output not in processed and QCOM_QUANT_ATTRS in output.meta: - dq_node = [n for n in output.args if n.target in dq_ops][0] - q_node = dq_node.args[0] - q_node_attrs = get_quant_attrs(graph_module, q_node) - - scalar_nodes = [n for n in output.args if n != dq_node] - if len(scalar_nodes) == 0: - continue - - scalar_node = scalar_nodes[0] - source_scalar_node = self._get_source_scalar_node(scalar_node) - # we'll abandon cast op here, since the constant scalar will - # be pre-loaded into QNN context binary - output.replace_input_with(scalar_node, source_scalar_node) - - scalar_quant_attrs = self._update_scalar_node_attrs( - source_scalar_node, q_node_attrs - ) - self._annotate_scalar_node(source_scalar_node, scalar_quant_attrs) - processed.add(output) - - def call(self, graph_module: torch.fx.GraphModule): - self._traverse_binary_node(graph_module) - graph_module.recompile() - dead_code_elimination_pass(graph_module) - return PassResult(graph_module, True) diff --git a/backends/qualcomm/_passes/convert_binary_op_with_scalar.py b/backends/qualcomm/_passes/convert_binary_op_with_scalar.py deleted file mode 100644 index 22ce48800d0..00000000000 --- a/backends/qualcomm/_passes/convert_binary_op_with_scalar.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) Qualcomm Innovation Center, Inc. -# All rights reserved -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. -from typing import Dict, Tuple - -import torch -from executorch.exir.pass_base import ExportPass -from torch._export.pass_base import Argument -from torch._export.pass_infra.node_metadata import NodeMetadata -from torch._export.pass_infra.proxy_value import ProxyValue - - -class ConvertBinaryOpsWithScalar(ExportPass): - """ - Replace binary ops with scalar into binary ops with tensor. - Since torch.ops.aten.xxx.Scalar will not generate a placeholder node - for scalar after to_edge. - """ - - binary_ops_with_scalar = { - torch.ops.aten.add.Scalar: torch.ops.aten.add.Tensor, - torch.ops.aten.sub.Scalar: torch.ops.aten.sub.Tensor, - torch.ops.aten.div.Scalar: torch.ops.aten.div.Tensor, - torch.ops.aten.mul.Scalar: torch.ops.aten.mul.Tensor, - } - - def __init__(self): - super(ConvertBinaryOpsWithScalar, self).__init__() - - def call_operator( - self, - op, - args: Tuple[Argument, ...], - kwargs: Dict[str, Argument], - meta: NodeMetadata, - ) -> ProxyValue: - return super().call_operator( - self.binary_ops_with_scalar.get(op, op), args, kwargs, meta - ) diff --git a/backends/qualcomm/_passes/decompose_linalg_vector_norm.py b/backends/qualcomm/_passes/decompose_linalg_vector_norm.py index 8006780863b..0ee74720c78 100644 --- a/backends/qualcomm/_passes/decompose_linalg_vector_norm.py +++ b/backends/qualcomm/_passes/decompose_linalg_vector_norm.py @@ -32,9 +32,9 @@ class DecomposeLinalgVectorNorm(ExportPass): Decompose for math equivalent op. """ - def __init__(self, quantization_capture=False) -> None: + def __init__(self, aten_dialect_capture=False) -> None: super().__init__() - self.quantization_capture = quantization_capture + self.aten_dialect_capture = aten_dialect_capture def call(self, graph_module: torch.fx.GraphModule) -> PassResult: graph = graph_module.graph @@ -44,7 +44,7 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult: dim = node.args[2] if len(node.args) > 2 else None keepdim = node.args[3] if len(node.args) > 3 else False model = LinalgVectorNorm(ord, dim, keepdim) - if self.quantization_capture: + if self.aten_dialect_capture: decomposed_module = torch.export.export( model, (node.args[0].meta["val"],) ).module() diff --git a/backends/qualcomm/_passes/decompose_silu.py b/backends/qualcomm/_passes/decompose_silu.py index ca1a566be1e..96c48920419 100644 --- a/backends/qualcomm/_passes/decompose_silu.py +++ b/backends/qualcomm/_passes/decompose_silu.py @@ -30,13 +30,15 @@ def call(self, graph_module: torch.fx.GraphModule): silu_node_input = node.args[0] with graph_module.graph.inserting_after(silu_node_input): sigmoid_node = graph.create_node( - "call_function", torch.ops.aten.sigmoid, (silu_node_input,) + "call_function", + torch.ops.aten.sigmoid.default, + (silu_node_input,), ) sigmoid_node.meta = self._copy_meta(silu_node.meta) with graph_module.graph.inserting_after(sigmoid_node): mul_node = graph.create_node( "call_function", - torch.ops.aten.mul, + torch.ops.aten.mul.Tensor, (silu_node_input, sigmoid_node), ) mul_node.meta = self._copy_meta(silu_node.meta) diff --git a/backends/qualcomm/_passes/layout_transform.py b/backends/qualcomm/_passes/layout_transform.py index e822a52d1cf..967ae7afd2b 100644 --- a/backends/qualcomm/_passes/layout_transform.py +++ b/backends/qualcomm/_passes/layout_transform.py @@ -53,20 +53,15 @@ class LayoutTransform(ExportPass): exir_ops.edge.aten.clamp.default, exir_ops.edge.aten.constant_pad_nd.default, exir_ops.edge.aten.div.Tensor, - exir_ops.edge.aten.eq.Scalar, exir_ops.edge.aten.eq.Tensor, exir_ops.edge.aten.full.default, exir_ops.edge.aten.full_like.default, - exir_ops.edge.aten.ge.Scalar, exir_ops.edge.aten.ge.Tensor, exir_ops.edge.aten.gelu.default, - exir_ops.edge.aten.gt.Scalar, exir_ops.edge.aten.gt.Tensor, exir_ops.edge.aten.hardswish.default, exir_ops.edge.aten.hardsigmoid.default, exir_ops.edge.aten.hardtanh.default, - exir_ops.edge.aten.leaky_relu.default, - exir_ops.edge.aten.le.Scalar, exir_ops.edge.aten.le.Tensor, exir_ops.edge.aten.linear.default, exir_ops.edge.aten.log.default, diff --git a/backends/qualcomm/_passes/lift_constant_scalar_operands.py b/backends/qualcomm/_passes/lift_constant_scalar_operands.py new file mode 100644 index 00000000000..749d30f3564 --- /dev/null +++ b/backends/qualcomm/_passes/lift_constant_scalar_operands.py @@ -0,0 +1,161 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass +from numbers import Number +from types import BuiltinFunctionType, BuiltinMethodType +from typing import Dict + +import torch +from executorch.backends.qualcomm._passes.utils import is_float_tensor +from executorch.exir.pass_base import ExportPass, PassResult +from executorch.exir.passes import dead_code_elimination_pass +from torch import fx +from torch.ao.quantization.fx.utils import get_new_attr_name_with_prefix +from torch.ops import aten as aten + + +@dataclass(frozen=True) +class TensorConstant: + tensor: torch.Tensor + name: str + + +@dataclass(frozen=True) +class TensorOpInfo: + target: torch._ops.OpOverload + use_schema_args: bool + + +SCALAR_OPS = { + aten.eq.Scalar: TensorOpInfo(aten.eq.Tensor, False), + aten.ge.Scalar: TensorOpInfo(aten.ge.Tensor, False), + aten.gt.Scalar: TensorOpInfo(aten.gt.Tensor, False), + aten.le.Scalar: TensorOpInfo(aten.le.Tensor, False), + aten.lt.Scalar: TensorOpInfo(aten.lt.Tensor, False), + aten.ne.Scalar: TensorOpInfo(aten.ne.Tensor, False), + aten.add.Scalar: TensorOpInfo(aten.add.Tensor, False), + aten.add_.Scalar: TensorOpInfo(aten.add_.Tensor, False), + aten.div.Scalar: TensorOpInfo(aten.div.Tensor, False), + aten.mul.Scalar: TensorOpInfo(aten.mul.Tensor, False), + aten.rsub.Scalar: TensorOpInfo(aten.rsub.Tensor, False), + aten.sub.Scalar: TensorOpInfo(aten.sub.Tensor, False), + aten.pow.Tensor_Scalar: TensorOpInfo(aten.pow.Tensor_Tensor, False), + # The scalar number arg[1] is missing when using default. Result in a corner case to deal + aten.leaky_relu.default: TensorOpInfo(aten.prelu.default, True), +} + + +SKIP_LIFT_OPS = {aten.full_like.default, aten.arange.start_step} + + +class LiftConstantScalarOperands(ExportPass): + """ + Lift constant scalar so that we can use observer of quantizer + """ + + def __init__(self): + super(LiftConstantScalarOperands, self).__init__() + + def _build_tensor_constant( + self, gm: torch.fx.GraphModule, node: fx.Node, const_val + ) -> TensorConstant: + tensor = torch.tensor( + [const_val], + dtype=( + node.args[0].meta["val"].dtype + if not is_float_tensor(node) + else node.meta["val"].dtype + ), + device=node.meta["val"].device, + ) + name = get_new_attr_name_with_prefix("_tensor_constant_")(gm) + tensor_constant = TensorConstant(tensor, name) + return tensor_constant + + def _register_tensor( + self, gm: torch.fx.GraphModule, node: fx.Node, tensor_constant: TensorConstant + ) -> fx.Node: + gm.register_buffer(tensor_constant.name, tensor_constant.tensor) + + fake_mode = node.meta["val"].fake_mode + with gm.graph.inserting_before(node): + get_attr_node = gm.graph.get_attr(tensor_constant.name) + get_attr_node.meta["val"] = fake_mode.from_tensor(tensor_constant.tensor) + return get_attr_node + + def _update_node(self, node: fx.Node, tensor_args: Dict) -> None: + new_args = list(node.args) + if (info := SCALAR_OPS.get(node.target)) and info.use_schema_args: + new_args += [None] * max( + 0, (len(node.target._schema.arguments) - len(new_args)) + ) + + for k, v in tensor_args.items(): + new_args[k] = v + node.args = tuple(new_args) + node.target = SCALAR_OPS.get(node.target, node).target + + def _create_tensor_args( + self, node: fx.Node, gm: torch.fx.graph_module + ) -> Dict[int, TensorConstant]: + tensor_args = {} + for i, arg in enumerate(node.args): + schema = node.target._schema.arguments[i] + is_tensor_arg_got_num = isinstance( + schema.type, torch.TensorType + ) and isinstance(arg, Number) + + is_scalar_arg = ( + isinstance(schema.type, torch.NumberType) and node.target in SCALAR_OPS + ) + + # This is for showing warning of new-coming op + is_arg_num_type = ( + isinstance(schema.type, torch.NumberType) + and node.target not in SCALAR_OPS + ) + + if is_tensor_arg_got_num or is_scalar_arg: + tensor_constant = self._build_tensor_constant(gm, node, arg) + tensor_constant_node = self._register_tensor(gm, node, tensor_constant) + tensor_args[i] = tensor_constant_node + + elif is_arg_num_type: + print( + f"[WARNING] the {i} th arg of node {node} is NumberType, might need to lift" + ) + + if (info := SCALAR_OPS.get(node.target)) and info.use_schema_args: + schema_args = list(node.target._schema.arguments) + for i, sa in enumerate(schema_args): + if isinstance(sa.type, torch.NumberType) and i not in tensor_args: + tensor_constant = self._build_tensor_constant( + gm, node, sa.default_value + ) + tensor_constant_node = self._register_tensor( + gm, node, tensor_constant + ) + tensor_args[i] = tensor_constant_node + return tensor_args + + def _lift(self, gm: torch.fx.GraphModule) -> None: + for n in gm.graph.nodes: + if ( + n.op != "call_function" + or isinstance(n.target, (BuiltinMethodType, BuiltinFunctionType)) + or n.target in SKIP_LIFT_OPS + ): + continue + + if tensor_args := self._create_tensor_args(n, gm): + self._update_node(n, tensor_args) + + def call(self, graph_module: torch.fx.GraphModule): + self._lift(graph_module) + graph_module.recompile() + dead_code_elimination_pass(graph_module) + return PassResult(graph_module, True) diff --git a/backends/qualcomm/_passes/convert_prelu.py b/backends/qualcomm/_passes/recompose_prelu.py similarity index 64% rename from backends/qualcomm/_passes/convert_prelu.py rename to backends/qualcomm/_passes/recompose_prelu.py index 6e2cd677781..082b9c83b27 100644 --- a/backends/qualcomm/_passes/convert_prelu.py +++ b/backends/qualcomm/_passes/recompose_prelu.py @@ -3,35 +3,48 @@ # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +from typing import List + import torch from executorch.exir.dialects._ops import ops as exir_ops from executorch.exir.pass_base import ExportPass, PassResult from torch.fx.passes.utils.source_matcher_utils import get_source_partitions -class ConvertPReLU(ExportPass): +class RecomposePReLU(ExportPass): """ Merge decomposed operators from prelu back to one super node. """ def __init__(self, edge_program: torch.export.ExportedProgram): - super(ConvertPReLU, self).__init__() + super(RecomposePReLU, self).__init__() self.edge_program = edge_program + def _get_coeff_node(self, nodes: List[torch.fx.Node]): + for node in nodes: + if node.target == exir_ops.edge.aten.view_copy.default: + return node.args[0] + + def _get_input_node(self, nodes: List[torch.fx.Node], coeff_node): + return [n for n in nodes if n != coeff_node][0] + def call(self, graph_module: torch.fx.GraphModule): graph = graph_module.graph - partitions = get_source_partitions(graph, [torch.nn.PReLU]) + partitions = get_source_partitions(graph, [torch.nn.PReLU, torch.nn.LeakyReLU]) for _, src_partitions in partitions.items(): for src_partition in src_partitions: - input_node = src_partition.input_nodes[0] + # somehow op might not be decomposed, skip it + if len(src_partition.nodes) == 1: + continue + + coeff_node = self._get_coeff_node(src_partition.nodes) + input_node = self._get_input_node(src_partition.input_nodes, coeff_node) output_node = src_partition.output_nodes[0] - placeholders = [n for n in src_partition.nodes if n.op == "placeholder"] - assert len(placeholders) == 1 - with graph.inserting_after(input_node): + with graph.inserting_before(output_node): prelu_op = exir_ops.edge.aten.prelu.default prelu_node = graph.create_node( - "call_function", prelu_op, (input_node, placeholders[0]) + "call_function", prelu_op, (input_node, coeff_node) ) users = output_node.users.copy() for user in users: diff --git a/backends/qualcomm/_passes/utils.py b/backends/qualcomm/_passes/utils.py index febea6959db..68056d53aca 100755 --- a/backends/qualcomm/_passes/utils.py +++ b/backends/qualcomm/_passes/utils.py @@ -8,6 +8,7 @@ from executorch.backends.qualcomm.builders.utils import get_parameter from executorch.backends.qualcomm.utils.constants import QCOM_ENCODING from executorch.exir.dialects._ops import ops as exir_ops +from torch._subclasses import FakeTensor q_ops = { @@ -57,13 +58,11 @@ def get_passes_dependency_for_capture_program(): dict: A dictionary mapping each pass to its corresponding list of dependencies. """ from executorch.backends.qualcomm._passes import ( - AnnotateAndQuantScalar, AnnotateDecomposed, AnnotateQuantAttrs, ConstantI64toI32, ConvertBmmToMatmul, ConvertInterpolateWithUpsample2D, - ConvertPReLU, ConvertToLinear, DecomposeAny, DecomposeLinalgVectorNorm, @@ -71,6 +70,7 @@ def get_passes_dependency_for_capture_program(): FoldQDQ, LayoutTransform, RecomposePixelUnshuffle, + RecomposePReLU, RecomposeRmsNorm, RemoveRedundancy, ReplaceIndexPutInput, @@ -78,34 +78,36 @@ def get_passes_dependency_for_capture_program(): ) return { - AnnotateAndQuantScalar: [ - AnnotateQuantAttrs, - ], AnnotateDecomposed: [RemoveRedundancy], AnnotateQuantAttrs: [ RecomposePixelUnshuffle, RecomposeRmsNorm, ConvertToLinear, - ConvertPReLU, + RecomposePReLU, ConvertBmmToMatmul, ConvertInterpolateWithUpsample2D, ], ConstantI64toI32: [ConvertInterpolateWithUpsample2D], ConvertBmmToMatmul: [ConvertToLinear], ConvertInterpolateWithUpsample2D: [RemoveRedundancy], - ConvertPReLU: [RemoveRedundancy], ConvertToLinear: [RecomposePixelUnshuffle], DecomposeAny: [RemoveRedundancy], DecomposeLinalgVectorNorm: [RemoveRedundancy], ExpandBroadcastTensorShape: [RemoveRedundancy], - FoldQDQ: [AnnotateQuantAttrs, AnnotateAndQuantScalar, AnnotateDecomposed], + FoldQDQ: [AnnotateQuantAttrs, AnnotateDecomposed], LayoutTransform: [ AnnotateQuantAttrs, - AnnotateAndQuantScalar, ExpandBroadcastTensorShape, ], RecomposePixelUnshuffle: [RemoveRedundancy], + RecomposePReLU: [RemoveRedundancy], RecomposeRmsNorm: [RemoveRedundancy], ReplaceIndexPutInput: [LayoutTransform], TensorI64toI32: [RemoveRedundancy], } + + +def is_float_tensor(node: torch.fx.Node) -> bool: + if "val" not in node.meta or not isinstance(node.meta["val"], FakeTensor): + return False + return node.meta["val"].dtype == torch.float32 diff --git a/backends/qualcomm/builders/node_visitor.py b/backends/qualcomm/builders/node_visitor.py index f450811ab70..1e0d2039641 100644 --- a/backends/qualcomm/builders/node_visitor.py +++ b/backends/qualcomm/builders/node_visitor.py @@ -106,7 +106,7 @@ def _get_tensor(node, index): return node.meta["val"] tensor = _get_tensor(input_node, idx) - if len(tensor.shape) != 0 and QCOM_AXIS_ORDER in op_node.meta: + if len(tensor.shape) > 1 and QCOM_AXIS_ORDER in op_node.meta: tensor = tensor.permute(dims=op_node.meta[QCOM_AXIS_ORDER]).contiguous() return tensor diff --git a/backends/qualcomm/builders/op_eq.py b/backends/qualcomm/builders/op_eq.py index ac682c3c1e2..855c5e13be6 100644 --- a/backends/qualcomm/builders/op_eq.py +++ b/backends/qualcomm/builders/op_eq.py @@ -8,14 +8,6 @@ import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper import torch -from executorch.backends.qualcomm.utils.constants import ( - QCOM_QUANT_ATTRS, - QCOM_QUANT_MAX, - QCOM_QUANT_MIN, - QCOM_SCALE, - QCOM_ZERO_POINT, -) -from executorch.exir.dialects._ops import ops as exir_ops from .node_visitor import NodeVisitor, register_node_visitor from .qnn_constants import OpElementWiseEqual, QNN_OP_PACKAGE_NAME_QTI_AISW @@ -23,7 +15,7 @@ @register_node_visitor class Equal(NodeVisitor): - target = ["aten.eq.Tensor", "aten.eq.Scalar"] + target = ["aten.eq.Tensor"] def __init__(self, *args) -> None: super().__init__(*args) @@ -46,37 +38,8 @@ def define_node( input_tensors = [] for index in range(2): input_node = node.args[index] - if isinstance(input_node, torch.fx.Node): - input_tensor = self.get_tensor(input_node, node) - tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE - else: - scalar = input_node - input_tensor = torch.tensor(scalar, dtype=torch.float32) - tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC - - # 'graph', 'name', 'op', 'target', 'args', and 'kwargs' - input_node = torch.fx.Node( - node.graph, - node.name + "_runtime_scalar", - "call_function", - exir_ops.edge.aten.scalar_tensor.default, - (), # args - {}, # kwargs - ) - # Because the output data type of the eq node is boolean. - # We need to take the quant attr from the non-scalar node. - if quant_attrs := node.args[index ^ 1].meta.get(QCOM_QUANT_ATTRS): - quant_attrs = quant_attrs.copy() - quant_range = ( - quant_attrs[QCOM_QUANT_MAX] - quant_attrs[QCOM_QUANT_MIN] - ) - quant_attrs[QCOM_ZERO_POINT] = ( - 0 if scalar >= 0 else quant_attrs[QCOM_QUANT_MAX] - ) - quant_attrs[QCOM_SCALE] = ( - scalar / quant_range if scalar >= 0 else -scalar / quant_range - ) - input_node.meta[QCOM_QUANT_ATTRS] = quant_attrs + input_tensor = self.get_tensor(input_node, node) + tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE input_tensor_wrapper = self.define_tensor( input_node, diff --git a/backends/qualcomm/builders/op_ge.py b/backends/qualcomm/builders/op_ge.py index 552cab659cc..6784167aa5b 100644 --- a/backends/qualcomm/builders/op_ge.py +++ b/backends/qualcomm/builders/op_ge.py @@ -8,14 +8,6 @@ import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper import torch -from executorch.backends.qualcomm.utils.constants import ( - QCOM_QUANT_ATTRS, - QCOM_QUANT_MAX, - QCOM_QUANT_MIN, - QCOM_SCALE, - QCOM_ZERO_POINT, -) -from executorch.exir.dialects._ops import ops as exir_ops from .node_visitor import NodeVisitor, register_node_visitor from .qnn_constants import OpElementWiseGreaterEqual, QNN_OP_PACKAGE_NAME_QTI_AISW @@ -23,7 +15,7 @@ @register_node_visitor class GreaterEqual(NodeVisitor): - target = ["aten.ge.Tensor", "aten.ge.Scalar"] + target = ["aten.ge.Tensor"] def __init__(self, *args) -> None: super().__init__(*args) @@ -46,37 +38,8 @@ def define_node( input_tensors = [] for index in range(2): input_node = node.args[index] - if isinstance(input_node, torch.fx.Node): - input_tensor = self.get_tensor(input_node, node) - tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE - else: - scalar = input_node - input_tensor = torch.tensor(scalar, dtype=torch.float32) - tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC - - # 'graph', 'name', 'op', 'target', 'args', and 'kwargs' - input_node = torch.fx.Node( - node.graph, - node.name + "_runtime_scalar", - "call_function", - exir_ops.edge.aten.scalar_tensor.default, - (), # args - {}, # kwargs - ) - # Because the output data type of the ge node is boolean. - # We need to take the quant attr from the non-scalar node. - if quant_attrs := node.args[index ^ 1].meta.get(QCOM_QUANT_ATTRS): - quant_attrs = quant_attrs.copy() - quant_range = ( - quant_attrs[QCOM_QUANT_MAX] - quant_attrs[QCOM_QUANT_MIN] - ) - quant_attrs[QCOM_ZERO_POINT] = ( - 0 if scalar >= 0 else quant_attrs[QCOM_QUANT_MAX] - ) - quant_attrs[QCOM_SCALE] = ( - scalar / quant_range if scalar >= 0 else -scalar / quant_range - ) - input_node.meta[QCOM_QUANT_ATTRS] = quant_attrs + input_tensor = self.get_tensor(input_node, node) + tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE input_tensor_wrapper = self.define_tensor( input_node, diff --git a/backends/qualcomm/builders/op_group_norm.py b/backends/qualcomm/builders/op_group_norm.py index d498b202d71..26700216b53 100644 --- a/backends/qualcomm/builders/op_group_norm.py +++ b/backends/qualcomm/builders/op_group_norm.py @@ -10,6 +10,7 @@ import numpy as np import torch +from executorch.backends.qualcomm.utils.constants import QCOM_DATA from .node_visitor import NodeVisitor, register_node_visitor from .qnn_constants import OpGroupNorm, QNN_OP_PACKAGE_NAME_QTI_AISW @@ -81,12 +82,12 @@ def define_node( group_norm_op.AddScalarParam( OpGroupNorm.param_epsilon, PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_FLOAT_32, - {"data": np.float32(epsilon)}, + {QCOM_DATA: np.float32(epsilon)}, ) group_norm_op.AddScalarParam( OpGroupNorm.param_group, PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_32, - {"data": np.uint32(group)}, + {QCOM_DATA: np.uint32(group)}, ) return group_norm_op diff --git a/backends/qualcomm/builders/op_gt.py b/backends/qualcomm/builders/op_gt.py index 443017b7b0d..6c311f42b7f 100644 --- a/backends/qualcomm/builders/op_gt.py +++ b/backends/qualcomm/builders/op_gt.py @@ -8,14 +8,6 @@ import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper import torch -from executorch.backends.qualcomm.utils.constants import ( - QCOM_QUANT_ATTRS, - QCOM_QUANT_MAX, - QCOM_QUANT_MIN, - QCOM_SCALE, - QCOM_ZERO_POINT, -) -from executorch.exir.dialects._ops import ops as exir_ops from .node_visitor import NodeVisitor, register_node_visitor from .qnn_constants import OpElementWiseGreater, QNN_OP_PACKAGE_NAME_QTI_AISW @@ -23,7 +15,7 @@ @register_node_visitor class GreaterThan(NodeVisitor): - target = ["aten.gt.Tensor", "aten.gt.Scalar"] + target = ["aten.gt.Tensor"] def __init__(self, *args) -> None: super().__init__(*args) @@ -46,37 +38,8 @@ def define_node( input_tensors = [] for index in range(2): input_node = node.args[index] - if isinstance(input_node, torch.fx.Node): - input_tensor = self.get_tensor(input_node, node) - tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE - else: - scalar = input_node - input_tensor = torch.tensor(scalar, dtype=torch.float32) - tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC - - # 'graph', 'name', 'op', 'target', 'args', and 'kwargs' - input_node = torch.fx.Node( - node.graph, - node.name + "_runtime_scalar", - "call_function", - exir_ops.edge.aten.scalar_tensor.default, - (), # args - {}, # kwargs - ) - # Because the output data type of the gt node is boolean. - # We need to take the quant attr from the non-scalar node. - if quant_attrs := node.args[index ^ 1].meta.get(QCOM_QUANT_ATTRS): - quant_attrs = quant_attrs.copy() - quant_range = ( - quant_attrs[QCOM_QUANT_MAX] - quant_attrs[QCOM_QUANT_MIN] - ) - quant_attrs[QCOM_ZERO_POINT] = ( - 0 if scalar >= 0 else quant_attrs[QCOM_QUANT_MAX] - ) - quant_attrs[QCOM_SCALE] = ( - scalar / quant_range if scalar >= 0 else -scalar / quant_range - ) - input_node.meta[QCOM_QUANT_ATTRS] = quant_attrs + input_tensor = self.get_tensor(input_node, node) + tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE input_tensor_wrapper = self.define_tensor( input_node, diff --git a/backends/qualcomm/builders/op_index.py b/backends/qualcomm/builders/op_index.py index 4ddab23aeae..e78284a5e32 100644 --- a/backends/qualcomm/builders/op_index.py +++ b/backends/qualcomm/builders/op_index.py @@ -9,6 +9,7 @@ import numpy as np import torch +from executorch.backends.qualcomm.utils.constants import QCOM_DATA from .node_visitor import NodeVisitor, register_node_visitor from .qnn_constants import OpGather, QNN_OP_PACKAGE_NAME_QTI_AISW @@ -77,7 +78,7 @@ def define_node( gather_op.AddScalarParam( OpGather.param_axis, PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_INT_32, - {"data": np.int32(0)}, + {QCOM_DATA: np.int32(0)}, ) return gather_op diff --git a/backends/qualcomm/builders/op_le.py b/backends/qualcomm/builders/op_le.py index d057c04708a..1dd2a06b777 100644 --- a/backends/qualcomm/builders/op_le.py +++ b/backends/qualcomm/builders/op_le.py @@ -8,14 +8,6 @@ import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper import torch -from executorch.backends.qualcomm.utils.constants import ( - QCOM_QUANT_ATTRS, - QCOM_QUANT_MAX, - QCOM_QUANT_MIN, - QCOM_SCALE, - QCOM_ZERO_POINT, -) -from executorch.exir.dialects._ops import ops as exir_ops from .node_visitor import NodeVisitor, register_node_visitor from .qnn_constants import OpElementWiseLessEqual, QNN_OP_PACKAGE_NAME_QTI_AISW @@ -23,7 +15,7 @@ @register_node_visitor class LessEqual(NodeVisitor): - target = ["aten.le.Tensor", "aten.le.Scalar"] + target = ["aten.le.Tensor"] def __init__(self, *args) -> None: super().__init__(*args) @@ -46,37 +38,8 @@ def define_node( input_tensors = [] for index in range(2): input_node = node.args[index] - if isinstance(input_node, torch.fx.Node): - input_tensor = self.get_tensor(input_node, node) - tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE - else: - scalar = input_node - input_tensor = torch.tensor(scalar, dtype=torch.float32) - tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC - - # 'graph', 'name', 'op', 'target', 'args', and 'kwargs' - input_node = torch.fx.Node( - node.graph, - node.name + "_runtime_scalar", - "call_function", - exir_ops.edge.aten.scalar_tensor.default, - (), # args - {}, # kwargs - ) - # Because the output data type of the le node is boolean. - # We need to take the quant attr from the non-scalar node. - if quant_attrs := node.args[index ^ 1].meta.get(QCOM_QUANT_ATTRS): - quant_attrs = quant_attrs.copy() - quant_range = ( - quant_attrs[QCOM_QUANT_MAX] - quant_attrs[QCOM_QUANT_MIN] - ) - quant_attrs[QCOM_ZERO_POINT] = ( - 0 if scalar >= 0 else quant_attrs[QCOM_QUANT_MAX] - ) - quant_attrs[QCOM_SCALE] = ( - scalar / quant_range if scalar >= 0 else -scalar / quant_range - ) - input_node.meta[QCOM_QUANT_ATTRS] = quant_attrs + input_tensor = self.get_tensor(input_node, node) + tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE input_tensor_wrapper = self.define_tensor( input_node, diff --git a/backends/qualcomm/builders/op_lt.py b/backends/qualcomm/builders/op_lt.py index 6275478254e..b4a080efc38 100644 --- a/backends/qualcomm/builders/op_lt.py +++ b/backends/qualcomm/builders/op_lt.py @@ -8,14 +8,6 @@ import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper import torch -from executorch.backends.qualcomm.utils.constants import ( - QCOM_QUANT_ATTRS, - QCOM_QUANT_MAX, - QCOM_QUANT_MIN, - QCOM_SCALE, - QCOM_ZERO_POINT, -) -from executorch.exir.dialects._ops import ops as exir_ops from .node_visitor import NodeVisitor, register_node_visitor from .qnn_constants import OpElementWiseLess, QNN_OP_PACKAGE_NAME_QTI_AISW @@ -23,7 +15,7 @@ @register_node_visitor class LessThan(NodeVisitor): - target = ["aten.lt.Tensor", "aten.lt.Scalar"] + target = ["aten.lt.Tensor"] def __init__(self, *args) -> None: super().__init__(*args) @@ -46,37 +38,8 @@ def define_node( input_tensors = [] for index in range(2): input_node = node.args[index] - if isinstance(input_node, torch.fx.Node): - input_tensor = self.get_tensor(input_node, node) - tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE - else: - scalar = input_node - input_tensor = torch.tensor(scalar, dtype=torch.float32) - tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC - - # 'graph', 'name', 'op', 'target', 'args', and 'kwargs' - input_node = torch.fx.Node( - node.graph, - node.name + "_runtime_scalar", - "call_function", - exir_ops.edge.aten.scalar_tensor.default, - (), # args - {}, # kwargs - ) - # Because the output data type of the lt node is boolean. - # We need to take the quant attr from the non-scalar node. - if quant_attrs := node.args[index ^ 1].meta.get(QCOM_QUANT_ATTRS): - quant_attrs = quant_attrs.copy() - quant_range = ( - quant_attrs[QCOM_QUANT_MAX] - quant_attrs[QCOM_QUANT_MIN] - ) - quant_attrs[QCOM_ZERO_POINT] = ( - 0 if scalar >= 0 else quant_attrs[QCOM_QUANT_MAX] - ) - quant_attrs[QCOM_SCALE] = ( - scalar / quant_range if scalar >= 0 else -scalar / quant_range - ) - input_node.meta[QCOM_QUANT_ATTRS] = quant_attrs + input_tensor = self.get_tensor(input_node, node) + tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE input_tensor_wrapper = self.define_tensor( input_node, diff --git a/backends/qualcomm/builders/op_pow.py b/backends/qualcomm/builders/op_pow.py index cf5b7595697..3e89bdcfc4d 100644 --- a/backends/qualcomm/builders/op_pow.py +++ b/backends/qualcomm/builders/op_pow.py @@ -8,17 +8,15 @@ import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper import torch -from executorch.backends.qualcomm.utils.constants import QCOM_QUANT_ATTRS -from executorch.exir.dialects._ops import ops as exir_ops from .node_visitor import NodeVisitor, register_node_visitor from .qnn_constants import OpElementWisePower, QNN_OP_PACKAGE_NAME_QTI_AISW -# TODO Add more class Like PowTensorTensor if needed +# pow.Tensor_Scalar should fall in this visitor because LiftConstantScalarOperands pass @register_node_visitor -class PowTensorScalar(NodeVisitor): - target = ["aten.pow.Tensor_Scalar"] +class PowTensorTensor(NodeVisitor): + target = ["aten.pow.Tensor_Tensor"] def __init__(self, *args) -> None: super().__init__(*args) @@ -52,38 +50,18 @@ def define_node( nodes_to_wrappers, ) - # scalar input - scalar = node.args[1] - scalar_tensor = torch.tensor(scalar).to(torch.float32) - - # 'graph', 'name', 'op', 'target', 'args', and 'kwargs' - scalar_node = torch.fx.Node( - node.graph, - node.name + "_runtime_scalar", - "call_function", - exir_ops.edge.aten.scalar_tensor.default, - (), # args - {}, # kwargs - ) - - if pow_quant_attrs := node.meta.get(QCOM_QUANT_ATTRS): - quant_attrs = pow_quant_attrs.copy() - quant_range = quant_attrs["quant_max"] - quant_attrs["quant_min"] - quant_attrs["zero_point"] = 0 if scalar >= 0 else quant_attrs["quant_max"] - quant_attrs["scale"] = ( - scalar / quant_range if scalar >= 0 else -scalar / quant_range - ) - scalar_node.meta[QCOM_QUANT_ATTRS] = quant_attrs - - scalar_tensor_wrapper = self.define_tensor( - scalar_node, + # exp input + exp_node = node.args[1] + exp_tensor = self.get_tensor(exp_node, node) + exp_tensor_wrapper = self.define_tensor( + exp_node, node, - scalar_tensor, + exp_tensor, PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC, nodes_to_wrappers, ) - pow_input_tensors = [input_tensor_wrapper, scalar_tensor_wrapper] + pow_input_tensors = [input_tensor_wrapper, exp_tensor_wrapper] pow_op = PyQnnWrapper.PyQnnOpWrapper( node.name, diff --git a/backends/qualcomm/builders/op_prelu.py b/backends/qualcomm/builders/op_prelu.py index 4057b3d5559..e35839f535e 100644 --- a/backends/qualcomm/builders/op_prelu.py +++ b/backends/qualcomm/builders/op_prelu.py @@ -8,15 +8,7 @@ import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper import torch -from executorch.backends.qualcomm.utils.constants import ( - QCOM_AXIS_ORDER, - QCOM_QUANT_ATTRS, - QCOM_QUANT_MAX, - QCOM_QUANT_MIN, - QCOM_SCALE, - QCOM_ZERO_POINT, -) -from executorch.exir.dialects._ops import ops as exir_ops +from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER from .node_visitor import get_parameter, NodeVisitor, register_node_visitor from .qnn_constants import OpPRelu, QNN_OP_PACKAGE_NAME_QTI_AISW @@ -24,7 +16,7 @@ @register_node_visitor class PReLU(NodeVisitor): - target = ["aten.leaky_relu.default", "aten.prelu.default"] + target = ["aten.prelu.default"] def __init__(self, *args) -> None: super().__init__(*args) @@ -44,57 +36,32 @@ def define_node( nodes_to_wrappers, ) - if node.target.__name__ == "aten.leaky_relu.default": - coeff = 1e-2 if len(node.args) < 2 else node.args[1] - coeff_tensor = torch.full(input_tensor.shape, coeff).to(torch.float32) + coeff_node = node.args[1] + coeff_tensor = torch.zeros(input_node.meta["val"].shape) + coeff = get_parameter(coeff_node, self.edge_program) + # param nodes will be FakeTensor when doing partition + # fill in random numeric for validation + if isinstance(coeff, torch._subclasses.fake_tensor.FakeTensor): + coeff = torch.ones(coeff.shape) + # per-channel activation + if coeff_node.meta["val"].shape[0] > 1: + for i in range(input_node.meta["val"].shape[1]): + coeff_tensor = coeff_tensor.index_fill(1, torch.tensor([i]), coeff[i]) + if QCOM_AXIS_ORDER in input_node.meta: + axis_order = input_node.meta[QCOM_AXIS_ORDER] + coeff_tensor = coeff_tensor.permute(dims=axis_order).contiguous() else: - coeff_node = node.args[1] - coeff_tensor = torch.zeros(input_node.meta["val"].shape) - coeff = get_parameter(coeff_node, self.edge_program) - # param nodes will be FakeTensor when doing partition - # fill in random numeric for validation - if isinstance(coeff, torch._subclasses.fake_tensor.FakeTensor): - coeff = torch.ones(coeff.shape) - # per-channel activation - if coeff_node.meta["val"].shape[0] > 1: - for i in range(input_node.meta["val"].shape[1]): - coeff_tensor = coeff_tensor.index_fill( - 1, torch.tensor([i]), coeff[i] - ) - if QCOM_AXIS_ORDER in input_node.meta: - axis_order = input_node.meta[QCOM_AXIS_ORDER] - coeff_tensor = coeff_tensor.permute(dims=axis_order).contiguous() - # simple min-max quantization - coeff = torch.max(coeff).item() - else: - coeff = coeff.item() - coeff_tensor = torch.full(input_tensor.shape, coeff).to(torch.float32) - - # 'graph', 'name', 'op', 'target', 'args', and 'kwargs' - scalar_node = torch.fx.Node( - node.graph, - node.name + "_runtime_scalar", - "call_function", - exir_ops.edge.aten.full.default, - (), # args - {}, # kwargs - ) - if pow_quant_attrs := node.meta.get(QCOM_QUANT_ATTRS): - quant_attrs = pow_quant_attrs.copy() - quant_range = quant_attrs[QCOM_QUANT_MAX] - quant_attrs[QCOM_QUANT_MIN] - # coeff is guaranteed to be positive - quant_attrs[QCOM_ZERO_POINT] = 0 - quant_attrs[QCOM_SCALE] = coeff / quant_range - scalar_node.meta[QCOM_QUANT_ATTRS] = quant_attrs + coeff = coeff.item() + coeff_tensor = torch.full(input_tensor.shape, coeff).to(torch.float32) - scalar_tensor_wrapper = self.define_tensor( - scalar_node, + coeff_tensor_wrapper = self.define_tensor( + coeff_node, node, coeff_tensor, PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC, nodes_to_wrappers, ) - prelu_input_tensors = [prelu_inp_tensor_wrapper, scalar_tensor_wrapper] + prelu_input_tensors = [prelu_inp_tensor_wrapper, coeff_tensor_wrapper] output_tensor = self.get_tensor(node, node) output_tensor_wrapper = self.define_tensor( diff --git a/backends/qualcomm/builders/op_topk.py b/backends/qualcomm/builders/op_topk.py index 1bbf19c84bd..745cf7b9935 100644 --- a/backends/qualcomm/builders/op_topk.py +++ b/backends/qualcomm/builders/op_topk.py @@ -10,7 +10,11 @@ import numpy as np import torch -from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA +from executorch.backends.qualcomm.utils.constants import ( + QCOM_AXIS_ORDER, + QCOM_DATA, + QCOM_QUANT_ATTRS, +) from .node_visitor import NodeVisitor, register_node_visitor from .qnn_constants import OpTopK, QNN_OP_PACKAGE_NAME_QTI_AISW @@ -60,7 +64,7 @@ def define_node( output_idx_tensor = self.get_tensor(node, node, 1).to(torch.int32) # QNN constraint, topk output_0 requires having the same quant config as input - node.meta["quant_attrs"] = input_node.meta.get("quant_attrs") + node.meta[QCOM_QUANT_ATTRS] = input_node.meta.get(QCOM_QUANT_ATTRS) output_val_tensor_wrapper = self.define_tensor( node, node, @@ -70,7 +74,7 @@ def define_node( ) # topk output_1 is index, do not quantize it. - node.meta.pop("quant_attrs", None) + node.meta.pop(QCOM_QUANT_ATTRS, None) output_index_tensor_wrapper = self.define_tensor( node, node, @@ -92,10 +96,10 @@ def define_node( topk_op.AddScalarParam( OpTopK.param_k, PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_32, - {"data": np.uint32(k)}, + {QCOM_DATA: np.uint32(k)}, ) - # As of QNN 2.26, QNN HTP backend only allows users to set this value to 1, or else it will fail at op validation + # As of QNN 2.26, QNN HTP backend only allows users to set this value to 1, or it will fail at op validation if len(node.args) > 3: largest = cast(bool, node.args[3]) topk_op.AddScalarParam( diff --git a/backends/qualcomm/quantizer/annotators.py b/backends/qualcomm/quantizer/annotators.py index 3f27dbdb163..a232d231c27 100644 --- a/backends/qualcomm/quantizer/annotators.py +++ b/backends/qualcomm/quantizer/annotators.py @@ -194,39 +194,37 @@ def annotate_sub(node: Node, quantization_config: QuantizationConfig) -> None: annotate_binary(node, quantization_config) -@register_annotator([torch.ops.aten.eq.Scalar, torch.ops.aten.eq.Tensor]) +@register_annotator([torch.ops.aten.eq.Tensor]) def annotate_eq(node: Node, quantization_config: QuantizationConfig) -> None: annotate_binary(node, quantization_config) -@register_annotator([torch.ops.aten.ne.Scalar, torch.ops.aten.ne.Tensor]) +@register_annotator([torch.ops.aten.ne.Tensor]) def annotate_ne(node: Node, quantization_config: QuantizationConfig) -> None: annotate_binary(node, quantization_config) -@register_annotator([torch.ops.aten.ge.Scalar, torch.ops.aten.ge.Tensor]) +@register_annotator([torch.ops.aten.ge.Tensor]) def annotate_ge(node: Node, quantization_config: QuantizationConfig) -> None: annotate_binary(node, quantization_config) -@register_annotator([torch.ops.aten.gt.Scalar, torch.ops.aten.gt.Tensor]) +@register_annotator([torch.ops.aten.gt.Tensor]) def annotate_gt(node: Node, quantization_config: QuantizationConfig) -> None: annotate_binary(node, quantization_config) -@register_annotator([torch.ops.aten.le.Scalar, torch.ops.aten.le.Tensor]) +@register_annotator([torch.ops.aten.le.Tensor]) def annotate_le(node: Node, quantization_config: QuantizationConfig) -> None: annotate_binary(node, quantization_config) -@register_annotator([torch.ops.aten.lt.Scalar, torch.ops.aten.lt.Tensor]) +@register_annotator([torch.ops.aten.lt.Tensor]) def annotate_lt(node: Node, quantization_config: QuantizationConfig) -> None: annotate_binary(node, quantization_config) -@register_annotator( - [torch.ops.aten.mul, torch.ops.aten.mul.Tensor, torch.ops.aten.mul.Scalar] -) +@register_annotator([torch.ops.aten.mul, torch.ops.aten.mul.Tensor]) def annotate_mul(node: Node, quantization_config: QuantizationConfig) -> None: annotate_binary(node, quantization_config) @@ -308,7 +306,7 @@ def _derive_div_qparams_fn( raise NotImplementedError(f"No quant annotation is implemented for {node}.") -@register_annotator([torch.ops.aten.rsub.Scalar]) +@register_annotator([torch.ops.aten.rsub.Tensor]) def annotate_rsub(node: Node, quantization_config: QuantizationConfig) -> None: annotate_binary(node, quantization_config) @@ -460,15 +458,9 @@ def annotate_permute(node: Node, quantization_config: QuantizationConfig) -> Non annotate_single_in_single_out(node, quantization_config) -@register_annotator( - [ - torch.ops.aten.leaky_relu.default, - torch.ops.aten.leaky_relu_.default, - torch.ops.aten.prelu.default, - ] -) +@register_annotator([torch.ops.aten.prelu.default]) def annotate_prelu(node: Node, quantization_config: QuantizationConfig) -> None: - annotate_single_in_single_out(node, quantization_config) + annotate_binary(node, quantization_config) @register_annotator([torch.ops.aten.view.default, torch.ops.aten._unsafe_view.default]) @@ -688,7 +680,7 @@ def annotate_sigmoid(node: Node, quantization_config: QuantizationConfig) -> Non ) -@register_annotator([torch.ops.aten.pow.Tensor_Scalar]) +@register_annotator([torch.ops.aten.pow.Tensor_Tensor]) def annotate_pow(node: Node, quantization_config: QuantizationConfig) -> None: annotate_single_in_single_out(node, quantization_config) diff --git a/backends/qualcomm/quantizer/quantizer.py b/backends/qualcomm/quantizer/quantizer.py index 37c9e9ab21e..f5f07f6a365 100644 --- a/backends/qualcomm/quantizer/quantizer.py +++ b/backends/qualcomm/quantizer/quantizer.py @@ -12,6 +12,7 @@ DecomposeEinsum, DecomposeLinalgVectorNorm, DecomposeSilu, + LiftConstantScalarOperands, RecomposePixelUnshuffle, ReduceDynamicRange, ReplaceInfBuffer, @@ -224,8 +225,9 @@ def transform_for_annotation(self, model: GraphModule) -> GraphModule: model = DecomposeScaledDotProductAttention()(model).graph_module model = DecomposeSilu()(model).graph_module model = DecomposeEinsum()(model).graph_module - model = DecomposeLinalgVectorNorm(quantization_capture=True)(model).graph_module + model = DecomposeLinalgVectorNorm(aten_dialect_capture=True)(model).graph_module model = ReplaceInfBuffer()(model).graph_module + model = LiftConstantScalarOperands()(model).graph_module return model def validate(self, model: GraphModule) -> None: diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index f8552e4fd4b..ad00d58fb85 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -68,8 +68,7 @@ from executorch.examples.models.inception_v3 import InceptionV3Model from executorch.examples.models.inception_v4 import InceptionV4Model -# from executorch.examples.models.llama import Llama2Model -from executorch.examples.models.mobilebert import MobileBertModelExample +# from executorch.examples.models.mobilebert import MobileBertModelExample from executorch.examples.models.mobilenet_v2 import MV2Model from executorch.examples.models.mobilenet_v3 import MV3Model from executorch.examples.models.torchvision_vit.model import TorchVisionViTModel @@ -462,12 +461,16 @@ def test_qnn_backend_instance_norm_2d(self): with self.subTest(i=i): self.lower_module_and_test_output(module, sample_input) + @unittest.expectedFailure def test_qnn_backend_interpolate_bilinear_2d(self): + # TODO: Fix op not supported KeyError: 'aten.randn.default' module = ResizeBilinear2D() # noqa: F405 sample_input = (torch.randn(2, 3, 4, 5),) self.lower_module_and_test_output(module, sample_input) + @unittest.expectedFailure def test_qnn_backend_interpolate_nearest_2d(self): + # TODO: Fix op not supported KeyError: 'aten.randn.default' module = ResizeNearest2D() # noqa: F405 sample_input = (torch.randn(2, 3, 4, 5),) self.lower_module_and_test_output(module, sample_input) @@ -892,17 +895,18 @@ def test_qnn_backend_view_permute_matmul(self): self.lower_module_and_test_output(module, sample_input) def test_qnn_backend_example_models(self): + # TODO Fix MobileBertModelExample and TorchVisionViTModel instances = [ DeepLabV3ResNet101Model(), EdsrModel(), InceptionV3Model(), InceptionV4Model(), # The module of llama is changing frequently. Reopen it when it's stable - # Llama2Model(), MV2Model(), MV3Model(), - MobileBertModelExample(), - TorchVisionViTModel(), + # Fail during lowering Reopen once resolved + # MobileBertModelExample(), + # TorchVisionViTModel(), # Encountered undefined symbol in mainline. Reopen once resolved. # Wav2LetterModel(), ] @@ -916,7 +920,6 @@ def test_qnn_backend_example_models(self): 1, 1, 1, - 1, ] # TODO: Due to trigger maximum recursion depth exceeded, need to check it. disable_validation() @@ -1412,13 +1415,17 @@ def test_qnn_backend_instance_norm_2d(self): module = self.get_qdq_module(module, sample_input) self.lower_module_and_test_output(module, sample_input) + @unittest.expectedFailure def test_qnn_backend_interpolate_bilinear_2d(self): + # TODO: Fix op not supported KeyError: 'aten.randn.default' module = ResizeBilinear2D() # noqa: F405 sample_input = (torch.randn(2, 3, 4, 5),) module = self.get_qdq_module(module, sample_input) self.lower_module_and_test_output(module, sample_input) + @unittest.expectedFailure def test_qnn_backend_interpolate_nearest_2d(self): + # TODO: Fix op not supported KeyError: 'aten.randn.default' module = ResizeNearest2D() # noqa: F405 sample_input = (torch.randn(2, 3, 4, 5),) module = self.get_qdq_module(module, sample_input) @@ -1938,7 +1945,6 @@ def test_qnn_backend_example_models(self): QCOM_QUANT_DTYPE: QuantDtype.use_8a8w, }, # The module of llama is changing frequently. Reopen it when it's stable - # {QCOM_MODULE: Llama2Model(), QCOM_ANNOTATION: (), QCOM_QUANT_DTYPE: QuantDtype.use_8a8w}, { QCOM_MODULE: MV2Model(), QCOM_ANNOTATION: (), @@ -1970,7 +1976,6 @@ def test_qnn_backend_example_models(self): 1, 1, 1, - 1, # For MobileBertModelExample # 1, 1, @@ -2045,7 +2050,9 @@ def test_qnn_backend_skip_node_op(self): skip_node_op_set={"aten.add.Tensor"}, ) + @unittest.expectedFailure def test_qnn_backend_spill_fill_buffer_size(self): + # TODO: Fix self.assertNotEqual(0, max_sf_size) module = LargeTensorLinear() # noqa: F405 sample_input = (torch.randn(1, 256, 512),) edge_prog = capture_program(module, sample_input) @@ -2199,7 +2206,9 @@ def test_qnn_backend_online_prepare(self): sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28)) self.lower_module_and_test_output(module, sample_input) + @unittest.expectedFailure def test_qnn_backend_context_direct(self): + # TODO: Fix QNN tools pairs with np 2.x with tempfile.TemporaryDirectory() as tmp_dir: module = ContextBinaryExample() # noqa: F405 generate_context_binary( @@ -2642,7 +2651,9 @@ def calibrator(gm): ).to_executorch() self.verify_output(module, sample_input, exec_prog) + @unittest.expectedFailure def test_qnn_backend_spill_fill_buffer_size(self): + # TODO: Fix self.assertNotEqual(0, max_sf_size) module = LargeTensorLinear() # noqa: F405 sample_input = (torch.randn(1, 256, 512),) module = self.get_qdq_module(module, sample_input) @@ -2839,7 +2850,9 @@ def test_qnn_backend_online_prepare(self): module = self.get_qdq_module(module, sample_input) self.lower_module_and_test_output(module, sample_input) + @unittest.expectedFailure def test_qnn_backend_context_direct(self): + # TODO: Fix QNN tools pairs with np 2.x with tempfile.TemporaryDirectory() as tmp_dir: module = ContextBinaryExample() # noqa: F405 generate_context_binary( diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py index 1da17cb25f6..5ae640adc6e 100644 --- a/backends/qualcomm/utils/utils.py +++ b/backends/qualcomm/utils/utils.py @@ -17,21 +17,20 @@ import torch from executorch.backends.qualcomm._passes import ( - AnnotateAndQuantScalar, AnnotateDecomposed, AnnotateQuantAttrs, ConstantI64toI32, - ConvertBinaryOpsWithScalar, ConvertBmmToMatmul, ConvertInterpolateWithUpsample2D, - ConvertPReLU, ConvertToLinear, DecomposeAny, DecomposeLinalgVectorNorm, ExpandBroadcastTensorShape, FoldQDQ, LayoutTransform, + LiftConstantScalarOperands, RecomposePixelUnshuffle, + RecomposePReLU, RecomposeRmsNorm, RemoveRedundancy, ReplaceIndexPutInput, @@ -73,6 +72,9 @@ QCOM_QNN_COMPILE_SPEC, QCOM_QUANTIZED_IO, ) +from executorch.backends.transforms.decompose_sdpa import ( + DecomposeScaledDotProductAttention, +) from executorch.exir import ( EdgeCompileConfig, @@ -350,19 +352,18 @@ def get_capture_program_passes(): # The second value in each tuple in `default_passes_and_setting` indicates whether the corresponding pass is activated by default. # If a pass is activated, it will be executed by default. default_passes_and_setting = [ - (AnnotateAndQuantScalar, True), (AnnotateDecomposed, True), (AnnotateQuantAttrs, True), (ConstantI64toI32, True), (ConvertBmmToMatmul, True), (ConvertInterpolateWithUpsample2D, True), - (ConvertPReLU, True), (ConvertToLinear, True), (DecomposeAny, True), (DecomposeLinalgVectorNorm, True), (ExpandBroadcastTensorShape, False), (FoldQDQ, True), (LayoutTransform, True), + (RecomposePReLU, True), (RecomposePixelUnshuffle, True), (RecomposeRmsNorm, True), (RemoveRedundancy, True), @@ -432,22 +433,29 @@ def _transform( return edge_program +# Modify the fx graph at very beginning for floating point model +# Aim to reduce registration of scalar at graph_module or program +def _preprocess_module(module: torch.nn.Module, inputs: Tuple[torch.Tensor]): + if isinstance(module, torch.fx.graph_module.GraphModule): + return module + module = torch.export.export(module, inputs, strict=True).module() + module = DecomposeScaledDotProductAttention()(module).graph_module + module = DecomposeLinalgVectorNorm(True)(module).graph_module + module = LiftConstantScalarOperands()(module).graph_module + return module + + def capture_program( module: torch.nn.Module, inputs: Tuple[torch.Tensor], passes_job: OrderedDict = None, dynamic_shapes: Dict = None, ) -> exir.ExirExportedProgram: + module = _preprocess_module(module, inputs) ep = torch.export.export(module, inputs, dynamic_shapes=dynamic_shapes) decomposed_ep = ep.run_decompositions(get_decomp_table()) - # We choose call_operator by target in ConvertBinaryOpsWithScalar - # because it is the same source_fn_stack for MultiheadAttention - # TODO: Should modify the scalar op in the op builder instead of - # using transformation core_ep = ExirExportedProgram(decomposed_ep, False) - core_ep.transform( - TensorI64toI32(edge_program=core_ep), ConvertBinaryOpsWithScalar() - ) + core_ep.transform(TensorI64toI32(edge_program=core_ep)) edge_ep = core_ep.to_edge(qnn_edge_config()) _transform(edge_ep.exported_program, passes_job) return edge_ep From 40d399aeee30e5b8a247f32c80859aa0bda8598e Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Thu, 27 Feb 2025 21:41:13 -0800 Subject: [PATCH 159/584] Update libraries naming inside frameworks (#8815) Take just the first path component of the provided directory --- build/create_frameworks.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build/create_frameworks.sh b/build/create_frameworks.sh index a55c4aed1e7..804a5156e0b 100755 --- a/build/create_frameworks.sh +++ b/build/create_frameworks.sh @@ -76,8 +76,8 @@ create_xcframework() { fi local dir_suffix - dir_suffix=$(echo "$dir" | tr '[:upper:]' '[:lower:]' | sed 's/\//-/g') - local merged_lib="${output}/lib${target_library_name}-${dir_suffix}.a" + dir_suffix=$(echo "$dir" | cut -d'/' -f1 | tr '[:upper:]' '[:lower:]' | sed 's/[\/\.~]/_/g') + local merged_lib="${output}/lib${target_library_name}_${dir_suffix}.a" # Remove the existing .a file if it exists. if [ -f "${merged_lib}" ]; then From e80750670126b249151401e1cead18abaa863765 Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Thu, 27 Feb 2025 22:26:44 -0800 Subject: [PATCH 160/584] Update xcconfigs to use the new format (#8818) --- docs/source/using-executorch-ios.md | 16 ++++++++-------- .../ExecuTorchDemo.xcodeproj/project.pbxproj | 2 +- .../LLaMA/LLaMA.xcodeproj/project.pbxproj | 2 +- .../LLaMA/LLaMA/SupportingFiles/Debug.xcconfig | 14 +++++++------- .../LLaMA/LLaMA/SupportingFiles/Release.xcconfig | 14 +++++++------- .../apple_ios/LLaMA/docs/delegates/mps_README.md | 2 +- .../LLaMA/docs/delegates/xnnpack_README.md | 2 +- .../react-native/rnllama/ios/Release.xcconfig | 14 +++++++------- .../ios/rnllama.xcodeproj/project.pbxproj | 2 +- .../Benchmark/Frameworks/download_frameworks.sh | 2 +- .../apple/Benchmark/Tests/Tests.xcconfig | 14 +++++++------- 11 files changed, 42 insertions(+), 42 deletions(-) diff --git a/docs/source/using-executorch-ios.md b/docs/source/using-executorch-ios.md index b4c27ebbed0..fa1283cfb16 100644 --- a/docs/source/using-executorch-ios.md +++ b/docs/source/using-executorch-ios.md @@ -25,7 +25,7 @@ The prebuilt ExecuTorch runtime, backend, and kernels are available as a [Swift #### Xcode -In Xcode, go to `File > Add Package Dependencies`. Paste the URL of the [ExecuTorch repo](https://github.com/pytorch/executorch) into the search bar and select it. Make sure to change the branch name to the desired ExecuTorch version in format "swiftpm-", (e.g. "swiftpm-0.5.0"), or a branch name in format "swiftpm-." (e.g. "swiftpm-0.5.0-20250130") for a nightly build on a specific date. +In Xcode, go to `File > Add Package Dependencies`. Paste the URL of the [ExecuTorch repo](https://github.com/pytorch/executorch) into the search bar and select it. Make sure to change the branch name to the desired ExecuTorch version in format "swiftpm-", (e.g. "swiftpm-0.5.0"), or a branch name in format "swiftpm-." (e.g. "swiftpm-0.5.0-20250228") for a nightly build on a specific date. ![](_static/img/swiftpm_xcode1.png) @@ -153,15 +153,15 @@ ET_PLATFORM[sdk=iphoneos*] = ios ET_PLATFORM[sdk=macos*] = macos OTHER_LDFLAGS = $(inherited) \ - -force_load $(BUILT_PRODUCTS_DIR)/libexecutorch-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libbackend_coreml-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libbackend_mps-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libbackend_xnnpack-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libkernels_optimized-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libkernels_quantized-$(ET_PLATFORM)-release.a + -force_load $(BUILT_PRODUCTS_DIR)/libexecutorch_debug_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libbackend_coreml_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libbackend_mps_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libbackend_xnnpack_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libkernels_optimized_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libkernels_quantized_$(ET_PLATFORM).a ``` -For a Debug build configuration, replace `release` with `debug` in the library file names. Remember to link against the ExecuTorch runtime (`libexecutorch`) in Debug mode even if other components are built for Release to preserve logs if needed. +**Note:** In the example above, we link against the Debug version of the ExecuTorch runtime (`libexecutorch_debug`) to preserve the logs. Normally, that does not impact the performance too much. Nevertheless, remember to link against the release version of the runtime (`libexecutorch`) for the best performance and no logs. You can assign such a config file to your target in Xcode: diff --git a/examples/demo-apps/apple_ios/ExecuTorchDemo/ExecuTorchDemo.xcodeproj/project.pbxproj b/examples/demo-apps/apple_ios/ExecuTorchDemo/ExecuTorchDemo.xcodeproj/project.pbxproj index f08d61396d2..ef8590814a6 100644 --- a/examples/demo-apps/apple_ios/ExecuTorchDemo/ExecuTorchDemo.xcodeproj/project.pbxproj +++ b/examples/demo-apps/apple_ios/ExecuTorchDemo/ExecuTorchDemo.xcodeproj/project.pbxproj @@ -806,7 +806,7 @@ isa = XCRemoteSwiftPackageReference; repositoryURL = "https://github.com/pytorch/executorch"; requirement = { - branch = "swiftpm-0.5.0.20250130"; + branch = "swiftpm-0.5.0.20250228"; kind = branch; }; }; diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj b/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj index 2cc93808799..ca9a35ea3d7 100644 --- a/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj @@ -808,7 +808,7 @@ isa = XCRemoteSwiftPackageReference; repositoryURL = "https://github.com/pytorch/executorch"; requirement = { - branch = "swiftpm-0.5.0.20250130"; + branch = "swiftpm-0.5.0.20250228"; kind = branch; }; }; diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMA/SupportingFiles/Debug.xcconfig b/examples/demo-apps/apple_ios/LLaMA/LLaMA/SupportingFiles/Debug.xcconfig index e674c783b2c..2dddc6f1f7a 100644 --- a/examples/demo-apps/apple_ios/LLaMA/LLaMA/SupportingFiles/Debug.xcconfig +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMA/SupportingFiles/Debug.xcconfig @@ -3,13 +3,13 @@ ET_PLATFORM[sdk=iphoneos*] = ios ET_PLATFORM[sdk=macos*] = macos OTHER_LDFLAGS = $(inherited) \ - -force_load $(BUILT_PRODUCTS_DIR)/libexecutorch-$(ET_PLATFORM)-debug.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libbackend_coreml-$(ET_PLATFORM)-debug.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libbackend_mps-$(ET_PLATFORM)-debug.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libbackend_xnnpack-$(ET_PLATFORM)-debug.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libkernels_custom-$(ET_PLATFORM)-debug.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libkernels_optimized-$(ET_PLATFORM)-debug.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libkernels_quantized-$(ET_PLATFORM)-debug.a \ + -force_load $(BUILT_PRODUCTS_DIR)/libexecutorch_debug_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libbackend_coreml_debug_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libbackend_mps_debug_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libbackend_xnnpack_debug_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libkernels_custom_debug_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libkernels_optimized_debug_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libkernels_quantized_debug_$(ET_PLATFORM).a \ @$(TEMP_DIR)/cmake/linker_flags // LLaMARunner requires additional dependencies built with CMake in a custom run script phase. diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMA/SupportingFiles/Release.xcconfig b/examples/demo-apps/apple_ios/LLaMA/LLaMA/SupportingFiles/Release.xcconfig index fcf63012cc4..2f860aa4d30 100644 --- a/examples/demo-apps/apple_ios/LLaMA/LLaMA/SupportingFiles/Release.xcconfig +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMA/SupportingFiles/Release.xcconfig @@ -5,13 +5,13 @@ ET_PLATFORM[sdk=macos*] = macos // Link the Debug version of ExecuTorch runtime to keep the logs. // Switch to Release for better performance if logs are not needed. OTHER_LDFLAGS = $(inherited) \ - -force_load $(BUILT_PRODUCTS_DIR)/libexecutorch-$(ET_PLATFORM)-debug.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libbackend_coreml-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libbackend_mps-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libbackend_xnnpack-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libkernels_custom-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libkernels_optimized-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libkernels_quantized-$(ET_PLATFORM)-release.a \ + -force_load $(BUILT_PRODUCTS_DIR)/libexecutorch_debug_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libbackend_coreml_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libbackend_mps_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libbackend_xnnpack_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libkernels_custom_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libkernels_optimized_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libkernels_quantized_$(ET_PLATFORM).a \ @$(TEMP_DIR)/cmake/linker_flags // LLaMARunner requires additional dependencies built with CMake in a custom run script phase. diff --git a/examples/demo-apps/apple_ios/LLaMA/docs/delegates/mps_README.md b/examples/demo-apps/apple_ios/LLaMA/docs/delegates/mps_README.md index e1a1530acf9..f5292fe5c05 100644 --- a/examples/demo-apps/apple_ios/LLaMA/docs/delegates/mps_README.md +++ b/examples/demo-apps/apple_ios/LLaMA/docs/delegates/mps_README.md @@ -76,7 +76,7 @@ sudo /Applications/CMake.app/Contents/bin/cmake-gui --install The prebuilt ExecuTorch runtime, backend, and kernels are available as a Swift PM package. ### Xcode -Open the project in Xcode.In Xcode, go to `File > Add Package Dependencies`. Paste the URL of the ExecuTorch repo into the search bar and select it. Make sure to change the branch name to the desired ExecuTorch version, e.g., “swiftpm-0.5.0”, or a branch name in format "swiftpm-." (e.g. "swiftpm-0.5.0-20250130") for a nightly build on a specific date. +Open the project in Xcode.In Xcode, go to `File > Add Package Dependencies`. Paste the URL of the ExecuTorch repo into the search bar and select it. Make sure to change the branch name to the desired ExecuTorch version, e.g., “swiftpm-0.5.0”, or a branch name in format "swiftpm-." (e.g. "swiftpm-0.5.0-20250228") for a nightly build on a specific date. Note: If you're running into any issues related to package dependencies, quit Xcode entirely, delete the whole executorch repo, clean the caches by running the command below in terminal and clone the repo again. diff --git a/examples/demo-apps/apple_ios/LLaMA/docs/delegates/xnnpack_README.md b/examples/demo-apps/apple_ios/LLaMA/docs/delegates/xnnpack_README.md index b127bad10e2..d2a901608d3 100644 --- a/examples/demo-apps/apple_ios/LLaMA/docs/delegates/xnnpack_README.md +++ b/examples/demo-apps/apple_ios/LLaMA/docs/delegates/xnnpack_README.md @@ -130,7 +130,7 @@ While we recommended using the latest prebuilt package pre-configured with the X Go to Project Navigator, click on LLaMA. `Project --> LLaMA --> Package Dependencies`, and update the package dependencies to any of the available options below: -- Branch --> swiftpm-0.5.0.20250130 (amend to match the latest nightly build) +- Branch --> swiftpm-0.5.0.20250228 (amend to match the latest nightly build) - Branch --> swiftpm-0.5.0 - Branch --> swiftpm-0.4.0 diff --git a/examples/demo-apps/react-native/rnllama/ios/Release.xcconfig b/examples/demo-apps/react-native/rnllama/ios/Release.xcconfig index b98b21b9c6e..6893e1252e7 100644 --- a/examples/demo-apps/react-native/rnllama/ios/Release.xcconfig +++ b/examples/demo-apps/react-native/rnllama/ios/Release.xcconfig @@ -5,13 +5,13 @@ ET_PLATFORM[sdk=macos*] = macos // Link the Debug version of ExecuTorch runtime to keep the logs. // Switch to Release for better performance if logs are not needed. OTHER_LDFLAGS = $(inherited) \ - -force_load $(BUILT_PRODUCTS_DIR)/libexecutorch-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libbackend_coreml-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libbackend_mps-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libbackend_xnnpack-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libkernels_custom-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libkernels_optimized-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libkernels_quantized-$(ET_PLATFORM)-release.a \ + -force_load $(BUILT_PRODUCTS_DIR)/libexecutorch_debug_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libbackend_coreml_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libbackend_mps_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libbackend_xnnpack_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libkernels_custom_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libkernels_optimized_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libkernels_quantized_$(ET_PLATFORM).a \ @$(TEMP_DIR)/cmake/linker_flags // LLaMARunner requires additional dependencies built with CMake in a custom run script phase. diff --git a/examples/demo-apps/react-native/rnllama/ios/rnllama.xcodeproj/project.pbxproj b/examples/demo-apps/react-native/rnllama/ios/rnllama.xcodeproj/project.pbxproj index 73314459f6a..612dd410a1a 100644 --- a/examples/demo-apps/react-native/rnllama/ios/rnllama.xcodeproj/project.pbxproj +++ b/examples/demo-apps/react-native/rnllama/ios/rnllama.xcodeproj/project.pbxproj @@ -947,7 +947,7 @@ isa = XCRemoteSwiftPackageReference; repositoryURL = "https://github.com/pytorch/executorch.git"; requirement = { - branch = "swiftpm-0.5.0.20250130"; + branch = "swiftpm-0.5.0.20250228"; kind = branch; }; }; diff --git a/extension/benchmark/apple/Benchmark/Frameworks/download_frameworks.sh b/extension/benchmark/apple/Benchmark/Frameworks/download_frameworks.sh index 6cd1a56a0f7..e6c39c16df7 100755 --- a/extension/benchmark/apple/Benchmark/Frameworks/download_frameworks.sh +++ b/extension/benchmark/apple/Benchmark/Frameworks/download_frameworks.sh @@ -5,7 +5,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -VERSION="0.5.0.20250130" +VERSION="0.5.0.20250228" FRAMEWORKS=( "backend_coreml" "backend_mps" diff --git a/extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig b/extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig index 25c3f9a6267..9891a952069 100644 --- a/extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig +++ b/extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig @@ -3,13 +3,13 @@ ET_PLATFORM[sdk=iphoneos*] = ios ET_PLATFORM[sdk=macos*] = macos OTHER_LDFLAGS = $(inherited) \ - -force_load $(BUILT_PRODUCTS_DIR)/libexecutorch-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libbackend_coreml-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libbackend_mps-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libbackend_xnnpack-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libkernels_custom-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libkernels_optimized-$(ET_PLATFORM)-release.a \ - -force_load $(BUILT_PRODUCTS_DIR)/libkernels_quantized-$(ET_PLATFORM)-release.a \ + -force_load $(BUILT_PRODUCTS_DIR)/libexecutorch_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libbackend_coreml_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libbackend_mps_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libbackend_xnnpack_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libkernels_custom_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libkernels_optimized_$(ET_PLATFORM).a \ + -force_load $(BUILT_PRODUCTS_DIR)/libkernels_quantized_$(ET_PLATFORM).a \ @$(TEMP_DIR)/cmake/linker_flags // LLaMARunner requires additional dependencies built with CMake in a custom run script phase. From 4df0ade1424a2fbd37f495a5602f75929c70c634 Mon Sep 17 00:00:00 2001 From: lucylq Date: Thu, 27 Feb 2025 22:54:45 -0800 Subject: [PATCH 161/584] Introduce NamedData to PTE schema Differential Revision: D69430152 Pull Request resolved: https://github.com/pytorch/executorch/pull/8778 --- exir/schema.py | 7 +++++++ schema/program.fbs | 16 ++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/exir/schema.py b/exir/schema.py index 8e1434a2fe4..7dba623aebf 100644 --- a/exir/schema.py +++ b/exir/schema.py @@ -290,6 +290,12 @@ class SubsegmentOffsets: offsets: List[int] +@dataclass +class NamedData: + key: str + segment_index: int + + @dataclass class Program: version: int @@ -299,3 +305,4 @@ class Program: segments: List[DataSegment] constant_segment: SubsegmentOffsets mutable_data_segments: Optional[List[SubsegmentOffsets]] = None + named_data: Optional[List[NamedData]] = None diff --git a/schema/program.fbs b/schema/program.fbs index 7ab2175f8ac..7308cc63199 100644 --- a/schema/program.fbs +++ b/schema/program.fbs @@ -431,6 +431,17 @@ table SubsegmentOffsets { offsets: [uint64]; } +// Attributes a name to data referenced by Program.segments. Used when data is +// referenced by multiple users, in cases where indices are not guaranteed to +// be consistent across the users. +table NamedData { + // The unique id of the data blob. + key: string; + + // Index of the segment in Program.segments. + segment_index: uint32; +} + table Program { // Schema version. version: uint; @@ -468,6 +479,11 @@ table Program { // constant memory, copying it over, and then being unable to release the // constant segment. No two elements should point to the same segment. mutable_data_segments: [SubsegmentOffsets]; + + // [Optional] List of blobs keyed by a unique name. Note that multiple + // 'NamedData' entries could point to the same segment index. Stored in + // segments attached to the PTE file. + named_data: [NamedData]; } root_type Program; From 521e931f943622fe0672cd007024e3f80a546849 Mon Sep 17 00:00:00 2001 From: Zingo Andersen Date: Fri, 28 Feb 2025 14:22:29 +0100 Subject: [PATCH 162/584] Arm backend: Avoid flakyness for mv2 by bumping ATOL/RTOL (#8771) Signed-off-by: Zingo Andersen --- backends/arm/test/test_arm_baremetal.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/arm/test/test_arm_baremetal.sh b/backends/arm/test/test_arm_baremetal.sh index 90b34241f3d..9365962cd10 100755 --- a/backends/arm/test/test_arm_baremetal.sh +++ b/backends/arm/test/test_arm_baremetal.sh @@ -124,13 +124,13 @@ test_models_ethosu_fvp() { # End to End model tests using model_test.py # Ethos-U55 echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U55" - python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-128 --model=mv2 --extra_flags="-DET_ATOL=1.20 -DET_RTOL=1.20" + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-128 --model=mv2 --extra_flags="-DET_ATOL=2.00 -DET_RTOL=2.00" python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-64 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00" python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-256 --model=lstm --extra_flags="-DET_ATOL=0.02 -DET_RTOL=0.02" # Ethos-U85 echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U85" - python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=mv2 --extra_flags="-DET_ATOL=1.20 -DET_RTOL=1.20" + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=mv2 --extra_flags="-DET_ATOL=2.00 -DET_RTOL=2.00" python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-1024 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00" python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=lstm --extra_flags="-DET_ATOL=0.02 -DET_RTOL=0.02" echo "${TEST_SUITE_NAME}: PASS" From 709fa3855215667bf6f8652e178edcded436a113 Mon Sep 17 00:00:00 2001 From: Adrian Lundell <36153706+AdrianLundell@users.noreply.github.com> Date: Fri, 28 Feb 2025 14:25:16 +0100 Subject: [PATCH 163/584] Arm backend: Add support for amax/max/amin/min (#8829) Max/min can be decomposed as for example max(x) = (amax(x), argmax(x)) For MI, an operator support check is added to support max-ops for which the argmax is not used. For BI, the int64 dtype returned by argmax is currently not supported by the arm_quantizer, and the program will crash. This is the same behaviour as before the patch, but with an improved error message. - Adds op_amax/op_amin node visitors. - Renames op_max/min->op_maximum/minimum to clearly separate the two ops. - Adds convert_minmax_pass to make min/max/amin/amax TOSA-compatible. - Adds unittests Util updates: - Updates analyze_output_utils to support rank 0. - Adds quantization to OpNotSupportedPipeline Signed-off-by: Adrian Lundell --- backends/arm/_passes/arm_pass_manager.py | 4 + backends/arm/_passes/convert_minmax_pass.py | 136 ++++++++++++++ .../keep_dims_false_to_squeeze_pass.py | 9 +- backends/arm/operator_support/__init__.py | 1 + .../arm/operator_support/minmax_support.py | 37 ++++ .../tosa_supported_operators.py | 4 + backends/arm/operators/__init__.py | 6 +- backends/arm/operators/op_amax.py | 45 +++++ backends/arm/operators/op_amin.py | 45 +++++ .../operators/{op_max.py => op_maximum.py} | 0 .../operators/{op_min.py => op_minimum.py} | 0 .../arm/quantizer/quantization_annotator.py | 2 + backends/arm/test/ops/test_amax.py | 165 +++++++++++++++++ backends/arm/test/ops/test_amin.py | 166 ++++++++++++++++++ .../arm/test/tester/analyze_output_utils.py | 2 + backends/arm/test/tester/test_pipeline.py | 4 + 16 files changed, 619 insertions(+), 7 deletions(-) create mode 100644 backends/arm/_passes/convert_minmax_pass.py create mode 100644 backends/arm/operator_support/minmax_support.py create mode 100644 backends/arm/operators/op_amax.py create mode 100644 backends/arm/operators/op_amin.py rename backends/arm/operators/{op_max.py => op_maximum.py} (100%) rename backends/arm/operators/{op_min.py => op_minimum.py} (100%) create mode 100644 backends/arm/test/ops/test_amax.py create mode 100644 backends/arm/test/ops/test_amin.py diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py index 331d45e9124..f8a4a40648f 100644 --- a/backends/arm/_passes/arm_pass_manager.py +++ b/backends/arm/_passes/arm_pass_manager.py @@ -21,6 +21,7 @@ from executorch.backends.arm._passes.convert_full_like_to_full_pass import ( ConvertFullLikeToFullPass, ) +from executorch.backends.arm._passes.convert_minmax_pass import ConvertMinMaxPass from executorch.backends.arm._passes.convert_split_to_slice import ( ConvertSplitToSlicePass, ) @@ -106,6 +107,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul self.add_pass(ConvertMeanDimToAveragePoolPass()) self.add_pass(ConvertFullLikeToFullPass()) self.add_pass(ConvertToClampPass()) + self.add_pass(ConvertMinMaxPass()) self.add_pass(ReplaceScalarWithTensorArgPass()) self.add_pass(AnnotateDecomposedMatmulPass()) @@ -147,6 +149,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul self.add_pass(DecomposeSoftmaxesPass()) self.add_pass(ConvertFullLikeToFullPass()) self.add_pass(ConvertToClampPass()) + self.add_pass(ConvertMinMaxPass()) self.add_pass(AnnotateDecomposedMatmulPass()) self.add_pass(QuantizeOperatorArguments()) @@ -190,4 +193,5 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule): self.add_pass(DecomposeMeanDimPass()) self.add_pass(DecomposeDivPass()) self.add_pass(DecomposeSoftmaxesPass()) + self.add_pass(ConvertMinMaxPass()) return self._transform(graph_module) diff --git a/backends/arm/_passes/convert_minmax_pass.py b/backends/arm/_passes/convert_minmax_pass.py new file mode 100644 index 00000000000..9f409632c20 --- /dev/null +++ b/backends/arm/_passes/convert_minmax_pass.py @@ -0,0 +1,136 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from executorch.exir.dialects._ops import ops as exir_ops +from executorch.exir.pass_base import ExportPass, PassResult + + +class ConvertMinMaxPass(ExportPass): + """ + Converts min/max to amin/amax and unrolls multi-dimensional reduction and keep-dims arg to be + TOSA compliant. + + The difference between max/min and amax/amin is (from pytorch docs): + - amax/amin supports reducing on multiple dimensions, + - amax/amin does not return indices, + - amax/amin evenly distributes gradient between equal values, while max(dim)/min(dim) + propagates gradient only to a single index in the source tensor. + Since we do not care about gradients post training, convert min/max ops to amin/amax as long as + the indices are not used. + + Original: + amax([dim1, dim2], keepdim = False) + After pass: + amax(dim1, keepdim = True) + amax(dim2, keepdim = True) + squeeze(dim = [dim1, dim2]) + """ + + def check_argmax(self, node): + """ + Raises a RuntimeError if the argmax value returned by the min/max op is used in the graph. + """ + if node.target in [torch.ops.aten.max.dim, torch.ops.aten.min.dim]: + no_argmax = len(node.users) == 1 + no_argmax_users = (len(node.users) == 2) and ( + len(list(node.users)[1].users) == 0 + ) + if not (no_argmax or no_argmax_users): + raise RuntimeError("Argmax is not supported by the arm_quantizer") + + def get_variables(self, node): + """Returns variables specific for each op handled by the pass.""" + if node.target in [ + exir_ops.edge.aten.amax.default, + exir_ops.edge.aten.amin.default, + ]: + replace_node = node + op = node.target + squeeze_op = exir_ops.edge.aten.squeeze_copy.dims + elif node.target == exir_ops.edge.aten.max.dim: + replace_node = list(node.users)[0] + op = exir_ops.edge.aten.amax.default + squeeze_op = exir_ops.edge.aten.squeeze_copy.dims + elif node.target == exir_ops.edge.aten.min.dim: + replace_node = list(node.users)[0] + op = exir_ops.edge.aten.amin.default + squeeze_op = exir_ops.edge.aten.squeeze_copy.dims + elif node.target == torch.ops.aten.max.dim: + replace_node = list(node.users)[0] + op = torch.ops.aten.amax.default + squeeze_op = torch.ops.aten.squeeze.dims + elif node.target == torch.ops.aten.min.dim: + replace_node = list(node.users)[0] + op = torch.ops.aten.amin.default + squeeze_op = torch.ops.aten.squeeze.dims + else: + raise RuntimeError( + f"{node.name} is not an accepted target for ConvertMinMaxPass()" + ) + + return (replace_node, op, squeeze_op) + + def call(self, graph_module: torch.fx.GraphModule): + modified = False + for node in graph_module.graph.nodes: + if node.op != "call_function": + continue + if node.target not in [ + exir_ops.edge.aten.amax.default, + exir_ops.edge.aten.amin.default, + exir_ops.edge.aten.max.dim, + exir_ops.edge.aten.min.dim, + torch.ops.aten.max.dim, + torch.ops.aten.min.dim, + ]: + continue + + self.check_argmax( + node + ) # TODO: MLETORCH-718 : Quantization of indices in arm_quantizer + replace_node, op, squeeze_op = self.get_variables(node) + + # Unwrap args + if len(node.args) == 2: + input_node, dims = node.args + keepdims = False + elif len(node.args) == 3: + input_node, dims, keepdims = node.args + else: + raise RuntimeError(f"Unexpected arg size in {node.name}") + + try: + iter(dims) + except: + dims = [dims] + else: + dims = list(dims) + + # Unroll multi-dimensional reduction and keep-dims arg + with graph_module.graph.inserting_before(node): + + for dim in dims: + args = (input_node, dim, True) + input_node = graph_module.graph.create_node( + "call_function", op, args, node.kwargs + ) + + if not keepdims: + input_node = graph_module.graph.create_node( + "call_function", + squeeze_op, + (input_node, dims), + ) + + replace_node.replace_all_uses_with(input_node) + modified = True + + if modified: + graph_module.graph.eliminate_dead_code() + graph_module.recompile() + graph_module = super().call(graph_module).graph_module + + return PassResult(graph_module, True) diff --git a/backends/arm/_passes/keep_dims_false_to_squeeze_pass.py b/backends/arm/_passes/keep_dims_false_to_squeeze_pass.py index ad95379cc87..e3ed7e65a73 100644 --- a/backends/arm/_passes/keep_dims_false_to_squeeze_pass.py +++ b/backends/arm/_passes/keep_dims_false_to_squeeze_pass.py @@ -1,5 +1,4 @@ # Copyright 2024-2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -36,18 +35,18 @@ class KeepDimsFalseToSqueezePass(ExportPass): """ # CURRENTLY NOT HANDLED OPS - # exir_ops.edge.aten.amax, - # exir_ops.edge.aten.amin, # exir_ops.edge.aten.any.dim, # exir_ops.edge.aten.any.dims, # exir_ops.edge.aten.argmax, # exir_ops.edge.aten.argmin, - # exir_ops.edge.aten.max.dim, - # exir_ops.edge.aten.min.dim, # exir_ops.edge.aten.prod.dim_int, # HANDLED OPS # exir_ops.edge.aten.sum.dim_IntList + # exir_ops.edge.aten.max.dim (decomposed in convert_minmax_pass) + # exir_ops.edge.aten.min.dim (decomposed in convert_minmax_pass) + # exir_ops.edge.aten.amin (decomposed in convert_minmax_pass) + # exir_ops.edge.aten.amax (decomposed in convert_minmax_pass) # exir_ops.edge.aten.var.correction (decomposed in decompose_var_pass) # exir_ops.edge.aten.var.dim (decomposed in decompose_var_pass) # exir_ops.edge.aten.mean.dim (decomposed in decompose_meandim_pass) diff --git a/backends/arm/operator_support/__init__.py b/backends/arm/operator_support/__init__.py index c6895cce492..cf7a08e0d58 100644 --- a/backends/arm/operator_support/__init__.py +++ b/backends/arm/operator_support/__init__.py @@ -7,6 +7,7 @@ from . import ( # noqa convolution_support, + minmax_support, pool_2d_support, reduce_sum_support, right_shift_support, diff --git a/backends/arm/operator_support/minmax_support.py b/backends/arm/operator_support/minmax_support.py new file mode 100644 index 00000000000..bdff368a5ce --- /dev/null +++ b/backends/arm/operator_support/minmax_support.py @@ -0,0 +1,37 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import torch.fx as fx +from executorch.backends.arm.operator_support.tosa_supported_operators import ( + register_tosa_support_check, + SupportedTOSAOperatorCheck, +) +from executorch.backends.arm.tosa_specification import TosaSpecification +from executorch.exir.dialects._ops import ops as exir_ops + + +@register_tosa_support_check +class MinMaxSupported(SupportedTOSAOperatorCheck): + targets = [ + exir_ops.edge.aten.max.dim, + exir_ops.edge.aten.min.dim, + ] + + # TODO : "MLETORCH-718 : Quantization of indices in arm_quantizer" + tosa_specs = [ + TosaSpecification.create_from_string("TOSA-0.80+MI"), + ] + + def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification): + if node.target in [exir_ops.edge.aten.max.dim, exir_ops.edge.aten.min.dim]: + no_argmax = len(node.users) == 1 + no_argmax_users = (len(node.users) == 2) and ( + len(list(node.users)[1].users) == 0 + ) + + if not (no_argmax or no_argmax_users): + return False + + return True diff --git a/backends/arm/operator_support/tosa_supported_operators.py b/backends/arm/operator_support/tosa_supported_operators.py index 1268e2c912c..607ae017a56 100644 --- a/backends/arm/operator_support/tosa_supported_operators.py +++ b/backends/arm/operator_support/tosa_supported_operators.py @@ -169,6 +169,8 @@ def is_node_supported( exir_ops.edge.quantized_decomposed.quantize_per_tensor.default, exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default, exir_ops.edge.aten.constant_pad_nd.default, + exir_ops.edge.aten.amax.default, + exir_ops.edge.aten.amin.default, ] return supported @@ -191,6 +193,8 @@ def is_node_supported( exir_ops.edge.aten.bitwise_and.Tensor, exir_ops.edge.aten.bitwise_or.Tensor, exir_ops.edge.aten.bitwise_xor.Tensor, + exir_ops.edge.aten.amax.default, + exir_ops.edge.aten.amin.default, ] if node.target in unsupported_ops: diff --git a/backends/arm/operators/__init__.py b/backends/arm/operators/__init__.py index e98d7e76938..ad5a107f9da 100644 --- a/backends/arm/operators/__init__.py +++ b/backends/arm/operators/__init__.py @@ -9,6 +9,8 @@ node_visitor, op_abs, op_add, + op_amax, + op_amin, op_avg_pool2d, op_bmm, op_cat, @@ -24,9 +26,9 @@ op_le, op_log, op_lt, - op_max, op_max_pool2d, - op_min, + op_maximum, + op_minimum, op_mul, op_permute, op_reciprocal, diff --git a/backends/arm/operators/op_amax.py b/backends/arm/operators/op_amax.py new file mode 100644 index 00000000000..30d64b51a51 --- /dev/null +++ b/backends/arm/operators/op_amax.py @@ -0,0 +1,45 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import List + +import serializer.tosa_serializer as ts +from executorch.backends.arm.operators.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.arm.tosa_mapping import TosaArg +from serializer.tosa_serializer import TosaOp +from torch.fx import Node + + +@register_node_visitor +class MaxVisitor(NodeVisitor): + target = "aten.amax.default" + + def __init__(self, *args): + super().__init__(*args) + + def define_node( + self, + node: Node, + tosa_graph: ts.TosaSerializer, + inputs: List[TosaArg], + output: TosaArg, + ) -> None: + + input = inputs[0] + dim = inputs[1].number + keep_dims = inputs[2].number + if not keep_dims: + raise RuntimeError( + "TOSA only supports keepdims == True; Did you run the convert_minmax pass?" + ) + + attr = ts.TosaSerializerAttribute() + attr.AxisAttribute(input.dim_order.index(dim)) + + tosa_graph.addOperator( + TosaOp.Op().REDUCE_MAX, [input.name], [output.name], attr + ) diff --git a/backends/arm/operators/op_amin.py b/backends/arm/operators/op_amin.py new file mode 100644 index 00000000000..91a5d308155 --- /dev/null +++ b/backends/arm/operators/op_amin.py @@ -0,0 +1,45 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import List + +import serializer.tosa_serializer as ts +from executorch.backends.arm.operators.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.arm.tosa_mapping import TosaArg +from serializer.tosa_serializer import TosaOp +from torch.fx import Node + + +@register_node_visitor +class MinVisitor(NodeVisitor): + target = "aten.amin.default" + + def __init__(self, *args): + super().__init__(*args) + + def define_node( + self, + node: Node, + tosa_graph: ts.TosaSerializer, + inputs: List[TosaArg], + output: TosaArg, + ) -> None: + + input = inputs[0] + dim = inputs[1].number + keep_dims = inputs[2].number + if not keep_dims: + raise RuntimeError( + "TOSA only supports keepdims == True; Did you run the convert_minmax pass?" + ) + + attr = ts.TosaSerializerAttribute() + attr.AxisAttribute(input.dim_order.index(dim)) + + tosa_graph.addOperator( + TosaOp.Op().REDUCE_MIN, [input.name], [output.name], attr + ) diff --git a/backends/arm/operators/op_max.py b/backends/arm/operators/op_maximum.py similarity index 100% rename from backends/arm/operators/op_max.py rename to backends/arm/operators/op_maximum.py diff --git a/backends/arm/operators/op_min.py b/backends/arm/operators/op_minimum.py similarity index 100% rename from backends/arm/operators/op_min.py rename to backends/arm/operators/op_minimum.py diff --git a/backends/arm/quantizer/quantization_annotator.py b/backends/arm/quantizer/quantization_annotator.py index 09eb3e2a12c..07aa9dac9ad 100644 --- a/backends/arm/quantizer/quantization_annotator.py +++ b/backends/arm/quantizer/quantization_annotator.py @@ -175,6 +175,8 @@ def _match_pattern( torch.ops.aten.contiguous.default, torch.ops.aten.upsample_nearest2d.vec, torch.ops.aten.pad.default, + torch.ops.aten.amax.default, + torch.ops.aten.amin.default, ] # Operators that can inherit the quantization specs from its parent node diff --git a/backends/arm/test/ops/test_amax.py b/backends/arm/test/ops/test_amax.py new file mode 100644 index 00000000000..b2639a5f108 --- /dev/null +++ b/backends/arm/test/ops/test_amax.py @@ -0,0 +1,165 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + + +from typing import Dict, Tuple + +import pytest +import torch +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU85PipelineBI, + OpNotSupportedPipeline, + TosaPipelineBI, + TosaPipelineMI, +) + + +class Amax(torch.nn.Module): + input_t = Tuple[Tuple[torch.Tensor], int | Tuple[int], bool] + aten_op = ["torch.ops.aten.amax"] + + def __init__(self, dim, keep_dims): + self.dim = dim + self.keep_dims = keep_dims + super().__init__() + + def forward(self, x): + return torch.amax(x, self.dim, self.keep_dims) + + test_data: Dict[str, input_t] = { + "rank_1_dim_0": ((torch.rand([10]),), 0, False), + "rank_2_dim_1_keep_dims": ((torch.rand([2, 2]),), (1,), True), + "rank_4_all_dim": ((torch.rand([1, 2, 5, 5]),), (0, 1, 2, 3), False), + "rank_4_0,3_keep_dims": ((torch.rand([1, 2, 2, 2]),), (0, 3), True), + "rank_4_mult_batches": ((torch.rand([2, 2, 2, 2]),), (0), True), + } + + +class Max(torch.nn.Module): + input_t = Tuple[Tuple[torch.Tensor], int] + aten_op = ["torch.ops.aten.amax"] + + def __init__(self, dim): + self.dim = dim + super().__init__() + + def forward(self, x): + x = torch.max(x, self.dim, False) + return x[0] + + test_data: Dict[str, input_t] = { + "rank_1_dim_0": ((torch.rand([10]),), 0), + "rank_2_dim_1": ((torch.rand([2, 2]),), 1), + "rank_4_dim_2": ((torch.rand([2, 2, 2, 2]),), 2), + "rank_4_dim_3": ((torch.rand([2, 2, 2, 2]),), 3), + } + + +class MaxWithIndex(torch.nn.Module): + def __init__(self, dim): + self.dim = dim + super().__init__() + + def forward(self, x): + x, i = torch.max(x, self.dim) + return x, i + + +@common.parametrize("test_data", Amax.test_data) +def test_amax_tosa_MI(test_data: Amax.input_t): + data, dim, keep_dims = test_data + pipeline = TosaPipelineMI[Amax.input_t]( + Amax(dim, keep_dims), + data, + Amax.aten_op, + ) + pipeline.run() + + +@common.parametrize("test_data", Amax.test_data) +def test_amax_tosa_BI(test_data: Amax.input_t): + data, dim, keep_dims = test_data + pipeline = TosaPipelineBI[Amax.input_t]( + Amax(dim, keep_dims), + data, + Amax.aten_op, + ) + pipeline.run() + + +def test_amax_u55_BI_not_delegated(): + data, dim, keep_dims = Amax.test_data["rank_4_all_dim"] + pipeline = OpNotSupportedPipeline[Amax.input_t]( + Amax(dim, keep_dims), + data, + "TOSA-0.80+BI+u55", + {" executorch_exir_dialects_edge__ops_aten_amax_default": 1}, + ) + pipeline.run() + + +@common.parametrize("test_data", Amax.test_data) +def test_amax_u85_BI(test_data: Amax.input_t): + data, dim, keep_dims = test_data + pipeline = EthosU85PipelineBI[Amax.input_t]( + Amax(dim, keep_dims), + data, + Amax.aten_op, + ) + pipeline.run() + + +fvp_xfails = {"rank_4_mult_batches": "MLETORCH-517 : Multiple batches not supported"} + + +@common.parametrize("test_data", Amax.test_data, fvp_xfails) +@common.SkipIfNoCorstone320 +def test_amax_u85_BI_on_fvp(test_data: Amax.input_t): + data, dim, keep_dims = test_data + pipeline = EthosU85PipelineBI[Amax.input_t]( + Amax(dim, keep_dims), data, Amax.aten_op, run_on_fvp=True + ) + pipeline.run() + + +@common.parametrize("test_data", Max.test_data) +def test_max_to_amax_MI(test_data: Max.input_t): + data, dim = test_data + pipeline = TosaPipelineMI[Max.input_t]( + Max(dim), + data, + "torch.ops.aten.max", + ) + pipeline.run() + + +@common.parametrize("test_data", Max.test_data) +def test_max_to_amax_BI(test_data: Max.input_t): + data, dim = test_data + module = Max(dim) + pipeline = TosaPipelineBI[Max.input_t]( + module, + data, + "torch.ops.aten.amax", + ) + pipeline.run() + + +@pytest.mark.xfail(reason="MLETORCH-718 : Quantization of indices in arm_quantizer") +def test_max_index_not_delegated_BI(): + data, dim = Max.test_data["rank_4_dim_3"] + pipeline = OpNotSupportedPipeline[Max.input_t]( + MaxWithIndex(dim), data, "TOSA-0.80+BI", {} + ) + pipeline.run() + + +def test_max_index_not_delegated_MI(): + data, dim = Max.test_data["rank_4_dim_3"] + pipeline = OpNotSupportedPipeline[Max.input_t]( + MaxWithIndex(dim), data, "TOSA-0.80+MI", {} + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_amin.py b/backends/arm/test/ops/test_amin.py new file mode 100644 index 00000000000..092ed472bce --- /dev/null +++ b/backends/arm/test/ops/test_amin.py @@ -0,0 +1,166 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + + +from typing import Dict, Tuple + +import pytest + +import torch +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU85PipelineBI, + OpNotSupportedPipeline, + TosaPipelineBI, + TosaPipelineMI, +) + + +class Amin(torch.nn.Module): + input_t = Tuple[Tuple[torch.Tensor], int | Tuple[int], bool] + aten_op = ["torch.ops.aten.amin"] + + def __init__(self, dim, keep_dims): + self.dim = dim + self.keep_dims = keep_dims + super().__init__() + + def forward(self, x): + return torch.amin(x, self.dim, self.keep_dims) + + test_data: Dict[str, input_t] = { + "rank_1_dim_0": ((torch.rand([10]),), 0, False), + "rank_2_dim_1_keep_dims": ((torch.rand([2, 2]),), (1,), True), + "rank_4_all_dim": ((torch.rand([1, 2, 5, 5]),), (0, 1, 2, 3), False), + "rank_4_0,3_keep_dims": ((torch.rand([1, 2, 2, 2]),), (0, 3), True), + "rank_4_mult_batches": ((torch.rand([2, 2, 2, 2]),), (0), True), + } + + +class Min(torch.nn.Module): + input_t = Tuple[Tuple[torch.Tensor], int] + aten_op = ["torch.ops.aten.amin"] + + def __init__(self, dim): + self.dim = dim + super().__init__() + + def forward(self, x): + x = torch.min(x, self.dim) + return x[0] + + test_data: Dict[str, input_t] = { + "rank_1_dim_0": ((torch.rand([10]),), 0), + "rank_2_dim_1": ((torch.rand([2, 2]),), 1), + "rank_4_dim_2": ((torch.rand([2, 2, 2, 2]),), 2), + "rank_4_dim_3": ((torch.rand([2, 2, 2, 2]),), 3), + } + + +class MinWithIndex(torch.nn.Module): + def __init__(self, dim): + self.dim = dim + super().__init__() + + def forward(self, x): + x, i = torch.min(x, self.dim) + return x, i + + +@common.parametrize("test_data", Amin.test_data) +def test_amin_tosa_MI(test_data: Amin.input_t): + data, dim, keep_dims = test_data + pipeline = TosaPipelineMI[Amin.input_t]( + Amin(dim, keep_dims), + data, + Amin.aten_op, + ) + pipeline.run() + + +@common.parametrize("test_data", Amin.test_data) +def test_amin_tosa_BI(test_data: Amin.input_t): + data, dim, keep_dims = test_data + pipeline = TosaPipelineBI[Amin.input_t]( + Amin(dim, keep_dims), + data, + Amin.aten_op, + ) + pipeline.run() + + +def test_amin_u55_BI_not_delegated(): + data, dim, keep_dims = Amin.test_data["rank_4_all_dim"] + pipeline = OpNotSupportedPipeline[Amin.input_t]( + Amin(dim, keep_dims), + data, + "TOSA-0.80+BI+u55", + {" executorch_exir_dialects_edge__ops_aten_amin_default": 1}, + ) + pipeline.run() + + +@common.parametrize("test_data", Amin.test_data) +def test_amin_u85_BI(test_data: Amin.input_t): + data, dim, keep_dims = test_data + pipeline = EthosU85PipelineBI[Amin.input_t]( + Amin(dim, keep_dims), + data, + Amin.aten_op, + ) + pipeline.run() + + +fvp_xfails = {"rank_4_mult_batches": "MLETORCH-517 : Multiple batches not supported"} + + +@common.parametrize("test_data", Amin.test_data, fvp_xfails) +@common.SkipIfNoCorstone320 +def test_amin_u85_BI_on_fvp(test_data: Amin.input_t): + data, dim, keep_dims = test_data + pipeline = EthosU85PipelineBI[Amin.input_t]( + Amin(dim, keep_dims), data, Amin.aten_op, run_on_fvp=True + ) + pipeline.run() + + +@common.parametrize("test_data", Min.test_data) +def test_min_to_amin_MI(test_data: Min.input_t): + data, dim = test_data + pipeline = TosaPipelineMI[Min.input_t]( + Min(dim), + data, + "torch.ops.aten.min", + ) + pipeline.run() + + +@common.parametrize("test_data", Min.test_data) +def test_min_to_amin_BI(test_data: Min.input_t): + data, dim = test_data + module = Min(dim) + pipeline = TosaPipelineBI[Min.input_t]( + module, + data, + "torch.ops.aten.amin", + ) + pipeline.run() + + +@pytest.mark.xfail(reason="MLETORCH-718 : Quantization of indices in arm_quantizer") +def test_max_index_not_delegated_BI(): + data, dim = Min.test_data["rank_4_dim_3"] + pipeline = OpNotSupportedPipeline[Min.input_t]( + MinWithIndex(dim), data, "TOSA-0.80+BI", {} + ) + pipeline.run() + + +def test_max_index_not_delegated_MI(): + data, dim = Min.test_data["rank_4_dim_3"] + pipeline = OpNotSupportedPipeline[Min.input_t]( + MinWithIndex(dim), data, "TOSA-0.80+MI", {} + ) + pipeline.run() diff --git a/backends/arm/test/tester/analyze_output_utils.py b/backends/arm/test/tester/analyze_output_utils.py index 3436bfe618a..1ec0f2304aa 100644 --- a/backends/arm/test/tester/analyze_output_utils.py +++ b/backends/arm/test/tester/analyze_output_utils.py @@ -137,6 +137,8 @@ def print_error_diffs( N, C, H, W = (1, 1, shape[0], shape[1]) case 1: N, C, H, W = (1, 1, 1, shape[0]) + case 0: + N, C, H, W = (1, 1, 1, 1) case _: raise ValueError("Invalid tensor rank") diff --git a/backends/arm/test/tester/test_pipeline.py b/backends/arm/test/tester/test_pipeline.py index 1df2db8c4f1..62d0b633224 100644 --- a/backends/arm/test/tester/test_pipeline.py +++ b/backends/arm/test/tester/test_pipeline.py @@ -614,6 +614,10 @@ def __init__( compile_spec, [], ) + + if "BI" in tosa_version: + self.add_stage(self.tester.quantize, pos=0) + self.change_args("check_not.exir", []) self.change_args( "check_count.exir", From 3ece59372a141f7e82f2071a967efba71a7e373d Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Fri, 28 Feb 2025 05:28:51 -0800 Subject: [PATCH 164/584] Cleanup doc wording and code snippets in a few locations (#8832) --- docs/source/backends-xnnpack.md | 4 ++ docs/source/getting-started.md | 57 +++++++++++++++----------- docs/source/using-executorch-export.md | 8 ++-- 3 files changed, 42 insertions(+), 27 deletions(-) diff --git a/docs/source/backends-xnnpack.md b/docs/source/backends-xnnpack.md index 77285d3cdbe..b07543f5c68 100644 --- a/docs/source/backends-xnnpack.md +++ b/docs/source/backends-xnnpack.md @@ -81,6 +81,10 @@ quantizer.set_global(quantization_config) #### Quantizing a model with the XNNPACKQuantizer After configuring the quantizer, the model can be quantized by via the `prepare_pt2e` and `convert_pt2e` APIs. ```python +from torch.ao.quantization.quantize_pt2e import ( + prepare_pt2e, + convert_pt2e, +) from torch.export import export_for_training exported_model = export_for_training(model_to_quantize, example_inputs).module() diff --git a/docs/source/getting-started.md b/docs/source/getting-started.md index 492334c81df..4d8e3f0189d 100644 --- a/docs/source/getting-started.md +++ b/docs/source/getting-started.md @@ -5,26 +5,25 @@ This section is intended to describe the necessary steps to take PyTorch model a - Run the model using the ExecuTorch runtime APIs on your development platform. - Deploy the model to the target platform using the ExecuTorch runtime. +## System Requirements +The following are required to install the ExecuTorch host libraries, needed to export models and run from Python. Requirements for target end-user devices are backend dependent. See the appropriate backend documentation for more information. + +- Python 3.10 - 3.12 +- g++ version 7 or higher, clang++ version 5 or higher, or another C++17-compatible toolchain. +- Linux or MacOS operating system (Arm or x86). + - Windows is supported via WSL. + ## Installation -To use ExecuTorch, you will need to install both the Python package and the appropriate platform-specific runtime libraries. +To use ExecuTorch, you will need to install both the Python package and the appropriate platform-specific runtime libraries. Pip is the recommended way to install the ExecuTorch python package. -Pip is the recommended way to install the ExecuTorch python package. This package includes the dependencies needed to export a PyTorch model, as well as Python runtime bindings for model testing and evaluation. It is common to install the package within a Python virtual environment, in order to meet the Python and dependency version requirements. +This package includes the dependencies needed to export a PyTorch model, as well as Python runtime bindings for model testing and evaluation. Consider installing ExecuTorch within a virtual environment, such as one provided by [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/getting-started.html#creating-environments) or [venv](https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/#create-and-use-virtual-environments). ``` pip install executorch ``` -To build the framework from source, see [Building From Source](using-executorch-building-from-source.md). - -Backend delegates may require additional dependencies. See the appropriate backend documentation for more information. +To build the framework from source, see [Building From Source](using-executorch-building-from-source.md). Backend delegates may require additional dependencies. See the appropriate backend documentation for more information. -#### System Requirements -The following are required to install the ExecuTorch host libraries, needed to export models and run from Python. Requirements for target end-user devices are backend dependent. See the appropriate backend documentation for more information. - -- Python 3.10 - 3.12 -- g++ version 7 or higher, clang++ version 5 or higher, or another C++17-compatible toolchain. -- Linux or MacOS operating system (Arm or x86). - - Windows is supported via WSL.


@@ -44,15 +43,20 @@ ExecuTorch provides hardware acceleration for a wide variety of hardware. The mo For mobile use cases, consider using XNNPACK for Android and Core ML or XNNPACK for iOS as a first step. See [Hardware Backends](backends-overview.md) for more information. ### Exporting -Exporting is done using Python APIs. ExecuTorch provides a high degree of customization during the export process, but the typical flow is as follows: +Exporting is done using Python APIs. ExecuTorch provides a high degree of customization during the export process, but the typical flow is as follows. This example uses the MobileNet V2 image classification model implementation in torchvision, but the process supports any [export-compliant](https://pytorch.org/docs/stable/export.html) PyTorch model. + ```python -import executorch +import torch +import torchvision.models as models +from torchvision.models.mobilenetv2 import MobileNet_V2_Weights +from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner +from executorch.exir import to_edge_transform_and_lower -model = MyModel() # The PyTorch model to export -example_inputs = (torch.randn(1,3,64,64),) # A tuple of inputs +model = models.mobilenetv2.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).eval() +sample_inputs = (torch.randn(1, 3, 224, 224), ) -et_program = executorch.exir.to_edge_transform_and_lower( - torch.export.export(model, example_inputs), +et_program = to_edge_transform_and_lower( + torch.export.export(model, sample_inputs), partitioner=[XnnpackPartitioner()] ).to_executorch() @@ -62,7 +66,7 @@ with open("model.pte", "wb") as f: If the model requires varying input sizes, you will need to specify the varying dimensions and bounds as part of the `export` call. See [Model Export and Lowering](using-executorch-export.md) for more information. -The hardware backend to target is controlled by the partitioner parameter to to\_edge\_transform\_and\_lower. In this example, the XnnpackPartitioner is used to target mobile CPUs. See the delegate-specific documentation for a full description of the partitioner and available options. +The hardware backend to target is controlled by the partitioner parameter to to\_edge\_transform\_and\_lower. In this example, the XnnpackPartitioner is used to target mobile CPUs. See the [backend-specific documentation](backends-overview.md) for information on how to use each backend. Quantization can also be done at this stage to reduce model size and runtime. Quantization is backend-specific. See the documentation for the target backend for a full description of supported quantization schemes. @@ -70,16 +74,19 @@ Quantization can also be done at this stage to reduce model size and runtime. Qu After successfully generating a .pte file, it is common to use the Python runtime APIs to validate the model on the development platform. This can be used to evaluate model accuracy before running on-device. -Inference can be run as follows: +For the MobileNet V2 model from torchvision used in this example, image inputs are expected as a normalized, float32 tensor with a dimensions of (batch, channels, height, width). The output See [torchvision.models.mobilenet_v2](https://pytorch.org/vision/main/models/generated/torchvision.models.mobilenet_v2.html) for more information on the input and output tensor format for this model. + ```python +import torch from executorch.runtime import Runtime +from typing import List runtime = Runtime.get() -input_tensor = torch.randn(1,3,128,128) -program = runtime.load_program("/path/to/mode.pte") +input_tensor: torch.Tensor = torch.randn(1, 3, 224, 224) +program = runtime.load_program("model.pte") method = program.load_method("forward") -outputs = method.execute([input_tensor]) +outputs: List[torch.Tensor] = method.execute([input_tensor]) ``` @@ -101,13 +108,15 @@ To add the library to your app, download the AAR, and add it to the gradle build ``` mkdir -p app/libs -curl https://ossci-android.s3.amazonaws.com/executorch/release/executorch-241002/executorch.aar -o app/libs/executorch.aar +curl https://ossci-android.s3.amazonaws.com/executorch/release/v0.5.0-rc3/executorch.aar -o app/libs/executorch.aar ``` And in gradle, ``` # app/build.gradle.kts dependencies { implementation(files("libs/executorch.aar")) + implementation("com.facebook.soloader:soloader:0.10.5") + implementation("com.facebook.fbjni:fbjni:0.5.1") } ``` diff --git a/docs/source/using-executorch-export.md b/docs/source/using-executorch-export.md index 9bcfedc256a..fb51ff1bd40 100644 --- a/docs/source/using-executorch-export.md +++ b/docs/source/using-executorch-export.md @@ -59,7 +59,7 @@ class Model(torch.nn.Module): torch.nn.Conv2d(8, 16, 3), torch.nn.ReLU(), torch.nn.AdaptiveAvgPool2d((1,1)) - ) + ) self.linear = torch.nn.Linear(16, 10) def forward(self, x): @@ -68,12 +68,14 @@ class Model(torch.nn.Module): y = self.linear(y) return y -model = Model() +model = Model().eval() inputs = (torch.randn(1,1,16,16),) outputs = model(*inputs) print(f"Model output: {outputs}") ``` +Note that the model is set to evaluation mode using `.eval()`. Models should always be exported in evaluation mode unless performing on-device training. This mode configures certain operations with training-specific behavior, such as batch norm or dropout, to use the inference-mode configuration. + ## Export and Lowering To actually export and lower the model, call `export`, `to_edge_transform_and_lower`, and `to_executorch` in sequence. This yields an ExecuTorch program which can be serialized to a file. Putting it all together, lowering the example model above using the XNNPACK delegate for mobile CPU performance can be done as follows: @@ -92,7 +94,7 @@ class Model(torch.nn.Module): torch.nn.ReLU(), torch.nn.Conv2d(8, 16, 3), torch.nn.ReLU(), - torch.nn.AdaptiveAvgPool2d([1,1]) + torch.nn.AdaptiveAvgPool2d((1,1)) ) self.linear = torch.nn.Linear(16, 10) From ef2bfcd411a4d5bbb7e3bd1d30cf72d5c82a37f8 Mon Sep 17 00:00:00 2001 From: Sebastian Larsson <38941629+Sebastian-Larsson@users.noreply.github.com> Date: Fri, 28 Feb 2025 16:25:00 +0100 Subject: [PATCH 165/584] Arm backend: Refactor gt, ge, lt, le and eq tests to pipeline (#8828) Signed-off-by: Sebastian Larsson --- .../tosa_supported_operators.py | 5 + backends/arm/test/ops/test_eq.py | 255 +++++++++--------- backends/arm/test/ops/test_ge.py | 250 +++++++++-------- backends/arm/test/ops/test_gt.py | 250 +++++++++-------- backends/arm/test/ops/test_le.py | 250 +++++++++-------- backends/arm/test/ops/test_lt.py | 250 +++++++++-------- 6 files changed, 620 insertions(+), 640 deletions(-) diff --git a/backends/arm/operator_support/tosa_supported_operators.py b/backends/arm/operator_support/tosa_supported_operators.py index 607ae017a56..7a9ce29ff52 100644 --- a/backends/arm/operator_support/tosa_supported_operators.py +++ b/backends/arm/operator_support/tosa_supported_operators.py @@ -195,6 +195,11 @@ def is_node_supported( exir_ops.edge.aten.bitwise_xor.Tensor, exir_ops.edge.aten.amax.default, exir_ops.edge.aten.amin.default, + exir_ops.edge.aten.eq.Tensor, + exir_ops.edge.aten.ge.Tensor, + exir_ops.edge.aten.gt.Tensor, + exir_ops.edge.aten.le.Tensor, + exir_ops.edge.aten.lt.Tensor, ] if node.target in unsupported_ops: diff --git a/backends/arm/test/ops/test_eq.py b/backends/arm/test/ops/test_eq.py index 263a042ea1c..329f65dfead 100644 --- a/backends/arm/test/ops/test_eq.py +++ b/backends/arm/test/ops/test_eq.py @@ -1,145 +1,136 @@ # Copyright 2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest +from typing import Tuple +import pytest import torch from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized - -test_data_suite = [ - # (test_name, input, other,) See torch.eq() for info - ( - "op_eq_rank1_ones", - torch.ones(5), - torch.ones(5), - ), - ( - "op_eq_rank2_rand", - torch.rand(4, 5), - torch.rand(1, 5), - ), - ( - "op_eq_rank3_randn", - torch.randn(10, 5, 2), - torch.randn(10, 5, 2), - ), - ( - "op_eq_rank4_randn", - torch.randn(3, 2, 2, 2), - torch.randn(3, 2, 2, 2), - ), -] - - -class TestEqual(unittest.TestCase): - class Equal(torch.nn.Module): - def forward( - self, - input_: torch.Tensor, - other_: torch.Tensor, - ): - return input_ == other_ - - def _test_eq_tosa_MI_pipeline( - self, - compile_spec: list[CompileSpec], - module: torch.nn.Module, - test_data: tuple[torch.Tensor, torch.Tensor], - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .export() - .check_count({"torch.ops.aten.eq.Tensor": 1}) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_eq_tosa_BI_pipeline( - self, - compile_spec: list[CompileSpec], - module: torch.nn.Module, - test_data: tuple[torch.Tensor, torch.Tensor], - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.eq.Tensor": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - @parameterized.expand(test_data_suite) - def test_eq_tosa_MI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_eq_tosa_MI_pipeline( - common.get_tosa_compile_spec("TOSA-0.80+MI"), self.Equal(), test_data - ) - @parameterized.expand(test_data_suite) - def test_eq_tosa_BI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_eq_tosa_BI_pipeline( - common.get_tosa_compile_spec("TOSA-0.80+BI"), self.Equal(), test_data - ) - - @parameterized.expand(test_data_suite) - @unittest.skip - def test_eq_u55_BI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_eq_tosa_BI_pipeline( - common.get_u55_compile_spec(permute_memory_to_nhwc=True), - self.Equal(), - test_data, - ) - - @parameterized.expand(test_data_suite) - @unittest.skip - def test_eq_u85_BI( +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU85PipelineBI, + OpNotSupportedPipeline, + TosaPipelineBI, + TosaPipelineMI, +) + +aten_op = "torch.ops.aten.eq.Tensor" +exir_op = "executorch_exir_dialects_edge__ops_aten_eq_Tensor" + +input_t = Tuple[torch.Tensor] + + +class Equal(torch.nn.Module): + def __init__(self, input, other): + super().__init__() + self.input_ = input + self.other_ = other + + def forward( self, - test_name: str, input_: torch.Tensor, other_: torch.Tensor, ): - test_data = (input_, other_) - self._test_eq_tosa_BI_pipeline( - common.get_u85_compile_spec(permute_memory_to_nhwc=True), - self.Equal(), - test_data, - ) + return input_ == other_ + + def get_inputs(self): + return (self.input_, self.other_) + + +op_eq_rank1_ones = Equal( + torch.ones(5), + torch.ones(5), +) +op_eq_rank2_rand = Equal( + torch.rand(4, 5), + torch.rand(1, 5), +) +op_eq_rank3_randn = Equal( + torch.randn(10, 5, 2), + torch.randn(10, 5, 2), +) +op_eq_rank4_randn = Equal( + torch.randn(3, 2, 2, 2), + torch.randn(3, 2, 2, 2), +) + +test_data_common = { + "eq_rank1_ones": op_eq_rank1_ones, + "eq_rank2_rand": op_eq_rank2_rand, + "eq_rank3_randn": op_eq_rank3_randn, + "eq_rank4_randn": op_eq_rank4_randn, +} + + +@common.parametrize("test_module", test_data_common) +def test_eq_tosa_MI(test_module): + pipeline = TosaPipelineMI[input_t]( + test_module, test_module.get_inputs(), aten_op, exir_op + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +def test_eq_tosa_BI(test_module): + pipeline = TosaPipelineBI[input_t]( + test_module, test_module.get_inputs(), aten_op, exir_op + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +def test_eq_u55_BI(test_module): + # EQUAL is not supported on U55. + pipeline = OpNotSupportedPipeline[input_t]( + test_module, + test_module.get_inputs(), + "TOSA-0.80+BI+u55", + {exir_op: 1}, + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +def test_eq_u85_BI(test_module): + pipeline = EthosU85PipelineBI[input_t]( + test_module, + test_module.get_inputs(), + aten_op, + exir_op, + run_on_fvp=False, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +@pytest.mark.skip(reason="The same as test_eq_u55_BI") +def test_eq_u55_BI_on_fvp(test_module): + # EQUAL is not supported on U55. + pipeline = OpNotSupportedPipeline[input_t]( + test_module, + test_module.get_inputs(), + "TOSA-0.80+BI+u55", + {exir_op: 1}, + ) + pipeline.run() + + +@common.parametrize( + "test_module", + test_data_common, + xfails={"eq_rank4_randn": "4D fails because boolean Tensors can't be subtracted"}, +) +@common.SkipIfNoCorstone320 +def test_eq_u85_BI_on_fvp(test_module): + pipeline = EthosU85PipelineBI[input_t]( + test_module, + test_module.get_inputs(), + aten_op, + exir_op, + run_on_fvp=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_ge.py b/backends/arm/test/ops/test_ge.py index ff6cacd1f97..a6193f6ea08 100644 --- a/backends/arm/test/ops/test_ge.py +++ b/backends/arm/test/ops/test_ge.py @@ -1,140 +1,136 @@ # Copyright 2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest +from typing import Tuple +import pytest import torch from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized - -test_data_suite = [ - # (test_name, input, other,) See torch.ge() for info - ( - "op_ge_rank1_ones", - torch.ones(5), - torch.ones(5), - ), - ( - "op_ge_rank2_rand", - torch.rand(4, 5), - torch.rand(1, 5), - ), - ( - "op_ge_rank3_randn", - torch.randn(10, 5, 2), - torch.randn(10, 5, 2), - ), - ( - "op_ge_rank4_randn", - torch.randn(3, 2, 2, 2), - torch.randn(3, 2, 2, 2), - ), -] - - -class TestGreaterEqual(unittest.TestCase): - class GreaterEqual(torch.nn.Module): - def forward( - self, - input_: torch.Tensor, - other_: torch.Tensor, - ): - return input_ >= other_ - - def _test_ge_tosa_pipeline( - self, module: torch.nn.Module, test_data: tuple[torch.Tensor, torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({"torch.ops.aten.ge.Tensor": 1}) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_ge_tosa_BI_pipeline( - self, - compile_spec: list[CompileSpec], - module: torch.nn.Module, - test_data: tuple[torch.Tensor, torch.Tensor], - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.ge.Tensor": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - @parameterized.expand(test_data_suite) - def test_ge_tosa_MI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_ge_tosa_pipeline(self.GreaterEqual(), test_data) - @parameterized.expand(test_data_suite) - def test_ge_tosa_BI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_ge_tosa_BI_pipeline( - common.get_tosa_compile_spec("TOSA-0.80+BI"), self.GreaterEqual(), test_data - ) - - @parameterized.expand(test_data_suite) - @unittest.skip - def test_ge_u55_BI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_ge_tosa_BI_pipeline( - common.get_u55_compile_spec(permute_memory_to_nhwc=True), - self.GreaterEqual(), - test_data, - ) - - @parameterized.expand(test_data_suite) - @unittest.skip - def test_ge_u85_BI( +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU85PipelineBI, + OpNotSupportedPipeline, + TosaPipelineBI, + TosaPipelineMI, +) + +aten_op = "torch.ops.aten.ge.Tensor" +exir_op = "executorch_exir_dialects_edge__ops_aten_ge_Tensor" + +input_t = Tuple[torch.Tensor] + + +class GreaterEqual(torch.nn.Module): + def __init__(self, input, other): + super().__init__() + self.input_ = input + self.other_ = other + + def forward( self, - test_name: str, input_: torch.Tensor, other_: torch.Tensor, ): - test_data = (input_, other_) - self._test_ge_tosa_BI_pipeline( - common.get_u85_compile_spec(permute_memory_to_nhwc=True), - self.GreaterEqual(), - test_data, - ) + return input_ >= other_ + + def get_inputs(self): + return (self.input_, self.other_) + + +op_ge_rank1_ones = GreaterEqual( + torch.ones(5), + torch.ones(5), +) +op_ge_rank2_rand = GreaterEqual( + torch.rand(4, 5), + torch.rand(1, 5), +) +op_ge_rank3_randn = GreaterEqual( + torch.randn(10, 5, 2), + torch.randn(10, 5, 2), +) +op_ge_rank4_randn = GreaterEqual( + torch.randn(3, 2, 2, 2), + torch.randn(3, 2, 2, 2), +) + +test_data_common = { + "ge_rank1_ones": op_ge_rank1_ones, + "ge_rank2_rand": op_ge_rank2_rand, + "ge_rank3_randn": op_ge_rank3_randn, + "ge_rank4_randn": op_ge_rank4_randn, +} + + +@common.parametrize("test_module", test_data_common) +def test_ge_tosa_MI(test_module): + pipeline = TosaPipelineMI[input_t]( + test_module, test_module.get_inputs(), aten_op, exir_op + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +def test_ge_tosa_BI(test_module): + pipeline = TosaPipelineBI[input_t]( + test_module, test_module.get_inputs(), aten_op, exir_op + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +def test_ge_u55_BI(test_module): + # GREATER_EQUAL is not supported on U55. + pipeline = OpNotSupportedPipeline[input_t]( + test_module, + test_module.get_inputs(), + "TOSA-0.80+BI+u55", + {exir_op: 1}, + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +def test_ge_u85_BI(test_module): + pipeline = EthosU85PipelineBI[input_t]( + test_module, + test_module.get_inputs(), + aten_op, + exir_op, + run_on_fvp=False, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +@pytest.mark.skip(reason="The same as test_ge_u55_BI") +def test_ge_u55_BI_on_fvp(test_module): + # GREATER_EQUAL is not supported on U55. + pipeline = OpNotSupportedPipeline[input_t]( + test_module, + test_module.get_inputs(), + "TOSA-0.80+BI+u55", + {exir_op: 1}, + ) + pipeline.run() + + +@common.parametrize( + "test_module", + test_data_common, + xfails={"ge_rank4_randn": "4D fails because boolean Tensors can't be subtracted"}, +) +@common.SkipIfNoCorstone320 +def test_ge_u85_BI_on_fvp(test_module): + pipeline = EthosU85PipelineBI[input_t]( + test_module, + test_module.get_inputs(), + aten_op, + exir_op, + run_on_fvp=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_gt.py b/backends/arm/test/ops/test_gt.py index 33899f64492..2095f781bdb 100644 --- a/backends/arm/test/ops/test_gt.py +++ b/backends/arm/test/ops/test_gt.py @@ -1,140 +1,136 @@ # Copyright 2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest +from typing import Tuple +import pytest import torch from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized - -test_data_suite = [ - # (test_name, input, other,) See torch.gt() for info - ( - "op_gt_rank1_ones", - torch.ones(5), - torch.ones(5), - ), - ( - "op_gt_rank2_rand", - torch.rand(4, 5), - torch.rand(1, 5), - ), - ( - "op_gt_rank3_randn", - torch.randn(10, 5, 2), - torch.randn(10, 5, 2), - ), - ( - "op_gt_rank4_randn", - torch.randn(3, 2, 2, 2), - torch.randn(3, 2, 2, 2), - ), -] - - -class TestGreater(unittest.TestCase): - class Greater(torch.nn.Module): - def forward( - self, - input_: torch.Tensor, - other_: torch.Tensor, - ): - return input_ > other_ - - def _test_gt_tosa_pipeline( - self, module: torch.nn.Module, test_data: tuple[torch.Tensor, torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({"torch.ops.aten.gt.Tensor": 1}) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_gt_tosa_BI_pipeline( - self, - compile_spec: list[CompileSpec], - module: torch.nn.Module, - test_data: tuple[torch.Tensor, torch.Tensor], - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.gt.Tensor": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - @parameterized.expand(test_data_suite) - def test_gt_tosa_MI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_gt_tosa_pipeline(self.Greater(), test_data) - @parameterized.expand(test_data_suite) - def test_gt_tosa_BI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_gt_tosa_BI_pipeline( - common.get_tosa_compile_spec("TOSA-0.80+BI"), self.Greater(), test_data - ) - - @parameterized.expand(test_data_suite) - @unittest.skip - def test_gt_u55_BI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_gt_tosa_BI_pipeline( - common.get_u55_compile_spec(permute_memory_to_nhwc=True), - self.Greater(), - test_data, - ) - - @parameterized.expand(test_data_suite) - @unittest.skip - def test_gt_u85_BI( +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU85PipelineBI, + OpNotSupportedPipeline, + TosaPipelineBI, + TosaPipelineMI, +) + +aten_op = "torch.ops.aten.gt.Tensor" +exir_op = "executorch_exir_dialects_edge__ops_aten_gt_Tensor" + +input_t = Tuple[torch.Tensor] + + +class Greater(torch.nn.Module): + def __init__(self, input, other): + super().__init__() + self.input_ = input + self.other_ = other + + def forward( self, - test_name: str, input_: torch.Tensor, other_: torch.Tensor, ): - test_data = (input_, other_) - self._test_gt_tosa_BI_pipeline( - common.get_u85_compile_spec(permute_memory_to_nhwc=True), - self.Greater(), - test_data, - ) + return input_ > other_ + + def get_inputs(self): + return (self.input_, self.other_) + + +op_gt_rank1_ones = Greater( + torch.ones(5), + torch.ones(5), +) +op_gt_rank2_rand = Greater( + torch.rand(4, 5), + torch.rand(1, 5), +) +op_gt_rank3_randn = Greater( + torch.randn(10, 5, 2), + torch.randn(10, 5, 2), +) +op_gt_rank4_randn = Greater( + torch.randn(3, 2, 2, 2), + torch.randn(3, 2, 2, 2), +) + +test_data_common = { + "gt_rank1_ones": op_gt_rank1_ones, + "gt_rank2_rand": op_gt_rank2_rand, + "gt_rank3_randn": op_gt_rank3_randn, + "gt_rank4_randn": op_gt_rank4_randn, +} + + +@common.parametrize("test_module", test_data_common) +def test_gt_tosa_MI(test_module): + pipeline = TosaPipelineMI[input_t]( + test_module, test_module.get_inputs(), aten_op, exir_op + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +def test_gt_tosa_BI(test_module): + pipeline = TosaPipelineBI[input_t]( + test_module, test_module.get_inputs(), aten_op, exir_op + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +def test_gt_u55_BI(test_module): + # GREATER is not supported on U55. + pipeline = OpNotSupportedPipeline[input_t]( + test_module, + test_module.get_inputs(), + "TOSA-0.80+BI+u55", + {exir_op: 1}, + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +def test_gt_u85_BI(test_module): + pipeline = EthosU85PipelineBI[input_t]( + test_module, + test_module.get_inputs(), + aten_op, + exir_op, + run_on_fvp=False, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +@pytest.mark.skip(reason="The same as test_gt_u55_BI") +def test_gt_u55_BI_on_fvp(test_module): + # GREATER is not supported on U55. + pipeline = OpNotSupportedPipeline[input_t]( + test_module, + test_module.get_inputs(), + "TOSA-0.80+BI+u55", + {exir_op: 1}, + ) + pipeline.run() + + +@common.parametrize( + "test_module", + test_data_common, + xfails={"gt_rank4_randn": "4D fails because boolean Tensors can't be subtracted"}, +) +@common.SkipIfNoCorstone320 +def test_gt_u85_BI_on_fvp(test_module): + pipeline = EthosU85PipelineBI[input_t]( + test_module, + test_module.get_inputs(), + aten_op, + exir_op, + run_on_fvp=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_le.py b/backends/arm/test/ops/test_le.py index 0710f483a0b..7e243ead620 100644 --- a/backends/arm/test/ops/test_le.py +++ b/backends/arm/test/ops/test_le.py @@ -1,140 +1,136 @@ # Copyright 2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest +from typing import Tuple +import pytest import torch from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized - -test_data_suite = [ - # (test_name, input, other,) See torch.le() for info - ( - "op_le_rank1_ones", - torch.ones(5), - torch.ones(5), - ), - ( - "op_le_rank2_rand", - torch.rand(4, 5), - torch.rand(1, 5), - ), - ( - "op_le_rank3_randn", - torch.randn(10, 5, 2), - torch.randn(10, 5, 2), - ), - ( - "op_le_rank4_randn", - torch.randn(3, 2, 2, 2), - torch.randn(3, 2, 2, 2), - ), -] - - -class TestLessEqual(unittest.TestCase): - class LessEqual(torch.nn.Module): - def forward( - self, - input_: torch.Tensor, - other_: torch.Tensor, - ): - return torch.le(input_, other_) - - def _test_le_tosa_pipeline( - self, module: torch.nn.Module, test_data: tuple[torch.Tensor, torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({"torch.ops.aten.le.Tensor": 1}) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_le_tosa_BI_pipeline( - self, - compile_spec: list[CompileSpec], - module: torch.nn.Module, - test_data: tuple[torch.Tensor, torch.Tensor], - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.le.Tensor": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - @parameterized.expand(test_data_suite) - def test_le_tosa_MI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_le_tosa_pipeline(self.LessEqual(), test_data) - @parameterized.expand(test_data_suite) - def test_le_tosa_BI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_le_tosa_BI_pipeline( - common.get_tosa_compile_spec("TOSA-0.80+BI"), self.LessEqual(), test_data - ) - - @parameterized.expand(test_data_suite) - @unittest.skip - def test_le_u55_BI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_le_tosa_BI_pipeline( - common.get_u55_compile_spec(permute_memory_to_nhwc=True), - self.LessEqual(), - test_data, - ) - - @parameterized.expand(test_data_suite) - @unittest.skip - def test_le_u85_BI( +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU85PipelineBI, + OpNotSupportedPipeline, + TosaPipelineBI, + TosaPipelineMI, +) + +aten_op = "torch.ops.aten.le.Tensor" +exir_op = "executorch_exir_dialects_edge__ops_aten_le_Tensor" + +input_t = Tuple[torch.Tensor] + + +class GreaterEqual(torch.nn.Module): + def __init__(self, input, other): + super().__init__() + self.input_ = input + self.other_ = other + + def forward( self, - test_name: str, input_: torch.Tensor, other_: torch.Tensor, ): - test_data = (input_, other_) - self._test_le_tosa_BI_pipeline( - common.get_u85_compile_spec(permute_memory_to_nhwc=True), - self.LessEqual(), - test_data, - ) + return input_ <= other_ + + def get_inputs(self): + return (self.input_, self.other_) + + +op_le_rank1_ones = GreaterEqual( + torch.ones(5), + torch.ones(5), +) +op_le_rank2_rand = GreaterEqual( + torch.rand(4, 5), + torch.rand(1, 5), +) +op_le_rank3_randn = GreaterEqual( + torch.randn(10, 5, 2), + torch.randn(10, 5, 2), +) +op_le_rank4_randn = GreaterEqual( + torch.randn(3, 2, 2, 2), + torch.randn(3, 2, 2, 2), +) + +test_data_common = { + "le_rank1_ones": op_le_rank1_ones, + "le_rank2_rand": op_le_rank2_rand, + "le_rank3_randn": op_le_rank3_randn, + "le_rank4_randn": op_le_rank4_randn, +} + + +@common.parametrize("test_module", test_data_common) +def test_le_tosa_MI(test_module): + pipeline = TosaPipelineMI[input_t]( + test_module, test_module.get_inputs(), aten_op, exir_op + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +def test_le_tosa_BI(test_module): + pipeline = TosaPipelineBI[input_t]( + test_module, test_module.get_inputs(), aten_op, exir_op + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +def test_le_u55_BI(test_module): + # GREATER_EQUAL is not supported on U55. LE uses the GREATER_EQUAL Tosa operator. + pipeline = OpNotSupportedPipeline[input_t]( + test_module, + test_module.get_inputs(), + "TOSA-0.80+BI+u55", + {exir_op: 1}, + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +def test_le_u85_BI(test_module): + pipeline = EthosU85PipelineBI[input_t]( + test_module, + test_module.get_inputs(), + aten_op, + exir_op, + run_on_fvp=False, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +@pytest.mark.skip(reason="The same as test_le_u55_BI") +def test_le_u55_BI_on_fvp(test_module): + # GREATER_EQUAL is not supported on U55. LE uses the GREATER_EQUAL Tosa operator. + pipeline = OpNotSupportedPipeline[input_t]( + test_module, + test_module.get_inputs(), + "TOSA-0.80+BI+u55", + {exir_op: 1}, + ) + pipeline.run() + + +@common.parametrize( + "test_module", + test_data_common, + xfails={"le_rank4_randn": "4D fails because boolean Tensors can't be subtracted"}, +) +@common.SkipIfNoCorstone320 +def test_le_u85_BI_on_fvp(test_module): + pipeline = EthosU85PipelineBI[input_t]( + test_module, + test_module.get_inputs(), + aten_op, + exir_op, + run_on_fvp=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_lt.py b/backends/arm/test/ops/test_lt.py index 398df8c2036..cae119cd7a8 100644 --- a/backends/arm/test/ops/test_lt.py +++ b/backends/arm/test/ops/test_lt.py @@ -1,140 +1,136 @@ # Copyright 2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest +from typing import Tuple +import pytest import torch from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized - -test_data_suite = [ - # (test_name, input, other,) See torch.lt() for info - ( - "op_lt_rank1_ones", - torch.ones(5), - torch.ones(5), - ), - ( - "op_lt_rank2_rand", - torch.rand(4, 5), - torch.rand(1, 5), - ), - ( - "op_lt_rank3_randn", - torch.randn(10, 5, 2), - torch.randn(10, 5, 2), - ), - ( - "op_lt_rank4_randn", - torch.randn(3, 2, 2, 2), - torch.randn(3, 2, 2, 2), - ), -] - - -class TestLessThan(unittest.TestCase): - class LessThan(torch.nn.Module): - def forward( - self, - input_: torch.Tensor, - other_: torch.Tensor, - ): - return torch.lt(input_, other_) - - def _test_lt_tosa_pipeline( - self, module: torch.nn.Module, test_data: tuple[torch.Tensor, torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({"torch.ops.aten.lt.Tensor": 1}) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_lt_tosa_BI_pipeline( - self, - compile_spec: list[CompileSpec], - module: torch.nn.Module, - test_data: tuple[torch.Tensor, torch.Tensor], - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.lt.Tensor": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - @parameterized.expand(test_data_suite) - def test_lt_tosa_MI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_lt_tosa_pipeline(self.LessThan(), test_data) - @parameterized.expand(test_data_suite) - def test_lt_tosa_BI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_lt_tosa_BI_pipeline( - common.get_tosa_compile_spec("TOSA-0.80+BI"), self.LessThan(), test_data - ) - - @parameterized.expand(test_data_suite) - @unittest.skip - def test_lt_u55_BI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_lt_tosa_BI_pipeline( - common.get_u55_compile_spec(permute_memory_to_nhwc=True), - self.LessThan(), - test_data, - ) - - @parameterized.expand(test_data_suite) - @unittest.skip - def test_lt_u85_BI( +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU85PipelineBI, + OpNotSupportedPipeline, + TosaPipelineBI, + TosaPipelineMI, +) + +aten_op = "torch.ops.aten.lt.Tensor" +exir_op = "executorch_exir_dialects_edge__ops_aten_lt_Tensor" + +input_t = Tuple[torch.Tensor] + + +class LessThan(torch.nn.Module): + def __init__(self, input, other): + super().__init__() + self.input_ = input + self.other_ = other + + def forward( self, - test_name: str, input_: torch.Tensor, other_: torch.Tensor, ): - test_data = (input_, other_) - self._test_lt_tosa_BI_pipeline( - common.get_u85_compile_spec(permute_memory_to_nhwc=True), - self.LessThan(), - test_data, - ) + return input_ < other_ + + def get_inputs(self): + return (self.input_, self.other_) + + +op_lt_rank1_ones = LessThan( + torch.ones(5), + torch.ones(5), +) +op_lt_rank2_rand = LessThan( + torch.rand(4, 5), + torch.rand(1, 5), +) +op_lt_rank3_randn = LessThan( + torch.randn(10, 5, 2), + torch.randn(10, 5, 2), +) +op_lt_rank4_randn = LessThan( + torch.randn(3, 2, 2, 2), + torch.randn(3, 2, 2, 2), +) + +test_data_common = { + "lt_rank1_ones": op_lt_rank1_ones, + "lt_rank2_rand": op_lt_rank2_rand, + "lt_rank3_randn": op_lt_rank3_randn, + "lt_rank4_randn": op_lt_rank4_randn, +} + + +@common.parametrize("test_module", test_data_common) +def test_lt_tosa_MI(test_module): + pipeline = TosaPipelineMI[input_t]( + test_module, test_module.get_inputs(), aten_op, exir_op + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +def test_lt_tosa_BI(test_module): + pipeline = TosaPipelineBI[input_t]( + test_module, test_module.get_inputs(), aten_op, exir_op + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +def test_lt_u55_BI(test_module): + # GREATER is not supported on U55. LT uses the GREATER Tosa operator. + pipeline = OpNotSupportedPipeline[input_t]( + test_module, + test_module.get_inputs(), + "TOSA-0.80+BI+u55", + {exir_op: 1}, + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +def test_lt_u85_BI(test_module): + pipeline = EthosU85PipelineBI[input_t]( + test_module, + test_module.get_inputs(), + aten_op, + exir_op, + run_on_fvp=False, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.parametrize("test_module", test_data_common) +@pytest.mark.skip(reason="The same as test_lt_u55_BI") +def test_lt_u55_BI_on_fvp(test_module): + # GREATER is not supported on U55. LT uses the GREATER Tosa operator. + pipeline = OpNotSupportedPipeline[input_t]( + test_module, + test_module.get_inputs(), + "TOSA-0.80+BI+u55", + {exir_op: 1}, + ) + pipeline.run() + + +@common.parametrize( + "test_module", + test_data_common, + xfails={"lt_rank4_randn": "4D fails because boolean Tensors can't be subtracted"}, +) +@common.SkipIfNoCorstone320 +def test_lt_u85_BI_on_fvp(test_module): + pipeline = EthosU85PipelineBI[input_t]( + test_module, + test_module.get_inputs(), + aten_op, + exir_op, + run_on_fvp=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() From 3be14cee5078843ee9a0e4d84dff667b1d84753f Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Fri, 28 Feb 2025 07:57:33 -0800 Subject: [PATCH 166/584] Ignore .build dir and artifacts (#8822) --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 67dd6be3342..6cbb0f6643e 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,7 @@ pip-out/ # Xcode xcuserdata/ +.build/ .swiftpm/ *.xcworkspace/ *.xcframework/ From 464bd9ecafac35a7900a2dbb9c7c202fd0e9371f Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Fri, 28 Feb 2025 08:09:20 -0800 Subject: [PATCH 167/584] Make flat_tensor depend on generated schema (#8803) When passing -GNinja to cmake to build with Ninja, I was frequently getting errors that executorch/extension/flat_tensor/serialize/flat_tensor_generated.h was not found. This seems to fix them? --- CMakeLists.txt | 1 - extension/flat_tensor/CMakeLists.txt | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 01ad728c425..6bdcda2f19c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -724,7 +724,6 @@ endif() if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/flat_tensor) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/flat_tensor/serialize) endif() if(EXECUTORCH_BUILD_EXTENSION_LLM) diff --git a/extension/flat_tensor/CMakeLists.txt b/extension/flat_tensor/CMakeLists.txt index 14d49d244e3..caacd96b557 100644 --- a/extension/flat_tensor/CMakeLists.txt +++ b/extension/flat_tensor/CMakeLists.txt @@ -36,6 +36,9 @@ install( DESTINATION ${_common_include_directories} ) +add_subdirectory(serialize) +add_dependencies(extension_flat_tensor flat_tensor_schema) + if(BUILD_TESTING) add_subdirectory(test) endif() From ef1e3e14c40f071ea7e202c3d977e86e1d702bba Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Fri, 28 Feb 2025 08:10:00 -0800 Subject: [PATCH 168/584] fix a missed get_aten_mode_options spot (#8775) This was the only spot that didn't use a tuple, so my find/replace missed it. --- kernels/portable/cpu/util/targets.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernels/portable/cpu/util/targets.bzl b/kernels/portable/cpu/util/targets.bzl index 26f55a91e8d..eef765d5eec 100644 --- a/kernels/portable/cpu/util/targets.bzl +++ b/kernels/portable/cpu/util/targets.bzl @@ -1,4 +1,4 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -281,7 +281,7 @@ def define_common_targets(): ) # Utility functions that can be used by operators that perform reduction - for aten_mode in [True, False]: + for aten_mode in get_aten_mode_options(): suffix = "_aten" if aten_mode else "" runtime.cxx_library( name = "reduce_util{}".format(suffix), From dfe11f053d3890dc209f437b3a7fd9a7203e8c1e Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Fri, 28 Feb 2025 08:10:54 -0800 Subject: [PATCH 169/584] test buck2 query for much more of the repo (#8660) Had to fix a couple .buckconfig issues post-shim_et to enable this. --- .buckconfig | 4 +++- .ci/scripts/unittest-buck2.sh | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.buckconfig b/.buckconfig index a7c48881bb3..8bc3e80ff17 100644 --- a/.buckconfig +++ b/.buckconfig @@ -11,13 +11,15 @@ shim_et = shim_et [repository_aliases] + bazel_skylib = shim config = prelude ovr_config = prelude toolchains = shim_et fbcode = shim_et - fbcode_macros = shim_et + fbcode_macros = shim fbsource = shim_et buck = shim + gh_facebook_buck2_shims_meta = shim [cxx] cxxflags = -g -std=c++17 diff --git a/.ci/scripts/unittest-buck2.sh b/.ci/scripts/unittest-buck2.sh index 2e386570504..09275f29ae9 100755 --- a/.ci/scripts/unittest-buck2.sh +++ b/.ci/scripts/unittest-buck2.sh @@ -7,7 +7,11 @@ set -eux # TODO: expand this to //... -buck2 query //runtime/... +# TODO: can't query cadence & vulkan backends +buck2 query "//backends/apple/... + //backends/example/... + \ +//backends/mediatek/... + //backends/test/... + //backends/transforms/... + \ +//backends/xnnpack/... + //configurations/... + //kernels/portable/cpu/... + \ +//runtime/... + //schema/... + //test/... + //util/..." # TODO: expand the covered scope of Buck targets. buck2 build //runtime/core/portable_type/... From e713682aff34381e49120ab4f6c6c23c9bb459c2 Mon Sep 17 00:00:00 2001 From: Zingo Andersen Date: Fri, 28 Feb 2025 17:29:56 +0100 Subject: [PATCH 170/584] Arm backend: Remove arm_test folder build and test output from git radar (#8770) This adds the arm_test folder used as default build and test output by the arm scripts to .gitignore Signed-off-by: Zingo Andersen --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 6cbb0f6643e..7b8279f604d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ __pycache__/ # Build and tool-generated files +arm_test/ buck-out/ buck2-bin/ cmake-android-out/ From a0c0d2bedb800424d3cc0a936c5571393d4b92cc Mon Sep 17 00:00:00 2001 From: Zingo Andersen Date: Fri, 28 Feb 2025 17:59:44 +0100 Subject: [PATCH 171/584] Arm backend: Remove output buffering when running Corstone FVP with tee (#8821) Arm backend: Remove output buffering when running FVP with tee This improves the log output feedback when running the FVP so you don't have to wait for slow models without any output. Signed-off-by: Zingo Andersen --- backends/arm/scripts/run_fvp.sh | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/backends/arm/scripts/run_fvp.sh b/backends/arm/scripts/run_fvp.sh index e0237a9c414..bd459d5363d 100755 --- a/backends/arm/scripts/run_fvp.sh +++ b/backends/arm/scripts/run_fvp.sh @@ -69,10 +69,21 @@ echo "Running ${elf_file} for ${target} run with FVP:${fvp_model} num_macs:${num echo "WARNING: Corstone FVP is not cycle accurate and should NOT be used to determine valid runtime" echo "--------------------------------------------------------------------------------" +# Check if stdbuf is intalled and use stdbuf -oL together with tee below to make the output +# go all the way to the console more directly and not be buffered + +if hash stdbuf 2>/dev/null; then + nobuf="stdbuf -oL" +else + nobuf="" +fi + log_file=$(mktemp) + + if [[ ${target} == *"ethos-u55"* ]]; then - ${fvp_model} \ + ${nobuf} ${fvp_model} \ -C ethosu.num_macs=${num_macs} \ -C mps3_board.visualisation.disable-visualisation=1 \ -C mps3_board.telnetterminal0.start_telnet=0 \ @@ -82,7 +93,7 @@ if [[ ${target} == *"ethos-u55"* ]]; then --timelimit ${timeout} 2>&1 | tee ${log_file} || true # seconds echo "[${BASH_SOURCE[0]}] Simulation complete, $?" elif [[ ${target} == *"ethos-u85"* ]]; then - ${fvp_model} \ + ${nobuf} ${fvp_model} \ -C mps4_board.subsystem.ethosu.num_macs=${num_macs} \ -C mps4_board.visualisation.disable-visualisation=1 \ -C vis_hdlcd.disable_visualisation=1 \ From 21aa6ee6480b51dcc4be9c77576f829ebd1ee1fa Mon Sep 17 00:00:00 2001 From: Huy Do Date: Fri, 28 Feb 2025 09:39:38 -0800 Subject: [PATCH 172/584] Fail the benchmark job if the export step fails (#8786) * Fail the benchmark job if the export step fails * Try to run it in the test phase * Ready for review --- .github/workflows/android-perf.yml | 4 ++-- .github/workflows/apple-perf.yml | 4 ++-- .../benchmark/android-llm-device-farm-test-spec.yml.j2 | 4 ++++ .../default-ios-device-farm-appium-test-spec.yml.j2 | 5 +++++ 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index 201fb3b7a8f..d3a16428b57 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -20,7 +20,7 @@ on: description: Models to be benchmarked required: false type: string - default: stories110M + default: llama devices: description: Target devices to run benchmark required: false @@ -36,7 +36,7 @@ on: description: Models to be benchmarked required: false type: string - default: stories110M + default: llama devices: description: Target devices to run benchmark required: false diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml index ea88be441cb..df29e44eac1 100644 --- a/.github/workflows/apple-perf.yml +++ b/.github/workflows/apple-perf.yml @@ -20,7 +20,7 @@ on: description: Models to be benchmarked required: false type: string - default: stories110M + default: llama devices: description: Target devices to run benchmark required: false @@ -36,7 +36,7 @@ on: description: Models to be benchmarked required: false type: string - default: stories110M + default: llama devices: description: Target devices to run benchmark required: false diff --git a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 index ae25a071e5c..1ed5ede738c 100644 --- a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 +++ b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 @@ -35,6 +35,10 @@ phases: test: commands: + # Fail the test if the model doesn't exist, doing it here so that AWS can report the status back + - echo "Verify model" + - curl -I --fail '{{ model_path }}' || false + # By default, the following ADB command is used by Device Farm to run your Instrumentation test. # Please refer to Android's documentation for more options on running instrumentation tests with adb: # https://developer.android.com/studio/test/command-line#run-tests-with-adb diff --git a/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 b/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 index 05816685638..a24c0257100 100644 --- a/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 +++ b/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 @@ -34,6 +34,11 @@ phases: # The test phase includes commands that run your test suite execution. test: commands: + # Fail the test if the model doesn't exist, doing it here so that AWS can report the status back + - echo "Verify model" + - curl -I --fail '{{ model_path }}' || false + + # Run the benchmark - xcodebuild test-without-building -destination id=$DEVICEFARM_DEVICE_UDID -xctestrun $DEVICEFARM_TEST_PACKAGE_PATH/*.xctestrun -derivedDataPath $DEVICEFARM_LOG_DIR # The post test phase includes are commands that are run after your tests are executed. From a5750fb3d6d3aa66db095ff419c8202c0edb51f2 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Fri, 28 Feb 2025 10:02:49 -0800 Subject: [PATCH 173/584] remove exir:_warnings dep from executorch_pybindings buck rule (#8710) Per @larryliu0820. It was blocking buck2 build //runtime/... (which is still failing, but differently) --- shim_et/xplat/executorch/extension/pybindings/pybindings.bzl | 1 - 1 file changed, 1 deletion(-) diff --git a/shim_et/xplat/executorch/extension/pybindings/pybindings.bzl b/shim_et/xplat/executorch/extension/pybindings/pybindings.bzl index 52191eb978a..5ef9fe59266 100644 --- a/shim_et/xplat/executorch/extension/pybindings/pybindings.bzl +++ b/shim_et/xplat/executorch/extension/pybindings/pybindings.bzl @@ -52,7 +52,6 @@ def executorch_pybindings(python_module_name, srcs = [], cppdeps = [], visibilit "-DEXECUTORCH_PYTHON_MODULE_NAME={}".format(python_module_name), ], deps = [ - "//executorch/exir:_warnings", "//executorch/runtime/core:core", ] + cppdeps, external_deps = [ From 7ce47fc6edd7ac118485891925790690a3d169c8 Mon Sep 17 00:00:00 2001 From: cccclai Date: Fri, 28 Feb 2025 10:10:18 -0800 Subject: [PATCH 174/584] add backend is available Differential Revision: D69810445 Pull Request resolved: https://github.com/pytorch/executorch/pull/8738 --- extension/pybindings/portable_lib.py | 1 + extension/pybindings/pybindings.cpp | 11 +++++++++++ extension/pybindings/pybindings.pyi | 9 +++++++++ extension/pybindings/test/test_backend_pybinding.py | 13 +++++++++++++ runtime/__init__.py | 6 ++++++ 5 files changed, 40 insertions(+) diff --git a/extension/pybindings/portable_lib.py b/extension/pybindings/portable_lib.py index 24097fea6aa..eb2f843406e 100644 --- a/extension/pybindings/portable_lib.py +++ b/extension/pybindings/portable_lib.py @@ -39,6 +39,7 @@ _dump_profile_results, # noqa: F401 _get_operator_names, # noqa: F401 _get_registered_backend_names, # noqa: F401 + _is_available, # noqa: F401 _load_bundled_program_from_buffer, # noqa: F401 _load_for_executorch, # noqa: F401 _load_for_executorch_from_buffer, # noqa: F401 diff --git a/extension/pybindings/pybindings.cpp b/extension/pybindings/pybindings.cpp index f17ddbbbc36..a7dffddab0a 100644 --- a/extension/pybindings/pybindings.cpp +++ b/extension/pybindings/pybindings.cpp @@ -88,10 +88,12 @@ using ::executorch::extension::BufferDataLoader; using ::executorch::extension::MallocMemoryAllocator; using ::executorch::extension::MmapDataLoader; using ::executorch::runtime::ArrayRef; +using ::executorch::runtime::BackendInterface; using ::executorch::runtime::DataLoader; using ::executorch::runtime::Error; using ::executorch::runtime::EValue; using ::executorch::runtime::EventTracerDebugLogLevel; +using ::executorch::runtime::get_backend_class; using ::executorch::runtime::get_backend_name; using ::executorch::runtime::get_num_registered_backends; using ::executorch::runtime::get_registered_kernels; @@ -990,6 +992,14 @@ py::list get_registered_backend_names() { return res; } +py::bool_ is_available(const std::string& backend_name) { + BackendInterface* backend = get_backend_class(backend_name.c_str()); + if (backend == nullptr) { + return false; + } + return backend->is_available(); +} + } // namespace PYBIND11_MODULE(EXECUTORCH_PYTHON_MODULE_NAME, m) { @@ -1048,6 +1058,7 @@ PYBIND11_MODULE(EXECUTORCH_PYTHON_MODULE_NAME, m) { &get_registered_backend_names, call_guard); m.def("_get_operator_names", &get_operator_names); + m.def("_is_available", &is_available, py::arg("backend_name"), call_guard); m.def("_create_profile_block", &create_profile_block, call_guard); m.def( "_reset_profile_results", diff --git a/extension/pybindings/pybindings.pyi b/extension/pybindings/pybindings.pyi index a380e90528e..cd720645fbc 100644 --- a/extension/pybindings/pybindings.pyi +++ b/extension/pybindings/pybindings.pyi @@ -211,6 +211,15 @@ def _load_bundled_program_from_buffer( """ ... +@experimental("This API is experimental and subject to change without notice.") +def _is_available(backend_name: str) -> bool: + """ + .. warning:: + + This API is experimental and subject to change without notice. + """ + ... + @experimental("This API is experimental and subject to change without notice.") def _get_operator_names() -> List[str]: """ diff --git a/extension/pybindings/test/test_backend_pybinding.py b/extension/pybindings/test/test_backend_pybinding.py index fbdc2be7799..4dafc2fae15 100644 --- a/extension/pybindings/test/test_backend_pybinding.py +++ b/extension/pybindings/test/test_backend_pybinding.py @@ -12,3 +12,16 @@ def test_backend_name_list( registered_backend_names = runtime.backend_registry.registered_backend_names self.assertGreaterEqual(len(registered_backend_names), 1) self.assertIn("XnnpackBackend", registered_backend_names) + + def test_backend_is_available( + self, + ) -> None: + # XnnpackBackend is available + runtime = Runtime.get() + self.assertTrue( + runtime.backend_registry.is_available(backend_name="XnnpackBackend") + ) + # NonExistBackend doesn't exist and not available + self.assertFalse( + runtime.backend_registry.is_available(backend_name="NonExistBackend") + ) diff --git a/runtime/__init__.py b/runtime/__init__.py index 33999b716e9..ed315316c9c 100644 --- a/runtime/__init__.py +++ b/runtime/__init__.py @@ -139,6 +139,12 @@ def registered_backend_names(self) -> List[str]: """ return self._legacy_module._get_registered_backend_names() + def is_available(self, backend_name: str) -> bool: + """ + Returns the names of all registered backends as a list of strings. + """ + return self._legacy_module._is_available(backend_name) + class OperatorRegistry: """The registry of operators that are available to the runtime.""" From 38384a24a45a2247001089fc2a1d3e21820c526c Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Fri, 28 Feb 2025 10:45:43 -0800 Subject: [PATCH 175/584] Add support for splitting in_features in linear layers (#8715) init --- examples/apple/coreml/llama/export.py | 67 ++++----------- examples/apple/coreml/llama/readme.md | 11 +-- examples/apple/coreml/llama/run.py | 6 ++ examples/apple/coreml/llama/test.py | 48 +++++++++++ examples/apple/coreml/llama/utils.py | 116 ++++++++++++++++++++++++++ 5 files changed, 190 insertions(+), 58 deletions(-) create mode 100644 examples/apple/coreml/llama/test.py create mode 100644 examples/apple/coreml/llama/utils.py diff --git a/examples/apple/coreml/llama/export.py b/examples/apple/coreml/llama/export.py index cc9eb9f02ee..c0f60529895 100644 --- a/examples/apple/coreml/llama/export.py +++ b/examples/apple/coreml/llama/export.py @@ -1,6 +1,8 @@ -# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -# pyre-strict +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. import argparse @@ -24,55 +26,7 @@ sys.path.insert(0, ".") from llama_transformer import InputManager, load_model - - -class SplitLinearModule(torch.nn.Module): - def __init__(self, in_features, out_features, target_split_size, max_splits): - super(SplitLinearModule, self).__init__() - num_splits = max(out_features // target_split_size, 1) - if num_splits > max_splits: - num_splits = max_splits - - self.split_size = out_features // num_splits - self.split_remainder = out_features % num_splits - self.splits = torch.nn.ModuleList( - [torch.nn.Linear(in_features, self.split_size) for _ in range(num_splits)] - ) - print( - f"Splitting out_features={out_features} into {num_splits} of size {self.split_size}" - ) - if self.split_remainder > 0: - print( - f"Warning: remainder {self.split_remainder} after splitting out_features={out_features} into {num_splits} of size {self.split_size}" - ) - self.splits.append(torch.nn.Linear(in_features, self.split_remainder)) - - def split_sizes(self): - return [split.out_features for split in self.splits] - - def forward(self, x): - return torch.cat([split(x) for split in self.splits], dim=-1) - - -def replace_linear_with_split_linear(model, target_split_size, max_splits): - for name, module in model.named_children(): - if isinstance(module, torch.nn.Linear): - new_module = SplitLinearModule( - module.in_features, module.out_features, target_split_size, max_splits - ) - split_sizes = new_module.split_sizes() - if module.bias is not None: - split_bias = module.bias.split(split_sizes) - split_weights = module.weight.split(split_sizes, dim=0) - for i, split in enumerate(new_module.splits): - split.weight = torch.nn.Parameter(split_weights[i]) - if module.bias is not None: - split.bias = torch.nn.Parameter(split_bias[i]) - else: - split.bias = None - setattr(model, name, new_module) - else: - replace_linear_with_split_linear(module, target_split_size, max_splits) +from utils import replace_linear_with_split_linear def main() -> None: @@ -175,7 +129,13 @@ def main() -> None: if export_args.target_split_size is not None: replace_linear_with_split_linear( - model, export_args.target_split_size, export_args.max_splits + model, + out_target_split_size=export_args.target_split_size, + out_max_splits=export_args.max_splits, + # I have not found splitting on in_features to be beneficial, + # and it often leads to OOM so I set in_max_splits to 1 + in_target_split_size=1, + in_max_splits=1, ) model.eval() @@ -241,6 +201,7 @@ def main() -> None: ep, preserve_ops=[ torch.ops.aten.scaled_dot_product_attention.default, + # preserve norm op for numerical stability torch.ops.aten.linalg_vector_norm.default, ], compile_config=EdgeCompileConfig( diff --git a/examples/apple/coreml/llama/readme.md b/examples/apple/coreml/llama/readme.md index a9efedf6bbe..14dff0c8580 100644 --- a/examples/apple/coreml/llama/readme.md +++ b/examples/apple/coreml/llama/readme.md @@ -38,8 +38,9 @@ The runner can also be used to run an eager model model to compare with CoreML n We are actively experimenting with different settings. But here are ones that we've found work well for Llama1B on iPhone 15 Pro: -* Set use_cache_list -* Split linear layers with target_split_size=1024, max_splits=8 -* Use seq_length=32 or seq_length=64, both of which offer reasonable tradeoffs for prefill and decode performance. seq_length=32 is better at decode and seq_length=64 is better at prefill. - -In our tests, we set max_seq_length=1024, but if your application allows for it, performance can improve with max_seq_length=512 or by keeping max_seq_length=1024 and setting cache_size=512-seq_length. +* Set use_cache_list. +* Use seq_length = 32, which offers a good balance between prefill/decode performance. +* Split out_features in linear layers with target_split_size=1024, max_splits=8. +* For ANE, set dtype = fp16, coreml-quantize = c4w. The requires doing QAT on Llama1B for good accuracy. +* Set embedding-quantize to "4,32". +* Set max_seq_length to 128, 256, 512, 1024, and 2048, depending on needed context. Note that performance drops with max_seq_length. More specifically, performance drops with cache_size, and the best experience may require a good cache eviction policy. The python runner in run.py uses a last-in-last-out policy when cache_size is specified. diff --git a/examples/apple/coreml/llama/run.py b/examples/apple/coreml/llama/run.py index 501aaee07ed..de22794dee1 100644 --- a/examples/apple/coreml/llama/run.py +++ b/examples/apple/coreml/llama/run.py @@ -1,3 +1,9 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + import argparse import sys diff --git a/examples/apple/coreml/llama/test.py b/examples/apple/coreml/llama/test.py new file mode 100644 index 00000000000..895cf2e1cce --- /dev/null +++ b/examples/apple/coreml/llama/test.py @@ -0,0 +1,48 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import sys + +sys.path.insert(0, ".") +import copy + +import torch +from utils import replace_linear_with_split_linear + + +def get_split_model( + model, + out_target_split_size=1, + out_max_splits=1, + in_target_split_size=1, + in_max_splits=1, +): + model_copy = copy.deepcopy(model) + replace_linear_with_split_linear( + model_copy, + out_target_split_size, + out_max_splits, + in_target_split_size, + in_max_splits, + ) + return model_copy + + +def test_split_model(): + inputs = torch.randn(10, 5, 1, 512) + + model = torch.nn.Sequential(*[torch.nn.Linear(512, 1024, bias=False)]) + model1 = get_split_model(model, 64, 2, 64, 1000) + model2 = get_split_model(model, 64, 2, 64, 1) + model3 = get_split_model(model, 64, 1, 64, 1000) + + assert torch.allclose(model(inputs), model1(inputs), atol=1e-5) + assert torch.allclose(model(inputs), model2(inputs), atol=1e-5) + assert torch.allclose(model(inputs), model3(inputs), atol=1e-5) + + +if __name__ == "__main__": + test_split_model() diff --git a/examples/apple/coreml/llama/utils.py b/examples/apple/coreml/llama/utils.py new file mode 100644 index 00000000000..1e5a842fed5 --- /dev/null +++ b/examples/apple/coreml/llama/utils.py @@ -0,0 +1,116 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import torch + + +class SplitLinearModule(torch.nn.Module): + def __init__( + self, + in_features, + out_features, + out_target_split_size=1, + out_max_splits=1, + in_target_split_size=1, + in_max_splits=1, + ): + super(SplitLinearModule, self).__init__() + self.out_split_sizes = self._get_split_sizes( + out_features, out_target_split_size, out_max_splits + ) + self.in_split_sizes = self._get_split_sizes( + in_features, in_target_split_size, in_max_splits + ) + print( + f"Splitting out_features={out_features} into {len(self.out_split_sizes)} of size {self.out_split_sizes[0]}." + ) + print( + f"Splitting in_features={in_features} into {len(self.in_split_sizes)} of size {self.in_split_sizes[0]}." + ) + + # self.ops contains a list of linear ops for different pieces of the output matrix + # The index of an op at (in_idx, out_idx) is given by self.op_index(in_idx, out_idx) + self.ops = torch.nn.ModuleList() + for idx_out, s_out in enumerate(self.out_split_sizes): + for idx_in, s_in in enumerate(self.in_split_sizes): + assert len(self.ops) == self.op_index(idx_in, idx_out) + self.ops.append(torch.nn.Linear(s_in, s_out, bias=False)) + + def op_index(self, in_index, out_index): + idx = out_index * len(self.in_split_sizes) + in_index + return idx + + def _get_split_sizes(self, n_features, target_split_size, max_splits): + num_splits = max(n_features // target_split_size, 1) + if num_splits > max_splits: + num_splits = max_splits + + split_size = n_features // num_splits + split_remainder = n_features % num_splits + if split_remainder > 0: + raise ValueError( + f"Cannot split {n_features} with target_split_size={target_split_size} and max_splits={max_splits} because it leaves a remainder of {split_remainder}." + ) + + ret = [split_size for _ in range(num_splits)] + return ret + + def set_params(self, weight): + split_weights = [] + for w_out in weight.split(self.out_split_sizes, dim=0): + for w in w_out.split(self.in_split_sizes, dim=1): + split_weights.append(w) + + for i, split in enumerate(self.ops): + split.weight = torch.nn.Parameter(split_weights[i]) + + def forward(self, x): + if len(self.in_split_sizes) == 1: + out_chunks = [op(x) for op in self.ops] + else: + x_splits = x.split(self.in_split_sizes, dim=-1) + out_chunks = [ + torch.sum( + torch.stack( + [ + self.ops[self.op_index(in_idx, out_idx)].forward( + x_splits[in_idx] + ) + for in_idx in range(len(self.in_split_sizes)) + ], + ), + dim=0, + ) + for out_idx in range(len(self.out_split_sizes)) + ] + + return torch.concat(out_chunks, dim=-1) + + +def replace_linear_with_split_linear( + model, out_target_split_size, out_max_splits, in_target_split_size, in_max_splits=1 +): + for name, module in model.named_children(): + if isinstance(module, torch.nn.Linear): + assert module.bias is None, "SplitLinearModule does not support bias" + new_module = SplitLinearModule( + module.in_features, + module.out_features, + out_target_split_size, + out_max_splits, + in_target_split_size, + in_max_splits, + ) + new_module.set_params(module.weight) + setattr(model, name, new_module) + else: + replace_linear_with_split_linear( + module, + out_target_split_size, + out_max_splits, + in_target_split_size, + in_max_splits, + ) From 7e0a446278cfcb61611be3a1741d6e79a8127822 Mon Sep 17 00:00:00 2001 From: Gasoonjia Date: Fri, 28 Feb 2025 12:25:33 -0800 Subject: [PATCH 176/584] make datasink as a sepreate directory Differential Revision: D69732404 Pull Request resolved: https://github.com/pytorch/executorch/pull/8827 --- devtools/CMakeLists.txt | 4 +- devtools/etdump/data_sinks/TARGETS | 5 ++ .../{ => data_sinks}/buffer_data_sink.cpp | 2 +- .../{ => data_sinks}/buffer_data_sink.h | 2 +- .../etdump/{ => data_sinks}/data_sink_base.h | 0 devtools/etdump/data_sinks/targets.bzl | 49 +++++++++++++++++++ devtools/etdump/data_sinks/tests/TARGETS | 5 ++ .../tests/buffer_data_sink_test.cpp | 2 +- devtools/etdump/data_sinks/tests/targets.bzl | 20 ++++++++ devtools/etdump/etdump_flatcc.cpp | 2 +- devtools/etdump/etdump_flatcc.h | 4 +- devtools/etdump/targets.bzl | 40 ++------------- devtools/etdump/tests/etdump_test.cpp | 2 +- devtools/etdump/tests/targets.bzl | 11 ----- 14 files changed, 91 insertions(+), 57 deletions(-) create mode 100644 devtools/etdump/data_sinks/TARGETS rename devtools/etdump/{ => data_sinks}/buffer_data_sink.cpp (96%) rename devtools/etdump/{ => data_sinks}/buffer_data_sink.h (98%) rename devtools/etdump/{ => data_sinks}/data_sink_base.h (100%) create mode 100644 devtools/etdump/data_sinks/targets.bzl create mode 100644 devtools/etdump/data_sinks/tests/TARGETS rename devtools/etdump/{ => data_sinks}/tests/buffer_data_sink_test.cpp (98%) create mode 100644 devtools/etdump/data_sinks/tests/targets.bzl diff --git a/devtools/CMakeLists.txt b/devtools/CMakeLists.txt index aaee4d1d0b6..77b536d70b5 100644 --- a/devtools/CMakeLists.txt +++ b/devtools/CMakeLists.txt @@ -176,8 +176,8 @@ add_custom_command( add_library( etdump ${CMAKE_CURRENT_SOURCE_DIR}/etdump/etdump_flatcc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/etdump/emitter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/etdump/buffer_data_sink.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/etdump/buffer_data_sink.h + ${CMAKE_CURRENT_SOURCE_DIR}/etdump/data_sinks/buffer_data_sink.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/etdump/data_sinks/buffer_data_sink.h ) target_link_libraries( diff --git a/devtools/etdump/data_sinks/TARGETS b/devtools/etdump/data_sinks/TARGETS new file mode 100644 index 00000000000..0a42614a385 --- /dev/null +++ b/devtools/etdump/data_sinks/TARGETS @@ -0,0 +1,5 @@ +load(":targets.bzl", "define_common_targets") + +oncall("executorch") + +define_common_targets() diff --git a/devtools/etdump/buffer_data_sink.cpp b/devtools/etdump/data_sinks/buffer_data_sink.cpp similarity index 96% rename from devtools/etdump/buffer_data_sink.cpp rename to devtools/etdump/data_sinks/buffer_data_sink.cpp index 8a366339783..5678aefb181 100644 --- a/devtools/etdump/buffer_data_sink.cpp +++ b/devtools/etdump/data_sinks/buffer_data_sink.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ -#include +#include #include using ::executorch::runtime::Error; diff --git a/devtools/etdump/buffer_data_sink.h b/devtools/etdump/data_sinks/buffer_data_sink.h similarity index 98% rename from devtools/etdump/buffer_data_sink.h rename to devtools/etdump/data_sinks/buffer_data_sink.h index c5bbcf6e435..685e62b2103 100644 --- a/devtools/etdump/buffer_data_sink.h +++ b/devtools/etdump/data_sinks/buffer_data_sink.h @@ -8,7 +8,7 @@ #pragma once -#include +#include #include #include diff --git a/devtools/etdump/data_sink_base.h b/devtools/etdump/data_sinks/data_sink_base.h similarity index 100% rename from devtools/etdump/data_sink_base.h rename to devtools/etdump/data_sinks/data_sink_base.h diff --git a/devtools/etdump/data_sinks/targets.bzl b/devtools/etdump/data_sinks/targets.bzl new file mode 100644 index 00000000000..f7e68d57e50 --- /dev/null +++ b/devtools/etdump/data_sinks/targets.bzl @@ -0,0 +1,49 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + + +def define_data_sink_target(data_sink_name, aten_suffix): + runtime.cxx_library( + name = data_sink_name + aten_suffix, + exported_headers = [ + data_sink_name + ".h", + ], + srcs = [ + data_sink_name + ".cpp", + ], + deps = [ + "//executorch/devtools/etdump:utils", + ], + exported_deps = [ + "//executorch/runtime/core/exec_aten:lib" + aten_suffix, + ":data_sink_base" + aten_suffix, + ], + visibility = [ + "//executorch/...", + "@EXECUTORCH_CLIENTS", + ], + ) + +def define_common_targets(): + """Defines targets that should be shared between fbcode and xplat. + + The directory containing this targets.bzl file should also contain both + TARGETS and BUCK files that call this function. + """ + for aten_mode in (True, False): + aten_suffix = "_aten" if aten_mode else "" + + runtime.cxx_library( + name = "data_sink_base" + aten_suffix, + exported_headers = [ + "data_sink_base.h", + ], + exported_deps = [ + "//executorch/runtime/core/exec_aten/util:scalar_type_util" + aten_suffix, + ], + visibility = [ + "//executorch/...", + "@EXECUTORCH_CLIENTS", + ], + ) + + define_data_sink_target("buffer_data_sink", aten_suffix) diff --git a/devtools/etdump/data_sinks/tests/TARGETS b/devtools/etdump/data_sinks/tests/TARGETS new file mode 100644 index 00000000000..0a42614a385 --- /dev/null +++ b/devtools/etdump/data_sinks/tests/TARGETS @@ -0,0 +1,5 @@ +load(":targets.bzl", "define_common_targets") + +oncall("executorch") + +define_common_targets() diff --git a/devtools/etdump/tests/buffer_data_sink_test.cpp b/devtools/etdump/data_sinks/tests/buffer_data_sink_test.cpp similarity index 98% rename from devtools/etdump/tests/buffer_data_sink_test.cpp rename to devtools/etdump/data_sinks/tests/buffer_data_sink_test.cpp index 0dc4ae997fd..c4178c29a4b 100644 --- a/devtools/etdump/tests/buffer_data_sink_test.cpp +++ b/devtools/etdump/data_sinks/tests/buffer_data_sink_test.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ -#include +#include #include #include #include diff --git a/devtools/etdump/data_sinks/tests/targets.bzl b/devtools/etdump/data_sinks/tests/targets.bzl new file mode 100644 index 00000000000..499898355aa --- /dev/null +++ b/devtools/etdump/data_sinks/tests/targets.bzl @@ -0,0 +1,20 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + +def define_common_targets(): + """Defines targets that should be shared between fbcode and xplat. + + The directory containing this targets.bzl file should also contain both + TARGETS and BUCK files that call this function. + """ + + + runtime.cxx_test( + name = "buffer_data_sink_test", + srcs = [ + "buffer_data_sink_test.cpp", + ], + deps = [ + "//executorch/devtools/etdump/data_sinks:buffer_data_sink", + "//executorch/runtime/core/exec_aten/testing_util:tensor_util", + ], + ) diff --git a/devtools/etdump/etdump_flatcc.cpp b/devtools/etdump/etdump_flatcc.cpp index 92c977f778b..a5242c8ed4b 100644 --- a/devtools/etdump/etdump_flatcc.cpp +++ b/devtools/etdump/etdump_flatcc.cpp @@ -10,7 +10,7 @@ #include -#include +#include #include #include #include diff --git a/devtools/etdump/etdump_flatcc.h b/devtools/etdump/etdump_flatcc.h index a0457a91de8..458fa90621e 100644 --- a/devtools/etdump/etdump_flatcc.h +++ b/devtools/etdump/etdump_flatcc.h @@ -11,8 +11,8 @@ #include #include -#include -#include +#include +#include #include #include #include diff --git a/devtools/etdump/targets.bzl b/devtools/etdump/targets.bzl index afa012ed948..dda68e1b6ac 100644 --- a/devtools/etdump/targets.bzl +++ b/devtools/etdump/targets.bzl @@ -94,47 +94,13 @@ def define_common_targets(): "utils.h", ], visibility = [ - + "//executorch/devtools/etdump/...", ], ) for aten_mode in get_aten_mode_options(): aten_suffix = "_aten" if aten_mode else "" - runtime.cxx_library( - name = "data_sink_base" + aten_suffix, - exported_headers = [ - "data_sink_base.h", - ], - exported_deps = [ - "//executorch/runtime/core/exec_aten/util:scalar_type_util" + aten_suffix, - ], - visibility = [ - "//executorch/...", - "@EXECUTORCH_CLIENTS", - ], - ) - - runtime.cxx_library( - name = "buffer_data_sink" + aten_suffix, - exported_headers = [ - "buffer_data_sink.h", - ], - srcs = [ - "buffer_data_sink.cpp", - ], - deps = [ - ":utils", - ], - exported_deps = [ - "//executorch/runtime/core/exec_aten:lib" + aten_suffix, - ":data_sink_base" + aten_suffix, - ], - visibility = [ - "//executorch/...", - "@EXECUTORCH_CLIENTS", - ], - ) runtime.cxx_library( name = "etdump_flatcc" + aten_suffix, srcs = [ @@ -153,8 +119,8 @@ def define_common_targets(): exported_deps = [ ":etdump_schema_flatcc", ":utils", - ":data_sink_base" + aten_suffix, - ":buffer_data_sink" + aten_suffix, + "//executorch/devtools/etdump/data_sinks:data_sink_base" + aten_suffix, + "//executorch/devtools/etdump/data_sinks:buffer_data_sink" + aten_suffix, "//executorch/runtime/core:event_tracer" + aten_suffix, "//executorch/runtime/core/exec_aten/util:scalar_type_util" + aten_suffix, ], diff --git a/devtools/etdump/tests/etdump_test.cpp b/devtools/etdump/tests/etdump_test.cpp index 50456bade42..cd9acdf5e65 100644 --- a/devtools/etdump/tests/etdump_test.cpp +++ b/devtools/etdump/tests/etdump_test.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include diff --git a/devtools/etdump/tests/targets.bzl b/devtools/etdump/tests/targets.bzl index c91267ff467..5299b7c1cb7 100644 --- a/devtools/etdump/tests/targets.bzl +++ b/devtools/etdump/tests/targets.bzl @@ -19,14 +19,3 @@ def define_common_targets(): "//executorch/runtime/core/exec_aten/testing_util:tensor_util", ], ) - - runtime.cxx_test( - name = "buffer_data_sink_test", - srcs = [ - "buffer_data_sink_test.cpp", - ], - deps = [ - "//executorch/devtools/etdump:buffer_data_sink", - "//executorch/runtime/core/exec_aten/testing_util:tensor_util", - ], - ) From 781b08230d627adc515e25be3d77e81beb546db1 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Fri, 28 Feb 2025 12:39:16 -0800 Subject: [PATCH 177/584] Fixes to_edge_transform_and_lower when unsupported ops are asked for preservation (#8776) * init * up * up * up --- .../coreml/partition/coreml_partitioner.py | 16 ++++-- .../coreml/test/test_coreml_partitioner.py | 19 ++++++- exir/program/_program.py | 49 +++++++++++++++++++ 3 files changed, 78 insertions(+), 6 deletions(-) diff --git a/backends/apple/coreml/partition/coreml_partitioner.py b/backends/apple/coreml/partition/coreml_partitioner.py index 99aa2a0a60e..210ef307477 100644 --- a/backends/apple/coreml/partition/coreml_partitioner.py +++ b/backends/apple/coreml/partition/coreml_partitioner.py @@ -111,10 +111,16 @@ def ops_to_not_decompose( do_not_decompose = [] op_support = OperatorsSupportedForCoreMLBackend() for node in ep.graph.nodes: - if ( - node.op == "call_function" - and isinstance(node.target, torch._ops.OpOverload) - and op_support.is_node_supported(None, node) + if node.op == "call_function" and isinstance( + node.target, torch._ops.OpOverload ): - do_not_decompose.append(node.target) + try: + if op_support.is_node_supported(None, node): + do_not_decompose.append(node.target) + except Exception as e: + # CoreML's op_support.is_node_supported will sometimes throw + # for unsupported ops, rather than returning False + logger.warning( + f"Encountered exception when checking node support: {e}" + ) return do_not_decompose, None diff --git a/backends/apple/coreml/test/test_coreml_partitioner.py b/backends/apple/coreml/test/test_coreml_partitioner.py index 03aac6a8611..7683d9c44d1 100644 --- a/backends/apple/coreml/test/test_coreml_partitioner.py +++ b/backends/apple/coreml/test/test_coreml_partitioner.py @@ -82,11 +82,28 @@ def test_vit_skip_conv(self): def test_ops_to_not_decompose(self): class Model(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + def forward(self, q, k, v, mask): - return torch.ops.aten.scaled_dot_product_attention.default( + out = torch.ops.aten.scaled_dot_product_attention.default( q, k, v, attn_mask=mask ) + # Add non-functional and alias ops + # These will be removed by ExecuTorch in non-decomposition + # table because they cannot be functionalized + out = out.transpose(1, 2) + out = out.view(1, -1) + out = out.permute(0, 1) + out = out.add_(1.0) + out = out.mul_(2.0) + out = out.div_(3.0) + out = out.sub_(4.0) + out = torch.ops.aten.view_copy.default(out, (-1,)) + out = out.select(0, 0) + return out + model = Model() model.eval() diff --git a/exir/program/_program.py b/exir/program/_program.py index fdf4b93e19c..739765be0d5 100644 --- a/exir/program/_program.py +++ b/exir/program/_program.py @@ -26,6 +26,7 @@ from executorch.exir.emit._emitter import _DelegateDebugIdentifierMap from executorch.exir.error import ExportError from executorch.exir.graph_module import get_control_flow_submodules +from executorch.exir.operator.convert import _pybind_schema_to_native_schema from executorch.exir.pass_base import PassBase from executorch.exir.pass_manager import PassType from executorch.exir.passes import ( @@ -836,6 +837,9 @@ def _replace_aten_ops_with_transformed_ops( ops_set_to_not_decompose, check_op_support = partitioner.ops_to_not_decompose( program ) + ops_set_to_not_decompose = _remove_invalid_ops_for_not_decompose( + ops_set_to_not_decompose + ) for op_aten in ops_set_to_not_decompose: _register_no_decomp_op(op_aten) @@ -965,6 +969,47 @@ def _sanity_check_graph_for_non_decomp_ops( logging.warning(warning_str) +def _remove_invalid_ops_for_not_decompose( + ops_to_not_decompose: List[torch._ops.OpOverload], +) -> List[torch._ops.OpOverload]: + # To address https://github.com/pytorch/executorch/issues/8781 + def keep(op): + schema = op._schema + native_schema = _pybind_schema_to_native_schema(schema) + if native_schema.is_mutable: + logging.warn( + f"Op {op} was requested for preservation by partitioner. This request is ignored because it is mutable." + ) + return False + + if native_schema.aliased_return_names() != [None]: + logging.warn( + f"Op {op} was requested for preservation by partitioner. This request is ignored because it aliases output." + ) + return False + + # Explicit block list of ops that don't work if asked for + # preservation + if op in [ + # Hits infinte recursion error when op is in + # EDGE_DO_NOT_DECOMP namespace + torch.ops.aten._to_copy.default, + # scalar to tensor type promotion does not work on ops + # in EDGE_DO_NOT_DECOMP namespace + torch.ops.aten.mul.Tensor, + torch.ops.aten.add.Tensor, + torch.ops.aten.sub.Tensor, + torch.ops.aten.div.Tensor, + ]: + logging.warn( + f"Op {op} was requested for preservation by partitioner. This request is ignored because it is in a blocklist." + ) + return False + return True + + return list(filter(keep, ops_to_not_decompose)) + + def _gen_edge_manager_for_partitioners( partitioner: Dict[str, List[Partitioner]], aten_programs: Dict[str, ExportedProgram], @@ -992,6 +1037,9 @@ def _gen_edge_manager_for_partitioners( all_ops_no_decomp = set() for curr_partitioner in partitioner.get(name, []): curr_ops_no_decomp, _ = curr_partitioner.ops_to_not_decompose(program) + curr_ops_no_decomp = _remove_invalid_ops_for_not_decompose( + curr_ops_no_decomp + ) all_ops_no_decomp |= set(curr_ops_no_decomp) table = _default_decomposition_table() @@ -1113,6 +1161,7 @@ def to_edge_transform_and_lower( curr_op_set, check_op_support = curr_partitioner.ops_to_not_decompose( program ) + curr_op_set = _remove_invalid_ops_for_not_decompose(curr_op_set) ops_set_to_not_decompose = ops_set_to_not_decompose.union(curr_op_set) _sanity_check_graph_for_non_decomp_ops( name, From 314ab18f488602473818883988b6f790b12a1d30 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Fri, 28 Feb 2025 16:53:13 -0500 Subject: [PATCH 178/584] Serialize NamedData in PTE file (#8847) 1. Serialize NamedData in PTE file 2. Add NamedDataStore to EdgeProgramManager --- Serializing NamedData is slightly different to constant/delegate data as each segment comes with its own alignment. **An example:** Given NamedData = {"key": data}. Data is 250 bytes. - BackendA requires data with alignment=3 - BackendB requires data with alignment=4 Then, data0 should be serialized with alignment of lcm(3, 4) = 12 At serialization, ExecuTorch has a 'segment_alignment' that defaults to 128. Data is now serialized to lcm(12, 128) = 384. Inside the DataSegment, we want to store the original size of the data (250). The offset of the subsequent DataSegment would be 384 bytes after the start of this one. **Design** Introduce a new dataclass 'AlignedData' that stores the buffer and any alignment that's required. This is used when assembling Program.segments to ensure we get lcm(buffer_alignment, segment_alignment). Note: The default segment_alignment can be overridden inside 'ExecutorchBackendConfig'. Differential Revision: [D69764150](https://our.internmc.facebook.com/intern/diff/D69764150/) ghstack-source-id: 269030449 Pull Request resolved: https://github.com/pytorch/executorch/pull/8835 Co-authored-by: lucylq --- exir/_serialize/_program.py | 91 ++++++++++++-- exir/_serialize/_serialize.py | 25 +++- exir/_serialize/test/test_program.py | 176 +++++++++++++++++++++++++-- exir/program/_program.py | 20 ++- exir/tests/common.py | 1 + 5 files changed, 290 insertions(+), 23 deletions(-) diff --git a/exir/_serialize/_program.py b/exir/_serialize/_program.py index 7656ea3f363..0994156ae50 100644 --- a/exir/_serialize/_program.py +++ b/exir/_serialize/_program.py @@ -8,10 +8,11 @@ import copy import json +import math import re from dataclasses import dataclass -from typing import ClassVar, List, Literal, Optional, Tuple +from typing import ClassVar, Dict, List, Literal, Optional, Tuple from executorch.exir._serialize._cord import Cord from executorch.exir._serialize._dataclass import _DataclassEncoder, _json_to_dataclass @@ -20,6 +21,10 @@ _program_flatbuffer_to_json, _program_json_to_flatbuffer, ) +from executorch.exir._serialize._named_data_store import ( + BufferEntry, + NamedDataStoreOutput, +) from executorch.exir._serialize.padding import aligned_size, pad_to, padding_required @@ -29,6 +34,7 @@ Buffer, DataLocation, DataSegment, + NamedData, Program, SubsegmentOffsets, ) @@ -41,6 +47,24 @@ _HEADER_BYTEORDER: Literal["little"] = "little" +@dataclass +class AlignedData: + """ + Holds data that should be aligned, for serialization. + + Attributes: + data: The data to serialize, as a cord. + alignment: The alignment required for the data. + """ + + data: Cord + alignment: int + + def __init__(self, data: Cord, alignment: Optional[int] = None) -> None: + self.data = data + self.alignment = alignment or 1 + + def _program_to_json(program: Program) -> str: """Returns the JSON representation of the given Program.""" return json.dumps(program, cls=_DataclassEncoder) @@ -213,7 +237,7 @@ def _get_extended_header(program_data: bytes) -> Optional[_ExtendedHeader]: def _extract_delegate_segments( program: Program, - segments: List[Cord], + segments: List[AlignedData], ) -> None: """Extracts the delegate segments inlined in the program into a list of buffers. The program is modified in-place to remove the delegate data. @@ -253,7 +277,7 @@ def _extract_delegate_segments( segment_index = segment_index_map.get(inline.data) if segment_index is None: segment_index = len(segments) - segments.append(Cord(inline.data)) + segments.append(AlignedData(Cord(inline.data))) segment_index_map[inline.data] = segment_index delegate.processed = BackendDelegateDataReference( location=DataLocation.SEGMENT, @@ -316,6 +340,44 @@ def _extract_constant_segment( return constant_segment_data, constant_segment_offsets +def _extract_named_data( + program: Program, + segments: List[AlignedData], + buffers: List[BufferEntry], + name_to_buffer_idx: Dict[str, int], +) -> None: + """Modifies the program in-place to add references to the named data + segments. + + Args: + program: The program to extract segments from. Modified in-place. + segments: A list of buffers to append extracted segments to. Modified in-place. + buffers: A list of unique buffers and the information required to + serialize them. Not modified. + name_to_buffer_idx: A map from the name of a blob to the index in buffers. + Not modified. + """ + if program.named_data is not None and len(program.named_data) > 0: + raise ValueError("Program already has named data.") + + # Map from buffer_idx to segment_idx. + segment_index_map: Dict[int, int] = {} + + named_data: List[NamedData] = [] + for name, buffer_idx in name_to_buffer_idx.items(): + segment_index = segment_index_map.get(buffer_idx, None) + if segment_index is None: + segment_index = len(segments) + segment_index_map[buffer_idx] = segment_index + segments.append( + AlignedData( + Cord(buffers[buffer_idx].buffer), buffers[buffer_idx].alignment + ) + ) + named_data.append(NamedData(key=name, segment_index=segment_index)) + program.named_data = named_data + + def serialize_pte_binary( program: Program, *, @@ -324,6 +386,7 @@ def serialize_pte_binary( segment_alignment: int = 128, constant_tensor_alignment: Optional[int] = None, delegate_alignment: Optional[int] = None, + named_data: Optional[NamedDataStoreOutput] = None, ) -> Cord: """Returns the runtime binary representation of the given Program. @@ -343,6 +406,8 @@ def serialize_pte_binary( delegate_alignment: If provided, the minimum alignment of delegate data in the program. Must be a power of 2. If not provided, uses the value in the schema file. + named_data: If provided, named blobs to be stored in segments + after the PTE file. Returns: The serialized form of the Program, ready for execution by the runtime. """ @@ -355,8 +420,9 @@ def serialize_pte_binary( # copy, reusing the actual data blobs. program = copy.deepcopy(program) - # Store extracted segment data; this may be constant data or delegate data. - segments: List[Cord] = [] + # Store extracted segment data, with any buffer-specific alignment. + # This may be constant data, delegate data or named data. + segments: List[AlignedData] = [] constant_segment_data, constant_segment_offsets = _extract_constant_segment( program.constant_buffer, tensor_alignment=constant_tensor_alignment @@ -374,7 +440,7 @@ def serialize_pte_binary( # Clear the constant buffer, as constant data will be stored in segments. program.constant_buffer = [] # Add to the aggregate segments cord. - segments.append(constant_segment_data) + segments.append(AlignedData(constant_segment_data)) if mutable_data is not None: mutable_segment_data, mutable_segment_offsets = _extract_constant_segment( @@ -389,31 +455,34 @@ def serialize_pte_binary( ), ] # Add to the aggregate segments cord. - segments.append(mutable_segment_data) + segments.append(AlignedData(mutable_segment_data)) if extract_delegate_segments: _extract_delegate_segments(program, segments) + if named_data is not None: + _extract_named_data(program, segments, named_data.buffers, named_data.pte_data) # Append all segments into a single Cord, adding any necessary padding to ensure that # each segment begins at the required alignment. # Update program.segments with the offsets to each segment. segments_data = Cord() - for data in segments: + for segment in segments: prev_end = ( (program.segments[-1].offset + program.segments[-1].size) if program.segments else 0 ) + alignment = math.lcm(segment_alignment, segment.alignment) program.segments.append( DataSegment( - offset=aligned_size(prev_end, segment_alignment), size=len(data) + offset=aligned_size(prev_end, alignment), size=len(segment.data) ) ) # Add to aggregate segments cord with padding. - padding_length = padding_required(len(segments_data), segment_alignment) + padding_length = padding_required(len(segments_data), alignment) if padding_length > 0: segments_data.append(b"\x00" * padding_length) - segments_data.append(data) + segments_data.append(segment.data) # Convert to a standard flatbuffer binary. result: _FlatbufferResult = _program_json_to_flatbuffer( diff --git a/exir/_serialize/_serialize.py b/exir/_serialize/_serialize.py index c311274922f..6351875e113 100644 --- a/exir/_serialize/_serialize.py +++ b/exir/_serialize/_serialize.py @@ -6,12 +6,12 @@ # pyre-strict - -from typing import Dict, Tuple +from typing import Dict, Optional, Tuple from executorch.exir._serialize import _serialize_pte_binary from executorch.exir._serialize._cord import Cord +from executorch.exir._serialize._named_data_store import NamedDataStoreOutput from executorch.exir._serialize.data_serializer import ( DataPayload, DataSerializer, @@ -28,10 +28,24 @@ def serialize_for_executorch( emitter_output: EmitterOutput, config: ExecutorchBackendConfig, data_serializer: DataSerializer, + named_data: Optional[NamedDataStoreOutput] = None, ) -> Tuple[Cord, Dict[str, Cord]]: """Serialize the output from Emitter into ExecuTorch artifacts; PTE and PTD files.""" # Serialize PTE file. + pte_named_data = None + if ( + named_data is not None + and len(named_data.buffers) > 0 + and len(named_data.pte_data) > 0 + ): + # Create a separate NamedDataStoreOutput with only pte_data; exclude + # external_data, which shouldn't be serialized with the PTE file. + pte_named_data = NamedDataStoreOutput( + buffers=named_data.buffers, + pte_data=named_data.pte_data, + external_data={}, + ) pte: Cord = _serialize_pte_binary( program=emitter_output.program, mutable_data=emitter_output.mutable_data, @@ -39,6 +53,7 @@ def serialize_for_executorch( segment_alignment=config.segment_alignment, constant_tensor_alignment=config.constant_tensor_alignment, delegate_alignment=config.delegate_alignment, + named_data=pte_named_data, ) # Serialize PTD files. @@ -88,4 +103,10 @@ def serialize_for_executorch( ) ) + if named_data is None or len(named_data.external_data) == 0: + return pte, ptd_files + + if len(named_data.buffers) == 0: + raise RuntimeError("External data exists, but there are no buffers provided.") + return pte, ptd_files diff --git a/exir/_serialize/test/test_program.py b/exir/_serialize/test/test_program.py index f20c0b39798..c67849dd28d 100644 --- a/exir/_serialize/test/test_program.py +++ b/exir/_serialize/test/test_program.py @@ -10,11 +10,16 @@ import copy import difflib import json +import math import unittest from typing import List, Sequence from executorch.exir._serialize._flatbuffer import _program_flatbuffer_to_json +from executorch.exir._serialize._named_data_store import ( + BufferEntry, + NamedDataStoreOutput, +) from executorch.exir._serialize._program import ( _ExtendedHeader, _get_extended_header, @@ -23,6 +28,7 @@ deserialize_pte_binary, serialize_pte_binary, ) +from executorch.exir._serialize.padding import aligned_size from executorch.exir.schema import ( BackendDelegate, @@ -552,11 +558,9 @@ def test_round_trip_with_segments(self) -> None: # Check the segment base offset boundary. segment_base_offset = eh.segment_base_offset self.assertEqual( - pte_data[segment_base_offset - 2 : segment_base_offset + 3], - # The padding before the first segment. - b"\x00\x00" + pte_data[segment_base_offset : segment_base_offset + 3], # The first few bytes of the first segment. - + b"\x10\x11\x11", + b"\x10\x11\x11", ) # Now that we've shown that the base offset is correct, slice off the @@ -671,7 +675,7 @@ def test_constant_segment_tensor_alignment_non_power_of_2_fails(self) -> None: constant_tensor_alignment=constant_tensor_alignment, ) - def test_constant_segment_and_delegate_segment(self) -> None: + def test_constant_delegate_and_named_data_segments(self) -> None: # Create a program with some constant tensor data and delegate data blobs. program = get_test_program() constant_blobs = ( @@ -682,10 +686,22 @@ def test_constant_segment_and_delegate_segment(self) -> None: self.gen_blob_data(SEGMENT_ALIGNMENT // 2, b"\x30\x33\x03"), self.gen_blob_data(SEGMENT_ALIGNMENT + 1, b"\x40\x44\x04"), ) - add_constant_data(program, constant_blobs) add_delegate_data(program, program.execution_plan[0], delegate_blobs) + # Create named data segment. + named_data_buffers = [ + BufferEntry( + buffer=self.gen_blob_data(8, b"\x50\x55\x05"), alignment=3 + ), # expect lcm(3, 128) = 384 + BufferEntry( + buffer=self.gen_blob_data(16, b"\x60\x66\x06"), alignment=256 + ), # expect lcm(256, 128) = 256 + ] + pte_named_data = {"key0": 0, "key1": 1} + named_data = NamedDataStoreOutput( + buffers=named_data_buffers, pte_data=pte_named_data, external_data={} + ) # Extract the blobs into segments during serialization. pte_data = bytes( serialize_pte_binary( @@ -693,6 +709,7 @@ def test_constant_segment_and_delegate_segment(self) -> None: extract_delegate_segments=True, segment_alignment=SEGMENT_ALIGNMENT, constant_tensor_alignment=CONSTANT_TENSOR_ALIGNMENT, + named_data=named_data, ) ) @@ -702,6 +719,7 @@ def test_constant_segment_and_delegate_segment(self) -> None: program.execution_plan[0].delegates[0].processed.location, DataLocation.INLINE, ) + self.assertEqual(program.named_data, []) # Extended header should be present in the serialized data. eh = self.get_and_validate_extended_header(pte_data) @@ -715,9 +733,12 @@ def test_constant_segment_and_delegate_segment(self) -> None: # Peek inside the actual flatbuffer data to see the segments. program_with_segments = _json_to_program(_program_flatbuffer_to_json(pte_data)) - # Segment table should contain a constant segment and the delegate blobs. + # Segment table should contain a constant segment, the delegate blobs + # and a named data segment. segment_table: List[DataSegment] = program_with_segments.segments - self.assertEqual(len(segment_table), len(delegate_blobs) + 1) + self.assertEqual( + len(segment_table), len(delegate_blobs) + len(pte_named_data) + 1 + ) self.assertEqual(segment_table[0].offset, 0) # segment_table[0] is the constant segment, which # contains a couple of tensors with sizes: @@ -728,6 +749,30 @@ def test_constant_segment_and_delegate_segment(self) -> None: self.assertEqual(segment_table[1].size, SEGMENT_ALIGNMENT // 2) self.assertEqual(segment_table[2].offset, SEGMENT_ALIGNMENT * 2) self.assertEqual(segment_table[2].size, SEGMENT_ALIGNMENT + 1) + # Named data segments. + expected_offset = aligned_size( + (segment_table[2].offset + segment_table[2].size), + math.lcm(named_data_buffers[0].alignment, SEGMENT_ALIGNMENT), + ) + self.assertEqual(segment_table[3].offset, expected_offset) + self.assertEqual(segment_table[3].size, len(named_data_buffers[0].buffer)) + expected_offset = aligned_size( + (segment_table[3].offset + segment_table[3].size), + math.lcm(named_data_buffers[1].alignment, SEGMENT_ALIGNMENT), + ) + self.assertEqual(segment_table[4].offset, expected_offset) + self.assertEqual(segment_table[4].size, len(named_data_buffers[1].buffer)) + + # Named data. + self.assertTrue(program_with_segments.named_data is not None) + program_named_data = program_with_segments.named_data + self.assertEqual(len(program_named_data), len(pte_named_data)) + + # Check named data values. + self.assertEqual(program_named_data[0].key, "key0") + self.assertEqual(program_named_data[0].segment_index, 3) + self.assertEqual(program_named_data[1].key, "key1") + self.assertEqual(program_named_data[1].segment_index, 4) # Check constant_segment index and offsets. subsegment_offsets: SubsegmentOffsets = program_with_segments.constant_segment @@ -811,6 +856,23 @@ def test_constant_segment_and_delegate_segment(self) -> None: + b"\x40\x44\x44", ) + # Check named data segments + self.assertEqual( + segment_data[ + segment_table[3].offset : segment_table[3].offset + + segment_table[3].size + ], + named_data_buffers[0].buffer, + ) + + self.assertEqual( + segment_data[ + segment_table[4].offset : segment_table[4].offset + + segment_table[4].size + ], + named_data_buffers[1].buffer, + ) + # Convert back. program2 = deserialize_pte_binary(pte_data) # Programs are the same besides constant_buffer, as deserialization @@ -820,6 +882,104 @@ def test_constant_segment_and_delegate_segment(self) -> None: # Number of constant tensors should be the same. self.assertEqual(len(program2.constant_buffer), len(program.constant_buffer)) + def test_named_data_segments(self) -> None: + # Set segment alignment to 12 to test the padding. + SEGMENT_ALIGNMENT: int = 12 + + # Create a program with some named data segments. + program = get_test_program() + + # Create named data segments with different alignments. + buffers = [ + BufferEntry( + buffer=self.gen_blob_data(8, b"\x10\x11\x01"), alignment=8 + ), # expect lcm(8, 12) = 24 + BufferEntry( + buffer=self.gen_blob_data(16, b"\x20\x22\x02"), alignment=32 + ), # expect lcm(32, 12) = 96 + BufferEntry( + buffer=self.gen_blob_data(24, b"\x30\x33\x03"), alignment=24 + ), # expect lcm(24, 12) = 24 + ] + pte_named_data = {"key1": 0, "key2": 0, "key3": 1, "key4": 2} + named_data = NamedDataStoreOutput( + buffers=buffers, pte_data=pte_named_data, external_data={} + ) + # Serialize the program with named data segments. + pte_data = bytes( + serialize_pte_binary( + program, + extract_delegate_segments=True, + segment_alignment=SEGMENT_ALIGNMENT, + constant_tensor_alignment=CONSTANT_TENSOR_ALIGNMENT, + named_data=named_data, + ) + ) + + # named_data is initially empty. + self.assertEqual(program.named_data, []) + # Extended header should be present in the serialized data. + eh = self.get_and_validate_extended_header(pte_data) + # Segment offset should be non-zero since there are segments. It + # should point past the end of the program data, but not beyond + # the end of the file. + self.assertGreaterEqual(eh.segment_base_offset, eh.program_size) + self.assertLess(eh.segment_base_offset, len(pte_data)) + + # Peek inside the actual flatbuffer data to see the named data segments. + program_with_segments = _json_to_program(_program_flatbuffer_to_json(pte_data)) + # pyre-ignore Incompatible parameter type [6] + self.assertEqual(len(program_with_segments.named_data), len(pte_named_data)) + + # Check Program.named_data values. + # pyre-ignore Undefined attribute [16] + self.assertEqual(program_with_segments.named_data[0].key, "key1") + self.assertEqual(program_with_segments.named_data[0].segment_index, 0) + self.assertEqual(program_with_segments.named_data[1].key, "key2") + self.assertEqual(program_with_segments.named_data[1].segment_index, 0) + self.assertEqual(program_with_segments.named_data[2].key, "key3") + self.assertEqual(program_with_segments.named_data[2].segment_index, 1) + self.assertEqual(program_with_segments.named_data[3].key, "key4") + self.assertEqual(program_with_segments.named_data[3].segment_index, 2) + + # Check Program.segments values. + segment_table: List[DataSegment] = program_with_segments.segments + self.assertEqual(len(segment_table), 3) + + for i in range(len(segment_table)): + segment_length = ( + segment_table[i - 1].offset + segment_table[i - 1].size if i > 0 else 0 + ) + expected_offset = aligned_size( + segment_length, math.lcm(SEGMENT_ALIGNMENT, buffers[i].alignment) + ) + self.assertEqual(segment_table[i].offset, expected_offset) + self.assertEqual(segment_table[i].size, len(buffers[i].buffer)) + + # Check the pte data for buffer values. + segment_data: bytes = pte_data[eh.segment_base_offset :] + self.assertEqual( + segment_data[ + segment_table[0].offset : segment_table[0].offset + + segment_table[0].size + ], + buffers[0].buffer, + ) + self.assertEqual( + segment_data[ + segment_table[1].offset : segment_table[1].offset + + segment_table[1].size + ], + buffers[1].buffer, + ) + self.assertEqual( + segment_data[ + segment_table[2].offset : segment_table[2].offset + + segment_table[2].size + ], + buffers[2].buffer, + ) + # Common data for extended header tests. The two example values should produce # the example data. diff --git a/exir/program/_program.py b/exir/program/_program.py index 739765be0d5..5a9c101a06a 100644 --- a/exir/program/_program.py +++ b/exir/program/_program.py @@ -16,6 +16,10 @@ import torch import torch._export from executorch.exir._serialize._cord import Cord +from executorch.exir._serialize._named_data_store import ( + NamedDataStore, + NamedDataStoreOutput, +) from executorch.exir._serialize._serialize import serialize_for_executorch from executorch.exir._serialize.data_serializer import DataSerializer from executorch.exir._warnings import experimental @@ -1308,6 +1312,8 @@ def __init__( self._edge_programs: Dict[str, ExportedProgram] = edge_programs self._config_methods = constant_methods + self._named_data_store = NamedDataStore() + @property def methods(self) -> Set[str]: """ @@ -1493,7 +1499,10 @@ def to_executorch( execution_programs[name] = program return ExecutorchProgramManager( - execution_programs, self._config_methods, config + execution_programs, + self._config_methods, + config, + self._named_data_store.get_named_data_store_output(), ) @@ -1514,6 +1523,7 @@ def __init__( execution_programs: Dict[str, ExportedProgram], config_methods: Optional[Dict[str, Any]] = None, backend_config: Optional[ExecutorchBackendConfig] = None, + named_data: Optional[NamedDataStoreOutput] = None, ): """ End users should not call this constructor directly. Instead, they should use @@ -1536,6 +1546,9 @@ def __init__( self._execution_programs: Dict[str, ExportedProgram] = execution_programs self._config_methods: Optional[Dict[str, Any]] = config_methods + # Named data from EdgeProgramManager + self._named_data: Optional[NamedDataStoreOutput] = named_data + backend_config = backend_config or ExecutorchBackendConfig() # Emit methods @@ -1548,7 +1561,10 @@ def __init__( # Serialize emitter output, ready to be written to a file. self._data_serializer = FlatTensorSerializer() self._pte_data, self._tensor_data = serialize_for_executorch( - self._emitter_output, backend_config, self._data_serializer + self._emitter_output, + backend_config, + self._data_serializer, + self._named_data, ) self._buffer: Optional[bytes] = None diff --git a/exir/tests/common.py b/exir/tests/common.py index fdd7a3adca4..daeea109667 100644 --- a/exir/tests/common.py +++ b/exir/tests/common.py @@ -79,6 +79,7 @@ def get_test_program() -> Program: backend_delegate_data=[], segments=[], constant_segment=SubsegmentOffsets(segment_index=0, offsets=[]), + named_data=[], ) From a5f92101258489174990260463b6c1b405cd7cad Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Fri, 28 Feb 2025 14:13:21 -0800 Subject: [PATCH 179/584] Buck changes for ObjC/Swift bindings. Differential Revision: D70377502 Pull Request resolved: https://github.com/pytorch/executorch/pull/8825 --- .../apple/ExecuTorch/__tests__/resources/add.pte | Bin 0 -> 728 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 extension/apple/ExecuTorch/__tests__/resources/add.pte diff --git a/extension/apple/ExecuTorch/__tests__/resources/add.pte b/extension/apple/ExecuTorch/__tests__/resources/add.pte new file mode 100644 index 0000000000000000000000000000000000000000..43252ca7d3d05e8fe847e122c9c7de976e0e0096 GIT binary patch literal 728 zcmZ`$O-_Sg5Pi1LVna-08q!40d z9v`$+<8G4NtCNznyo*d;{901sofq4 z&95FC(_^3>z=$tz@ie@O=wFQ6?sRR{e3+V%PkTlvcFUN0_=L1XT6i=0w*D?~&6+W_ zm?cw28ad;8ZTa+8zxfcf= X8imU+Ugk*@$4qIuZ+H9Wa$n&GV)QgK literal 0 HcmV?d00001 From eef7b44c8c19fb7611139689210a5d98d3b1fab1 Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Fri, 28 Feb 2025 14:56:13 -0800 Subject: [PATCH 180/584] Lay out ObjC/Swift bindings skeleton (#8826) --- .../apple/ExecuTorch/Exported/ExecuTorch.h | 4 ++++ .../ExecuTorch/Exported/ExecuTorchError.h | 15 +++++++++++++ .../ExecuTorch/Exported/ExecuTorchError.m | 11 ++++++++++ .../ExecuTorch/Exported/ExecuTorchModule.h | 22 +++++++++++++++++++ .../ExecuTorch/Exported/ExecuTorchModule.mm | 20 +++++++++++++++++ .../ExecuTorch/Exported/ExecuTorchTensor.h | 22 +++++++++++++++++++ .../ExecuTorch/Exported/ExecuTorchTensor.mm | 19 ++++++++++++++++ .../ExecuTorch/Exported/ExecuTorchValue.h | 19 ++++++++++++++++ .../ExecuTorch/Exported/ExecuTorchValue.m | 13 +++++++++++ .../ExecuTorch/__tests__/ModuleTest.swift | 21 ++++++++++++++++++ .../ExecuTorch/__tests__/TensorTest.swift | 16 ++++++++++++++ .../ExecuTorch/__tests__/ValueTest.swift | 16 ++++++++++++++ 12 files changed, 198 insertions(+) create mode 100644 extension/apple/ExecuTorch/Exported/ExecuTorchError.h create mode 100644 extension/apple/ExecuTorch/Exported/ExecuTorchError.m create mode 100644 extension/apple/ExecuTorch/Exported/ExecuTorchModule.h create mode 100644 extension/apple/ExecuTorch/Exported/ExecuTorchModule.mm create mode 100644 extension/apple/ExecuTorch/Exported/ExecuTorchTensor.h create mode 100644 extension/apple/ExecuTorch/Exported/ExecuTorchTensor.mm create mode 100644 extension/apple/ExecuTorch/Exported/ExecuTorchValue.h create mode 100644 extension/apple/ExecuTorch/Exported/ExecuTorchValue.m create mode 100644 extension/apple/ExecuTorch/__tests__/ModuleTest.swift create mode 100644 extension/apple/ExecuTorch/__tests__/TensorTest.swift create mode 100644 extension/apple/ExecuTorch/__tests__/ValueTest.swift diff --git a/extension/apple/ExecuTorch/Exported/ExecuTorch.h b/extension/apple/ExecuTorch/Exported/ExecuTorch.h index e16439714f2..3a12a5ddbae 100644 --- a/extension/apple/ExecuTorch/Exported/ExecuTorch.h +++ b/extension/apple/ExecuTorch/Exported/ExecuTorch.h @@ -6,4 +6,8 @@ * LICENSE file in the root directory of this source tree. */ +#import "ExecuTorchError.h" #import "ExecuTorchLog.h" +#import "ExecuTorchModule.h" +#import "ExecuTorchTensor.h" +#import "ExecuTorchValue.h" diff --git a/extension/apple/ExecuTorch/Exported/ExecuTorchError.h b/extension/apple/ExecuTorch/Exported/ExecuTorchError.h new file mode 100644 index 00000000000..cdf52051d05 --- /dev/null +++ b/extension/apple/ExecuTorch/Exported/ExecuTorchError.h @@ -0,0 +1,15 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import + +NS_ASSUME_NONNULL_BEGIN + +FOUNDATION_EXPORT NSErrorDomain const ExecuTorchErrorDomain NS_SWIFT_NAME(ErrorDomain); + +NS_ASSUME_NONNULL_END diff --git a/extension/apple/ExecuTorch/Exported/ExecuTorchError.m b/extension/apple/ExecuTorch/Exported/ExecuTorchError.m new file mode 100644 index 00000000000..43996dc213e --- /dev/null +++ b/extension/apple/ExecuTorch/Exported/ExecuTorchError.m @@ -0,0 +1,11 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import "ExecuTorchError.h" + +NSErrorDomain const ExecuTorchErrorDomain = @"org.pytorch.executorch.error"; diff --git a/extension/apple/ExecuTorch/Exported/ExecuTorchModule.h b/extension/apple/ExecuTorch/Exported/ExecuTorchModule.h new file mode 100644 index 00000000000..5e6e0ecaf47 --- /dev/null +++ b/extension/apple/ExecuTorch/Exported/ExecuTorchModule.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import "ExecuTorchValue.h" + +NS_ASSUME_NONNULL_BEGIN + +NS_SWIFT_NAME(Module) +__attribute__((deprecated("This API is experimental."))) +@interface ExecuTorchModule : NSObject + ++ (instancetype)new NS_UNAVAILABLE; +- (instancetype)init NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/extension/apple/ExecuTorch/Exported/ExecuTorchModule.mm b/extension/apple/ExecuTorch/Exported/ExecuTorchModule.mm new file mode 100644 index 00000000000..866dcc6901b --- /dev/null +++ b/extension/apple/ExecuTorch/Exported/ExecuTorchModule.mm @@ -0,0 +1,20 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import "ExecuTorchModule.h" + +#import "ExecuTorchError.h" + +#import +#import + +@implementation ExecuTorchModule { + std::unique_ptr _module; +} + +@end diff --git a/extension/apple/ExecuTorch/Exported/ExecuTorchTensor.h b/extension/apple/ExecuTorch/Exported/ExecuTorchTensor.h new file mode 100644 index 00000000000..220e377b60d --- /dev/null +++ b/extension/apple/ExecuTorch/Exported/ExecuTorchTensor.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import + +NS_ASSUME_NONNULL_BEGIN + +NS_SWIFT_NAME(Tensor) +__attribute__((deprecated("This API is experimental."))) +@interface ExecuTorchTensor : NSObject + ++ (instancetype)new NS_UNAVAILABLE; +- (instancetype)init NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/extension/apple/ExecuTorch/Exported/ExecuTorchTensor.mm b/extension/apple/ExecuTorch/Exported/ExecuTorchTensor.mm new file mode 100644 index 00000000000..4b072444bec --- /dev/null +++ b/extension/apple/ExecuTorch/Exported/ExecuTorchTensor.mm @@ -0,0 +1,19 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import "ExecuTorchTensor.h" + +#import "ExecuTorchError.h" + +#import + +@implementation ExecuTorchTensor { + ::executorch::extension::TensorPtr _tensor; +} + +@end diff --git a/extension/apple/ExecuTorch/Exported/ExecuTorchValue.h b/extension/apple/ExecuTorch/Exported/ExecuTorchValue.h new file mode 100644 index 00000000000..9b2c8aaaae6 --- /dev/null +++ b/extension/apple/ExecuTorch/Exported/ExecuTorchValue.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import "ExecuTorchTensor.h" + +NS_ASSUME_NONNULL_BEGIN + +NS_SWIFT_NAME(Value) +__attribute__((deprecated("This API is experimental."))) +@interface ExecuTorchValue : NSObject + +@end + +NS_ASSUME_NONNULL_END diff --git a/extension/apple/ExecuTorch/Exported/ExecuTorchValue.m b/extension/apple/ExecuTorch/Exported/ExecuTorchValue.m new file mode 100644 index 00000000000..98a6f774176 --- /dev/null +++ b/extension/apple/ExecuTorch/Exported/ExecuTorchValue.m @@ -0,0 +1,13 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import "ExecuTorchValue.h" + +@implementation ExecuTorchValue + +@end diff --git a/extension/apple/ExecuTorch/__tests__/ModuleTest.swift b/extension/apple/ExecuTorch/__tests__/ModuleTest.swift new file mode 100644 index 00000000000..609727ec93f --- /dev/null +++ b/extension/apple/ExecuTorch/__tests__/ModuleTest.swift @@ -0,0 +1,21 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +@testable import ExecuTorch + +import XCTest + +class ModuleTest: XCTestCase { + func test() throws { + let bundle = Bundle(for: type(of: self)) + guard let modelPath = bundle.path(forResource: "add", ofType: "pte") else { + XCTFail("Couldn't find the model file") + return + } + } +} diff --git a/extension/apple/ExecuTorch/__tests__/TensorTest.swift b/extension/apple/ExecuTorch/__tests__/TensorTest.swift new file mode 100644 index 00000000000..f5c2ccdbeba --- /dev/null +++ b/extension/apple/ExecuTorch/__tests__/TensorTest.swift @@ -0,0 +1,16 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +@testable import ExecuTorch + +import XCTest + +class TensorTest: XCTestCase { + func test() { + } +} diff --git a/extension/apple/ExecuTorch/__tests__/ValueTest.swift b/extension/apple/ExecuTorch/__tests__/ValueTest.swift new file mode 100644 index 00000000000..56802ee540c --- /dev/null +++ b/extension/apple/ExecuTorch/__tests__/ValueTest.swift @@ -0,0 +1,16 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +@testable import ExecuTorch + +import XCTest + +class ValueTest: XCTestCase { + func test() { + } +} From 86dcc3eaab5b799985e28b7d4f6c6abe93a51f9b Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Fri, 28 Feb 2025 19:44:19 -0500 Subject: [PATCH 181/584] [build] Add editable mode unittest (#8855) As titled. To do this we need to refactor the arguments being passed into `setup-linux.sh` `setup-macos.sh` and `unittest-linux.sh` `unittest-macos.sh`. ghstack-source-id: 19ab82778b4883ac986ef62c8d0aa87189e7c16e Pull Request resolved: https://github.com/pytorch/executorch/pull/8817 Co-authored-by: Mengwei Liu --- .ci/scripts/setup-linux.sh | 16 +++--- .ci/scripts/setup-macos.sh | 16 +++--- .ci/scripts/unittest-linux.sh | 21 ++----- .ci/scripts/unittest-macos.sh | 19 ++----- .ci/scripts/utils.sh | 55 ++++++++++++++++-- .github/workflows/_android.yml | 2 +- .github/workflows/_unittest.yml | 8 ++- .github/workflows/android-perf.yml | 4 +- .../workflows/android-release-artifacts.yml | 2 +- .github/workflows/apple-perf.yml | 4 +- .github/workflows/apple.yml | 6 +- .github/workflows/doc-build.yml | 2 +- .github/workflows/lint.yml | 2 +- .github/workflows/periodic.yml | 2 +- .github/workflows/pull.yml | 56 ++++++------------- .github/workflows/trunk.yml | 53 +++++------------- 16 files changed, 121 insertions(+), 147 deletions(-) diff --git a/.ci/scripts/setup-linux.sh b/.ci/scripts/setup-linux.sh index 776bf6f7953..a090571ab49 100755 --- a/.ci/scripts/setup-linux.sh +++ b/.ci/scripts/setup-linux.sh @@ -10,19 +10,17 @@ set -exu # shellcheck source=/dev/null source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" -BUILD_TOOL=$1 -if [[ -z "${BUILD_TOOL:-}" ]]; then - echo "Missing build tool (require buck2 or cmake), exiting..." - exit 1 -else - echo "Setup Linux for ${BUILD_TOOL} ..." -fi +read -r BUILD_TOOL BUILD_MODE EDITABLE < <(parse_args "$@") # As Linux job is running inside a Docker container, all of its dependencies # have already been installed, so we use PyTorch build from source here instead # of nightly. This allows CI to test against latest commits from PyTorch -install_executorch "use-pt-pinned-commit" -build_executorch_runner "${BUILD_TOOL}" "${2:-Release}" +if [[ "${EDITABLE:-false}" == "true" ]]; then + install_executorch --use-pt-pinned-commit --editable +else + install_executorch --use-pt-pinned-commit +fi +build_executorch_runner "${BUILD_TOOL}" "${BUILD_MODE}" if [[ "${GITHUB_BASE_REF:-}" == *main* || "${GITHUB_BASE_REF:-}" == *gh* ]]; then do_not_use_nightly_on_ci diff --git a/.ci/scripts/setup-macos.sh b/.ci/scripts/setup-macos.sh index bb8e45f23f1..4b43a730710 100755 --- a/.ci/scripts/setup-macos.sh +++ b/.ci/scripts/setup-macos.sh @@ -10,13 +10,7 @@ set -exu # shellcheck source=/dev/null source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" -BUILD_TOOL=$1 -if [[ -z "${BUILD_TOOL:-}" ]]; then - echo "Missing build tool (require buck2 or cmake), exiting..." - exit 1 -else - echo "Setup MacOS for ${BUILD_TOOL} ..." -fi +read -r BUILD_TOOL BUILD_MODE EDITABLE < <(parse_args "$@") install_buck() { if ! command -v zstd &> /dev/null; then @@ -135,8 +129,12 @@ print_cmake_info install_pytorch_and_domains # We build PyTorch from source here instead of using nightly. This allows CI to test against # the pinned commit from PyTorch -install_executorch "use-pt-pinned-commit" -build_executorch_runner "${BUILD_TOOL}" "${2:-Release}" +if [[ "$EDITABLE" == "true" ]]; then + install_executorch --use-pt-pinned-commit --editable +else + install_executorch --use-pt-pinned-commit +fi +build_executorch_runner "${BUILD_TOOL}" "${BUILD_MODE}" if [[ "${GITHUB_BASE_REF:-}" == *main* || "${GITHUB_BASE_REF:-}" == *gh* ]]; then do_not_use_nightly_on_ci diff --git a/.ci/scripts/unittest-linux.sh b/.ci/scripts/unittest-linux.sh index 27da8d4e4f9..f8ff9df773e 100755 --- a/.ci/scripts/unittest-linux.sh +++ b/.ci/scripts/unittest-linux.sh @@ -6,21 +6,10 @@ # LICENSE file in the root directory of this source tree. set -eux -BUILD_TOOL=$1 -if [[ $BUILD_TOOL =~ ^(cmake|buck2)$ ]]; then - echo "Running unittests for ${BUILD_TOOL} ..." -else - echo "Missing build tool (require buck2 or cmake), exiting..." - exit 1 -fi +# shellcheck source=/dev/null +source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" -BUILD_MODE=$2 -if [[ "${BUILD_MODE:-}" =~ ^(Debug|Release)$ ]]; then - echo "Running tests in build mode ${BUILD_MODE} ..." -else - echo "Unsupported build mode ${BUILD_MODE}, options are Debug or Release." - exit 1 -fi +read -r BUILD_TOOL BUILD_MODE EDITABLE < <(parse_args "$@") # The generic Linux job chooses to use base env, not the one setup by the image eval "$(conda shell.bash hook)" @@ -34,7 +23,7 @@ if [[ "$BUILD_TOOL" == "cmake" ]]; then PYTHON_EXECUTABLE=python \ EXECUTORCH_BUILD_PYBIND=ON \ CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ - .ci/scripts/setup-linux.sh "$BUILD_TOOL" "$BUILD_MODE" + .ci/scripts/setup-linux.sh "$@" # Install llama3_2_vision dependencies. PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh @@ -45,7 +34,7 @@ elif [[ "$BUILD_TOOL" == "buck2" ]]; then # because TMPDIR gets messed up? Please feel free to fix this and # speed up this CI job! PYTHON_EXECUTABLE=python \ - .ci/scripts/setup-linux.sh "$BUILD_TOOL" "$BUILD_MODE" + .ci/scripts/setup-linux.sh "$@" .ci/scripts/unittest-buck2.sh else diff --git a/.ci/scripts/unittest-macos.sh b/.ci/scripts/unittest-macos.sh index 9f7fafa35ce..960f69389ce 100755 --- a/.ci/scripts/unittest-macos.sh +++ b/.ci/scripts/unittest-macos.sh @@ -6,21 +6,10 @@ # LICENSE file in the root directory of this source tree. set -eux -BUILD_TOOL=$1 -if [[ $BUILD_TOOL =~ ^(cmake|buck2)$ ]]; then - echo "Running unittests for ${BUILD_TOOL} ..." -else - echo "Missing build tool (require buck2 or cmake), exiting..." - exit 1 -fi +# shellcheck source=/dev/null +source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" -BUILD_MODE=$2 -if [[ $BUILD_MODE =~ ^(Debug|Release)$ ]]; then - echo "Running tests in build mode ${BUILD_MODE} ..." -else - echo "Unsupported build mode ${BUILD_MODE}, options are Debug or Release." - exit 1 -fi +read -r BUILD_TOOL BUILD_MODE EDITABLE < <(parse_args "$@") bash .ci/scripts/setup-conda.sh eval "$(conda shell.bash hook)" @@ -36,7 +25,7 @@ if [[ "$BUILD_TOOL" == "cmake" ]]; then EXECUTORCH_BUILD_PYBIND=ON \ CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \ ${CONDA_RUN} --no-capture-output \ - .ci/scripts/setup-macos.sh "${BUILD_TOOL}" "${BUILD_MODE}" + .ci/scripts/setup-macos.sh "$@" # Install llama3_2_vision dependencies. PYTHON_EXECUTABLE=python \ diff --git a/.ci/scripts/utils.sh b/.ci/scripts/utils.sh index e0bc935e861..8d99a0022cf 100644 --- a/.ci/scripts/utils.sh +++ b/.ci/scripts/utils.sh @@ -24,11 +24,7 @@ install_executorch() { which pip # Install executorch, this assumes that Executorch is checked out in the # current directory. - if [[ "${1:-}" == "use-pt-pinned-commit" ]]; then - ./install_executorch.sh --pybind xnnpack --use-pt-pinned-commit - else - ./install_executorch.sh --pybind xnnpack - fi + ./install_executorch.sh --pybind xnnpack "$@" # Just print out the list of packages for debugging pip list } @@ -166,3 +162,52 @@ do_not_use_nightly_on_ci() { exit 1 fi } + + +parse_args() { + local args=("$@") + local i + local BUILD_TOOL="" + local BUILD_MODE="" + local EDITABLE="" + for ((i=0; i<${#args[@]}; i++)); do + case "${args[$i]}" in + --build-tool) + BUILD_TOOL="${args[$((i+1))]}" + i=$((i+1)) + ;; + --build-mode) + BUILD_MODE="${args[$((i+1))]}" + i=$((i+1)) + ;; + --editable) + EDITABLE="${args[$((i+1))]}" + i=$((i+1)) + ;; + *) + echo "Invalid argument: ${args[$i]}" + exit 1 + ;; + esac + done + + if [ -z "$BUILD_TOOL" ]; then + echo "Missing build tool (require buck2 or cmake), exiting..." + exit 1 + elif ! [[ $BUILD_TOOL =~ ^(cmake|buck2)$ ]]; then + echo "Require buck2 or cmake for --build-tool, got ${BUILD_TOOL}, exiting..." + exit 1 + fi + BUILD_MODE="${BUILD_MODE:-Release}" + if ! [[ "$BUILD_MODE" =~ ^(Debug|Release)$ ]]; then + echo "Unsupported build mode ${BUILD_MODE}, options are Debug or Release." + exit 1 + fi + EDITABLE="${EDITABLE:-false}" + if ! [[ $EDITABLE =~ ^(true|false)$ ]]; then + echo "Require true or false for --editable, got ${EDITABLE}, exiting..." + exit 1 + fi + + echo "$BUILD_TOOL $BUILD_MODE $EDITABLE" +} diff --git a/.github/workflows/_android.yml b/.github/workflows/_android.yml index fa7c331311f..82e49d6672e 100644 --- a/.github/workflows/_android.yml +++ b/.github/workflows/_android.yml @@ -25,7 +25,7 @@ jobs: # The generic Linux job chooses to use base env, not the one setup by the image CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2 + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool buck2 export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded # Build LLM Demo for Android diff --git a/.github/workflows/_unittest.yml b/.github/workflows/_unittest.yml index f2eb2cfdb51..6b08b6d1259 100644 --- a/.github/workflows/_unittest.yml +++ b/.github/workflows/_unittest.yml @@ -15,6 +15,10 @@ on: required: true type: string description: Build tool to use, cmake or buck2. + editable: + required: false + type: string + description: Install ExecuTorch in editable mode or not. python-version: required: false type: string @@ -34,7 +38,7 @@ jobs: timeout: 90 script: | set -eux - .ci/scripts/unittest-linux.sh "${{ inputs.build-tool }}" "${{ inputs.build-mode }}" + .ci/scripts/unittest-linux.sh --build-tool "${{ inputs.build-tool }}" --build-mode "${{ inputs.build-mode }}" --editable "${{ inputs.editable }}" macos: uses: pytorch/test-infra/.github/workflows/macos_job.yml@main @@ -45,4 +49,4 @@ jobs: ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} script: | set -eux - .ci/scripts/unittest-macos.sh "${{ inputs.build-tool }}" "${{ inputs.build-mode }}" + .ci/scripts/unittest-macos.sh --build-tool "${{ inputs.build-tool }}" --build-mode "${{ inputs.build-mode }}" --editable "${{ inputs.editable }}" diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index d3a16428b57..f21ed849d03 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -181,7 +181,7 @@ jobs: PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh fi - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake" # Install requirements for export_llama PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh @@ -356,7 +356,7 @@ jobs: # The generic Linux job chooses to use base env, not the one setup by the image CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh diff --git a/.github/workflows/android-release-artifacts.yml b/.github/workflows/android-release-artifacts.yml index 8d2c1d354cc..26423e59233 100644 --- a/.github/workflows/android-release-artifacts.yml +++ b/.github/workflows/android-release-artifacts.yml @@ -49,7 +49,7 @@ jobs: # The generic Linux job chooses to use base env, not the one setup by the image CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2 + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool buck2 export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded # Build LLM Demo for Android diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml index df29e44eac1..1b0de8d7659 100644 --- a/.github/workflows/apple-perf.yml +++ b/.github/workflows/apple-perf.yml @@ -181,7 +181,7 @@ jobs: BUILD_TOOL=cmake # Setup MacOS dependencies as there is no Docker support on MacOS atm GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" if [[ ${{ matrix.config }} == *"coreml"* ]]; then PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ @@ -367,7 +367,7 @@ jobs: BUILD_TOOL=cmake # Setup MacOS dependencies as there is no Docker support on MacOS atm GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded # Setup Apple certificate for iOS development diff --git a/.github/workflows/apple.yml b/.github/workflows/apple.yml index 6929e12fa6d..19460a35379 100644 --- a/.github/workflows/apple.yml +++ b/.github/workflows/apple.yml @@ -69,7 +69,7 @@ jobs: # Setup MacOS dependencies as there is no Docker support on MacOS atm GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded @@ -160,7 +160,7 @@ jobs: # Setup MacOS dependencies as there is no Docker support on MacOS atm GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" # Install CoreML Backend Requirements PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ @@ -293,7 +293,7 @@ jobs: BUILD_TOOL=cmake # Setup MacOS dependencies as there is no Docker support on MacOS atm GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded # Setup Apple certificate for iOS development diff --git a/.github/workflows/doc-build.yml b/.github/workflows/doc-build.yml index 8d9081615be..aa6f164d7b6 100644 --- a/.github/workflows/doc-build.yml +++ b/.github/workflows/doc-build.yml @@ -38,7 +38,7 @@ jobs: BUILD_TOOL=${{ matrix.build-tool }} # Setup dependencies as there is no Docker support - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" if [[(${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then export CHANNEL=test diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 53d2bd7910b..69fcc11a347 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -35,7 +35,7 @@ jobs: # For mypy linting, we need to first install executorch first so that # it builds the python package information. BUILD_TOOL="cmake" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" CACHE_DIRECTORY="/tmp/.lintbin" # Try to recover the cached binaries diff --git a/.github/workflows/periodic.yml b/.github/workflows/periodic.yml index 6b4644bb522..fa3fa6e1cd2 100644 --- a/.github/workflows/periodic.yml +++ b/.github/workflows/periodic.yml @@ -63,6 +63,6 @@ jobs: BACKEND=${{ matrix.backend }} DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }} - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" # Build and test ExecuTorch PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}" diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index d9a7aa6bd5b..2a2109bcdc1 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -52,34 +52,10 @@ jobs: BUILD_TOOL="cmake" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" # Build and test ExecuTorch with the add model on portable backend. PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "add" "${BUILD_TOOL}" "portable" - test-pip-install-editable-mode-linux: - name: test-pip-install-editable-mode-linux - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - permissions: - id-token: write - contents: read - strategy: - fail-fast: false - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - # Debug - which pip - PYTHON_EXECUTABLE=python bash ./install_executorch.sh --editable --pybind xnnpack --use-pt-pinned-commit - # Try to import extension library - python -c "from executorch.extension.llm.custom_ops import custom_ops" - test-models-linux: name: test-models-linux uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main @@ -106,7 +82,7 @@ jobs: BACKEND=${{ matrix.backend }} DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }} - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" # Build and test ExecuTorch PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}" @@ -146,7 +122,7 @@ jobs: ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}" # Setup executorch - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" # Install requirements for export_llama PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh # Test llama2 @@ -172,7 +148,7 @@ jobs: conda activate "${CONDA_ENV}" source .ci/scripts/utils.sh - install_executorch "use-pt-pinned-commit" + install_executorch "--use-pt-pinned-commit" BUILD_TOOL="cmake" PYTHON_EXECUTABLE=python \ bash .ci/scripts/build_llama_android.sh "${BUILD_TOOL}" @@ -197,7 +173,7 @@ jobs: conda activate "${CONDA_ENV}" BUILD_TOOL="cmake" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" # Test custom ops PYTHON_EXECUTABLE=python bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}" @@ -221,7 +197,7 @@ jobs: conda activate "${CONDA_ENV}" BUILD_TOOL="cmake" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" # Test selective build PYTHON_EXECUTABLE=python bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}" @@ -244,7 +220,7 @@ jobs: CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake" # install Llava requirements bash examples/models/llama/install_requirements.sh @@ -276,7 +252,7 @@ jobs: conda activate "${CONDA_ENV}" BUILD_TOOL="cmake" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" PYTHON_EXECUTABLE=python bash examples/xnnpack/quantization/test_quantize.sh "${BUILD_TOOL}" mv2 test-pybind-build-linux: @@ -303,7 +279,7 @@ jobs: PYTHON_EXECUTABLE=python \ EXECUTORCH_BUILD_XNNPACK=ON \ EXECUTORCH_BUILD_PYBIND=ON \ - bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" # see if we can import the module successfully python -c "from executorch.extension.pybindings import portable_lib; print('success!')" @@ -429,7 +405,7 @@ jobs: PYTHON_EXECUTABLE=python \ EXECUTORCH_BUILD_PYBIND=ON \ EXECUTORCH_BUILD_ARM_BAREMETAL=ON \ - .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" # Install Arm dependencies .ci/scripts/setup-arm-baremetal-tools.sh @@ -469,7 +445,7 @@ jobs: PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh # Setup executorch - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" # Install requirements for export_llama PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh # Test llama2 @@ -500,7 +476,7 @@ jobs: PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh # Setup executorch - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" # Setup install_requirements for llama PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh @@ -549,7 +525,7 @@ jobs: CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake" # install phi-3-mini requirements bash examples/models/phi-3-mini/install_requirements.sh @@ -576,7 +552,7 @@ jobs: CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake" # install llama requirements bash examples/models/llama/install_requirements.sh @@ -603,7 +579,7 @@ jobs: CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake" # install llama requirements bash examples/models/llama/install_requirements.sh @@ -630,7 +606,7 @@ jobs: CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake" # install llama requirements bash examples/models/llama/install_requirements.sh diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index d8ec745b75c..410e95d9a84 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -36,31 +36,6 @@ jobs: PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --target-os macos --event "${GITHUB_EVENT_NAME}" - test-pip-install-editable-mode-macos: - name: test-pip-install-editable-mode-macos - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - permissions: - id-token: write - contents: read - strategy: - fail-fast: false - with: - runner: macos-m1-stable - python-version: '3.11' - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - # Debug - which pip - bash .ci/scripts/setup-conda.sh - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash ./install_executorch.sh --editable --pybind xnnpack - # Try to import extension library - python -c "from executorch.extension.llm.custom_ops import custom_ops" - test-models-macos: name: test-models-macos uses: pytorch/test-infra/.github/workflows/macos_job.yml@main @@ -82,7 +57,7 @@ jobs: bash .ci/scripts/setup-conda.sh # Setup MacOS dependencies as there is no Docker support on MacOS atm - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" # Build and test executorch PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}" @@ -104,7 +79,7 @@ jobs: bash .ci/scripts/setup-conda.sh # Setup MacOS dependencies as there is no Docker support on MacOS atm - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" # Build and test custom ops PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}" @@ -126,7 +101,7 @@ jobs: bash .ci/scripts/setup-conda.sh # Setup MacOS dependencies as there is no Docker support on MacOS atm - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" # Build and test selective build PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}" @@ -153,7 +128,7 @@ jobs: conda activate "${CONDA_ENV}" BUILD_TOOL=${{ matrix.build-tool }} - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" # Test selective build PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}" @@ -175,7 +150,7 @@ jobs: conda activate "${CONDA_ENV}" source .ci/scripts/utils.sh - install_executorch "use-pt-pinned-commit" + install_executorch "--use-pt-pinned-commit" .ci/scripts/setup-arm-baremetal-tools.sh @@ -205,7 +180,7 @@ jobs: conda activate "${CONDA_ENV}" source .ci/scripts/utils.sh - install_executorch "use-pt-pinned-commit" + install_executorch "--use-pt-pinned-commit" .ci/scripts/setup-arm-baremetal-tools.sh @@ -226,7 +201,7 @@ jobs: bash .ci/scripts/setup-conda.sh # Setup MacOS dependencies as there is no Docker support on MacOS atm - GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" # Build and test coreml delegate PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh @@ -249,7 +224,7 @@ jobs: # build module for executorch.extension.pybindings.portable_lib BUILD_TOOL=${{ matrix.build-tool }} - EXECUTORCH_BUILD_PYBIND=ON PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + EXECUTORCH_BUILD_PYBIND=ON PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" # see if we can import the module successfully ${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')" @@ -281,7 +256,7 @@ jobs: bash .ci/scripts/setup-conda.sh # Setup executorch - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh cmake + PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool cmake if [[ "${MODE}" == "mps" ]]; then # Install mps delegate @@ -315,7 +290,7 @@ jobs: # bash .ci/scripts/setup-conda.sh # # Setup MacOS dependencies as there is no Docker support on MacOS atm - # GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + # GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" # # install Llava requirements # ${CONDA_RUN} bash examples/models/llama/install_requirements.sh @@ -348,7 +323,7 @@ jobs: # The generic Linux job chooses to use base env, not the one setup by the image CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn" @@ -370,7 +345,7 @@ jobs: bash .ci/scripts/setup-conda.sh # Setup MacOS dependencies as there is no Docker support on MacOS atm - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh echo "Finishing installing coreml." PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh @@ -419,7 +394,7 @@ jobs: # The generic Linux job chooses to use base env, not the one setup by the image CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake echo "::endgroup::" echo "::group::Set up Hugging Face" @@ -488,7 +463,7 @@ jobs: PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh # Setup executorch - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" # Install requirements for export_llama PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh # Test llama2 From 01c2f3646b8f2c972c13245c51b69b12ce648c4a Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Fri, 28 Feb 2025 16:58:22 -0800 Subject: [PATCH 182/584] Use locally built frameworks for Benchmark app (#8854) --- .github/workflows/apple-perf.yml | 19 +-- .github/workflows/apple.yml | 19 +-- .../Benchmark.xcodeproj/project.pbxproj | 127 +++++++++--------- .../Frameworks/download_frameworks.sh | 28 ---- 4 files changed, 64 insertions(+), 129 deletions(-) delete mode 100755 extension/benchmark/apple/Benchmark/Frameworks/download_frameworks.sh diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml index 1b0de8d7659..44aa645d16d 100644 --- a/.github/workflows/apple-perf.yml +++ b/.github/workflows/apple-perf.yml @@ -386,25 +386,8 @@ jobs: echo "::endgroup::" echo "::group::Build ExecuTorch iOS frameworks" - FRAMEWORKS=( - "executorch" - "backend_coreml" - "backend_mps" - "backend_xnnpack" - "kernels_custom" - "kernels_optimized" - "kernels_portable" - "kernels_quantized" - ) - - # Build Release iOS Frameworks PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - build/build_apple_frameworks.sh --coreml --custom --mps --optimized --portable --quantized --xnnpack - - mkdir -p extension/benchmark/apple/Benchmark/Frameworks - for FRAMEWORK in "${FRAMEWORKS[@]}"; do ( - cp -r "cmake-out/${FRAMEWORK}.xcframework" extension/benchmark/apple/Benchmark/Frameworks/ - ) done + build/build_apple_frameworks.sh --Release --Debug --coreml --custom --mps --optimized --portable --quantized --xnnpack echo "::endgroup::" # NB: Although exported models can be copied to this directory and bundled together with the diff --git a/.github/workflows/apple.yml b/.github/workflows/apple.yml index 19460a35379..e6f7759365a 100644 --- a/.github/workflows/apple.yml +++ b/.github/workflows/apple.yml @@ -312,25 +312,8 @@ jobs: echo "::endgroup::" echo "::group::Build ExecuTorch iOS frameworks" - FRAMEWORKS=( - "executorch" - "backend_coreml" - "backend_mps" - "backend_xnnpack" - "kernels_custom" - "kernels_optimized" - "kernels_portable" - "kernels_quantized" - ) - - # Build Release iOS Frameworks PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - build/build_apple_frameworks.sh --coreml --custom --mps --optimized --portable --quantized --xnnpack - - mkdir -p extension/benchmark/apple/Benchmark/Frameworks - for FRAMEWORK in "${FRAMEWORKS[@]}"; do ( - cp -r "cmake-out/${FRAMEWORK}.xcframework" extension/benchmark/apple/Benchmark/Frameworks/ - ) done + build/build_apple_frameworks.sh --Release --Debug --coreml --custom --mps --optimized --portable --quantized --xnnpack echo "::endgroup::" echo "::group::Build ExecuTorch benchmark app" diff --git a/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj b/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj index c43b701e885..e4aa95d8622 100644 --- a/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj +++ b/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj @@ -3,7 +3,7 @@ archiveVersion = 1; classes = { }; - objectVersion = 56; + objectVersion = 60; objects = { /* Begin PBXBuildFile section */ @@ -20,21 +20,15 @@ 03B011912CAD114E00054791 /* ResourceTestCase.m in Sources */ = {isa = PBXBuildFile; fileRef = 03B011902CAD114E00054791 /* ResourceTestCase.m */; }; 03B2D3682C8A515A0046936E /* App.swift in Sources */ = {isa = PBXBuildFile; fileRef = 03B2D3672C8A515A0046936E /* App.swift */; }; 03B2D37A2C8A515C0046936E /* GenericTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 03B2D3792C8A515C0046936E /* GenericTests.mm */; }; - 03DD00A92C8FE44600FE4619 /* backend_coreml.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 03DD00992C8FE44600FE4619 /* backend_coreml.xcframework */; }; - 03DD00AA2C8FE44600FE4619 /* kernels_custom.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 03DD009A2C8FE44600FE4619 /* kernels_custom.xcframework */; }; - 03DD00AF2C8FE44600FE4619 /* kernels_portable.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 03DD009F2C8FE44600FE4619 /* kernels_portable.xcframework */; }; - 03DD00B02C8FE44600FE4619 /* kernels_optimized.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 03DD00A02C8FE44600FE4619 /* kernels_optimized.xcframework */; }; - 03DD00B12C8FE44600FE4619 /* backend_xnnpack.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 03DD00A12C8FE44600FE4619 /* backend_xnnpack.xcframework */; }; - 03DD00B22C8FE44600FE4619 /* backend_mps.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 03DD00A22C8FE44600FE4619 /* backend_mps.xcframework */; }; - 03DD00B32C8FE44600FE4619 /* executorch.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 03DD00A32C8FE44600FE4619 /* executorch.xcframework */; settings = {ATTRIBUTES = (Required, ); }; }; - 03DD00B52C8FE44600FE4619 /* kernels_quantized.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 03DD00A52C8FE44600FE4619 /* kernels_quantized.xcframework */; }; 03E7E6792CBDCAE900205E71 /* CoreMLTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 03E7E6782CBDC1C900205E71 /* CoreMLTests.mm */; }; - 03ED6D0F2C8AAFE900F2D6EE /* libsqlite3.0.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 03ED6D0E2C8AAFE900F2D6EE /* libsqlite3.0.tbd */; }; - 03ED6D112C8AAFF200F2D6EE /* MetalPerformanceShadersGraph.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 03ED6D102C8AAFF200F2D6EE /* MetalPerformanceShadersGraph.framework */; }; - 03ED6D132C8AAFF700F2D6EE /* MetalPerformanceShaders.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 03ED6D122C8AAFF700F2D6EE /* MetalPerformanceShaders.framework */; }; - 03ED6D152C8AAFFF00F2D6EE /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 03ED6D142C8AAFFF00F2D6EE /* Metal.framework */; }; - 03ED6D172C8AB00500F2D6EE /* CoreML.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 03ED6D162C8AB00500F2D6EE /* CoreML.framework */; }; - 03ED6D192C8AB00A00F2D6EE /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 03ED6D182C8AB00A00F2D6EE /* Accelerate.framework */; }; + 03F1814E2D7262FC0058BDF9 /* backend_coreml in Frameworks */ = {isa = PBXBuildFile; productRef = 03F1814D2D7262FC0058BDF9 /* backend_coreml */; }; + 03F181502D7262FC0058BDF9 /* backend_mps in Frameworks */ = {isa = PBXBuildFile; productRef = 03F1814F2D7262FC0058BDF9 /* backend_mps */; }; + 03F181522D7262FC0058BDF9 /* backend_xnnpack in Frameworks */ = {isa = PBXBuildFile; productRef = 03F181512D7262FC0058BDF9 /* backend_xnnpack */; }; + 03F181542D7262FC0058BDF9 /* executorch in Frameworks */ = {isa = PBXBuildFile; productRef = 03F181532D7262FC0058BDF9 /* executorch */; }; + 03F181562D7262FC0058BDF9 /* kernels_custom in Frameworks */ = {isa = PBXBuildFile; productRef = 03F181552D7262FC0058BDF9 /* kernels_custom */; }; + 03F181582D7262FC0058BDF9 /* kernels_optimized in Frameworks */ = {isa = PBXBuildFile; productRef = 03F181572D7262FC0058BDF9 /* kernels_optimized */; }; + 03F1815A2D7262FC0058BDF9 /* kernels_portable in Frameworks */ = {isa = PBXBuildFile; productRef = 03F181592D7262FC0058BDF9 /* kernels_portable */; }; + 03F1815C2D7262FC0058BDF9 /* kernels_quantized in Frameworks */ = {isa = PBXBuildFile; productRef = 03F1815B2D7262FC0058BDF9 /* kernels_quantized */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -83,21 +77,7 @@ 03B2D3752C8A515C0046936E /* Tests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = Tests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; 03B2D3792C8A515C0046936E /* GenericTests.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = GenericTests.mm; sourceTree = ""; }; 03C7FA322C8AA24200E6E9AE /* Resources */ = {isa = PBXFileReference; lastKnownFileType = folder; path = Resources; sourceTree = SOURCE_ROOT; }; - 03DD00992C8FE44600FE4619 /* backend_coreml.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = backend_coreml.xcframework; path = Frameworks/backend_coreml.xcframework; sourceTree = ""; }; - 03DD009A2C8FE44600FE4619 /* kernels_custom.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = kernels_custom.xcframework; path = Frameworks/kernels_custom.xcframework; sourceTree = ""; }; - 03DD009F2C8FE44600FE4619 /* kernels_portable.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = kernels_portable.xcframework; path = Frameworks/kernels_portable.xcframework; sourceTree = ""; }; - 03DD00A02C8FE44600FE4619 /* kernels_optimized.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = kernels_optimized.xcframework; path = Frameworks/kernels_optimized.xcframework; sourceTree = ""; }; - 03DD00A12C8FE44600FE4619 /* backend_xnnpack.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = backend_xnnpack.xcframework; path = Frameworks/backend_xnnpack.xcframework; sourceTree = ""; }; - 03DD00A22C8FE44600FE4619 /* backend_mps.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = backend_mps.xcframework; path = Frameworks/backend_mps.xcframework; sourceTree = ""; }; - 03DD00A32C8FE44600FE4619 /* executorch.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = executorch.xcframework; path = Frameworks/executorch.xcframework; sourceTree = ""; }; - 03DD00A52C8FE44600FE4619 /* kernels_quantized.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = kernels_quantized.xcframework; path = Frameworks/kernels_quantized.xcframework; sourceTree = ""; }; 03E7E6782CBDC1C900205E71 /* CoreMLTests.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = CoreMLTests.mm; sourceTree = ""; }; - 03ED6D0E2C8AAFE900F2D6EE /* libsqlite3.0.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libsqlite3.0.tbd; path = Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS17.5.sdk/usr/lib/libsqlite3.0.tbd; sourceTree = DEVELOPER_DIR; }; - 03ED6D102C8AAFF200F2D6EE /* MetalPerformanceShadersGraph.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = MetalPerformanceShadersGraph.framework; path = Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS17.5.sdk/System/Library/Frameworks/MetalPerformanceShadersGraph.framework; sourceTree = DEVELOPER_DIR; }; - 03ED6D122C8AAFF700F2D6EE /* MetalPerformanceShaders.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = MetalPerformanceShaders.framework; path = Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS17.5.sdk/System/Library/Frameworks/MetalPerformanceShaders.framework; sourceTree = DEVELOPER_DIR; }; - 03ED6D142C8AAFFF00F2D6EE /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS17.5.sdk/System/Library/Frameworks/Metal.framework; sourceTree = DEVELOPER_DIR; }; - 03ED6D162C8AB00500F2D6EE /* CoreML.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreML.framework; path = Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS17.5.sdk/System/Library/Frameworks/CoreML.framework; sourceTree = DEVELOPER_DIR; }; - 03ED6D182C8AB00A00F2D6EE /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS17.5.sdk/System/Library/Frameworks/Accelerate.framework; sourceTree = DEVELOPER_DIR; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -112,20 +92,14 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( - 03ED6D192C8AB00A00F2D6EE /* Accelerate.framework in Frameworks */, - 03ED6D172C8AB00500F2D6EE /* CoreML.framework in Frameworks */, - 03ED6D152C8AAFFF00F2D6EE /* Metal.framework in Frameworks */, - 03ED6D132C8AAFF700F2D6EE /* MetalPerformanceShaders.framework in Frameworks */, - 03ED6D112C8AAFF200F2D6EE /* MetalPerformanceShadersGraph.framework in Frameworks */, - 03ED6D0F2C8AAFE900F2D6EE /* libsqlite3.0.tbd in Frameworks */, - 03DD00A92C8FE44600FE4619 /* backend_coreml.xcframework in Frameworks */, - 03DD00B22C8FE44600FE4619 /* backend_mps.xcframework in Frameworks */, - 03DD00B12C8FE44600FE4619 /* backend_xnnpack.xcframework in Frameworks */, - 03DD00B32C8FE44600FE4619 /* executorch.xcframework in Frameworks */, - 03DD00AA2C8FE44600FE4619 /* kernels_custom.xcframework in Frameworks */, - 03DD00B02C8FE44600FE4619 /* kernels_optimized.xcframework in Frameworks */, - 03DD00AF2C8FE44600FE4619 /* kernels_portable.xcframework in Frameworks */, - 03DD00B52C8FE44600FE4619 /* kernels_quantized.xcframework in Frameworks */, + 03F181542D7262FC0058BDF9 /* executorch in Frameworks */, + 03F1815C2D7262FC0058BDF9 /* kernels_quantized in Frameworks */, + 03F181502D7262FC0058BDF9 /* backend_mps in Frameworks */, + 03F1814E2D7262FC0058BDF9 /* backend_coreml in Frameworks */, + 03F181522D7262FC0058BDF9 /* backend_xnnpack in Frameworks */, + 03F181562D7262FC0058BDF9 /* kernels_custom in Frameworks */, + 03F1815A2D7262FC0058BDF9 /* kernels_portable in Frameworks */, + 03F181582D7262FC0058BDF9 /* kernels_optimized in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -204,7 +178,6 @@ isa = PBXGroup; children = ( 03B2D3662C8A515A0046936E /* App */, - 03ED6CEB2C8AAF5300F2D6EE /* Frameworks */, 03C7FA322C8AA24200E6E9AE /* Resources */, 03B2D3782C8A515C0046936E /* Tests */, 03B0118D2CAC567900054791 /* TestUtils */, @@ -242,27 +215,6 @@ path = Tests; sourceTree = SOURCE_ROOT; }; - 03ED6CEB2C8AAF5300F2D6EE /* Frameworks */ = { - isa = PBXGroup; - children = ( - 03ED6D182C8AB00A00F2D6EE /* Accelerate.framework */, - 03ED6D162C8AB00500F2D6EE /* CoreML.framework */, - 03ED6D142C8AAFFF00F2D6EE /* Metal.framework */, - 03ED6D122C8AAFF700F2D6EE /* MetalPerformanceShaders.framework */, - 03ED6D102C8AAFF200F2D6EE /* MetalPerformanceShadersGraph.framework */, - 03ED6D0E2C8AAFE900F2D6EE /* libsqlite3.0.tbd */, - 03DD00992C8FE44600FE4619 /* backend_coreml.xcframework */, - 03DD00A22C8FE44600FE4619 /* backend_mps.xcframework */, - 03DD00A12C8FE44600FE4619 /* backend_xnnpack.xcframework */, - 03DD00A32C8FE44600FE4619 /* executorch.xcframework */, - 03DD009A2C8FE44600FE4619 /* kernels_custom.xcframework */, - 03DD00A02C8FE44600FE4619 /* kernels_optimized.xcframework */, - 03DD009F2C8FE44600FE4619 /* kernels_portable.xcframework */, - 03DD00A52C8FE44600FE4619 /* kernels_quantized.xcframework */, - ); - name = Frameworks; - sourceTree = SOURCE_ROOT; - }; /* End PBXGroup section */ /* Begin PBXNativeTarget section */ @@ -329,6 +281,9 @@ Base, ); mainGroup = 03B2D35B2C8A515A0046936E; + packageReferences = ( + 03F1814C2D7262FC0058BDF9 /* XCLocalSwiftPackageReference "../../../.." */, + ); productRefGroup = 03B2D3652C8A515A0046936E /* Products */; projectDirPath = ""; projectRoot = ""; @@ -671,6 +626,48 @@ defaultConfigurationName = Release; }; /* End XCConfigurationList section */ + +/* Begin XCLocalSwiftPackageReference section */ + 03F1814C2D7262FC0058BDF9 /* XCLocalSwiftPackageReference "../../../.." */ = { + isa = XCLocalSwiftPackageReference; + relativePath = ../../../..; + }; +/* End XCLocalSwiftPackageReference section */ + +/* Begin XCSwiftPackageProductDependency section */ + 03F1814D2D7262FC0058BDF9 /* backend_coreml */ = { + isa = XCSwiftPackageProductDependency; + productName = backend_coreml; + }; + 03F1814F2D7262FC0058BDF9 /* backend_mps */ = { + isa = XCSwiftPackageProductDependency; + productName = backend_mps; + }; + 03F181512D7262FC0058BDF9 /* backend_xnnpack */ = { + isa = XCSwiftPackageProductDependency; + productName = backend_xnnpack; + }; + 03F181532D7262FC0058BDF9 /* executorch */ = { + isa = XCSwiftPackageProductDependency; + productName = executorch; + }; + 03F181552D7262FC0058BDF9 /* kernels_custom */ = { + isa = XCSwiftPackageProductDependency; + productName = kernels_custom; + }; + 03F181572D7262FC0058BDF9 /* kernels_optimized */ = { + isa = XCSwiftPackageProductDependency; + productName = kernels_optimized; + }; + 03F181592D7262FC0058BDF9 /* kernels_portable */ = { + isa = XCSwiftPackageProductDependency; + productName = kernels_portable; + }; + 03F1815B2D7262FC0058BDF9 /* kernels_quantized */ = { + isa = XCSwiftPackageProductDependency; + productName = kernels_quantized; + }; +/* End XCSwiftPackageProductDependency section */ }; rootObject = 03B2D35C2C8A515A0046936E /* Project object */; } diff --git a/extension/benchmark/apple/Benchmark/Frameworks/download_frameworks.sh b/extension/benchmark/apple/Benchmark/Frameworks/download_frameworks.sh deleted file mode 100755 index e6c39c16df7..00000000000 --- a/extension/benchmark/apple/Benchmark/Frameworks/download_frameworks.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -VERSION="0.5.0.20250228" -FRAMEWORKS=( - "backend_coreml" - "backend_mps" - "backend_xnnpack" - "executorch" - "kernels_custom" - "kernels_optimized" - "kernels_portable" - "kernels_quantized" -) - -cd "$(dirname "$0")" || exit - -for FRAMEWORK in "${FRAMEWORKS[@]}"; do - rm -f "${FRAMEWORK}-${VERSION}.zip" - rm -rf "${FRAMEWORK}.xcframework" - curl -sSLO "https://ossci-ios.s3.amazonaws.com/executorch/${FRAMEWORK}-${VERSION}.zip" && \ - unzip -q "${FRAMEWORK}-${VERSION}.zip" && \ - rm "${FRAMEWORK}-${VERSION}.zip" -done From ef9c3aa6dbfeff237c5bdbd5fbb29dc5115757a5 Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Fri, 28 Feb 2025 19:38:00 -0800 Subject: [PATCH 183/584] Update README.md (#8868) --- extension/benchmark/apple/Benchmark/README.md | 42 ++----------------- 1 file changed, 3 insertions(+), 39 deletions(-) diff --git a/extension/benchmark/apple/Benchmark/README.md b/extension/benchmark/apple/Benchmark/README.md index 79daf070e44..e993ae4f970 100644 --- a/extension/benchmark/apple/Benchmark/README.md +++ b/extension/benchmark/apple/Benchmark/README.md @@ -30,47 +30,11 @@ cd executorch This command performs a shallow clone to speed up the process. -### Set Up the Frameworks +### Build the Frameworks -The Benchmark App relies on prebuilt ExecuTorch frameworks. -You have two options: +The Benchmark App is configured to use a Swift PM package that provides the prebuilt ExecuTorch frameworks. -
-Option 1: Download Prebuilt Frameworks -
- -Run the provided script to download the prebuilt frameworks: - -```bash -./extension/benchmark/apple/Benchmark/Frameworks/download_frameworks.sh -``` -
- -
-Option 2: Build Frameworks Locally -
- -Alternatively, you can build the frameworks yourself by following the [guide](https://pytorch.org/executorch/main/apple-runtime.html#local-build). -
- -Once the frameworks are downloaded or built, verify that the `Frameworks` directory contains the necessary `.xcframework` files: - -```bash -ls extension/benchmark/apple/Benchmark/Frameworks -``` - -You should see: - -``` -backend_coreml.xcframework -backend_mps.xcframework -backend_xnnpack.xcframework -executorch.xcframework -kernels_custom.xcframework -kernels_optimized.xcframework -kernels_portable.xcframework -kernels_quantized.xcframework -``` +By default, the app relies on the package referencing locally built binaries. To ensure it functions correctly, you must first build the frameworks by following the [guide](https://pytorch.org/executorch/main/using-executorch-ios.html#building-from-source). ## Adding Models and Resources From f357169c18807b27d40bb5c0c4019e87781df16a Mon Sep 17 00:00:00 2001 From: Nathanael See Date: Fri, 28 Feb 2025 19:49:37 -0800 Subject: [PATCH 184/584] merge q_8w_linear and main functions in q_8w_linear shader Differential Revision: D70127663 Pull Request resolved: https://github.com/pytorch/executorch/pull/8704 --- .../runtime/graph/ops/glsl/q_8w_linear.glsl | 67 +++++++++---------- 1 file changed, 30 insertions(+), 37 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl b/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl index cd1a08909d0..e98d2e919b0 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl @@ -52,19 +52,26 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; #define FLOAT_T float #endif -FLOAT_T q_8w_linear(const ivec4 out_idx, const int K) { - const FLOAT_T scale = t_scales[out_idx.x]; +void main() { + const int out_bufi = int(gl_GlobalInvocationID.x); + if (out_bufi >= out_numel) { + return; + } + + const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, 0); + + const FLOAT_T scale = t_scales[out_tidx.x]; FLOAT_T outval = FLOAT_T(0.0); - // Initial mat1 tensor idx will be (0, out_idx.y, out_idx.z, 0) - int mat1_offset = out_idx.y * mat1_strides.y + out_idx.z * qmat2_strides.z; - // Initial qmat2 tensor idx wil be (0, out_idx.x, 0, 0); note that the qmat2 + // Initial mat1 tensor idx will be (0, out_tidx.y, out_tidx.z, 0) + int mat1_offset = out_tidx.y * mat1_strides.y + out_tidx.z * qmat2_strides.z; + // Initial qmat2 tensor idx wil be (0, out_tidx.x, 0, 0); note that the qmat2 // tensor is transposed - int qmat2_offset = out_idx.x * qmat2_strides.y; + int qmat2_offset = out_tidx.x * qmat2_strides.y; - // TODO(ssjia): optimize memory access pattern by traversing K in inner loop - for (int i = 0; i < K; i++) { + // TODO(ssjia): optimize memory access pattern by traversing mat1 x in inner loop + for (int i = 0; i < mat1_sizes.x; i++) { const FLOAT_T mat1_val = t_mat1[mat1_offset]; const FLOAT_T mat2_val = t_qmat2[qmat2_offset] * scale; @@ -74,33 +81,32 @@ FLOAT_T q_8w_linear(const ivec4 out_idx, const int K) { qmat2_offset++; } - return outval; -} - -void main() { - const int out_bufi = int(gl_GlobalInvocationID.x); - if (out_bufi >= out_numel) { - return; - } - - const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, 0); - - t_out[out_bufi] = q_8w_linear(out_tidx, mat1_sizes.x); + t_out[out_bufi] = outval; } #else // USING_TEXTURE #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require -VEC4_T q_8w_linear(const u16vec2 out_pos, const uint16_t K) { +void main() { + const u16vec2 out_pos = u16vec2( + gl_GlobalInvocationID.x / out_limits.y, + gl_GlobalInvocationID.x % out_limits.y); + if (out_pos.x >= out_limits.x) { + return; + } + const uint16_t qmat2_pos_y = out_pos.x * uint16_t(4); VEC4_T outtex = VEC4_T(0); - const u16vec3 scales_pos = u16vec3(out_pos.x, 0, 0); - const VEC4_T scales = load_texel(t_scales, scales_pos); + const VEC4_T scales = load_texel(t_scales, u16vec3(out_pos.x, 0, 0)); - for (uint16_t i = uint16_t(0), x = uint16_t(0); i < K; i += uint16_t(4), x++) { + for ( + uint16_t i = uint16_t(0), x = uint16_t(0); + i < uint16_t(mat1_sizes.x); + i += uint16_t(4), x++) + { const VEC4_T mat1_tex = load_texel(t_mat1, u16vec3(x, out_pos.y, 0)); const VEC4_T sums = VEC4_T( dot(mat1_tex, load_texel(t_qmat2, u16vec3(x, qmat2_pos_y, 0))), @@ -112,19 +118,6 @@ VEC4_T q_8w_linear(const u16vec2 out_pos, const uint16_t K) { } outtex *= scales; - - return outtex; -} - -void main() { - const u16vec2 out_pos = u16vec2( - gl_GlobalInvocationID.x / out_limits.y, - gl_GlobalInvocationID.x % out_limits.y); - if (out_pos.x >= out_limits.x) { - return; - } - - VEC4_T outtex = q_8w_linear(out_pos, uint16_t(mat1_sizes.x)); write_texel(t_out, u16vec3(out_pos, 0), outtex); } From 19a3002adb4942c958f0aebe68368e0afc66734e Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Sat, 1 Mar 2025 01:22:03 -0500 Subject: [PATCH 185/584] [build] Fix flatc (#8858) [build] Fix flatc (#8816) * [build] Fix flatc We need to install `build/pip_data_bin_init.py.in` into `/data/bin/__init__.py`. This PR rewrite the logic into a `BuiltFile` so that it works well in editable mode. Test: ``` python -c "from executorch.data.bin import flatc" ``` Will add unit test in next PR. [ghstack-poisoned] * Update on "[build] Fix flatc" Fixes #8784 We need to install `build/pip_data_bin_init.py.in` into `/data/bin/__init__.py`. This PR rewrite the logic into a `BuiltFile` so that it works well in editable mode. Since `BuiltFile` by default looks into cmake cache directory, this PR adds a placeholder `%CMAKE_CACHE_DIR%` for those are actually built by CMake and for `build/pip_data_bin_init.py.in` we don't add this placeholder. Test: ``` python -c "from executorch.data.bin import flatc" ``` Will add unit test in next PR. [ghstack-poisoned] * Update on "[build] Fix flatc" Fixes #8784 We need to install `build/pip_data_bin_init.py.in` into `/data/bin/__init__.py`. This PR rewrite the logic into a `BuiltFile` so that it works well in editable mode. Since `BuiltFile` by default looks into cmake cache directory, this PR adds a placeholder `%CMAKE_CACHE_DIR%` for those are actually built by CMake and for `build/pip_data_bin_init.py.in` we don't add this placeholder. Test: ``` python -c "from executorch.data.bin import flatc" ``` Will add unit test in next PR. [ghstack-poisoned] * Update on "[build] Fix flatc" Fixes #8784 We need to install `build/pip_data_bin_init.py.in` into `/data/bin/__init__.py`. This PR rewrite the logic into a `BuiltFile` so that it works well in editable mode. Since `BuiltFile` by default looks into cmake cache directory, this PR adds a placeholder `%CMAKE_CACHE_DIR%` for those are actually built by CMake and for `build/pip_data_bin_init.py.in` we don't add this placeholder. Test: ``` python -c "from executorch.data.bin import flatc" ``` Will add unit test in next PR. [ghstack-poisoned] (cherry picked from commit d0b27b5471c6b5b2463dc43768d63817d27e2473) Co-authored-by: Mengwei Liu --- .github/workflows/pull.yml | 11 ++++ data/bin/README.md | 31 ++++++++++ pyproject.toml | 1 + setup.py | 121 +++++++++++++++++++++---------------- 4 files changed, 113 insertions(+), 51 deletions(-) create mode 100644 data/bin/README.md diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 2a2109bcdc1..5cc0d3c597b 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -371,6 +371,17 @@ jobs: build-tool: cmake docker-image: executorch-ubuntu-22.04-clang12 + unittest-editable: + uses: ./.github/workflows/_unittest.yml + permissions: + id-token: write + contents: read + with: + build-mode: Debug + build-tool: cmake + editable: true + docker-image: executorch-ubuntu-22.04-clang12 + unittest-buck: uses: ./.github/workflows/_unittest.yml permissions: diff --git a/data/bin/README.md b/data/bin/README.md new file mode 100644 index 00000000000..ca81882e00d --- /dev/null +++ b/data/bin/README.md @@ -0,0 +1,31 @@ +## PLEASE DO NOT REMOVE THIS DIRECTORY! + +This directory is used to host binaries installed during pip wheel build time. + +## How to add a binary into pip wheel + +1. Update `[project.scripts]` section of `pyproject.toml` file. Add the new binary name and it's corresponding module name similar to: + +``` +flatc = "executorch.data.bin:flatc" +``` + +For example, `flatc` is built during wheel packaging, we first build `flatc` through CMake and copy the file to `/data/bin/flatc` and ask `setuptools` to generate a commandline wrapper for `flatc`, then route it to `/data/bin/flatc`. + +This way after installing `executorch`, a user will be able to call `flatc` directly in commandline and it points to `/data/bin/flatc` + +2. Update `setup.py` to include the logic of building the new binary and copying the binary to this directory. + +```python +BuiltFile( + src_dir="%CMAKE_CACHE_DIR%/third-party/flatbuffers/%BUILD_TYPE%/", + src_name="flatc", + dst="executorch/data/bin/", + is_executable=True, +), +``` +This means find `flatc` in `CMAKE_CACHE_DIR` and copy it to `/data/bin`. Notice that this works for both pip wheel packaging as well as editable mode install. + +## Why we can't create this directory at wheel build time? + +The reason is without `data/bin/` present in source file, we can't tell `setuptools` to generate a module `executorch.data.bin` in editable mode, partially because we don't have a good top level module `executorch` and have to enumerate all the second level modules, including `executorch.data.bin`. diff --git a/pyproject.toml b/pyproject.toml index 43b0a8c4daf..a7244133063 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,6 +88,7 @@ flatc = "executorch.data.bin:flatc" [tool.setuptools.package-dir] "executorch.backends" = "backends" "executorch.codegen" = "codegen" +"executorch.data.bin" = "data/bin" # TODO(mnachin T180504136): Do not put examples/models # into core pip packages. Refactor out the necessary utils # or core models files into a separate package. diff --git a/setup.py b/setup.py index d5b9fea8cfc..5645708a4ca 100644 --- a/setup.py +++ b/setup.py @@ -220,37 +220,50 @@ def src_path(self, installer: "InstallerBuildExt") -> Path: """ # Share the cmake-out location with CustomBuild. build_cmd = installer.get_finalized_command("build") - if hasattr(build_cmd, "cmake_cache_dir"): - cmake_cache_dir = Path(build_cmd.cmake_cache_dir) + if "%CMAKE_CACHE_DIR%" in self.src: + if not hasattr(build_cmd, "cmake_cache_dir"): + raise RuntimeError( + f"Extension {self.name} has a src {self.src} that contains" + " %CMAKE_CACHE_DIR% but CMake does not run in the `build` " + "command. Please double check if the command is correct." + ) + else: + build_dir = Path(build_cmd.cmake_cache_dir) else: - # If we're in editable mode, use a default or fallback value for cmake_cache_dir - # This could be a hardcoded path, or a path derived from the current working directory - cmake_cache_dir = Path(".") + # If the src path doesn't contain %CMAKE_CACHE_DIR% placeholder, + # try to find it under the current directory. + build_dir = Path(".") + + src_path = self.src.replace("%CMAKE_CACHE_DIR%/", "") + cfg = get_build_type(installer.debug) if os.name == "nt": # Replace %BUILD_TYPE% with the current build type. - self.src = self.src.replace("%BUILD_TYPE%", cfg) + src_path = src_path.replace("%BUILD_TYPE%", cfg) else: # Remove %BUILD_TYPE% from the path. - self.src = self.src.replace("/%BUILD_TYPE%", "") + src_path = src_path.replace("/%BUILD_TYPE%", "") # Construct the full source path, resolving globs. If there are no glob # pattern characters, this will just ensure that the source file exists. - srcs = tuple(cmake_cache_dir.glob(self.src)) + srcs = tuple(build_dir.glob(src_path)) if len(srcs) != 1: raise ValueError( - f"""Expected exactly one file matching '{self.src}' in {cmake_cache_dir}; found {repr(srcs)}. - -If that file is a CMake-built extension module file, and we are installing in editable mode, please disable the corresponding build option since it's not supported yet. - -Try: - -EXECUTORCH_BUILD_FLATC=OFF EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT=OFF pip install -e . -""" + f"Expecting exactly 1 file matching {self.src} in {build_dir}, " + f"found {repr(srcs)}. Resolved src pattern: {src_path}." ) return srcs[0] + def inplace_dir(self, installer: "InstallerBuildExt") -> Path: + """Returns the path of this file to be installed to, under inplace mode. + + It will be a relative path to the project root directory. For more info + related to inplace/editable mode, please checkout this doc: + https://setuptools.pypa.io/en/latest/userguide/development_mode.html + """ + raise NotImplementedError() + class BuiltFile(_BaseExtension): """An extension that installs a single file that was built by cmake. @@ -316,6 +329,18 @@ def dst_path(self, installer: "InstallerBuildExt") -> Path: # Destination looks like a file. return dst_root / Path(self.dst) + def inplace_dir(self, installer: "InstallerBuildExt") -> Path: + """For a `BuiltFile`, we use self.dst as its inplace directory path. + Need to handle directory vs file. + """ + # HACK: get rid of the leading "executorch" in ext.dst. + # This is because we don't have a root level "executorch" module. + package_dir = self.dst.removeprefix("executorch/") + # If dst is a file, use it's directory + if not package_dir.endswith("/"): + package_dir = os.path.dirname(package_dir) + return Path(package_dir) + class BuiltExtension(_BaseExtension): """An extension that installs a python extension that was built by cmake.""" @@ -335,7 +360,7 @@ def __init__(self, src: str, modpath: str): "/" not in modpath ), f"modpath must be a dotted python module path: saw '{modpath}'" # This is a real extension, so use the modpath as the name. - super().__init__(src=src, dst=modpath, name=modpath) + super().__init__(src=f"%CMAKE_CACHE_DIR%/{src}", dst=modpath, name=modpath) def src_path(self, installer: "InstallerBuildExt") -> Path: """Returns the path to the source file, resolving globs. @@ -369,6 +394,15 @@ def dst_path(self, installer: "InstallerBuildExt") -> Path: # path: that's the file we're creating. return Path(installer.get_ext_fullpath(self.dst)) + def inplace_dir(self, installer: "InstallerBuildExt") -> Path: + """For BuiltExtension, deduce inplace dir path from extension name.""" + build_py = installer.get_finalized_command("build_py") + modpath = self.name.split(".") + package = ".".join(modpath[:-1]) + package_dir = os.path.abspath(build_py.get_package_dir(package)) + + return Path(package_dir) + class InstallerBuildExt(build_ext): """Installs files that were built by cmake.""" @@ -399,23 +433,15 @@ def copy_extensions_to_source(self) -> None: Returns: """ - build_py = self.get_finalized_command("build_py") for ext in self.extensions: - if isinstance(ext, BuiltExtension): - modpath = ext.name.split(".") - package = ".".join(modpath[:-1]) - package_dir = os.path.abspath(build_py.get_package_dir(package)) - else: - # HACK: get rid of the leading "executorch" in ext.dst. - # This is because we don't have a root level "executorch" module. - package_dir = ext.dst.removeprefix("executorch/") + package_dir = ext.inplace_dir(self) # Ensure that the destination directory exists. self.mkpath(os.fspath(package_dir)) regular_file = ext.src_path(self) inplace_file = os.path.join( - package_dir, os.path.basename(ext.src_path(self)) + package_dir, os.path.basename(ext.dst_path(self)) ) # Always copy, even if source is older than destination, to ensure @@ -724,20 +750,6 @@ def run(self): # Build the system. self.spawn(["cmake", "--build", cmake_cache_dir, *build_args]) - # Non-python files should live under this data directory. - data_root = os.path.join(self.build_lib, "executorch", "data") - - # Directories like bin/ and lib/ live under data/. - bin_dir = os.path.join(data_root, "bin") - - # Copy the bin wrapper so that users can run any executables under - # data/bin, as long as they are listed in the [project.scripts] section - # of pyproject.toml. - self.mkpath(bin_dir) - self.copy_file( - "build/pip_data_bin_init.py.in", - os.path.join(bin_dir, "__init__.py"), - ) # Share the cmake-out location with _BaseExtension. self.cmake_cache_dir = cmake_cache_dir @@ -749,13 +761,20 @@ def get_ext_modules() -> List[Extension]: """Returns the set of extension modules to build.""" ext_modules = [] if ShouldBuild.flatc(): - ext_modules.append( - BuiltFile( - src_dir="third-party/flatbuffers/%BUILD_TYPE%/", - src_name="flatc", - dst="executorch/data/bin/", - is_executable=True, - ) + ext_modules.extend( + [ + BuiltFile( + src_dir="%CMAKE_CACHE_DIR%/third-party/flatbuffers/%BUILD_TYPE%/", + src_name="flatc", + dst="executorch/data/bin/", + is_executable=True, + ), + BuiltFile( + src_dir="build/", + src_name="pip_data_bin_init.py.in", + dst="executorch/data/bin/__init__.py", + ), + ] ) if ShouldBuild.pybindings(): @@ -778,16 +797,16 @@ def get_ext_modules() -> List[Extension]: if ShouldBuild.llama_custom_ops(): ext_modules.append( BuiltFile( - src_dir="extension/llm/custom_ops/%BUILD_TYPE%/", + src_dir="%CMAKE_CACHE_DIR%/extension/llm/custom_ops/%BUILD_TYPE%/", src_name="custom_ops_aot_lib", - dst="executorch/extension/llm/custom_ops", + dst="executorch/extension/llm/custom_ops/", is_dynamic_lib=True, ) ) ext_modules.append( # Install the prebuilt library for quantized ops required by custom ops. BuiltFile( - src_dir="kernels/quantized/%BUILD_TYPE%/", + src_dir="%CMAKE_CACHE_DIR%/kernels/quantized/%BUILD_TYPE%/", src_name="quantized_ops_aot_lib", dst="executorch/kernels/quantized/", is_dynamic_lib=True, From 542480c03ebac67064ef0541daee0dd88d4b8b59 Mon Sep 17 00:00:00 2001 From: Shen Chen Xu Date: Sun, 2 Mar 2025 18:26:49 -0800 Subject: [PATCH 186/584] Fix static_llama to read some previously hardcoded options from ModelArgs Differential Revision: D70414663 Pull Request resolved: https://github.com/pytorch/executorch/pull/8846 --- examples/qualcomm/oss_scripts/llama/model/static_llama.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/qualcomm/oss_scripts/llama/model/static_llama.py b/examples/qualcomm/oss_scripts/llama/model/static_llama.py index 09cc7504224..40044db7428 100755 --- a/examples/qualcomm/oss_scripts/llama/model/static_llama.py +++ b/examples/qualcomm/oss_scripts/llama/model/static_llama.py @@ -37,7 +37,7 @@ def __init__(self, config: ModelArgs, output_new_cache_only=False): super().__init__() self.dim = config.dim self.n_heads = config.n_heads - self.head_dim = config.dim // config.n_heads + self.head_dim = config.head_dim self.n_kv_heads = config.n_kv_heads self.num_key_value_groups = config.n_heads // self.n_kv_heads self.max_seq_len = config.max_seq_len @@ -304,7 +304,7 @@ def __init__( ): super().__init__() self.dim = config.dim - self.head_dim = config.dim // config.n_heads + self.head_dim = config.head_dim self.max_batch_size = config.max_batch_size self.max_seq_len = config.max_seq_len self.n_heads = config.n_heads @@ -328,9 +328,11 @@ def __init__( self.output = nn.Linear(config.dim, config.vocab_size, bias=False) self.tok_embeddings = nn.Embedding(config.vocab_size, config.dim) freqs_cos, freqs_sin = precompute_freqs_cis( - config.dim // config.n_heads, + config.head_dim, config.max_seq_len, config.rope_freq_base, + config.use_scaled_rope, + config.rope_scale_factor, ) self.register_buffer("freqs_cos", freqs_cos, persistent=False) self.register_buffer("freqs_sin", freqs_sin, persistent=False) From ce715c51c9bc58e91a92dc908cbd69160bb0d0fb Mon Sep 17 00:00:00 2001 From: Yufeng Shi Date: Mon, 3 Mar 2025 15:33:54 +0000 Subject: [PATCH 187/584] Fix docstring of to_backend in backend_api.py (#8879) The second method overloading of to_backend() has the wrong signature in docstring Signed-off-by: Yufeng Shi --- exir/backend/backend_api.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/exir/backend/backend_api.py b/exir/backend/backend_api.py index 966cae5f022..519f184871a 100644 --- a/exir/backend/backend_api.py +++ b/exir/backend/backend_api.py @@ -1,5 +1,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. +# Copyright 2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -56,9 +57,9 @@ def to_backend( ) -> LoweredBackendModule: def to_backend( - graph_module: torch.fx.GraphModule, - partitioner: Type[TPartitioner], - ) -> torch.fx.GraphModule + edge_program: ExportedProgram, + partitioner: Partitioner, + ) -> ExportedProgram: """ pass From 9841e54b010733a5ab180ce59089b24379a7b18c Mon Sep 17 00:00:00 2001 From: Adrian Lundell <36153706+AdrianLundell@users.noreply.github.com> Date: Mon, 3 Mar 2025 16:47:02 +0100 Subject: [PATCH 188/584] Arm backend: Fix error not raised in TosaArg class (#8875) Signed-off-by: Adrian Lundell --- backends/arm/tosa_mapping.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backends/arm/tosa_mapping.py b/backends/arm/tosa_mapping.py index 9a8b6b2c35d..292d6209bb3 100644 --- a/backends/arm/tosa_mapping.py +++ b/backends/arm/tosa_mapping.py @@ -107,7 +107,10 @@ def __init__(self, argument: Any) -> None: if isinstance(argument, (int, float)): self.__process_number(argument) return + if isinstance(argument, torch.dtype): + # Dtype is parsed from fake tensor + return - RuntimeError( + raise RuntimeError( f"Unhandled node input argument: {argument}, of type {type(argument)}" ) From ff36efdb899063ff5f20cb941eaf2222650d5d2f Mon Sep 17 00:00:00 2001 From: Jacob Stevens Date: Mon, 3 Mar 2025 12:09:04 -0500 Subject: [PATCH 189/584] Address various warnings as errors (#8581) Address various warnings as errors (#8581) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/8581 Some projects uses more restrictive build options than currently used in ET CI. This means we encountered a number of errors when enabling for a microcontroller. Reviewed By: digantdesai, swolchok Differential Revision: D69139962 --- extension/threadpool/cpuinfo_utils.cpp | 7 ++-- extension/threadpool/targets.bzl | 1 + .../portable/cpu/op__to_dim_order_copy.cpp | 6 ++- kernels/portable/cpu/op_amax.cpp | 3 +- kernels/portable/cpu/op_amin.cpp | 4 +- kernels/portable/cpu/op_argmax.cpp | 3 +- kernels/portable/cpu/op_argmin.cpp | 3 +- kernels/portable/cpu/op_expand_copy.cpp | 3 +- .../portable/cpu/util/activation_ops_util.cpp | 2 +- kernels/portable/cpu/util/broadcast_util.cpp | 6 ++- kernels/portable/cpu/util/copy_ops_util.h | 7 ++-- kernels/portable/cpu/util/functional_util.h | 8 ++-- kernels/portable/cpu/util/reduce_util.cpp | 19 ++++----- kernels/portable/cpu/util/reduce_util.h | 4 +- kernels/portable/cpu/util/repeat_util.cpp | 17 ++++---- kernels/portable/cpu/util/targets.bzl | 2 - kernels/prim_ops/et_view.cpp | 3 +- runtime/core/data_loader.h | 12 +++--- runtime/core/exec_aten/util/dim_order_util.h | 18 ++++----- .../util/tensor_shape_to_c_string.cpp | 4 +- runtime/core/exec_aten/util/tensor_util.h | 32 +++++++++------ .../exec_aten/util/tensor_util_portable.cpp | 17 ++++---- .../core/portable_type/c10/c10/util/irange.h | 2 +- runtime/core/portable_type/tensor_impl.cpp | 8 ++-- runtime/core/tensor_layout.cpp | 2 +- runtime/executor/method.cpp | 40 +++++++++++-------- runtime/executor/method_meta.cpp | 20 +++++----- runtime/executor/program.cpp | 22 +++++----- runtime/executor/targets.bzl | 4 ++ runtime/executor/tensor_parser.h | 2 +- runtime/executor/tensor_parser_exec_aten.cpp | 11 +++-- runtime/executor/tensor_parser_portable.cpp | 6 +-- runtime/kernel/operator_registry.cpp | 4 +- runtime/kernel/operator_registry.h | 4 +- runtime/platform/log.cpp | 3 +- runtime/platform/log.h | 9 +++++ runtime/platform/profiler.cpp | 6 ++- schema/extended_header.cpp | 2 - test/build_size_test.sh | 3 +- 39 files changed, 186 insertions(+), 143 deletions(-) diff --git a/extension/threadpool/cpuinfo_utils.cpp b/extension/threadpool/cpuinfo_utils.cpp index 5dc3fa7fae5..21862fbd4aa 100644 --- a/extension/threadpool/cpuinfo_utils.cpp +++ b/extension/threadpool/cpuinfo_utils.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -84,7 +85,7 @@ bool populate_available_cpu_mids() { cpu_midrs->resize(num_possible_cores); const std::string kMidrFilePathPrefix = "/sys/devices/system/cpu/cpu"; const std::string kMidrFilePathSuffix = "/regs/identification/midr_el1"; - for (int32_t i = 0; i < num_possible_cores; ++i) { + for (const auto i : c10::irange(num_possible_cores)) { std::string midr_file_path = kMidrFilePathPrefix + std::to_string(i) + kMidrFilePathSuffix; ET_LOG(Info, "Reading file %s", midr_file_path.c_str()); @@ -115,7 +116,7 @@ uint32_t _get_num_performant_cores() { ET_LOG(Info, "CPU info and manual query on # of cpus dont match."); return 0; } - for (int32_t i = 0; i < cpu_midrs->size(); ++i) { + for (const auto i : c10::irange(cpu_midrs->size())) { uint32_t masked_midr = (*cpu_midrs)[i] & RIVISION_MASK; switch (masked_midr) { case CPUINFO_ARM_MIDR_CORTEX_A520: @@ -148,7 +149,7 @@ uint32_t get_num_performant_cores() { uint32_t num_possible_cores = cpuinfo_get_processors_count(); uint32_t num_non_performant_core = 0; if (uarch_count > 1) { - for (int32_t i = 0; i < uarch_count; ++i) { + for (const auto i : c10::irange(uarch_count)) { const struct cpuinfo_uarch_info* uarch_info = cpuinfo_get_uarch(i); if (is_non_performant_core(uarch_info)) { num_non_performant_core += uarch_info->processor_count; diff --git a/extension/threadpool/targets.bzl b/extension/threadpool/targets.bzl index 4a7185ce972..8bb0398b385 100644 --- a/extension/threadpool/targets.bzl +++ b/extension/threadpool/targets.bzl @@ -23,6 +23,7 @@ def define_common_targets(): srcs = _THREADPOOL_SRCS, deps = [ "//executorch/runtime/core:core", + "//executorch/runtime/core/portable_type/c10/c10:c10", ], exported_headers = _THREADPOOL_HEADERS, exported_deps = [ diff --git a/kernels/portable/cpu/op__to_dim_order_copy.cpp b/kernels/portable/cpu/op__to_dim_order_copy.cpp index efb74e3a01f..40ce86e8fdc 100644 --- a/kernels/portable/cpu/op__to_dim_order_copy.cpp +++ b/kernels/portable/cpu/op__to_dim_order_copy.cpp @@ -6,6 +6,8 @@ * LICENSE file in the root directory of this source tree. */ +#include + #include #include #include @@ -41,7 +43,7 @@ int64_t coordinateToIndexWithDimOrder( dim_order_to_stride_nocheck( sizes.data(), dim_order.data(), sizes.size(), strides); - for (size_t i = 0; i < self.dim(); ++i) { + for (const auto i : c10::irange(self.dim())) { index += cur_indices[i] * strides[i]; } return index; @@ -59,7 +61,7 @@ void _to_dim_order_copy_impl(const Tensor& self, Tensor& out) { for (ssize_t i = 0; i < self.numel(); i++) { // Update the current indices. for (ssize_t j = self.dim() - 1; j >= 0; j--) { - if (coordinate[j] + 1 < self.size(j)) { + if (coordinate[j] + 1 < static_cast(self.size(j))) { coordinate[j]++; break; } else { diff --git a/kernels/portable/cpu/op_amax.cpp b/kernels/portable/cpu/op_amax.cpp index 9f879179ec6..d36f416c7b4 100644 --- a/kernels/portable/cpu/op_amax.cpp +++ b/kernels/portable/cpu/op_amax.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -44,7 +45,7 @@ Tensor& amax_out( ET_SWITCH_REALHBBF16_TYPES(in.scalar_type(), ctx, "amax.out", CTYPE, [&]() { CTYPE* out_data = out.mutable_data_ptr(); - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { out_data[out_ix] = reduce_over_dim_list( [](CTYPE v, CTYPE max_v) { return std::isnan(v) || v > max_v ? v : max_v; diff --git a/kernels/portable/cpu/op_amin.cpp b/kernels/portable/cpu/op_amin.cpp index 4f6f3ce52e5..7c4c8186e59 100644 --- a/kernels/portable/cpu/op_amin.cpp +++ b/kernels/portable/cpu/op_amin.cpp @@ -5,7 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ - +#include #include #include @@ -44,7 +44,7 @@ Tensor& amin_out( ET_SWITCH_REALHBBF16_TYPES(in.scalar_type(), ctx, "amin.out", CTYPE, [&]() { CTYPE* out_data = out.mutable_data_ptr(); - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { out_data[out_ix] = reduce_over_dim_list( [](CTYPE v, CTYPE min_v) { return std::isnan(v) || v < min_v ? v : min_v; diff --git a/kernels/portable/cpu/op_argmax.cpp b/kernels/portable/cpu/op_argmax.cpp index 5eb656d5b76..39ad0171d5d 100644 --- a/kernels/portable/cpu/op_argmax.cpp +++ b/kernels/portable/cpu/op_argmax.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -46,7 +47,7 @@ Tensor& argmax_out( ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, "argmax.out", CTYPE, [&] { long* out_data = out.mutable_data_ptr(); - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { std::tuple acc = reduce_over_dim( [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) { if (!std::isnan(acc_val) && (std::isnan(v) || v > acc_val)) { diff --git a/kernels/portable/cpu/op_argmin.cpp b/kernels/portable/cpu/op_argmin.cpp index 1c4a2572ea8..8148efa6264 100644 --- a/kernels/portable/cpu/op_argmin.cpp +++ b/kernels/portable/cpu/op_argmin.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include @@ -46,7 +47,7 @@ Tensor& argmin_out( ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, "argmin.out", CTYPE, [&] { long* out_data = out.mutable_data_ptr(); - for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { + for (const auto out_ix : c10::irange(out.numel())) { std::tuple acc = reduce_over_dim( [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) { if (!std::isnan(acc_val) && (std::isnan(v) || v < acc_val)) { diff --git a/kernels/portable/cpu/op_expand_copy.cpp b/kernels/portable/cpu/op_expand_copy.cpp index f1a7bfbf1fb..6c8685dd867 100644 --- a/kernels/portable/cpu/op_expand_copy.cpp +++ b/kernels/portable/cpu/op_expand_copy.cpp @@ -96,7 +96,8 @@ Tensor& expand_copy_out( ET_KERNEL_CHECK( ctx, - repeat_tensor(self, {repeats, repeats_size}, out) == Error::Ok, + repeat_tensor(self, makeArrayRef(repeats, repeats_size), out) == + Error::Ok, InvalidArgument, out); diff --git a/kernels/portable/cpu/util/activation_ops_util.cpp b/kernels/portable/cpu/util/activation_ops_util.cpp index fe26d4fda04..abde15f8740 100644 --- a/kernels/portable/cpu/util/activation_ops_util.cpp +++ b/kernels/portable/cpu/util/activation_ops_util.cpp @@ -31,7 +31,7 @@ bool check_glu_args(const Tensor& in, int64_t dim, Tensor& out) { ET_LOG_AND_RETURN_IF_FALSE(tensor_is_floating_type(in)); const size_t non_negative_dim = dim < 0 ? dim + in.dim() : dim; - const size_t dim_size = in.size(non_negative_dim); + const ssize_t dim_size = in.size(non_negative_dim); ET_CHECK_OR_RETURN_FALSE( dim_size % 2 == 0, diff --git a/kernels/portable/cpu/util/broadcast_util.cpp b/kernels/portable/cpu/util/broadcast_util.cpp index d8569d23c2f..381e07cbe30 100644 --- a/kernels/portable/cpu/util/broadcast_util.cpp +++ b/kernels/portable/cpu/util/broadcast_util.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include #include @@ -274,7 +275,7 @@ void delinearize_index( size_t* out_indexes, const size_t out_indexes_len) { ET_CHECK(shape.size() <= out_indexes_len); - for (auto i = 0; i < shape.size(); ++i) { + for (size_t i = 0; i < shape.size(); ++i) { auto dim = shape.size() - 1 - i; auto dim_size = shape[dim]; out_indexes[dim] = linear_index % dim_size; @@ -304,7 +305,8 @@ size_t linearize_access_indexes( size_t linear_index = 0; for (size_t i = 0; i < indexes_broadcast_from.size(); ++i) { // If this dimension is broadcasted, add zero to the linear address. - if (indexes_broadcast_from[i] >= broadcast_from_shape[i]) { + if (indexes_broadcast_from[i] >= + static_cast(broadcast_from_shape[i])) { ET_CHECK_MSG( broadcast_from_shape[i] == 1, "Expected dim size == 1 if broadcasted, but actual dim size is %zu", diff --git a/kernels/portable/cpu/util/copy_ops_util.h b/kernels/portable/cpu/util/copy_ops_util.h index 8efd6057dba..e7399ae0956 100644 --- a/kernels/portable/cpu/util/copy_ops_util.h +++ b/kernels/portable/cpu/util/copy_ops_util.h @@ -7,6 +7,7 @@ */ #pragma once +#include #include @@ -26,8 +27,8 @@ void _as_strided_copy( ArrayRef stride, int64_t dim) { // the last dimension, copy data - if (dim == size.size() - 1) { - for (size_t i = 0; i < size.at(dim); ++i) { + if (dim == static_cast(size.size()) - 1) { + for (const auto i : c10::irange(size.at(dim))) { output_data[i] = *input_data; input_data += stride.at(dim); } @@ -35,7 +36,7 @@ void _as_strided_copy( } size_t trailing_dims = getTrailingDims(out, dim); // recursively set data for the next dimension - for (size_t i = 0; i < size.at(dim); ++i) { + for ([[maybe_unused]] const auto i : c10::irange(size.at(dim))) { _as_strided_copy( input_data, output_data, out, size, stride, dim + 1); input_data += stride.at(dim); diff --git a/kernels/portable/cpu/util/functional_util.h b/kernels/portable/cpu/util/functional_util.h index cdf90813772..609a1a26fa5 100644 --- a/kernels/portable/cpu/util/functional_util.h +++ b/kernels/portable/cpu/util/functional_util.h @@ -8,6 +8,8 @@ #pragma once +#include + #include #include @@ -30,7 +32,7 @@ inline CTYPE apply_unary_reduce_fn( const int64_t size, const int64_t stride = 1) { CTYPE acc_val = data_in[0]; - for (size_t i = 1; i < size; i++) { + for (const auto i : c10::irange(1, size)) { acc_val = reduce_fun(data_in[i * stride], acc_val); } return acc_val; @@ -51,7 +53,7 @@ inline void apply_unary_map_fn( CTYPE_OUT* const data_out, const int64_t size, const int64_t stride = 1) { - for (size_t i = 0; i < size; i++) { + for (const auto i : c10::irange(size)) { data_out[i * stride] = map_fun(data_in[i * stride]); } } @@ -77,7 +79,7 @@ inline CTYPE_OUT apply_unary_map_reduce_fn( const int64_t size, const int64_t stride = 1) { CTYPE_OUT acc_val = map_fun(data_in[0]); - for (size_t i = 1; i < size; ++i) { + for (const auto i : c10::irange(1, size)) { acc_val = reduce_fun(map_fun(data_in[i * stride]), acc_val); } return acc_val; diff --git a/kernels/portable/cpu/util/reduce_util.cpp b/kernels/portable/cpu/util/reduce_util.cpp index 2902cbfc138..09ba508a31d 100644 --- a/kernels/portable/cpu/util/reduce_util.cpp +++ b/kernels/portable/cpu/util/reduce_util.cpp @@ -48,8 +48,7 @@ ET_NODISCARD bool check_dim_list_is_valid( } const size_t non_neg_d = _normalize_non_neg_d(d, in.dim()); - ET_LOG_AND_RETURN_IF_FALSE( - non_neg_d < kTensorDimensionLimit && non_neg_d >= 0); + ET_LOG_AND_RETURN_IF_FALSE(non_neg_d < kTensorDimensionLimit); ET_CHECK_OR_RETURN_FALSE( dim_exist[non_neg_d] == false, @@ -86,7 +85,7 @@ size_t get_reduced_dim_product( } size_t dim_product = 1; if (!dim.has_value()) { - for (size_t i = 0; i < in.dim(); ++i) { + for (size_t i = 0; i < static_cast(in.dim()); ++i) { dim_product *= in.size(i); } return dim_product; @@ -108,7 +107,7 @@ size_t get_reduced_dim_product( size_t dim_product = 1; const size_t in_dim = in.dim(); if (!dim_list.has_value() || dim_list.value().size() == 0) { - for (size_t i = 0; i < in.dim(); ++i) { + for (size_t i = 0; i < static_cast(in.dim()); ++i) { dim_product *= in.size(i); } return dim_product; @@ -136,7 +135,7 @@ size_t get_out_numel( ET_CHECK_VALID_DIM(dim_val, in.dim()); } const size_t non_neg_dim = _normalize_non_neg_d(dim_val, in.dim()); - for (size_t d = 0; d < in.dim(); ++d) { + for (size_t d = 0; d < static_cast(in.dim()); ++d) { if (d != non_neg_dim) { out_numel *= in.size(d); } @@ -155,7 +154,7 @@ size_t get_out_numel( dim_list) { size_t out_numel = 1; if (dim_list.has_value() && dim_list.value().size() != 0) { - for (size_t d = 0; d < in.dim(); ++d) { + for (size_t d = 0; d < static_cast(in.dim()); ++d) { if (!check_dim_in_dim_list(d, in.dim(), dim_list.value())) { out_numel *= in.size(d); } @@ -234,7 +233,7 @@ size_t compute_reduced_out_size( if (dim.has_value()) { const auto dim_val = dim.value(); const size_t non_neg_dim = _normalize_non_neg_d(dim_val, in_dim); - for (ssize_t i = 0; i < non_neg_dim; ++i) { + for (size_t i = 0; i < non_neg_dim; ++i) { sizes_arr[i] = in.size(i); } if (keepdim) { @@ -250,7 +249,7 @@ size_t compute_reduced_out_size( } } else { if (keepdim) { - for (size_t i = 0; i < in_dim; ++i) { + for (size_t i = 0; i < static_cast(in_dim); ++i) { sizes_arr[i] = 1; } } else { @@ -266,7 +265,9 @@ size_t compute_reduced_out_size( dim_list, bool keepdim, executorch::aten::SizesType* sizes_arr) { - const auto in_dim = in.dim(); + // check_dim_in_dim_list and later comparisons + // expect in_dim to be size_t, so cast it here + const size_t in_dim = static_cast(in.dim()); size_t out_dim = in_dim; if (dim_list.has_value() && dim_list.value().size() != 0) { diff --git a/kernels/portable/cpu/util/reduce_util.h b/kernels/portable/cpu/util/reduce_util.h index 25a2c0b44c4..35cfdfbaa72 100644 --- a/kernels/portable/cpu/util/reduce_util.h +++ b/kernels/portable/cpu/util/reduce_util.h @@ -50,7 +50,7 @@ void apply_on_flat_ix_with_dim_mask_and_base( const size_t start, const size_t end) { // Compute innermost dim from dim list - size_t inner_dim = in.dim() - 1; + int64_t inner_dim = in.dim() - 1; while (!dim_mask[inner_dim]) { inner_dim--; } @@ -58,7 +58,7 @@ void apply_on_flat_ix_with_dim_mask_and_base( // Initialize array of indices per dimension. This array is used to maintain // the per-dimension index of the element in `in` that is being reduced over // Only the dims that are in the dim list are relevant. - size_t dim_index[kTensorDimensionLimit]; + int64_t dim_index[kTensorDimensionLimit]; for (int64_t d = 0; d < in.dim(); d++) { dim_index[d] = 0; } diff --git a/kernels/portable/cpu/util/repeat_util.cpp b/kernels/portable/cpu/util/repeat_util.cpp index 925fda9f793..be7231cb621 100644 --- a/kernels/portable/cpu/util/repeat_util.cpp +++ b/kernels/portable/cpu/util/repeat_util.cpp @@ -8,6 +8,7 @@ #include +#include #include #include #include @@ -26,7 +27,7 @@ bool check_repeat_args( Tensor& out) { // Ensure the self tensors list is non-empty. ET_CHECK_OR_RETURN_FALSE( - repeats.size() >= self.dim(), + static_cast(repeats.size()) >= self.dim(), "Number of dimensions of repeat dims can not be smaller than number of dimensions of tensor"); // Repeat arrayref shall not contain negative element. @@ -39,7 +40,7 @@ bool check_repeat_args( /// Check if out.size() is legal. ET_CHECK_OR_RETURN_FALSE( - out.dim() == repeats.size(), + static_cast(out.dim()) == repeats.size(), "The dimension of out shall equal size of repeats, but now is %zd and %zd", out.dim(), repeats.size()); @@ -48,7 +49,7 @@ bool check_repeat_args( // kTensorDimensionLimit. Only check out tensor because the number of // dimension of out tensor shall have more than or equal to self tensor ET_CHECK_OR_RETURN_FALSE( - out.dim() <= kTensorDimensionLimit, + static_cast(out.dim()) <= kTensorDimensionLimit, "The dimension of input and output should not be larger than %zd", kTensorDimensionLimit); @@ -58,7 +59,7 @@ bool check_repeat_args( // repeats, and called it reformat_self_size. We then make point-to-point mul // of reformat_self_size and repeats. The result should equal out.size(). size_t reformat_self_size[kTensorDimensionLimit]; - for (size_t i = 0; i < out.dim() - self.dim(); i++) { + for (ssize_t i = 0; i < out.dim() - self.dim(); i++) { reformat_self_size[i] = 1; } @@ -131,7 +132,7 @@ void repeat_internal( // The increment along index of slot array to reach the next possible valid // value. int64_t incr[kTensorDimensionLimit]; - for (size_t i = 0; i < self_dim; i++) { + for (size_t i = 0; i < static_cast(self_dim); i++) { incr[i] = self_size[i]; } @@ -141,7 +142,7 @@ void repeat_internal( // than self). size_t index = self_dim - 1; size_t start = out.dim() - self_dim; - while (slots[0] != out.size(start)) { + while (slots[0] != static_cast(out.size(start))) { // Compute the offset (from origin) in the out tensor where this self // data will be copied to. size_t offset = compute_access_offset(slots, strides, self_dim); @@ -151,7 +152,7 @@ void repeat_internal( slots[index] += incr[index]; // If we have reached the limit in the innermost dimension, successively // increment the slot index of outer dimensions. - while (slots[index] == out.size(start + index)) { + while (slots[index] == static_cast(out.size(start + index))) { if (index == 0) { break; } @@ -227,7 +228,7 @@ Error repeat_tensor( // so we reset the upper bound of innermost dim to 1. 'in_incr' indicates // the size (in bytes) of the self data. int64_t limits[kTensorDimensionLimit]; - for (size_t i = 0; i < self_dim; i++) { + for (ssize_t i = 0; i < self_dim; i++) { limits[i] = self_size[i]; } diff --git a/kernels/portable/cpu/util/targets.bzl b/kernels/portable/cpu/util/targets.bzl index eef765d5eec..2b22687274f 100644 --- a/kernels/portable/cpu/util/targets.bzl +++ b/kernels/portable/cpu/util/targets.bzl @@ -61,7 +61,6 @@ def define_common_targets(): "//executorch/runtime/core/exec_aten/util:scalar_type_util", "//executorch/runtime/core/exec_aten/util:tensor_util", ], - compiler_flags = ["-Wno-missing-prototypes"], visibility = ["//executorch/kernels/portable/cpu/..."], ) @@ -71,7 +70,6 @@ def define_common_targets(): exported_headers = [ "broadcast_util.h", ], - compiler_flags = ["-Wno-missing-prototypes"], deps = [ ":repeat_util", "//executorch/runtime/kernel:kernel_includes", diff --git a/kernels/prim_ops/et_view.cpp b/kernels/prim_ops/et_view.cpp index 0f041dae00f..7f66bca1725 100644 --- a/kernels/prim_ops/et_view.cpp +++ b/kernels/prim_ops/et_view.cpp @@ -32,7 +32,8 @@ bool get_view_target_size( executorch::aten::ArrayRef size, int64_t dim, executorch::aten::SizesType* out_size) { - ET_LOG_AND_RETURN_IF_FALSE(size.size() == dim); + ET_LOG_AND_RETURN_IF_FALSE( + dim >= 0 && size.size() == static_cast(dim)); int minus1_dim = -1; int n_zero = 0; int64_t numel_without_minus_1 = 1; diff --git a/runtime/core/data_loader.h b/runtime/core/data_loader.h index 45fd1bc8189..3dda5516908 100644 --- a/runtime/core/data_loader.h +++ b/runtime/core/data_loader.h @@ -69,12 +69,12 @@ class DataLoader { SegmentInfo() = default; explicit SegmentInfo( - Type segment_type, - size_t segment_index = 0, - const char* descriptor = nullptr) - : segment_type(segment_type), - segment_index(segment_index), - descriptor(descriptor) {} + Type segment_type_, + size_t segment_index_ = 0, + const char* descriptor_ = nullptr) + : segment_type(segment_type_), + segment_index(segment_index_), + descriptor(descriptor_) {} }; virtual ~DataLoader() = default; diff --git a/runtime/core/exec_aten/util/dim_order_util.h b/runtime/core/exec_aten/util/dim_order_util.h index 7a31db9d6ad..07b3d5c2a97 100644 --- a/runtime/core/exec_aten/util/dim_order_util.h +++ b/runtime/core/exec_aten/util/dim_order_util.h @@ -23,8 +23,8 @@ namespace runtime { namespace { template bool validate_dim_order(const DimOrderType* dim_order, const size_t dims) { - for (int32_t i = 0; i < dims; ++i) { - if (dim_order[i] >= dims) { + for (size_t i = 0; i < dims; ++i) { + if (dim_order[i] >= static_cast(dims)) { return false; } } @@ -43,8 +43,8 @@ template inline bool is_contiguous_dim_order( const DimOrderType* dim_order, const size_t dims) { - for (int i = 0; i < dims; ++i) { - if (dim_order[i] != i) { + for (size_t i = 0; i < dims; ++i) { + if (dim_order[i] != static_cast(i)) { return false; } } @@ -66,7 +66,7 @@ bool is_channels_last_dim_order( return false; } // 4-dim tensor is interpreted as NCHW, 5-dim tensor is interpreted as NCHWD - size_t channels_dim = 1; + DimOrderType channels_dim = 1; // Last value in the dim order should be the channels dim if (dim_order[dims - 1] != channels_dim) { return false; @@ -75,8 +75,8 @@ bool is_channels_last_dim_order( if (dim_order[0] != 0) { return false; } - int d = 1; - while (d < dims - 1) { + DimOrderType d = 1; + while (d < static_cast(dims) - 1) { if (dim_order[d] != d + 1) { return false; } @@ -163,8 +163,8 @@ struct StrideDimOrder { StridesType stride; DimOrderType dim_order; - StrideDimOrder(StridesType stride, DimOrderType dim_order) - : stride(stride), dim_order(dim_order) {} + StrideDimOrder(StridesType stride_, DimOrderType dim_order_) + : stride(stride_), dim_order(dim_order_) {} StrideDimOrder() = default; bool operator>(const StrideDimOrder& other) const { // descending order diff --git a/runtime/core/exec_aten/util/tensor_shape_to_c_string.cpp b/runtime/core/exec_aten/util/tensor_shape_to_c_string.cpp index cfd416285c5..02155a4d9b4 100644 --- a/runtime/core/exec_aten/util/tensor_shape_to_c_string.cpp +++ b/runtime/core/exec_aten/util/tensor_shape_to_c_string.cpp @@ -30,7 +30,9 @@ std::array tensor_shape_to_c_string_impl( } *p++ = '('; for (const auto elem : shape) { - if (elem < 0 || elem > internal::kMaximumPrintableTensorShapeElement) { + if (elem < 0 || + static_cast(elem) > + internal::kMaximumPrintableTensorShapeElement) { static_assert( internal::kMaximumPrintableTensorShapeElement > 99999, "must have room for error string!"); diff --git a/runtime/core/exec_aten/util/tensor_util.h b/runtime/core/exec_aten/util/tensor_util.h index eb5ce10b6f3..fcc08ebf98d 100644 --- a/runtime/core/exec_aten/util/tensor_util.h +++ b/runtime/core/exec_aten/util/tensor_util.h @@ -584,7 +584,7 @@ inline bool tensors_have_same_dtype( inline bool tensor_is_rank(executorch::aten::Tensor t, size_t rank) { ET_CHECK_OR_RETURN_FALSE( - t.dim() == rank, + static_cast(t.dim()) == rank, "Expected tensor.dim() to be %zu, but got %zu", static_cast(rank), static_cast(t.dim())); @@ -596,7 +596,7 @@ inline bool tensor_has_rank_greater_or_equal_to( executorch::aten::Tensor t, size_t rank) { ET_CHECK_OR_RETURN_FALSE( - t.dim() >= rank, + static_cast(t.dim()) >= rank, "Expected tensor.dim() to be >= %zu, but got %zu", static_cast(rank), static_cast(t.dim())); @@ -608,7 +608,7 @@ inline bool tensor_has_rank_smaller_or_equal_to( executorch::aten::Tensor t, size_t rank) { ET_CHECK_OR_RETURN_FALSE( - t.dim() <= rank, + static_cast(t.dim()) <= rank, "Expected tensor.dim() to be <= %zu, but got %zu", static_cast(rank), static_cast(t.dim())); @@ -665,12 +665,12 @@ inline bool tensors_have_same_size_at_dims( executorch::aten::Tensor b, size_t dim_b) { ET_CHECK_OR_RETURN_FALSE( - dim_a < a.dim(), + dim_a < static_cast(a.dim()), "Cannot retrieve dim %zu from tensor with dim %zu", static_cast(dim_a), static_cast(a.dim())); ET_CHECK_OR_RETURN_FALSE( - dim_b < b.dim(), + dim_b < static_cast(b.dim()), "Cannot retrieve dim %zu from tensor with dim %zu", static_cast(dim_b), static_cast(b.dim())); @@ -702,7 +702,9 @@ inline bool tensors_have_same_shape( static_cast(b.numel()), static_cast(a.dim()), static_cast(b.dim())); - for (size_t d = 0; d < ET_MIN2(a.dim(), b.dim()); ++d) { + // Using [[maybe_unused]] as ET_LOG may not trigger based on verbosity + for ([[maybe_unused]] const auto d : + c10::irange(ET_MIN2(a.dim(), b.dim()))) { ET_LOG( Error, " size(%zu): (%zu, %zu)", @@ -739,7 +741,8 @@ inline bool tensors_have_same_shape( static_cast(a.dim()), static_cast(b.dim()), static_cast(c.dim())); - for (size_t d = 0; d < ET_MIN3(a.dim(), b.dim(), c.dim()); ++d) { + for ([[maybe_unused]] const auto d : + c10::irange(ET_MIN3(a.dim(), b.dim(), c.dim()))) { ET_LOG( Error, " size(%zu): (%zu, %zu, %zu)", @@ -779,7 +782,8 @@ inline bool tensor_has_expected_size( static_cast(expected_sizes.size())); size_t a_dim = static_cast(a.dim()); size_t expected_dim = static_cast(expected_sizes.size()); - for (size_t d = 0; d < ET_MIN2(a_dim, expected_dim); ++d) { + for ([[maybe_unused]] const auto d : + c10::irange(ET_MIN2(a_dim, expected_dim))) { ET_LOG( Error, " size(%zu): (%zu, %zu)", @@ -802,7 +806,8 @@ inline bool tensors_have_same_strides( ET_TENSOR_CHECK_PREFIX__ ": dim=(%zu, %zu)", static_cast(a.dim()), static_cast(b.dim())); - for (size_t d = 0; d < ET_MIN2(a.dim(), b.dim()); ++d) { + for ([[maybe_unused]] const auto d : + c10::irange(ET_MIN2(a.dim(), b.dim()))) { ET_LOG( Error, " stride(%zu): (%zu, %zu)", @@ -827,7 +832,8 @@ inline bool tensors_have_same_strides( static_cast(a.dim()), static_cast(b.dim()), static_cast(c.dim())); - for (size_t d = 0; d < ET_MIN3(a.dim(), b.dim(), c.dim()); ++d) { + for ([[maybe_unused]] const auto d : + c10::irange(ET_MIN3(a.dim(), b.dim(), c.dim()))) { ET_LOG( Error, " stride(%zu): (%zu, %zu, %zu)", @@ -894,7 +900,7 @@ inline size_t getLeadingDims( dim, ssize_t(tensor.dim())); size_t dims = 1; - for (size_t i = 0; i < dim; ++i) { + for (const auto i : c10::irange(dim)) { dims *= static_cast(tensor.size(i)); } return dims; @@ -911,7 +917,7 @@ inline size_t getTrailingDims( dim, ssize_t(tensor.dim())); size_t dims = 1; - for (size_t i = dim + 1; i < tensor.dim(); ++i) { + for (size_t i = dim + 1; i < static_cast(tensor.dim()); ++i) { dims *= static_cast(tensor.size(i)); } return dims; @@ -984,7 +990,7 @@ inline void indexToCoordinate( const executorch::aten::Tensor& tensor, size_t index, size_t* coordinate) { - ET_CHECK(index < tensor.numel()); + ET_CHECK(index < static_cast(tensor.numel())); for (auto i = 0; i < tensor.dim(); ++i) { auto dim = tensor.dim() - 1 - i; size_t dim_size = tensor.size(dim); diff --git a/runtime/core/exec_aten/util/tensor_util_portable.cpp b/runtime/core/exec_aten/util/tensor_util_portable.cpp index c1cbcfb6064..e4aa875aed4 100644 --- a/runtime/core/exec_aten/util/tensor_util_portable.cpp +++ b/runtime/core/exec_aten/util/tensor_util_portable.cpp @@ -8,6 +8,7 @@ #include +#include #include #include @@ -41,11 +42,11 @@ Error get_dim_order( bool tensor_has_valid_dim_order(torch::executor::Tensor t) { if (!validate_dim_order(t.dim_order().data(), t.dim_order().size())) { ET_LOG(Error, "Tensor dim order is not valid:"); - for (size_t d = 0; d < t.dim(); ++d) { + for (size_t d = 0; d < static_cast(t.dim()); ++d) { ET_LOG( Error, " dim_order(%zu): %zu", - static_cast(d), + d, static_cast(t.dim_order()[d])); } return false; @@ -62,11 +63,11 @@ bool tensor_is_default_or_channels_last_dim_order(torch::executor::Tensor t) { ET_LOG( Error, "Expected tensor to have default or channels last dim order, but got"); - for (size_t d = 0; d < t.dim(); ++d) { + for (size_t d = 0; d < static_cast(t.dim()); ++d) { ET_LOG( Error, " dim_order(%zu): %zu", - static_cast(d), + d, static_cast(t.dim_order()[d])); } } @@ -79,11 +80,11 @@ bool tensor_is_default_dim_order(torch::executor::Tensor t) { if (!ret_val) { ET_LOG(Error, "Expected tensor to have default dim order, but got"); - for (size_t d = 0; d < t.dim(); ++d) { + for (size_t d = 0; d < static_cast(t.dim()); ++d) { ET_LOG( Error, " dim_order(%zu): %zu", - static_cast(d), + d, static_cast(t.dim_order()[d])); } } @@ -96,11 +97,11 @@ bool tensor_is_channels_last_dim_order(torch::executor::Tensor t) { if (!ret_val) { ET_LOG(Error, "Expected tensor to have channels last dim order, but got"); - for (size_t d = 0; d < t.dim(); ++d) { + for (size_t d = 0; d < static_cast(t.dim()); ++d) { ET_LOG( Error, " dim_order(%zu): %zu", - static_cast(d), + d, static_cast(t.dim_order()[d])); } } diff --git a/runtime/core/portable_type/c10/c10/util/irange.h b/runtime/core/portable_type/c10/c10/util/irange.h index 3249bdfa5cf..81104d9568f 100644 --- a/runtime/core/portable_type/c10/c10/util/irange.h +++ b/runtime/core/portable_type/c10/c10/util/irange.h @@ -24,7 +24,7 @@ struct integer_iterator { using pointer = I*; using reference = I&; - explicit constexpr integer_iterator(I value) : value(value) {} + explicit constexpr integer_iterator(I value_) : value(value_) {} constexpr I operator*() const { return value; diff --git a/runtime/core/portable_type/tensor_impl.cpp b/runtime/core/portable_type/tensor_impl.cpp index 6366a8eac28..ede5a3d4101 100644 --- a/runtime/core/portable_type/tensor_impl.cpp +++ b/runtime/core/portable_type/tensor_impl.cpp @@ -35,8 +35,8 @@ ssize_t compute_numel(const TensorImpl::SizesType* sizes, ssize_t dim) { for (const auto i : c10::irange(dim)) { ET_CHECK_MSG( sizes[i] >= 0, - "Size must be non-negative, got %d at dimension %zd", - sizes[i], + "Size must be non-negative, got %zd at dimension %zd", + static_cast(sizes[i]), i); numel *= sizes[i]; } @@ -76,7 +76,7 @@ ssize_t TensorImpl::element_size() const { Error TensorImpl::internal_resize_contiguous(ArrayRef new_sizes) { ET_CHECK_OR_RETURN_ERROR( - new_sizes.size() == dim_, + static_cast(new_sizes.size()) == dim_, NotSupported, "Attempted to change the tensor rank which is immutable: old=%zu, new=%zu", dim_, @@ -120,7 +120,7 @@ Error TensorImpl::internal_resize_contiguous(ArrayRef new_sizes) { const auto new_numel = compute_numel(new_sizes.data(), dim_); ET_CHECK_OR_RETURN_ERROR( - new_numel <= numel_bound_, + static_cast(new_numel) <= numel_bound_, NotSupported, "Attempted to resize a bounded tensor with a maximum capacity of %zu elements to %zu elements.", numel_bound_, diff --git a/runtime/core/tensor_layout.cpp b/runtime/core/tensor_layout.cpp index f0fac442e20..2b862e6dc14 100644 --- a/runtime/core/tensor_layout.cpp +++ b/runtime/core/tensor_layout.cpp @@ -20,7 +20,7 @@ Result calculate_nbytes( const Span& sizes, const executorch::aten::ScalarType& scalar_type) { ssize_t n = 1; - for (ssize_t i = 0; i < sizes.size(); i++) { + for (const auto i : c10::irange(sizes.size())) { if (sizes[i] < 0) { return Error::InvalidArgument; } diff --git a/runtime/executor/method.cpp b/runtime/executor/method.cpp index 0857bc1c976..7da7bafd3e5 100644 --- a/runtime/executor/method.cpp +++ b/runtime/executor/method.cpp @@ -8,6 +8,7 @@ #include +#include #include #include // @donotremove #include @@ -239,10 +240,10 @@ Result gen_instruction_arguments( for (size_t i = 0; i < num_args; ++i) { int32_t arg_idx = arg_idxs[i]; ET_CHECK_OR_RETURN_ERROR( - arg_idx < num_values, + static_cast(arg_idx) < num_values, InvalidProgram, - "Arg index %d >= %" ET_PRIsize_t, - arg_idx, + "Arg index %zd >= %" ET_PRIsize_t, + static_cast(arg_idx), num_values); arg_list[i] = &values[arg_idx]; } @@ -270,7 +271,7 @@ Result parse_cond_value(const EValue& cond_value) { static_cast(cond_val.scalar_type())); const bool* cond_data = cond_val.const_data_ptr(); - for (size_t i = 0; i < cond_val.numel(); i++) { + for (size_t i = 0; i < static_cast(cond_val.numel()); i++) { if (!cond_data[i]) { return false; } @@ -481,7 +482,7 @@ Error Method::parse_values(const NamedDataMap* named_data_map) { for (size_t j = 0; j < items->size(); j++) { auto value_index = items->Get(j); ET_CHECK_OR_RETURN_ERROR( - value_index >= 0 && value_index < n_value, + value_index >= 0 && static_cast(value_index) < n_value, InvalidProgram, "Invalid value index %" PRId64 " for IntList %" ET_PRIsize_t " index %" ET_PRIsize_t, @@ -644,7 +645,7 @@ Error populate_operator_name( has_overload ? op->overload()->c_str() : ""); ET_CHECK_OR_RETURN_ERROR(cx >= 0, Internal, "snprintf failed: %d", cx); ET_CHECK_OR_RETURN_ERROR( - cx < operator_name_size, + static_cast(cx) < operator_name_size, Internal, "Operator name %s%s%s with length %d " "truncated to %" ET_PRIsize_t " due to internal buffer limit.", @@ -672,7 +673,8 @@ Error Method::resolve_operator( char operator_name[kTempBufferSizeForName]; const auto ops = serialization_plan_->operators(); ET_CHECK_OR_RETURN_ERROR( - ops != nullptr && op_index < ops->size(), + ops != nullptr && + static_cast(op_index) < ops->size(), InvalidProgram, "Op index %" PRIu32 " out of range", op_index); @@ -721,7 +723,11 @@ Error Method::resolve_operator( Result op_function = get_op_function_from_registry(operator_name, {meta, count}); if (!op_function.ok()) { - ET_LOG(Error, "Missing operator: [%d] %s", op_index, operator_name); + ET_LOG( + Error, + "Missing operator: [%zd] %s", + static_cast(op_index), + operator_name); return op_function.error(); } kernels[kernel_index] = op_function.get(); @@ -923,10 +929,10 @@ Error Method::init( instr_args) ->cond_value_index(); ET_CHECK_OR_RETURN_ERROR( - index >= 0 && index < n_value_, + index >= 0 && static_cast(index) < n_value_, InvalidProgram, - "Index %d negative or >= %" ET_PRIsize_t, - index, + "Index %zd negative or >= %" ET_PRIsize_t, + static_cast(index), n_value_); chain_instruction_arg_lists[instr_idx] = InstructionArgs(); } break; @@ -944,9 +950,9 @@ Error Method::init( ET_CHECK_OR_RETURN_ERROR( num_instructions_missing_op == 0, OperatorMissing, - "There are %d instructions don't have corresponding operator registered. " + "There are %zu instructions don't have corresponding operator registered. " "See logs for details", - num_instructions_missing_op); + static_cast(num_instructions_missing_op)); if (delayed_error != Error::Ok) { return delayed_error; } @@ -1315,7 +1321,7 @@ Error Method::execute_instruction() { auto delegate_idx = instruction->instr_args_as_DelegateCall()->delegate_index(); ET_CHECK_OR_RETURN_ERROR( - delegate_idx < n_delegate_, + static_cast(delegate_idx) < n_delegate_, Internal, "DELEGATE_CALL index %" PRIu32 " >= num delegates %" ET_PRIsize_t " at instruction %" ET_PRIsize_t, @@ -1609,18 +1615,18 @@ Method::~Method() { // Destroy the values. It's necessary in ATen mode, where the refcount of // Tensors needs to be decremented properly. if (values_ != nullptr) { - for (int i = 0; i < n_value_; ++i) { + for (size_t i = 0; i < n_value_; ++i) { values_[i].~EValue(); } } // Free any resources associated with delegate backends. if (delegates_ != nullptr) { - for (int i = 0; i < n_delegate_; i++) { + for (size_t i = 0; i < n_delegate_; i++) { delegates_[i].~BackendDelegate(); } } // Free resources associated with external constants. - for (int i = 0; i < n_external_constants_; i++) { + for (const auto i : c10::irange(n_external_constants_)) { external_constants_[i].buffer.~FreeableBuffer(); } // All other fields are trivially destructible. diff --git a/runtime/executor/method_meta.cpp b/runtime/executor/method_meta.cpp index bcc2390d2bd..651a815c335 100644 --- a/runtime/executor/method_meta.cpp +++ b/runtime/executor/method_meta.cpp @@ -56,7 +56,7 @@ size_t calculate_nbytes( Span sizes, executorch::aten::ScalarType scalar_type) { ssize_t n = 1; - for (ssize_t i = 0; i < sizes.size(); i++) { + for (size_t i = 0; i < sizes.size(); i++) { n *= sizes[i]; } // Use the full namespace to disambiguate from c10::elementSize. @@ -110,7 +110,7 @@ size_t MethodMeta::num_inputs() const { Result MethodMeta::input_tag(size_t index) const { auto num_inputs = this->num_inputs(); ET_CHECK_OR_RETURN_ERROR( - index >= 0 && index < num_inputs, + index < num_inputs, InvalidArgument, "index %zu out of range. num_inputs: %zu", index, @@ -118,10 +118,10 @@ Result MethodMeta::input_tag(size_t index) const { auto input_index = s_plan_->inputs()->Get(index); size_t num_values = s_plan_->values()->size(); ET_CHECK_OR_RETURN_ERROR( - input_index >= 0 && input_index < num_values, + input_index >= 0 && static_cast(input_index) < num_values, InvalidProgram, - "internal value index %d out of range [0,%zu) for input %zu", - input_index, + "internal value index %zd out of range [0,%zu) for input %zu", + static_cast(input_index), num_values, index); auto serialization_value = s_plan_->values()->Get(input_index); @@ -160,7 +160,7 @@ size_t MethodMeta::num_outputs() const { Result MethodMeta::output_tag(size_t index) const { auto num_outputs = this->num_outputs(); ET_CHECK_OR_RETURN_ERROR( - index >= 0 && index < num_outputs, + index < num_outputs, InvalidArgument, "index %zu out of range. num_outputs: %zu", index, @@ -168,10 +168,10 @@ Result MethodMeta::output_tag(size_t index) const { auto output_index = s_plan_->outputs()->Get(index); size_t num_values = s_plan_->values()->size(); ET_CHECK_OR_RETURN_ERROR( - output_index >= 0 && output_index < num_values, + output_index >= 0 && static_cast(output_index) < num_values, InvalidProgram, - "internal value index %d out of range [0,%zu) for output %zu", - output_index, + "internal value index %zd out of range [0,%zu) for output %zu", + static_cast(output_index), num_values, index); auto serialization_value = s_plan_->values()->Get(output_index); @@ -218,7 +218,7 @@ size_t MethodMeta::num_memory_planned_buffers() const { Result MethodMeta::memory_planned_buffer_size(size_t index) const { auto num_buffers = this->num_memory_planned_buffers(); ET_CHECK_OR_RETURN_ERROR( - index >= 0 && index < num_buffers, + index < num_buffers, InvalidArgument, "index %zu out of range. num_buffers: %zu", index, diff --git a/runtime/executor/program.cpp b/runtime/executor/program.cpp index 964b8c8bdac..67f1edd4df3 100644 --- a/runtime/executor/program.cpp +++ b/runtime/executor/program.cpp @@ -163,10 +163,10 @@ Result get_execution_plan( ET_CHECK_OR_RETURN_ERROR( constant_buffer == nullptr || constant_buffer->size() == 0, InvalidProgram, - "constant_buffer contains %u items, " - "constant_segment.offsets contains %u items. Only one should be used.", - constant_buffer->size(), - constant_segment->offsets()->size()); + "constant_buffer contains %zu items, " + "constant_segment.offsets contains %zu items. Only one should be used.", + static_cast(constant_buffer->size()), + static_cast(constant_segment->offsets()->size())); const auto* segments = flatbuffer_program->segments(); ET_CHECK_OR_RETURN_ERROR( segments != nullptr, InvalidProgram, "No segments in program"); @@ -176,9 +176,9 @@ Result get_execution_plan( ET_CHECK_OR_RETURN_ERROR( constant_segment->segment_index() < segments->size(), InvalidProgram, - "Constant segment index %d invalid for program segments range %d", - constant_segment->segment_index(), - segments->size()); + "Constant segment index %zu invalid for program segments range %zu", + static_cast(constant_segment->segment_index()), + static_cast(segments->size())); const executorch_flatbuffer::DataSegment* data_segment = segments->Get(constant_segment->segment_index()); @@ -347,8 +347,8 @@ Result Program::get_constant_buffer_data( ET_CHECK_OR_RETURN_ERROR( storage_size <= nbytes, InvalidArgument, - "Constant buffer size %u larger than allocated nbytes %zu", - storage_size, + "Constant buffer size %zu larger than allocated nbytes %zu", + static_cast(constant_buffer[buffer_index]->storage()->size()), nbytes); return storage->data(); @@ -479,8 +479,8 @@ Error Program::load_mutable_subsegment_into( if (segment_offsets->segment_index() >= num_segments) { ET_LOG( Error, - "Segment index %u out of range (>= %zu)", - segment_offsets->segment_index(), + "Segment index %zu out of range (>= %zu)", + static_cast(segment_offsets->segment_index()), num_segments); return Error::NotFound; } diff --git a/runtime/executor/targets.bzl b/runtime/executor/targets.bzl index c5d07448a06..8993c5dc473 100644 --- a/runtime/executor/targets.bzl +++ b/runtime/executor/targets.bzl @@ -74,6 +74,10 @@ def define_common_targets(): "program.h", "tensor_parser.h", ], + compiler_flags = select({ + "ovr_config//os:windows": [], + "DEFAULT" :["-Wno-error=deprecated-declarations"] + }), preprocessor_flags = _program_preprocessor_flags(), exported_deps = [ ":memory_manager", diff --git a/runtime/executor/tensor_parser.h b/runtime/executor/tensor_parser.h index cfd711713ac..362f0b11e20 100644 --- a/runtime/executor/tensor_parser.h +++ b/runtime/executor/tensor_parser.h @@ -91,7 +91,7 @@ parseListOptionalType( evalp_list[output_idx] = nullptr; } else { ET_CHECK_OR_RETURN_ERROR( - index >= 0 && index < values_len, + index >= 0 && static_cast(index) < values_len, InvalidProgram, "Invalid value index %" PRId32 " for ListOptional", index); diff --git a/runtime/executor/tensor_parser_exec_aten.cpp b/runtime/executor/tensor_parser_exec_aten.cpp index de809ee09cc..002c7366be6 100644 --- a/runtime/executor/tensor_parser_exec_aten.cpp +++ b/runtime/executor/tensor_parser_exec_aten.cpp @@ -64,7 +64,8 @@ ET_NODISCARD Result getMemPlannedPtr( "size_t cannot hold memory offset 0x%08" PRIx32 ".%08" PRIx32, memory_offset_high, memory_offset_low); - memory_offset |= static_cast(memory_offset_high) << 32; + memory_offset |= static_cast(memory_offset_high) + << (sizeof(size_t) - sizeof(uint32_t)); } return allocator->get_offset_address(memory_id, memory_offset, nbytes); } @@ -94,7 +95,7 @@ ET_NODISCARD Result> parseTensorList( size_t output_idx = 0; for (int32_t tensor_index : *tensor_indices) { ET_CHECK_OR_RETURN_ERROR( - tensor_index >= 0 && tensor_index < values_len, + tensor_index >= 0 && static_cast(tensor_index) < values_len, InvalidProgram, "Invalid value index %" PRId32 " for TensorList", tensor_index); @@ -123,7 +124,9 @@ ET_NODISCARD Error validateTensorLayout( static_cast(expected_layout.scalar_type())); int dim = s_tensor->sizes()->size(); ET_CHECK_OR_RETURN_ERROR( - dim == expected_layout.sizes().size(), + dim >= 0, InvalidExternalData, "Dim is negative: %d", dim) + ET_CHECK_OR_RETURN_ERROR( + static_cast(dim) == expected_layout.sizes().size(), InvalidExternalData, "Dim mismatch. Expected %d, got %zu.", dim, @@ -150,7 +153,7 @@ ET_NODISCARD Error validateTensorLayout( // Check if key exists in entries. If it does, return a pointer to the entry // otherwise return a nullptr. NamedData* get_data_by_key(const char* key, Span entries) { - for (int i = 0; i < entries.size(); i++) { + for (const auto i : c10::irange(entries.size())) { if (strcmp(key, entries[i].key) == 0) { return &entries[i]; } diff --git a/runtime/executor/tensor_parser_portable.cpp b/runtime/executor/tensor_parser_portable.cpp index b72fedc5eee..4b424b29f5c 100644 --- a/runtime/executor/tensor_parser_portable.cpp +++ b/runtime/executor/tensor_parser_portable.cpp @@ -107,12 +107,12 @@ Result parseTensor( // detect bad positive values, but we can reject negative values, which would // otherwise panic in the TensorImpl ctor. dim_order_to_stride() will validate // dim_order. - for (int i = 0; i < dim; i++) { + for (flatbuffers::uoffset_t i = 0; i < dim; i++) { ET_CHECK_OR_RETURN_ERROR( sizes[i] >= 0, InvalidProgram, - "Negative size[%d] %" PRId32, - i, + "Negative size[%zu] %" PRId32, + static_cast(i), sizes[i]); } diff --git a/runtime/kernel/operator_registry.cpp b/runtime/kernel/operator_registry.cpp index b51c2567f0a..85705e5b3fd 100644 --- a/runtime/kernel/operator_registry.cpp +++ b/runtime/kernel/operator_registry.cpp @@ -79,7 +79,7 @@ Error register_kernels_internal(const Span kernels) { for (const auto& kernel : kernels) { // Linear search. This is fine if the number of kernels is small. - for (int32_t i = 0; i < num_registered_kernels; i++) { + for (size_t i = 0; i < num_registered_kernels; i++) { Kernel k = registered_kernels[i]; if (strcmp(kernel.name_, k.name_) == 0 && kernel.kernel_key_ == k.kernel_key_) { @@ -188,7 +188,7 @@ Error make_kernel_key_string( buf_size -= 1; // Add dim order. - for (int j = 0; j < meta.dim_order_.size(); j++) { + for (size_t j = 0; j < meta.dim_order_.size(); j++) { n = copy_char_as_number_to_buf((int)meta.dim_order_[j], buf, buf_size); if (n < 0) { return Error::InvalidArgument; diff --git a/runtime/kernel/operator_registry.h b/runtime/kernel/operator_registry.h index 82815852e6f..8e1eaca9981 100644 --- a/runtime/kernel/operator_registry.h +++ b/runtime/kernel/operator_registry.h @@ -33,7 +33,7 @@ #define ET_LOG_TENSOR_META(meta_list) \ for (const auto& meta : meta_list) { \ ET_LOG(Error, "dtype: %d | dim order: [", int(meta.dtype_)); \ - for (int i = 0; i < meta.dim_order_.size(); i++) { \ + for (size_t i = 0; i < meta.dim_order_.size(); i++) { \ ET_LOG(Error, "%d,", static_cast(meta.dim_order_[i])); \ } \ ET_LOG(Error, "]"); \ @@ -74,7 +74,7 @@ struct TensorMeta { if (dim_order_.size() != other.dim_order_.size()) { return false; } - for (int i = 0; i < dim_order_.size(); i++) { + for (size_t i = 0; i < dim_order_.size(); i++) { if (dim_order_[i] != other.dim_order_[i]) { return false; } diff --git a/runtime/platform/log.cpp b/runtime/platform/log.cpp index c1ad6ddcc0d..6529c73b238 100644 --- a/runtime/platform/log.cpp +++ b/runtime/platform/log.cpp @@ -92,8 +92,7 @@ void vlogf( } buf[kMaxLogMessageLength - 1] = 0; - et_pal_log_level_t pal_level = - (int(level) >= 0 && level < LogLevel::NumLevels) + et_pal_log_level_t pal_level = (level < LogLevel::NumLevels) ? kLevelToPal[size_t(level)] : et_pal_log_level_t::kUnknown; diff --git a/runtime/platform/log.h b/runtime/platform/log.h index 9ad234b2520..72ea8528442 100644 --- a/runtime/platform/log.h +++ b/runtime/platform/log.h @@ -33,6 +33,15 @@ #define ET_LOG_ENABLED 1 #endif // !defined(ET_LOG_ENABLED) +// Even though it is supposed to be "portable" some toolchains +// do not define, so providing a definition here +#ifndef PRIu64 +#define PRIu64 "llu" +#endif +#ifndef PRId64 +#define PRId64 "lld" +#endif + namespace executorch { namespace runtime { diff --git a/runtime/platform/profiler.cpp b/runtime/platform/profiler.cpp index 2f514286aa1..21f68963c78 100644 --- a/runtime/platform/profiler.cpp +++ b/runtime/platform/profiler.cpp @@ -129,7 +129,8 @@ void track_allocation(int32_t id, uint32_t size) { uint32_t track_allocator(const char* name) { ET_CHECK_MSG( prof_header->allocator_entries < MEM_PROFILE_MAX_ALLOCATORS, - "Out of allocator tracking space, %d is needed. Increase MEM_PROFILE_MAX_ALLOCATORS and re-compile", + "Out of allocator tracking space, %" PRIu32 + " is needed. Increase MEM_PROFILE_MAX_ALLOCATORS and re-compile", prof_header->allocator_entries); size_t str_len = strlen(name); size_t num_allocators = prof_header->allocator_entries; @@ -151,7 +152,8 @@ void profiling_create_block(const char* name) { num_blocks += 1; ET_CHECK_MSG( num_blocks <= MAX_PROFILE_BLOCKS, - "Only %d blocks are supported and they've all been used up but %d is used. Increment MAX_PROFILE_BLOCKS and re-run", + "Only %d blocks are supported and they've all been used up but %" PRIu32 + " is used. Increment MAX_PROFILE_BLOCKS and re-run", MAX_PROFILE_BLOCKS, num_blocks); } diff --git a/schema/extended_header.cpp b/schema/extended_header.cpp index fdc463207ba..3236b040c49 100644 --- a/schema/extended_header.cpp +++ b/schema/extended_header.cpp @@ -14,8 +14,6 @@ #include #include -#pragma clang diagnostic ignored "-Wdeprecated" - namespace executorch { namespace runtime { diff --git a/test/build_size_test.sh b/test/build_size_test.sh index 823b399fe34..09c0188ff9b 100644 --- a/test/build_size_test.sh +++ b/test/build_size_test.sh @@ -11,9 +11,8 @@ set -e # shellcheck source=/dev/null source "$(dirname "${BASH_SOURCE[0]}")/../.ci/scripts/utils.sh" -# TODO(#8149): Remove -Wno-sign-compare # TODO(#8357): Remove -Wno-int-in-bool-context -COMMON_CXXFLAGS="-fno-exceptions -fno-rtti -Wall -Werror -Wno-sign-compare -Wno-unknown-pragmas -Wno-int-in-bool-context" +COMMON_CXXFLAGS="-fno-exceptions -fno-rtti -Wall -Werror -Wno-int-in-bool-context" cmake_install_executorch_lib() { echo "Installing libexecutorch.a" From 9227cdcade720c14cba5909769d6644b6ea42ec9 Mon Sep 17 00:00:00 2001 From: Itamar Oren Date: Mon, 3 Mar 2025 09:09:09 -0800 Subject: [PATCH 190/584] Migrate pybind11 external deps to regular third-party deps Differential Revision: D70261352 Pull Request resolved: https://github.com/pytorch/executorch/pull/8837 --- backends/apple/coreml/TARGETS | 18 ++++++++---------- backends/qualcomm/aot/python/targets.bzl | 8 +++----- exir/verification/TARGETS | 4 +--- extension/pytree/TARGETS | 8 ++------ extension/training/pybindings/TARGETS | 6 ++---- 5 files changed, 16 insertions(+), 28 deletions(-) diff --git a/backends/apple/coreml/TARGETS b/backends/apple/coreml/TARGETS index d77e33679ab..df1165dd74e 100644 --- a/backends/apple/coreml/TARGETS +++ b/backends/apple/coreml/TARGETS @@ -14,10 +14,10 @@ runtime.python_library( "@EXECUTORCH_CLIENTS", ], deps = [ + "fbsource//third-party/pypi/coremltools:coremltools", ":executorchcoreml", "//executorch/exir/backend:backend_details", "//executorch/exir/backend:compile_spec_schema", - "fbsource//third-party/pypi/coremltools:coremltools", ], ) @@ -30,13 +30,13 @@ runtime.python_library( "@EXECUTORCH_CLIENTS", ], deps = [ + "fbsource//third-party/pypi/coremltools:coremltools", ":backend", "//caffe2:torch", "//executorch/exir:lib", "//executorch/exir/backend:compile_spec_schema", "//executorch/exir/backend:partitioner", "//executorch/exir/backend:utils", - "fbsource//third-party/pypi/coremltools:coremltools", ], ) @@ -64,25 +64,23 @@ runtime.cxx_python_extension( headers = glob([ "runtime/inmemoryfs/**/*.hpp", ]), + base_module = "", + compiler_flags = [ + "-std=c++17", + ], preprocessor_flags = [ "-Iexecutorch/backends/apple/coreml/runtime/util", ], types = [ "executorchcoreml.pyi", ], - compiler_flags = [ - "-std=c++17", - ], - base_module = "", visibility = [ "//executorch/examples/apple/coreml/...", "@EXECUTORCH_CLIENTS", ], - external_deps = [ - "pybind11", - ], deps = [ "fbsource//third-party/nlohmann-json:nlohmann-json", + "fbsource//third-party/pybind11:pybind11", ], ) @@ -92,10 +90,10 @@ runtime.python_test( "test/*.py", ]), deps = [ + "fbsource//third-party/pypi/pytest:pytest", ":partitioner", ":quantizer", "//caffe2:torch", "//pytorch/vision:torchvision", - "fbsource//third-party/pypi/pytest:pytest", ], ) diff --git a/backends/qualcomm/aot/python/targets.bzl b/backends/qualcomm/aot/python/targets.bzl index e1f5a6a8fc5..f29c02aa593 100644 --- a/backends/qualcomm/aot/python/targets.bzl +++ b/backends/qualcomm/aot/python/targets.bzl @@ -33,10 +33,10 @@ def define_common_targets(): "//executorch/backends/qualcomm:schema", "//executorch/backends/qualcomm/aot/ir:qcir_utils", "//executorch/backends/qualcomm/runtime:runtime", + "fbsource//third-party/pybind11:pybind11", "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()), ], external_deps = [ - "pybind11", "libtorch_python", ], use_static_deps = True, @@ -66,10 +66,10 @@ def define_common_targets(): "//executorch/backends/qualcomm:schema", "//executorch/backends/qualcomm/aot/ir:qcir_utils", "//executorch/backends/qualcomm/runtime:runtime", + "fbsource//third-party/pybind11:pybind11", "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()), ], external_deps = [ - "pybind11", "libtorch_python", ], use_static_deps = True, @@ -93,9 +93,7 @@ def define_common_targets(): "//executorch/backends/qualcomm:schema", "//executorch/backends/qualcomm/aot/ir:qcir_utils", "//executorch/backends/qualcomm/runtime:runtime", + "fbsource//third-party/pybind11:pybind11", "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()), ], - external_deps = [ - "pybind11", - ], ) diff --git a/exir/verification/TARGETS b/exir/verification/TARGETS index 8ee9e5546e3..092b48658df 100644 --- a/exir/verification/TARGETS +++ b/exir/verification/TARGETS @@ -10,13 +10,11 @@ cpp_python_extension( "bindings.cpp", ], deps = [ + "fbsource//third-party/pybind11:pybind11", "//caffe2:torch-cpp-cpu", "//caffe2:torch_extension", "//caffe2/c10:c10", ], - external_deps = [ - "pybind11", - ], ) python_library( diff --git a/extension/pytree/TARGETS b/extension/pytree/TARGETS index 400a5b9504c..005c5c9c2d7 100644 --- a/extension/pytree/TARGETS +++ b/extension/pytree/TARGETS @@ -16,11 +16,9 @@ cpp_python_extension( ], base_module = "executorch.extension.pytree", deps = [ + "fbsource//third-party/pybind11:pybind11", ":pytree", ], - external_deps = [ - "pybind11", - ], ) cpp_python_extension( @@ -30,11 +28,9 @@ cpp_python_extension( ], base_module = "executorch.extension.pytree", deps = [ + "fbsource//third-party/pybind11:pybind11", ":pytree", ], - external_deps = [ - "pybind11", - ], ) python_library( diff --git a/extension/training/pybindings/TARGETS b/extension/training/pybindings/TARGETS index 6aa11ea6726..19b54961493 100644 --- a/extension/training/pybindings/TARGETS +++ b/extension/training/pybindings/TARGETS @@ -17,13 +17,11 @@ runtime.cxx_python_extension( types = ["_training_lib.pyi"], visibility = ["//executorch/extension/training/..."], deps = [ + "fbsource//third-party/pybind11:pybind11", "//executorch/extension/aten_util:aten_bridge", "//executorch/extension/training/optimizer:sgd", ], - external_deps = [ - "pybind11", - "libtorch_python", - ], + external_deps = ["libtorch_python"], ) runtime.python_library( From 1ce7ed7c8169fec7eae5568ebedcee72d6d4edcb Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Mon, 3 Mar 2025 13:00:53 -0500 Subject: [PATCH 191/584] Add missing dep to executorch/extensin/pybindings:portable_lib (#8885) This file clearly imports executorch.exir._warnings and thus it should have the dep. Differential Revision: [D70451304](https://our.internmc.facebook.com/intern/diff/D70451304/) ghstack-source-id: 269199558 Pull Request resolved: https://github.com/pytorch/executorch/pull/8871 Co-authored-by: Scott Wolchok --- extension/pybindings/TARGETS | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/extension/pybindings/TARGETS b/extension/pybindings/TARGETS index 17ccbb2477c..2e77127bf56 100644 --- a/extension/pybindings/TARGETS +++ b/extension/pybindings/TARGETS @@ -70,5 +70,8 @@ runtime.python_library( "//executorch/runtime/...", "@EXECUTORCH_CLIENTS", ], - deps = [":_portable_lib"], + deps = [ + ":_portable_lib", + "//executorch/exir:_warnings", + ], ) From 40e80e4ed1eaa6802b5ae8e079d1ab5c2b652304 Mon Sep 17 00:00:00 2001 From: Jack <32371937+jackzhxng@users.noreply.github.com> Date: Mon, 3 Mar 2025 10:05:13 -0800 Subject: [PATCH 192/584] Basic performance logging for Llama python runner (#8862) Add basic performance metrics to native llama runner --- examples/models/llama/runner/generation.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/examples/models/llama/runner/generation.py b/examples/models/llama/runner/generation.py index 3e9ceb34af5..4ba645ffd87 100644 --- a/examples/models/llama/runner/generation.py +++ b/examples/models/llama/runner/generation.py @@ -4,6 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import time from abc import ABC, abstractmethod from typing import List, Optional @@ -97,6 +98,7 @@ def generate( # noqa: C901 pos_base: int = 0, ) -> List[int]: # Prefill + prefill_start = time.time() logits = self.forward( tokens=torch.tensor([prompt_tokens], dtype=torch.long, device=self.device), input_pos=( @@ -105,11 +107,13 @@ def generate( # noqa: C901 else None ), ) + prefill_time = time.time() - prefill_start current_token = next_token(logits, temperature, top_p) print(f"{self.tokenizer.decode_token(current_token)}", end="", flush=True) tokens = prompt_tokens + [current_token] + generate_start = time.time() while len(tokens) < max_seq_len: if self.use_kv_cache: logits = self.forward( @@ -140,6 +144,10 @@ def generate( # noqa: C901 print(f"{self.tokenizer.decode_token(current_token)}", end="", flush=True) print("\n") + generate_time = time.time() - generate_start + print(f"Prefill time: {prefill_time}") + print(f"Generation tok/s: {len(tokens) / generate_time}") + return tokens if echo else tokens[len(prompt_tokens) :] def text_completion( From a3bc2f185d2fb555a427d0968fae660b1762d9df Mon Sep 17 00:00:00 2001 From: Mergen Nachin Date: Mon, 3 Mar 2025 14:37:47 -0500 Subject: [PATCH 193/584] Add editable mode in Building from source file (#8882) * Add editable mode in Building from source file * Update using-executorch-building-from-source.md --- docs/source/using-executorch-building-from-source.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/source/using-executorch-building-from-source.md b/docs/source/using-executorch-building-from-source.md index eae7fbabf57..8196c7d39df 100644 --- a/docs/source/using-executorch-building-from-source.md +++ b/docs/source/using-executorch-building-from-source.md @@ -80,6 +80,14 @@ portability details. ./install_executorch.sh --pybind off ``` + For development, install the package in `--editable` mode, which allows to modify Python source code and see changes reflected immediately. + ``` + ./install_executorch.sh --editable [--pybind xnnpack] + + # Or you can directly do the following if dependencies are already installed. + pip install -e . + ``` + > **_NOTE:_** Cleaning the build system > > When fetching a new version of the upstream repo (via `git fetch` or `git From 9aca1fae64e27ba34d98da179bde2911ca3557e0 Mon Sep 17 00:00:00 2001 From: lucylq Date: Mon, 3 Mar 2025 11:40:38 -0800 Subject: [PATCH 194/584] add merge function for NamedDataStore Differential Revision: D70409078 Pull Request resolved: https://github.com/pytorch/executorch/pull/8850 --- exir/_serialize/_named_data_store.py | 27 +++++++++ exir/_serialize/test/test_named_data_store.py | 59 +++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/exir/_serialize/_named_data_store.py b/exir/_serialize/_named_data_store.py index 999913a4bb0..2c2d975937e 100644 --- a/exir/_serialize/_named_data_store.py +++ b/exir/_serialize/_named_data_store.py @@ -181,3 +181,30 @@ def get_named_data_store_output(self) -> NamedDataStoreOutput: # Clean up empty maps inside self.external_data self.external_data = {k: v for k, v in self.external_data.items() if len(v) > 0} return NamedDataStoreOutput(self.buffers, self.pte_data, self.external_data) + + def merge_named_data_store(self, other: NamedDataStoreOutput) -> None: + """ + Merge another NamedDataStore into this one. + Args: + other (NamedDataStore): the other NamedDataStore to merge. + Raises: + ValueError: when the key exists in both stores, and corresponding + data is different between them. + """ + # Merge the pte_data. + for key, buffer_idx in other.pte_data.items(): + self.add_named_data( + key, + other.buffers[buffer_idx].buffer, + other.buffers[buffer_idx].alignment, + ) + + # Merge the external_data. + for filename, key_to_buffer_idx in other.external_data.items(): + for key, buffer_idx in key_to_buffer_idx.items(): + self.add_named_data( + key, + other.buffers[buffer_idx].buffer, + other.buffers[buffer_idx].alignment, + external_tag=filename, + ) diff --git a/exir/_serialize/test/test_named_data_store.py b/exir/_serialize/test/test_named_data_store.py index d5355f6d7bf..ffe6f2ddce7 100644 --- a/exir/_serialize/test/test_named_data_store.py +++ b/exir/_serialize/test/test_named_data_store.py @@ -83,3 +83,62 @@ def test_add_duplicate_key_fail(self) -> None: self.assertEqual(len(output.pte_data), 1) self.assertEqual(output.pte_data["key"], 0) self.assertEqual(len(output.external_data), 0) + + def test_merge(self) -> None: + store1 = NamedDataStore() + store1.add_named_data("key1", b"data1", None, None) + store1.add_named_data("key2", b"data2", 16, "file1") + + # Check items in the store1. + output = store1.get_named_data_store_output() + self.assertEqual(len(output.buffers), 2) + self.assertEqual(len(output.pte_data), 1) + self.assertEqual(len(output.external_data), 1) + self.assertEqual(len(output.external_data["file1"]), 1) + + store2 = NamedDataStore() + store2.add_named_data("key1", b"data1", None, None) + store2.add_named_data("key3", b"data3", None, None) + store2.add_named_data("key4", b"data4", 16, "file1") + store2.add_named_data("key5", b"data5", 16, "file2") + + # Check items in store2. + output2 = store2.get_named_data_store_output() + self.assertEqual(len(output2.buffers), 4) + self.assertEqual(len(output2.pte_data), 2) + self.assertEqual(len(output2.external_data), 2) + self.assertEqual(len(output2.external_data["file1"]), 1) + self.assertEqual(len(output2.external_data["file2"]), 1) + + # Merge store2 into store1. + store1.merge_named_data_store(output2) + + # Check items in store2 are merged into store1. + output = store1.get_named_data_store_output() + # key1, data1 exist in both store1 and store2, so we only have one copy of it. + self.assertEqual(len(output.buffers), 5) + self.assertEqual(len(output.pte_data), 2) + self.assertEqual(len(output.external_data), 2) + self.assertEqual(len(output.external_data["file1"]), 2) + self.assertEqual(len(output.external_data["file2"]), 1) + + def test_merge_duplicate_error(self) -> None: + store1 = NamedDataStore() + store1.add_named_data("key1", b"data1", None, None) + + # Check items in the store1. + output = store1.get_named_data_store_output() + self.assertEqual(len(output.buffers), 1) + self.assertEqual(len(output.pte_data), 1) + + store2 = NamedDataStore() + store2.add_named_data("key1", b"data2", None, None) + + # Check items in store2. + output2 = store2.get_named_data_store_output() + self.assertEqual(len(output2.buffers), 1) + self.assertEqual(len(output2.pte_data), 1) + + # Merge store2 into store1 raises error as key1 is already in store1 + # with different data. + self.assertRaises(ValueError, store1.merge_named_data_store, output2) From 160421a7c7a6486eea34bcf0cee3f4bc4fbd38bb Mon Sep 17 00:00:00 2001 From: Jack <32371937+jackzhxng@users.noreply.github.com> Date: Mon, 3 Mar 2025 11:46:46 -0800 Subject: [PATCH 195/584] Allow non tensor checkpoint values (#8845) --- examples/models/checkpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/models/checkpoint.py b/examples/models/checkpoint.py index ee3fb560429..c84a689b951 100644 --- a/examples/models/checkpoint.py +++ b/examples/models/checkpoint.py @@ -64,7 +64,7 @@ def get_checkpoint_dtype(checkpoint: Dict[str, Any]) -> Optional[str]: mismatched_dtypes = [ (key, value.dtype) for key, value in checkpoint.items() - if value.dtype != dtype + if hasattr(value, "dtype") and value.dtype != dtype ] if len(mismatched_dtypes) > 0: print( From e83d0f2329d6b7ebf4ddbcc6a72119ac25cc395d Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Mon, 3 Mar 2025 12:12:11 -0800 Subject: [PATCH 196/584] Update Demo Scripts To Use .ptd (retry) Differential Revision: D70498387 Pull Request resolved: https://github.com/pytorch/executorch/pull/8886 --- CMakeLists.txt | 9 ++--- extension/training/CMakeLists.txt | 2 +- .../training/examples/XOR/export_model.py | 28 ++++++++++----- extension/training/examples/XOR/train.cpp | 34 ++++++++++++++++--- 4 files changed, 55 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6bdcda2f19c..de941663a88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -248,14 +248,15 @@ cmake_dependent_option( "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF ) -if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR) +if(EXECUTORCH_BUILD_EXTENSION_TRAINING) set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON) + set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON) + set(EXECUTORCH_BUILD_EXTENSION_MODULE ON) + set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON) endif() -if(EXECUTORCH_BUILD_EXTENSION_TRAINING) - set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON) +if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR) set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON) - set(EXECUTORCH_BUILD_EXTENSION_MODULE ON) endif() if(EXECUTORCH_BUILD_EXTENSION_MODULE) diff --git a/extension/training/CMakeLists.txt b/extension/training/CMakeLists.txt index e50bb3c71eb..97e75955837 100644 --- a/extension/training/CMakeLists.txt +++ b/extension/training/CMakeLists.txt @@ -26,7 +26,7 @@ target_include_directories( target_include_directories(extension_training PUBLIC ${EXECUTORCH_ROOT}/..) target_compile_options(extension_training PUBLIC ${_common_compile_options}) target_link_libraries(extension_training executorch_core - extension_data_loader extension_module extension_tensor) + extension_data_loader extension_module extension_tensor extension_flat_tensor) list(TRANSFORM _train_xor__srcs PREPEND "${EXECUTORCH_ROOT}/") diff --git a/extension/training/examples/XOR/export_model.py b/extension/training/examples/XOR/export_model.py index bfbe0ce2138..98e04f09a2f 100644 --- a/extension/training/examples/XOR/export_model.py +++ b/extension/training/examples/XOR/export_model.py @@ -11,14 +11,14 @@ import os import torch -from executorch.exir import to_edge +from executorch.exir import ExecutorchBackendConfig, to_edge from executorch.extension.training.examples.XOR.model import Net, TrainingNet from torch.export import export from torch.export.experimental import _export_forward_backward -def _export_model(): +def _export_model(external_mutable_weights: bool = False): net = TrainingNet(Net()) x = torch.randn(1, 2) @@ -30,7 +30,11 @@ def _export_model(): # Lower the graph to edge dialect. ep = to_edge(ep) # Lower the graph to executorch. - ep = ep.to_executorch() + ep = ep.to_executorch( + config=ExecutorchBackendConfig( + external_mutable_weights=external_mutable_weights + ) + ) return ep @@ -44,19 +48,27 @@ def main() -> None: "--outdir", type=str, required=True, - help="Path to the directory to write xor.pte files to", + help="Path to the directory to write xor.pte and xor.ptd files to", + ) + parser.add_argument( + "--external", + action="store_true", + help="Export the model with external weights", ) args = parser.parse_args() - ep = _export_model() + ep = _export_model(args.external) # Write out the .pte file. os.makedirs(args.outdir, exist_ok=True) outfile = os.path.join(args.outdir, "xor.pte") with open(outfile, "wb") as fp: - fp.write( - ep.buffer, - ) + ep.write_to_file(fp) + + if args.external: + # current infra doesnt easily allow renaming this file, so just hackily do it here. + ep._tensor_data["xor"] = ep._tensor_data.pop("_default_external_constant") + ep.write_tensor_data_to_file(args.outdir) if __name__ == "__main__": diff --git a/extension/training/examples/XOR/train.cpp b/extension/training/examples/XOR/train.cpp index 746daebbf1b..af1c37a6a50 100644 --- a/extension/training/examples/XOR/train.cpp +++ b/extension/training/examples/XOR/train.cpp @@ -23,12 +23,18 @@ using executorch::extension::training::optimizer::SGDOptions; using executorch::runtime::Error; using executorch::runtime::Result; DEFINE_string(model_path, "xor.pte", "Model serialized in flatbuffer format."); +DEFINE_string(ptd_path, "", "Model weights serialized in flatbuffer format."); int main(int argc, char** argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); - if (argc != 1) { + if (argc == 0) { + ET_LOG(Error, "Please provide a model path."); + return 1; + } else if (argc > 2) { std::string msg = "Extra commandline args: "; - for (int i = 1 /* skip argv[0] (program name) */; i < argc; i++) { + for (int i = 2 /* skip argv[0] (pte path) and argv[1] (ptd path) */; + i < argc; + i++) { msg += argv[i]; } ET_LOG(Error, "%s", msg.c_str()); @@ -46,7 +52,21 @@ int main(int argc, char** argv) { auto loader = std::make_unique( std::move(loader_res.get())); - auto mod = executorch::extension::training::TrainingModule(std::move(loader)); + std::unique_ptr ptd_loader = nullptr; + if (!FLAGS_ptd_path.empty()) { + executorch::runtime::Result + ptd_loader_res = + executorch::extension::FileDataLoader::from(FLAGS_ptd_path.c_str()); + if (ptd_loader_res.error() != Error::Ok) { + ET_LOG(Error, "Failed to open ptd file: %s", FLAGS_ptd_path.c_str()); + return 1; + } + ptd_loader = std::make_unique( + std::move(ptd_loader_res.get())); + } + + auto mod = executorch::extension::training::TrainingModule( + std::move(loader), nullptr, nullptr, nullptr, std::move(ptd_loader)); // Create full data set of input and labels. std::vector(param_res.error())); return 1; } @@ -112,5 +135,6 @@ int main(int argc, char** argv) { std::string(param.first.data()), param.second}); } - executorch::extension::flat_tensor::save_ptd("xor.ptd", param_map, 16); + executorch::extension::flat_tensor::save_ptd( + "trained_xor.ptd", param_map, 16); } From e8d20090364ce22f6ec81bebae2828a605d4e8d5 Mon Sep 17 00:00:00 2001 From: Jack <32371937+jackzhxng@users.noreply.github.com> Date: Mon, 3 Mar 2025 13:14:26 -0800 Subject: [PATCH 197/584] Add previous logging for to_edge model in export_llama (#8889) --- extension/llm/export/builder.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py index 47ad30e9390..eb8dd462378 100644 --- a/extension/llm/export/builder.py +++ b/extension/llm/export/builder.py @@ -448,6 +448,8 @@ def to_edge_transform_and_lower( compile_config=edge_config, constant_methods=self.metadata, ) + if self.verbose: + logging.info(f"Exported graph:\n{self.edge_manager.exported_program()}") return self def to_executorch( From 24671a96d6936836461810649839aa0fd1058cc3 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 4 Mar 2025 06:54:55 +0900 Subject: [PATCH 198/584] [Benchmark] fail test if model artifact does not exist (#8482) * replace fetcher * replace fetcher * replace fetcher * replace fetcher * replace fetcher * replace fetcher --- .github/workflows/android-perf.yml | 120 +++++++++++++++-------------- .github/workflows/apple-perf.yml | 119 ++++++++++++++-------------- 2 files changed, 125 insertions(+), 114 deletions(-) diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index f21ed849d03..8c0ba752259 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -96,63 +96,6 @@ jobs: PYTHONPATH="${PWD}" python .ci/scripts/gather_benchmark_configs.py $ARGS - prepare-test-specs: - runs-on: linux.2xlarge - needs: set-parameters - strategy: - matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} - fail-fast: false - steps: - - uses: actions/checkout@v3 - - - name: Prepare the spec - id: prepare - shell: bash - env: - BENCHMARK_CONFIG: ${{ toJSON(matrix) }} - working-directory: extension/benchmark/android/benchmark - run: | - set -eux - - # The model will be exported in the next step to this S3 path - MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip" - # We could write a script to properly use jinja here, but there is only one variable, - # so let's just sed it - sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2 - - BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g') - # The config for this benchmark runs, we save it in the test spec so that it can be fetched - # later by the upload script - sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' android-llm-device-farm-test-spec.yml.j2 - - cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml - # Just print the test spec for debugging - cat android-llm-device-farm-test-spec.yml - - # Save the benchmark configs so that we can use it later in the dashboard - echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json" - echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT - - - name: Upload the spec - uses: seemethere/upload-artifact-s3@v5 - with: - s3-bucket: gha-artifacts - s3-prefix: | - ${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }} - retention-days: 1 - if-no-files-found: error - path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml - - - name: Update the benchmark configs - uses: seemethere/upload-artifact-s3@v5 - with: - s3-bucket: gha-artifacts - s3-prefix: | - ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/ - retention-days: 1 - if-no-files-found: error - path: extension/benchmark/android/benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json - export-models: name: export-models uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main @@ -335,6 +278,69 @@ jobs: fi echo "::endgroup::" + prepare-test-specs: + runs-on: linux.2xlarge + needs: + - set-parameters + - export-models + strategy: + matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} + fail-fast: false + steps: + - uses: actions/checkout@v3 + + - name: Prepare the spec + id: prepare + shell: bash + env: + BENCHMARK_CONFIG: ${{ toJSON(matrix) }} + working-directory: extension/benchmark/android/benchmark + run: | + set -eux + + # The model will be exported in the next step to this S3 path + MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip" + + # Check if the model artifact exists, fail this step skip generating test-spec. + curl -s --head -f ${MODEL_PATH} + + # We could write a script to properly use jinja here, but there is only one variable, + # so let's just sed it + sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2 + + BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g') + # The config for this benchmark runs, we save it in the test spec so that it can be fetched + # later by the upload script + sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' android-llm-device-farm-test-spec.yml.j2 + + cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml + # Just print the test spec for debugging + cat android-llm-device-farm-test-spec.yml + + # Save the benchmark configs so that we can use it later in the dashboard + echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json" + echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT + + - name: Upload the spec + uses: seemethere/upload-artifact-s3@v5 + with: + s3-bucket: gha-artifacts + s3-prefix: | + ${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }} + retention-days: 1 + if-no-files-found: error + path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml + + - name: Update the benchmark configs + uses: seemethere/upload-artifact-s3@v5 + with: + s3-bucket: gha-artifacts + s3-prefix: | + ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/ + retention-days: 1 + if-no-files-found: error + path: extension/benchmark/android/benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json + build-benchmark-app: name: build-benchmark-app uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml index 44aa645d16d..cc7f85e9386 100644 --- a/.github/workflows/apple-perf.yml +++ b/.github/workflows/apple-perf.yml @@ -98,63 +98,6 @@ jobs: echo "benchmark_configs is: ${{ steps.set-parameters.outputs.benchmark_configs }}" - prepare-test-specs: - runs-on: linux.2xlarge - needs: set-parameters - strategy: - matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} - fail-fast: false - steps: - - uses: actions/checkout@v3 - - - name: Prepare the spec - id: prepare - shell: bash - env: - BENCHMARK_CONFIG: ${{ toJSON(matrix) }} - working-directory: extension/benchmark/apple/Benchmark - run: | - set -eux - - # The model will be exported in the next step to this S3 path - MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip" - # We could write a script to properly use jinja here, but there is only one variable, - # so let's just sed it - sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2 - - BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g') - # The config for this benchmark runs, we save it in the test spec so that it can be fetched - # later by the upload script - sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' default-ios-device-farm-appium-test-spec.yml.j2 - - cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml - # Just print the test spec for debugging - cat default-ios-device-farm-appium-test-spec.yml - - # Save the benchmark configs so that we can use it later in the dashboard - echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json" - echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT - - - name: Upload the spec - uses: seemethere/upload-artifact-s3@v5 - with: - s3-bucket: gha-artifacts - s3-prefix: | - ${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }} - retention-days: 1 - if-no-files-found: error - path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml - - - name: Update the benchmark configs - uses: seemethere/upload-artifact-s3@v5 - with: - s3-bucket: gha-artifacts - s3-prefix: | - ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/ - retention-days: 1 - if-no-files-found: error - path: extension/benchmark/apple/Benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json - export-models: name: export-models uses: pytorch/test-infra/.github/workflows/macos_job.yml@main @@ -344,6 +287,68 @@ jobs: fi echo "::endgroup::" + prepare-test-specs: + runs-on: linux.2xlarge + needs: + - set-parameters + - export-models + strategy: + matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} + fail-fast: false + steps: + - uses: actions/checkout@v3 + + - name: Prepare the spec + id: prepare + shell: bash + env: + BENCHMARK_CONFIG: ${{ toJSON(matrix) }} + working-directory: extension/benchmark/apple/Benchmark + run: | + set -eux + + # The model will be exported in the next step to this S3 path + MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip" + # Check if the model artifact exists, fail this step skip generating test-spec. + curl -s --head -f ${MODEL_PATH} + # We could write a script to properly use jinja here, but there is only one variable, + # so let's just sed it + sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2 + + BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g') + # The config for this benchmark runs, we save it in the test spec so that it can be fetched + # later by the upload script + sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' default-ios-device-farm-appium-test-spec.yml.j2 + + cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml + # Just print the test spec for debugging + cat default-ios-device-farm-appium-test-spec.yml + + # Save the benchmark configs so that we can use it later in the dashboard + echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json" + echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT + + - name: Upload the spec + uses: seemethere/upload-artifact-s3@v5 + with: + s3-bucket: gha-artifacts + s3-prefix: | + ${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }} + retention-days: 1 + if-no-files-found: error + path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml + + - name: Update the benchmark configs + uses: seemethere/upload-artifact-s3@v5 + with: + s3-bucket: gha-artifacts + s3-prefix: | + ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/ + retention-days: 1 + if-no-files-found: error + path: extension/benchmark/apple/Benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json + + build-benchmark-app: name: build-benchmark-app uses: pytorch/test-infra/.github/workflows/macos_job.yml@main From b2bee8c67800e8387378fe4004d927e234212ac6 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 3 Mar 2025 18:00:36 -0800 Subject: [PATCH 199/584] Fix broken tests Differential Revision: D70503708 Pull Request resolved: https://github.com/pytorch/executorch/pull/8888 --- exir/program/_program.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/exir/program/_program.py b/exir/program/_program.py index 5a9c101a06a..8295907d090 100644 --- a/exir/program/_program.py +++ b/exir/program/_program.py @@ -978,6 +978,18 @@ def _remove_invalid_ops_for_not_decompose( ) -> List[torch._ops.OpOverload]: # To address https://github.com/pytorch/executorch/issues/8781 def keep(op): + # Explicit allow list + allow_list = [] + try: + # Ops in torch.ops.quant are not always loaded, so we use try/except + # Aliases output, but we need to allow it for XNNPACK + allow_list.append(torch.ops.quant.choose_qparams_affine.default) + except: + pass + + if op in allow_list: + return True + schema = op._schema native_schema = _pybind_schema_to_native_schema(schema) if native_schema.is_mutable: From e792757100f4e7ea5ac680f8c18ad8f77a104e61 Mon Sep 17 00:00:00 2001 From: Gasoonjia Date: Mon, 3 Mar 2025 21:43:49 -0800 Subject: [PATCH 200/584] fix wrong error msg Differential Revision: D70528853 Pull Request resolved: https://github.com/pytorch/executorch/pull/8908 --- devtools/etdump/etdump_flatcc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devtools/etdump/etdump_flatcc.cpp b/devtools/etdump/etdump_flatcc.cpp index a5242c8ed4b..8c20bb4ad89 100644 --- a/devtools/etdump/etdump_flatcc.cpp +++ b/devtools/etdump/etdump_flatcc.cpp @@ -503,7 +503,7 @@ void ETDumpGen::set_debug_buffer(Span buffer) { Result bds_ret = BufferDataSink::create(buffer); ET_CHECK_MSG( bds_ret.ok(), - "Failed to write tensor with error 0x%" PRIx32, + "Failed to create data sink from debug buffer with error 0x%" PRIx32, static_cast(bds_ret.error())); buffer_data_sink_ = std::move(bds_ret.get()); From 2ee3ffa6004e597f4963fcd4a2e48f58e2b428b9 Mon Sep 17 00:00:00 2001 From: Naveen Suda <99509021+navsud@users.noreply.github.com> Date: Mon, 3 Mar 2025 22:03:30 -0800 Subject: [PATCH 201/584] fix head_dim in metadata Differential Revision: D70538475 Pull Request resolved: https://github.com/pytorch/executorch/pull/8918 --- examples/qualcomm/oss_scripts/llama/model/static_llama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/qualcomm/oss_scripts/llama/model/static_llama.py b/examples/qualcomm/oss_scripts/llama/model/static_llama.py index 40044db7428..ea8e2f5d319 100755 --- a/examples/qualcomm/oss_scripts/llama/model/static_llama.py +++ b/examples/qualcomm/oss_scripts/llama/model/static_llama.py @@ -461,7 +461,7 @@ def get_metadata(self): "get_bos_id": 1, "get_eos_id": 2, "get_dim": self.dim, - "get_head_dim": self.dim // self.n_heads, + "get_head_dim": self.head_dim, "get_max_batch_size": self.max_batch_size, "get_max_seq_len": self.max_seq_len, "get_n_bos": 1, From d92384bfb2641a2622ee2a7b14bdb903c44906e6 Mon Sep 17 00:00:00 2001 From: winskuo-quic <143469905+winskuo-quic@users.noreply.github.com> Date: Tue, 4 Mar 2025 14:29:10 +0800 Subject: [PATCH 202/584] Qualcomm AI Engine Direct - Meta CI for Mobilebert , W2L, and Llama (#8616) * Qualcomm AI Engine Direct - Meta CI for Mobilebert and W2L * variable update --- .ci/scripts/test_model.sh | 9 +++++++- .github/workflows/trunk.yml | 2 +- backends/qualcomm/tests/test_qnn_delegate.py | 22 +++++++++---------- examples/qualcomm/oss_scripts/llama/llama.py | 5 ++++- .../oss_scripts/llama/qnn_llama_runner.cpp | 5 +++++ .../oss_scripts/llama/runner/runner.cpp | 15 +++++++++---- .../oss_scripts/llama/runner/runner.h | 2 ++ .../qualcomm/scripts/mobilebert_fine_tune.py | 9 +++++++- examples/qualcomm/scripts/wav2letter.py | 22 ++++++++++++++----- 9 files changed, 66 insertions(+), 25 deletions(-) diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh index 054ac02bc07..8143f9ea9a4 100755 --- a/.ci/scripts/test_model.sh +++ b/.ci/scripts/test_model.sh @@ -164,6 +164,7 @@ test_model_with_qnn() { export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/ export PYTHONPATH=$EXECUTORCH_ROOT/.. + EXTRA_FLAGS="" if [[ "${MODEL_NAME}" == "dl3" ]]; then EXPORT_SCRIPT=deeplab_v3 elif [[ "${MODEL_NAME}" == "mv3" ]]; then @@ -176,6 +177,12 @@ test_model_with_qnn() { EXPORT_SCRIPT=inception_v3 elif [[ "${MODEL_NAME}" == "vit" ]]; then EXPORT_SCRIPT=torchvision_vit + elif [[ "${MODEL_NAME}" == "mb" ]]; then + EXPORT_SCRIPT=mobilebert_fine_tune + EXTRA_FLAGS="--num_epochs 1" + pip install scikit-learn + elif [[ "${MODEL_NAME}" == "w2l" ]]; then + EXPORT_SCRIPT=wav2letter elif [[ "${MODEL_NAME}" == "edsr" ]]; then EXPORT_SCRIPT=edsr # Additional deps for edsr @@ -189,7 +196,7 @@ test_model_with_qnn() { # TODO(guangyang): Make QNN chipset matches the target device QNN_CHIPSET=SM8450 - "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only + "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only $EXTRA_FLAGS EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit) } diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 410e95d9a84..49fd08591a7 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -311,7 +311,7 @@ jobs: strategy: matrix: dtype: [fp32] - model: [dl3, mv3, mv2, ic4, ic3, vit] + model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l] fail-fast: false with: runner: linux.2xlarge diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index ad00d58fb85..986243d7a9c 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -73,7 +73,7 @@ from executorch.examples.models.mobilenet_v3 import MV3Model from executorch.examples.models.torchvision_vit.model import TorchVisionViTModel -# from executorch.examples.models.wav2letter import Wav2LetterModel +from executorch.examples.models.wav2letter import Wav2LetterModel from executorch.exir import to_edge from executorch.exir.backend.backend_api import disable_validation from executorch.exir.passes import PassManager @@ -907,8 +907,7 @@ def test_qnn_backend_example_models(self): # Fail during lowering Reopen once resolved # MobileBertModelExample(), # TorchVisionViTModel(), - # Encountered undefined symbol in mainline. Reopen once resolved. - # Wav2LetterModel(), + Wav2LetterModel(), ] expected_partitions = [ 1, @@ -917,8 +916,8 @@ def test_qnn_backend_example_models(self): 1, 1, 1, - 1, - 1, + # 1, + # 1, 1, ] # TODO: Due to trigger maximum recursion depth exceeded, need to check it. @@ -1962,12 +1961,11 @@ def test_qnn_backend_example_models(self): QCOM_ANNOTATION: (), QCOM_QUANT_DTYPE: QuantDtype.use_8a8w, }, - # Encountered undefined symbol in mainline. Reopen once resolved. - # { - # QCOM_MODULE: Wav2LetterModel(), - # QCOM_ANNOTATION: (), - # QCOM_QUANT_DTYPE: QuantDtype.use_8a8w, - # }, + { + QCOM_MODULE: Wav2LetterModel(), + QCOM_ANNOTATION: (), + QCOM_QUANT_DTYPE: QuantDtype.use_8a8w, + }, ] expected_partitions = [ 1, @@ -1979,7 +1977,7 @@ def test_qnn_backend_example_models(self): # For MobileBertModelExample # 1, 1, - # 1, For Wav2LetterModel + 1, ] # TODO: Due to trigger maximum recursion depth exceeded, need to check it. disable_validation() diff --git a/examples/qualcomm/oss_scripts/llama/llama.py b/examples/qualcomm/oss_scripts/llama/llama.py index 9cad2499730..0829d99d57a 100755 --- a/examples/qualcomm/oss_scripts/llama/llama.py +++ b/examples/qualcomm/oss_scripts/llama/llama.py @@ -843,6 +843,7 @@ def post_process(): ) runner_cmd = "" + performance_output_path = "outputs/inference_speed.txt" if args.enable_x86_64: # x86 emulator is intended for CI and not performance. Check only the first few tokens. seq_len = min(seq_len, 16) @@ -862,6 +863,7 @@ def post_process(): f"--model_path {pte_path}", f"--seq_len {seq_len}", f"--output_path {args.artifact}/outputs/outputs.txt", + f"--performance_output_path {performance_output_path}", f"--kv_updater ShiftPointer", runner_args, ] @@ -882,6 +884,7 @@ def post_process(): f"--model_path {pte_filename}.pte", f"--seq_len {seq_len}", "--output_path outputs/outputs.txt", + f"--performance_output_path {performance_output_path}", f"--kv_updater {'SmartMask' if args.kv_updater == smart_mask_updater else 'ShiftPointer'}", runner_args, ] @@ -905,7 +908,7 @@ def post_process(): adb.pull(output_path=args.artifact, callback=post_process) if args.ip and args.port != -1: inference_speed = 0 - with open(f"{args.artifact}/outputs/inference_speed.txt", "r") as f: + with open(f"{args.artifact}/{performance_output_path}", "r") as f: inference_speed = float(f.read()) pte_size = os.path.getsize(pte_path) diff --git a/examples/qualcomm/oss_scripts/llama/qnn_llama_runner.cpp b/examples/qualcomm/oss_scripts/llama/qnn_llama_runner.cpp index 0a1635223e6..ab717aba9f8 100644 --- a/examples/qualcomm/oss_scripts/llama/qnn_llama_runner.cpp +++ b/examples/qualcomm/oss_scripts/llama/qnn_llama_runner.cpp @@ -30,6 +30,10 @@ DEFINE_string( output_path, "outputs.txt", "Executorch inference data output path."); +DEFINE_string( + performance_output_path, + "inference_speed.txt", + "Records inference speed. For CI purpose."); DEFINE_string(tokenizer_path, "tokenizer.bin", "Tokenizer stuff."); DEFINE_string(prompt, "The answer to the ultimate question is", "Prompt."); DEFINE_string( @@ -63,6 +67,7 @@ int main(int argc, char** argv) { example::Runner runner( {FLAGS_model_path}, FLAGS_tokenizer_path.c_str(), + FLAGS_performance_output_path.c_str(), FLAGS_logits_scale, FLAGS_logits_offset, FLAGS_temperature, diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp index da1997a5060..b9be77ce4db 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp @@ -34,13 +34,16 @@ namespace example { namespace { static constexpr auto kTopp = 0.9f; -void printReport(const Runner::Stats& stats); +void printReport( + const Runner::Stats& stats, + const std::string& performance_output_path); std::string statsToJsonString(const Runner::Stats& stats); } // namespace Runner::Runner( const std::vector& models_path, const std::string& tokenizer_path, + const std::string& performance_output_path, const float logits_scale, const int32_t logits_offset, const float temperature, @@ -49,6 +52,7 @@ Runner::Runner( : n_bos_(1), n_eos_(1), tokenizer_path_(tokenizer_path), + performance_output_path_(performance_output_path), logits_scale_(logits_scale), logits_offset_(logits_offset), temperature_(temperature), @@ -437,7 +441,7 @@ Error Runner::generate( stats_.num_prompt_tokens = num_prompt_tokens; stats_.num_generated_tokens = pos - num_prompt_tokens; - printReport(stats_); + printReport(stats_, performance_output_path_); if (stats_callback) { stats_callback(stats_); } @@ -446,7 +450,9 @@ Error Runner::generate( } namespace { -void printReport(const Runner::Stats& stats) { +void printReport( + const Runner::Stats& stats, + const std::string& performance_output_path) { printf("PyTorchObserver %s\n", statsToJsonString(stats).c_str()); ET_LOG( @@ -507,7 +513,8 @@ void printReport(const Runner::Stats& stats) { // For now, we just print the total inference time for CI, can save more info // in future if needed. - std::ofstream outfile("outputs/inference_speed.txt"); + + std::ofstream outfile(performance_output_path.c_str()); if (outfile.is_open()) { double num_tok = (stats.num_generated_tokens) / (double)(stats.inference_end_ms - stats.inference_start_ms) * diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.h b/examples/qualcomm/oss_scripts/llama/runner/runner.h index e659ac55164..713a1d840ad 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/runner.h +++ b/examples/qualcomm/oss_scripts/llama/runner/runner.h @@ -29,6 +29,7 @@ class Runner { explicit Runner( const std::vector& models_path, const std::string& tokenizer_path, + const std::string& performance_output_path_, const float logits_scale, const int32_t logits_offset, const float temperature, @@ -101,6 +102,7 @@ class Runner { const int32_t n_eos_; std::vector> modules_; std::string tokenizer_path_; + std::string performance_output_path_; float logits_scale_; int32_t logits_offset_; float temperature_; diff --git a/examples/qualcomm/scripts/mobilebert_fine_tune.py b/examples/qualcomm/scripts/mobilebert_fine_tune.py index 4ecdaf3583f..47a489f6d52 100755 --- a/examples/qualcomm/scripts/mobilebert_fine_tune.py +++ b/examples/qualcomm/scripts/mobilebert_fine_tune.py @@ -169,7 +169,7 @@ def get_fine_tuned_mobilebert(artifacts_dir, pretrained_weight, batch_size): dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train) dataset_val = TensorDataset(input_ids_val, attention_masks_val, labels_val) - epochs = 5 + epochs = args.num_epochs dataloader_train = DataLoader( dataset_train, sampler=RandomSampler(dataset_train), @@ -366,6 +366,13 @@ def calibrator(gm): type=str, ) + parser.add_argument( + "--num_epochs", + help="If no pretrained weights are provided, set number of epochs to train the model", + default=5, + type=int, + ) + parser.add_argument( "-F", "--use_fp16", diff --git a/examples/qualcomm/scripts/wav2letter.py b/examples/qualcomm/scripts/wav2letter.py index e377c6d7e90..7f30d1865b8 100644 --- a/examples/qualcomm/scripts/wav2letter.py +++ b/examples/qualcomm/scripts/wav2letter.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import json +import logging import os import sys from multiprocessing.connection import Client @@ -111,7 +112,12 @@ def main(args): # target labels " abcdefghijklmnopqrstuvwxyz'*" instance.vocab_size = 29 model = instance.get_eager_model().eval() - model.load_state_dict(torch.load(args.pretrained_weight, weights_only=True)) + if args.pretrained_weight: + model.load_state_dict(torch.load(args.pretrained_weight, weights_only=True)) + else: + logging.warning( + "It is strongly recommended to provide pretrained weights, otherwise accuracy will be bad. This option is here mainly for CI purpose to ensure compile is successful." + ) # convert conv1d to conv2d in nn.Module level will only introduce 2 permute # nodes around input & output, which is more quantization friendly. @@ -128,9 +134,15 @@ def main(args): # retrieve dataset, will take some time to download data_num = 100 - inputs, targets, input_list = get_dataset( - data_size=data_num, artifact_dir=args.artifact - ) + if args.compile_only: + inputs = [(torch.rand(1, 1, 700, 1),)] + logging.warning( + "With compile_only, accuracy will be bad due to insufficient datasets for quantization." + ) + else: + inputs, targets, input_list = get_dataset( + data_size=data_num, artifact_dir=args.artifact + ) pte_filename = "w2l_qnn" build_executorch_binary( model, @@ -212,7 +224,7 @@ def main(args): ), default=None, type=str, - required=True, + required=False, ) args = parser.parse_args() From e2201c5fdfe1819682ec00ef8d0b17a6a2cfd803 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Lindstr=C3=B6m?= <33344797+martinlsm@users.noreply.github.com> Date: Tue, 4 Mar 2025 08:19:29 +0100 Subject: [PATCH 203/584] Arm backend: Enable test_w2l_u85_BI (#8880) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TOSA compiler previously had a bug that caused a segmentation fault when loading the Wav2Letter model on Ethos-U85. This issue has now been fixed. Enable the test that previously failed due to this bug. Co-authored-by: Martin Lindström --- backends/arm/test/models/test_w2l_arm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backends/arm/test/models/test_w2l_arm.py b/backends/arm/test/models/test_w2l_arm.py index 184216e0ef8..6192f1d7cd8 100644 --- a/backends/arm/test/models/test_w2l_arm.py +++ b/backends/arm/test/models/test_w2l_arm.py @@ -131,7 +131,6 @@ def test_w2l_u55_BI(self): @pytest.mark.slow @pytest.mark.corstone_fvp - @unittest.skip("Blocked by MLBEDSW-10420") @conftest.expectedFailureOnFVP # TODO: MLBEDSW-10093 def test_w2l_u85_BI(self): tester = self._test_w2l_ethos_BI_pipeline( From efd1a06a42adbb01c1328d8065e1d772d3031edf Mon Sep 17 00:00:00 2001 From: Mergen Nachin Date: Tue, 4 Mar 2025 10:32:01 -0500 Subject: [PATCH 204/584] Update using-executorch-building-from-source.md (#8925) * Update using-executorch-building-from-source.md * Update using-executorch-building-from-source.md --- docs/source/using-executorch-building-from-source.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/source/using-executorch-building-from-source.md b/docs/source/using-executorch-building-from-source.md index 8196c7d39df..842c00b842c 100644 --- a/docs/source/using-executorch-building-from-source.md +++ b/docs/source/using-executorch-building-from-source.md @@ -81,10 +81,11 @@ portability details. ``` For development, install the package in `--editable` mode, which allows to modify Python source code and see changes reflected immediately. - ``` + ```bash ./install_executorch.sh --editable [--pybind xnnpack] - # Or you can directly do the following if dependencies are already installed. + # Or you can directly do the following if dependencies are already installed + # either via a previous invocation of `./install_executorch.sh` or by explicitly installing requirements via `./install_requirements.sh` first. pip install -e . ``` From 2051a15e34f9af6c5a874f7489f779be91720ed2 Mon Sep 17 00:00:00 2001 From: Adrian Lundell <36153706+AdrianLundell@users.noreply.github.com> Date: Tue, 4 Mar 2025 18:01:02 +0100 Subject: [PATCH 205/584] Arm backend: Update fuse_batchnorm_pass to create new placeholders (#8411) * [ARM backend] Update fuse_batchnorm_pass to create new placeholders - This allows to fuse bn+convs with multiple users of the same weights - Adds new util functions create/delete_const_placeholders to take care of updating the GraphSignature and state_dict/constants dict when handling constant placholders. - Adds and updates related tests Change-Id: I8e550614d9741de840786d9dca9f30af9eb95a64 * Move create/delete_constant_node utils to shared folder Change-Id: I3a82f58f9796e421bd205f030f7d79d72a2f7ed9 * Add buck dependency * Fix bazel build --------- Co-authored-by: Digant Desai --- backends/arm/_passes/TARGETS | 1 + backends/arm/_passes/arm_pass_utils.py | 2 +- backends/arm/_passes/fuse_batchnorm2d_pass.py | 130 ++++++++++------- .../test/passes/test_fuse_batchnorm_pass.py | 8 +- backends/transforms/targets.bzl | 3 + ...test_create_delete_constant_placeholder.py | 123 ++++++++++++++++ backends/transforms/utils.py | 136 +++++++++++++++++- 7 files changed, 349 insertions(+), 54 deletions(-) create mode 100644 backends/transforms/test/test_create_delete_constant_placeholder.py diff --git a/backends/arm/_passes/TARGETS b/backends/arm/_passes/TARGETS index f8bf9c0d208..c56eaca8d4b 100644 --- a/backends/arm/_passes/TARGETS +++ b/backends/arm/_passes/TARGETS @@ -9,5 +9,6 @@ python_library( "//executorch/backends/transforms:replace_scalar_with_tensor", "//executorch/backends/xnnpack/_passes:xnnpack_passes", "//executorch/exir:lib", + "//executorch/backends/transforms:utils", ], ) diff --git a/backends/arm/_passes/arm_pass_utils.py b/backends/arm/_passes/arm_pass_utils.py index cb43acc7fdb..3445886ffa7 100644 --- a/backends/arm/_passes/arm_pass_utils.py +++ b/backends/arm/_passes/arm_pass_utils.py @@ -1,6 +1,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. diff --git a/backends/arm/_passes/fuse_batchnorm2d_pass.py b/backends/arm/_passes/fuse_batchnorm2d_pass.py index 6cb7548a70c..9eb74aca145 100644 --- a/backends/arm/_passes/fuse_batchnorm2d_pass.py +++ b/backends/arm/_passes/fuse_batchnorm2d_pass.py @@ -6,10 +6,15 @@ # pyre-unsafe import torch +from executorch.backends.transforms.utils import ( + create_constant_placeholder, + delete_constant_placeholder, +) from executorch.exir import ExportedProgram from executorch.exir.dialects._ops import ops as exir_ops from executorch.exir.pass_base import ExportPass, PassResult from torch._export.utils import get_buffer, get_param +from torch.export.graph_signature import InputKind from torch.fx import Node from torch.nn.utils.fusion import fuse_conv_bn_weights @@ -23,7 +28,7 @@ def __init__(self, exported_program: ExportedProgram): self.exported_program = exported_program super().__init__() - def is_fuseable_conv_bn(self, node: Node): + def is_fuseable_conv_bn(self, node: Node) -> bool: """Returns True if node is a batchnorm that can be fused into a parent convolution.""" if node.op != "call_function": @@ -44,15 +49,19 @@ def is_fuseable_conv_bn(self, node: Node): # Since we change the output of the conv, fuse only if it has single user. if len(conv.users) > 1: return False - # For similar reasons, only fuse if conv parameters have single user. - if len(conv.all_input_nodes[1].users) > 1: - return False - if len(conv.all_input_nodes) > 2 and len(conv.all_input_nodes[2].users) > 1: - return False return True + def get_bias_name(self, conv_weight_node: Node, conv_bias_node: Node) -> str: + if conv_bias_node: + return conv_bias_node.name + "_fused_bn" + elif "weight" in conv_weight_node.name: + return conv_weight_node.name.replace("weight", "bias") + "_fused_bn" + else: + return conv_weight_node.name + "_bias_fused_bn" + def call(self, graph_module: torch.fx.GraphModule) -> PassResult: # noqa: C901 modified = False + constant_placeholders_to_delete = set() for node in graph_module.graph.nodes: if not self.is_fuseable_conv_bn(node): continue @@ -64,68 +73,93 @@ def get_param_or_none(arg) -> torch.nn.Parameter | None: ) # Get weight, bias, mean, var and epsilon from the batchnorm - bn = node - conv, bn_weight_node, bn_bias_node, bn_mean_node, bn_var_node = bn.args[0:5] - bn_weight = get_param_or_none(bn_weight_node) - bn_bias = get_param_or_none(bn_bias_node) - - running_mean = get_buffer(self.exported_program, bn_mean_node) - running_var = get_buffer(self.exported_program, bn_var_node) - if running_mean is None or running_var is None: + bn_node = node + conv, bn_weight_node, bn_bias_node, bn_mean_node, bn_var_node = ( + bn_node.args[0:5] + ) + bn_weight_tensor = get_param_or_none(bn_weight_node) + bn_bias_tensor = get_param_or_none(bn_bias_node) + bn_mean_tensor = get_buffer(self.exported_program, bn_mean_node) + bn_var_tensor = get_buffer(self.exported_program, bn_var_node) + if bn_mean_tensor is None or bn_var_tensor is None: raise ValueError( "Parameters running_mean and running_var of batchnorm can't be None." ) - epsilon = bn.args[-1] + epsilon = bn_node.args[-1] # Get weight and bias from conv conv_weight_node, conv_bias_node = conv.args[1:3] - conv_weight = get_param(self.exported_program, conv_weight_node) - conv_bias = get_param_or_none(conv_bias_node) - if conv_weight is None: + conv_weight_tensor = get_param(self.exported_program, conv_weight_node) + conv_bias_tensor = get_param_or_none(conv_bias_node) + if conv_weight_tensor is None: raise ValueError("Parameter weight of convolution can't be None.") # Compute conv parameters folded with batchnorm fused_conv_weight, fused_conv_bias = fuse_conv_bn_weights( - conv_weight, - conv_bias, - running_mean, - running_var, + conv_weight_tensor, + conv_bias_tensor, + bn_mean_tensor, + bn_var_tensor, epsilon, - bn_weight, - bn_bias, + bn_weight_tensor, + bn_bias_tensor, ) - # Set the conv parameters to fused value - def try_set_param( - param_node: Node | None, param_value: torch.nn.Parameter - ) -> bool: - """set_param but check if param_node is None first. Return True if param was set successfully, otherwise False.""" - if param_node is not None: - param_name = ( - self.exported_program.graph_signature.inputs_to_parameters[ - param_node.name - ] + # Create fused weights and bias to conv and replace conv args + with graph_module.graph.inserting_before(conv_weight_node): + fused_conv_weight_node = create_constant_placeholder( + exp_program=self.exported_program, + graph=graph_module.graph, + kind=InputKind.PARAMETER, + name=conv_weight_node.name + "_fused_bn", + data=fused_conv_weight, + ) + + if fused_conv_bias is not None: + fused_conv_bias_node = create_constant_placeholder( + exp_program=self.exported_program, + graph=graph_module.graph, + kind=InputKind.PARAMETER, + name=self.get_bias_name(conv_weight_node, conv_bias_node), + data=fused_conv_bias, ) - self.exported_program.state_dict[param_name] = param_value - return True - return False + else: + fused_conv_bias_node = None + + conv.args = ( + conv.args[0], + fused_conv_weight_node, + fused_conv_bias_node, + *conv.args[3:], + ) - try_set_param(conv_weight_node, fused_conv_weight) - if not try_set_param(conv_bias_node, fused_conv_bias) and try_set_param( - bn_bias_node, fused_conv_bias - ): - # pyre-ignore[60] - # Conv didn't have bias but batchnorm did, steal bias from batchnorm. - conv_args = (*conv.args[0:2], bn_bias_node, *conv.args[3:]) - conv.args = conv_args - - # Erasing nodes is handled by dead-code elimination. - for user in bn.users: + # Erasing batch-norm nodes is handled by dead-code elimination. After that we may remove their constant placeholder inputs + for user in bn_node.users: user.replace_all_uses_with(conv) + + constant_placeholders_to_delete.update( + [ + bn_weight_node, + bn_bias_node, + bn_mean_node, + bn_var_node, + conv_weight_node, + conv_bias_node, + ] + ) modified = True if modified: graph_module.graph.eliminate_dead_code() + for constant_placeholder in constant_placeholders_to_delete: + if (constant_placeholder is not None) and ( + len(constant_placeholder.users) == 0 + ): + delete_constant_placeholder( + self.exported_program, constant_placeholder + ) + graph_module.recompile() graph_module = super().call(graph_module).graph_module + return PassResult(graph_module=graph_module, modified=modified) diff --git a/backends/arm/test/passes/test_fuse_batchnorm_pass.py b/backends/arm/test/passes/test_fuse_batchnorm_pass.py index 45b3253f848..415aa9f6132 100644 --- a/backends/arm/test/passes/test_fuse_batchnorm_pass.py +++ b/backends/arm/test/passes/test_fuse_batchnorm_pass.py @@ -85,13 +85,13 @@ def forward(self, x): return x -class MergeNoBN(torch.nn.Module): +class MergeMultipleUsersBN(torch.nn.Module): ops_before_pass = { "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default": 2, "executorch_exir_dialects_edge__ops_aten_convolution_default": 3, } ops_after_pass = { - "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default": 2, + "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default": 1, "executorch_exir_dialects_edge__ops_aten_convolution_default": 3, } @@ -122,7 +122,7 @@ def forward(self, x): z = self.conv2d2(x) a = self.batch_norm2d( y - ) # Can't be fused since paramters of conv2d2 have multiple users. + ) # Can be fused despite paramters of conv2d2 having multiple users. return z, a @@ -131,7 +131,7 @@ def forward(self, x): "merge_one_of_two_bn_affine": MergeOneOfTwoBN(True), "merge_one_of_two_bn": MergeOneOfTwoBN(False), "merge_two_of_two_bn_affine": MergeTwosOfTwoBN(True), - "merge_no_bn_affine": MergeNoBN(True), + "merge_multiple_users_bn_affine": MergeMultipleUsersBN(True), } diff --git a/backends/transforms/targets.bzl b/backends/transforms/targets.bzl index ec4e1412862..66ff9111f52 100644 --- a/backends/transforms/targets.bzl +++ b/backends/transforms/targets.bzl @@ -149,6 +149,9 @@ def define_common_targets(): runtime.python_library( name = "utils", srcs = ["utils.py"], + visibility = [ + "//executorch/backends/...", + ], deps = [ "//caffe2:torch", "//executorch/exir:lib", diff --git a/backends/transforms/test/test_create_delete_constant_placeholder.py b/backends/transforms/test/test_create_delete_constant_placeholder.py new file mode 100644 index 00000000000..0e1f5224b44 --- /dev/null +++ b/backends/transforms/test/test_create_delete_constant_placeholder.py @@ -0,0 +1,123 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from executorch.backends.transforms.utils import ( + create_constant_placeholder, + delete_constant_placeholder, +) +from executorch.exir import to_edge +from executorch.exir.dialects._ops import ops as exir_ops +from torch.export import export +from torch.export.graph_signature import InputKind + + +class EmptyNetwork(torch.nn.Module): + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return x + + test_data: torch.Tensor = (torch.zeros(1),) + + +def _test_create_delete(kind: InputKind, persistent_buffer: bool = None): + """ + Tests the utility functions create_constant_placeholder and delete_constant_placeholder + """ + + # Toy network with two nodes, input and output + # The result should be 0 = 0 + module = EmptyNetwork() + exported_program = export(module, args=module.test_data) + exported_program = to_edge(exported_program).exported_program() + graph = exported_program.graph_module.graph + assert len(graph.nodes) == 2 + assert exported_program.module()(torch.zeros(1)) == 0 + assert len(exported_program.graph_signature.input_specs) == 1 + assert len(exported_program.state_dict) == 0 + assert len(exported_program.constants) == 0 + + const_name = "test_node" + + # Create one const node with value 1 and add it to the input + input_node = list(graph.nodes)[0] + with graph.inserting_before(input_node): + const_node = create_constant_placeholder( + exp_program=exported_program, + graph=graph, + kind=kind, + name=const_name, + data=torch.ones(1), + persistent_buffer=persistent_buffer, + ) + assert "val" in const_node.meta + + with graph.inserting_after(input_node): + add_node = graph.create_node( + "call_function", + exir_ops.edge.aten.add.Tensor, + args=(input_node, const_node), + kwargs={}, + ) + + output_node = list(graph.nodes)[-1] + output_node.replace_input_with(input_node, add_node) + + # We should now have four nodes: test_node, input, add, output + # The result should be 0 + 1 = 1 + assert exported_program.module()(torch.zeros(1)) == 1 + assert len(graph.nodes) == 4 + + if kind == InputKind.PARAMETER: + assert const_name in exported_program.graph_signature.inputs_to_parameters + assert const_name in exported_program.state_dict + assert len(exported_program.constants) == 0 + elif kind == InputKind.BUFFER and persistent_buffer: + assert const_name in exported_program.graph_signature.inputs_to_buffers + assert const_name in exported_program.state_dict + assert len(exported_program.constants) == 0 + elif kind == InputKind.BUFFER and not persistent_buffer: + assert const_name in exported_program.graph_signature.inputs_to_buffers + assert len(exported_program.state_dict) == 0 + assert const_name in exported_program.constants + elif kind == InputKind.CONSTANT_TENSOR: + assert ( + const_name + in exported_program.graph_signature.inputs_to_lifted_tensor_constants + ) + assert len(exported_program.state_dict) == 0 + assert const_name in exported_program.constants + else: + raise RuntimeError("Wrong input kind") + + # Replacing the add op and using eliminate_dead_code() deletes the add op but not the input op + output_node.replace_input_with(add_node, input_node) + graph.eliminate_dead_code() + assert len(graph.nodes) == 3 + + # Delete the input op manually + # The result should again be 0 = 0 + delete_constant_placeholder(exported_program, const_node) + assert exported_program.module()(torch.zeros(1)) == 0 + assert len(graph.nodes) == 2 + assert len(exported_program.graph_signature.input_specs) == 1 + assert len(exported_program.state_dict) == 0 + assert len(exported_program.constants) == 0 + + +def test_create_delete_parameter(): + _test_create_delete(InputKind.PARAMETER) + + +def test_create_delete_persistent_buffer(): + _test_create_delete(InputKind.BUFFER, True) + + +def test_create_delete_non_persistent_buffer(): + _test_create_delete(InputKind.BUFFER, False) + + +def test_create_delete_constant_tensor(): + _test_create_delete(InputKind.CONSTANT_TENSOR) diff --git a/backends/transforms/utils.py b/backends/transforms/utils.py index 03c48039b93..4e451928ee4 100644 --- a/backends/transforms/utils.py +++ b/backends/transforms/utils.py @@ -1,5 +1,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. +# Copyright 2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -8,7 +9,6 @@ import torch from executorch.exir import ExportedProgram - from torch._export.utils import ( get_buffer, get_lifted_tensor_constant, @@ -17,6 +17,13 @@ is_lifted_tensor_constant, is_param, ) +from torch._subclasses.fake_tensor import FakeTensorConverter +from torch.export.graph_signature import ( + ExportGraphSignature, + InputKind, + InputSpec, + TensorArgument, +) def is_get_attr_node(node: torch.fx.Node) -> bool: @@ -53,3 +60,130 @@ def get_param_tensor( except AttributeError: return getattr(exp_prog.graph_module, node.target) raise RuntimeError(f"unsupported param type, {node.op}.") + + +def create_constant_placeholder( + exp_program: ExportedProgram, + graph: torch.fx.Graph, + name: str, + kind: InputKind, + data: torch.Tensor, + persistent_buffer: Optional[bool] = None, +) -> torch.fx.Node: + """ + Creates and returns a constant placeholder node, meaning that it is of type parameter, buffer, + or lifted constant tensor. graph.inserting_before/after() should be used before the call to + decide where to insert the node, at an insertion point before the first input node. + """ + + target = name + + # Add data to state_dict/ constants + match kind: + case InputKind.PARAMETER: + exp_program.state_dict[target] = torch.nn.Parameter( + data, requires_grad=False + ) + case InputKind.BUFFER: + if persistent_buffer is None: + raise RuntimeError( + "Must set persistent_buffer when creating a new buffer." + ) + elif persistent_buffer: + exp_program.state_dict[target] = data + else: + exp_program.constants[target] = data + case InputKind.CONSTANT_TENSOR: + exp_program.constants[target] = data + case _: + raise RuntimeError("Can only create constant input nodes.") + + # Create fake tensor using the same fake_mode as the other fake tensors in the graph + example_node = list(graph.nodes)[0] + if isinstance( + example_node.meta["val"], (tuple, torch.fx.immutable_collections.immutable_list) + ): + example_fake_tensor = example_node.meta["val"][0] + else: + example_fake_tensor = example_node.meta["val"] + fake_tensor = FakeTensorConverter().from_real_tensor( + example_fake_tensor.fake_mode, t=data + ) + + # Create node + node = graph.create_node(op="placeholder", name=name, target=name) + node.meta["val"] = fake_tensor + + # Add tensor to graph_signature in the same order as nodes in the graph + node_names = [n.name for n in graph.nodes if n.op == "placeholder"] + node_index = node_names.index(name) + + input_specs = exp_program.graph_signature.input_specs + user_input_indices = [ + i for i, spec in enumerate(input_specs) if spec.kind == InputKind.USER_INPUT + ] + if not all( + (user_input_index >= node_index for user_input_index in user_input_indices) + ): + raise RuntimeError( + f"Failed to insert {name}; Const placeholder nodes must be inserted before user input nodes in the graph." + ) + + arg_spec = TensorArgument(name) + input_spec = InputSpec(kind, arg_spec, target, persistent_buffer) + input_specs.insert(node_index, input_spec) + + new_graph_signature = ExportGraphSignature( + input_specs, exp_program.graph_signature.output_specs + ) + exp_program._graph_signature = new_graph_signature + + return node + + +def delete_constant_placeholder(exp_program: ExportedProgram, node: torch.fx.Node): + """ + Deletes a node of type parameter, buffer, or lifted constant tensor and its related + graph signature and state_dict/constant entries. The node may not have any users. + """ + if not len(node.users) == 0: + raise RuntimeError( + f"Cannot delete input node {node.name} since it has users in the graph." + ) + + # Remove tensor from state_dict/ constants + if node.name in exp_program.graph_signature.inputs_to_parameters: + target = exp_program.graph_signature.inputs_to_parameters[node.name] + del exp_program.state_dict[target] + + elif node.name in exp_program.graph_signature.inputs_to_buffers: + target = exp_program.graph_signature.inputs_to_buffers[node.name] + + if target in exp_program.graph_signature.non_persistent_buffers: + del exp_program.constants[target] + else: + del exp_program.state_dict[target] + + elif node.name in exp_program.graph_signature.inputs_to_lifted_tensor_constants: + target = exp_program.graph_signature.inputs_to_lifted_tensor_constants[ + node.name + ] + del exp_program.constants[target] + else: + raise RuntimeError( + f"Cannot delete input node {node.name} since it is not a parameter, a buffer, nor a lifted tensor constant." + ) + + # Remove input from graph signature + input_specs = [ + spec + for spec in exp_program.graph_signature.input_specs + if spec.arg.name != node.name + ] + new_graph_signature = ExportGraphSignature( + input_specs, exp_program.graph_signature.output_specs + ) + exp_program._graph_signature = new_graph_signature + + # Remove node from graph + node.graph.erase_node(node) From 09ad20af3ef1e00310eaccc65c78b8ebc24e395c Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Tue, 4 Mar 2025 09:42:28 -0800 Subject: [PATCH 206/584] [minibench] Drop outliers from benchmark result (#8919) Currently the result has large variance from outliers, so only use 80% samples in the middle (trimmean 0.2) --- .../java/org/pytorch/minibench/BenchmarkActivity.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java index c0856f3e4fe..4b2ba56099e 100644 --- a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java +++ b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.stream.Collectors; import org.pytorch.executorch.Module; @@ -80,11 +81,18 @@ protected void onPostExecute(Void aVoid) { final List results = new ArrayList<>(); // The list of metrics we have atm includes: // Avg inference latency after N iterations + // Currently the result has large variance from outliers, so only use + // 80% samples in the middle (trimmean 0.2) + Collections.sort(stats.latency); + int resultSize = stats.latency.size(); + List usedLatencyResults = + stats.latency.subList(resultSize / 10, resultSize * 9 / 10); + results.add( new BenchmarkMetric( benchmarkModel, "avg_inference_latency(ms)", - stats.latency.stream().mapToDouble(l -> l).average().orElse(0.0f), + usedLatencyResults.stream().mapToDouble(l -> l).average().orElse(0.0f), 0.0f)); // Model load time results.add( From 7aa6494ea5da0d83ca8c5867bda6643caa69381e Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Tue, 4 Mar 2025 10:22:13 -0800 Subject: [PATCH 207/584] Fix ANE llama export (#8904) * up * up * up * up * up * up * up * up * up * up * up * up * up --- .ci/scripts/test_ane_static_llama.sh | 27 +++++++++++++++++++ .github/workflows/trunk.yml | 22 +++++++++++++++ examples/apple/coreml/llama/export.py | 1 + .../apple/coreml/llama/llama_transformer.py | 6 +++-- 4 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 .ci/scripts/test_ane_static_llama.sh diff --git a/.ci/scripts/test_ane_static_llama.sh b/.ci/scripts/test_ane_static_llama.sh new file mode 100644 index 00000000000..c83c522d629 --- /dev/null +++ b/.ci/scripts/test_ane_static_llama.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set -exu + +source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" + +export EXECUTORCH_ROOT="$(dirname "${BASH_SOURCE[0]}")/../.." + +if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then + PYTHON_EXECUTABLE=python3 +fi + +which "${PYTHON_EXECUTABLE}" + +pushd $EXECUTORCH_ROOT/examples/apple/coreml/llama + +# Download stories llama110m artifacts +download_stories_model_artifacts + +python export.py -n model.pte -p params.json -c stories110M.pt --seq_length 32 --max_seq_length 64 --dtype fp16 --coreml-quantize c4w + +popd diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 49fd08591a7..d5ca93a98ad 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -229,6 +229,28 @@ jobs: # see if we can import the module successfully ${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')" + test-static-llama-ane: + name: test-static-llama-ane + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + with: + runner: macos-m1-stable + python-version: '3.11' + submodules: 'true' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + script: | + set -eux + bash .ci/scripts/setup-conda.sh + eval "$(conda shell.bash hook)" + + # Install requirements + sh install_requirements.sh + sh backends/apple/coreml/scripts/install_requirements.sh + python install_executorch.py --pybind coreml + sh examples/models/llama/install_requirements.sh + + # Test ANE llama + sh .ci/scripts/test_ane_static_llama.sh + test-llama-runner-macos: name: test-llama-runner-mac uses: pytorch/test-infra/.github/workflows/macos_job.yml@main diff --git a/examples/apple/coreml/llama/export.py b/examples/apple/coreml/llama/export.py index c0f60529895..f440dc878d4 100644 --- a/examples/apple/coreml/llama/export.py +++ b/examples/apple/coreml/llama/export.py @@ -203,6 +203,7 @@ def main() -> None: torch.ops.aten.scaled_dot_product_attention.default, # preserve norm op for numerical stability torch.ops.aten.linalg_vector_norm.default, + torch.ops.aten.reciprocal.default, ], compile_config=EdgeCompileConfig( _check_ir_validity=False, diff --git a/examples/apple/coreml/llama/llama_transformer.py b/examples/apple/coreml/llama/llama_transformer.py index 2ce4c1d2b5b..3c371da4c00 100644 --- a/examples/apple/coreml/llama/llama_transformer.py +++ b/examples/apple/coreml/llama/llama_transformer.py @@ -134,8 +134,10 @@ def _norm(self, x): # We have yet to do large scale evaluations on the numeric stability of this solution, but note that # it appears better than what exists currently (removing FP32 casts and using FP16) rms_norm_eps0 = ( - x * torch.sqrt(torch.tensor(self.dim, dtype=x.dtype)) - ) / torch.linalg.vector_norm(x, dim=-1, keepdim=True) + x + * torch.sqrt(torch.tensor(self.dim, dtype=x.dtype)) + * torch.reciprocal(torch.linalg.vector_norm(x, dim=-1, keepdim=True)) + ) return rms_norm_eps0 def forward(self, x): From 2eae802c360df27907ffb118a861f44468d1b755 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Tue, 4 Mar 2025 13:39:43 -0500 Subject: [PATCH 208/584] [ExecuTorch][XNNPACK] Don't partition per_tensor weights with qd8 (#8927) This is not supported, so we shouldn't partition it. Add an expectedFailure test to indicate that this is not supported Differential Revision: [D70343584](https://our.internmc.facebook.com/intern/diff/D70343584/) ghstack-source-id: 269356867 Pull Request resolved: https://github.com/pytorch/executorch/pull/8891 Co-authored-by: Digant Desai --- .../xnnpack/partition/config/gemm_configs.py | 37 +++++++--- backends/xnnpack/test/ops/test_linear.py | 74 +++++++++++++++++++ backends/xnnpack/utils/quant_utils.py | 9 +++ 3 files changed, 111 insertions(+), 9 deletions(-) diff --git a/backends/xnnpack/partition/config/gemm_configs.py b/backends/xnnpack/partition/config/gemm_configs.py index 872ba355c70..9b10c3be530 100644 --- a/backends/xnnpack/partition/config/gemm_configs.py +++ b/backends/xnnpack/partition/config/gemm_configs.py @@ -21,6 +21,7 @@ is_dynamic_qdq, is_per_channel, is_per_channel_group, + is_per_tensor, is_qparam, is_quant, ) @@ -66,8 +67,6 @@ def check_constraints(self, node: torch.fx.Node, ep: ExportedProgram) -> bool: return False is_valid, _ = self.get_deps(node, ep) - if not is_valid: - why(node, "Failed to get valid dependent nodes.") return is_valid def get_node_and_deps( @@ -123,6 +122,7 @@ def get_deps( precision = self._detect_precision(node) if precision not in self.supported_precision_types(): # detected precision but it is either disabled or not supported + why(node, f"Unsupported precision type {precision}") return (False, []) _, precision = self._overwrite_precision(node) valid_bias, bias_deps = self._get_bias_deps(node, ep, precision) @@ -143,7 +143,8 @@ def _get_weight_deps( # First find the weight weight_node = get_input_node(node, self.weight_idx) if not is_param_node(ep, weight_node): - return (False, []) # weight must be a static param + why(node, "Expected weight to be a static param") + return (False, []) gemm_deps.append(weight_node) return (True, gemm_deps) @@ -151,19 +152,33 @@ def _get_weight_deps( # Quantized Weight deps dequant_node = get_input_node(node, self.weight_idx) if not is_dequant(dequant_node): + why(node, "Expected weight to have a dequantized node") return False, [] gemm_deps.append(dequant_node) weight = get_input_node(dequant_node, 0) if not is_param_node(ep, weight): + why(node, "Expected weight to be a static param") return False, [] gemm_deps.append(weight) + if ( + is_per_tensor(dequant_node) + and precision == ConfigPrecisionType.DYNAMIC_QUANT + ): + why( + node, + "XNNPACK does not support per tensor quantized weights for dynamic quantization of activations", + ) + return False, [] + if is_per_channel(dequant_node) or is_per_channel_group(dequant_node): if len(dequant_node.all_input_nodes) < 2: # Expected channel quantized to have scale/zp nodes + why(node, "Expected channel quantized to have scale/zp nodes") return False, [] gemm_deps.extend(dequant_node.all_input_nodes[1:3]) + return (True, gemm_deps) def _get_output_deps( @@ -174,7 +189,7 @@ def _get_output_deps( # Look for fused activations and tail end quant node node_users = list(node.users.keys()) if len(node_users) != 1: - # Expect quantized node to have a single output (fused act or dequant) + why(node, "Expected quantized node to have a single output") return False, [] # Check if the quantized pattern has a fused activation @@ -190,6 +205,7 @@ def _get_output_deps( if not is_quant(n_output): # Expected gemm_node --> fused_act (optional) --> dequant + why(node, "Expected output node to have a dequantized node") return (False, []) gemm_deps.append(n_output) elif precision == ConfigPrecisionType.FP32: @@ -219,7 +235,8 @@ def _get_bias_deps( bias_node = get_input_node(node, self.bias_idx) if bias_node: if not is_param_node(ep, bias_node): - return (False, []) # bias node must be a static param + why(node, "Expected bias to be a static param") + return (False, []) gemm_deps.append(bias_node) return (True, gemm_deps) @@ -233,7 +250,7 @@ def _get_act_deps( else: dq_input = get_input_node(node, self.act_idx) if not is_dequant(dq_input): - # Expected static quant input to be dequant node + why(node, "Expected act input to be dequant node") return False, [] gemm_deps.append(dq_input) if precision == ConfigPrecisionType.STATIC_QUANT: @@ -243,6 +260,7 @@ def _get_act_deps( # q input node q_input = get_input_node(dq_input, 0) if not is_quant(q_input): + why(node, "Expected dequant input to be quant node") return (False, []) gemm_deps.append(q_input) @@ -250,20 +268,20 @@ def _get_act_deps( if is_affine_qdq(q_input): q_input_args = extract_qdq_affine_op_args_for_decomposed_ops(q_input) if not (is_node(q_input_args[1]) and is_node(q_input_args[2])): - # expected to find getitem node from choose qparam + why(node, "expected to find getitem node from choose qparam") return (False, []) getitem1 = q_input_args[1] getitem2 = q_input_args[2] if not (is_getitem(getitem1) and is_getitem(getitem2)): - # expected getitem node from choose qparam + why(node, "expected getitem node from choose qparam") return (False, []) gemm_deps.extend([getitem1, getitem2]) choose_qparam = get_input_node(getitem1, 0) if not is_qparam(choose_qparam): - # expected to find choose_qparam node + why(node, "expected to find choose_qparam node") return (False, []) gemm_deps.append(choose_qparam) return (True, gemm_deps) @@ -471,6 +489,7 @@ def find_partition_args(input_node): # there can only be a single output node in partition or len(src_partition.output_nodes) != 1 ): + why(node, "invalid source partition") return (False, []) # map addmm's args to the source partition linear's inputs and users diff --git a/backends/xnnpack/test/ops/test_linear.py b/backends/xnnpack/test/ops/test_linear.py index 30bb4f0aba2..b56a746651c 100644 --- a/backends/xnnpack/test/ops/test_linear.py +++ b/backends/xnnpack/test/ops/test_linear.py @@ -539,6 +539,66 @@ def _test_qd8_per_channel_linear(self, dtype: torch.dtype = torch.float): uses_bias=uses_bias, ) + def _test_qd8_linear_per_tensor_unsupported(self, dtype: torch.dtype = torch.float): + for uses_bias in (False, True): + module = BaseLinear( + in_size=8, + input_channels=13, + output_channels=17, + dtype=dtype, + use_bias=uses_bias, + ) + inputs = module.get_inputs() + dynamic_shapes = ({1: torch.export.Dim("batch", max=100)},) + + quant_config = get_symmetric_quantization_config( + is_per_channel=False, + is_dynamic=True, + ) + + for legacy_partitioner in (True, False): + for per_op_mode in (True, False): + # Every combination should fail to partition Linear or [add]mm. + DynamicallyQuantizedPartitioner = XnnpackPartitioner( + config_precisions=ConfigPrecisionType.DYNAMIC_QUANT, + per_op_mode=per_op_mode, + ) + + tester = Tester(module, inputs, dynamic_shapes=dynamic_shapes) + tester.quantize(Quantize(quantization_config=quant_config)) + tester.export() + + if legacy_partitioner: + tester.to_edge() + tester.partition( + Partition(DynamicallyQuantizedPartitioner) + ).dump_artifact() + # should have [add]mm node + if uses_bias: + tester.check( + [ + "executorch_exir_dialects_edge__ops_aten_addmm_default", + ] + ) + else: + tester.check( + [ + "executorch_exir_dialects_edge__ops_aten_mm_default", + ] + ) + else: + tester.to_edge_transform_and_lower( + ToEdgeTransformAndLower([DynamicallyQuantizedPartitioner]) + ).dump_artifact() + # should not have a delegate node + tester.check_not( + [ + "torch.ops.higher_order.executorch_call_delegate", + ] + ) + # No need to run the model, since it should fail to partition. + return + def _test_qd8_per_channel_4w_linear(self, dtype: torch.dtype = torch.float): qconfig = self._get_4b_dqconfig() input_channels = [2, 63] @@ -697,10 +757,24 @@ def test_qs8_linear(self): def test_qd8_f16_per_channel_linear(self): self._test_qd8_per_channel_linear(dtype=torch.half) + def test_qd8_f16_per_tensor_linear(self): + """ + XNNPACK doesn't support per_tensor quantized weights for dynamic quantized linear op. + This test is to verify that we can't lower per_tensor quantized weights to per_channel quantized weights. + """ + self._test_qd8_linear_per_tensor_unsupported(dtype=torch.half) + # Tests for q[dp]8-f32-qc8w def test_qd8_f32_per_channel_linear(self): self._test_qd8_per_channel_linear(dtype=torch.float) + def test_qd8_f32_per_tensor_linear(self): + """ + XNNPACK doesn't support per_tensor quantized weights for dynamic quantized linear op. + This test is to verify that we can't lower per_tensor quantized weights to per_channel quantized weights. + """ + self._test_qd8_linear_per_tensor_unsupported(dtype=torch.half) + # Tests for q[dp]8-f16-qc4w def test_linear_qd8_f16_per_channel_int4(self): self._test_qd8_per_channel_4w_linear(dtype=torch.half) diff --git a/backends/xnnpack/utils/quant_utils.py b/backends/xnnpack/utils/quant_utils.py index 7c035757a6f..49c5a963161 100644 --- a/backends/xnnpack/utils/quant_utils.py +++ b/backends/xnnpack/utils/quant_utils.py @@ -89,6 +89,15 @@ def is_per_channel(node: torch.fx.Node) -> bool: return is_per_channel or is_affine_per_channel_group +def is_per_tensor(node: torch.fx.Node) -> bool: + if not (is_quant(node) or is_dequant(node)): + return False + + is_per_tensor = "per_tensor" in node.target.__name__ # pyre-ignore + + return is_per_tensor and not (is_per_channel(node)) + + def is_affine_qdq(node: torch.fx.Node) -> bool: if not (is_quant(node) or is_dequant(node)): return False From 6caefc32645003da1a1c707013be80278717bb9c Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Tue, 4 Mar 2025 11:17:45 -0800 Subject: [PATCH 209/584] Link xnn_executor_runner with optimized op library (#8901) Doesn't seem to be any reason not to allow optimized ops for this one. --- backends/xnnpack/CMakeLists.txt | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/backends/xnnpack/CMakeLists.txt b/backends/xnnpack/CMakeLists.txt index a703d67c1b2..02cd0b6d988 100644 --- a/backends/xnnpack/CMakeLists.txt +++ b/backends/xnnpack/CMakeLists.txt @@ -33,14 +33,14 @@ if(NOT PYTHON_EXECUTABLE) resolve_python_executable() endif() -# NB: Enabling this will serialize execution of delegate instances -# Keeping this OFF by default to maintain existing behavior, to be revisited. +# NB: Enabling this will serialize execution of delegate instances Keeping this +# OFF by default to maintain existing behavior, to be revisited. option(EXECUTORCH_XNNPACK_SHARED_WORKSPACE - "Enable workspace sharing across different delegate instances" ON) -# Keeping this OFF by default due to regressions in decode -# and model load with kleidi kernels -option(EXECUTORCH_XNNPACK_ENABLE_KLEIDI - "Enable Arm Kleidi kernels" OFF) + "Enable workspace sharing across different delegate instances" ON +) +# Keeping this OFF by default due to regressions in decode and model load with +# kleidi kernels +option(EXECUTORCH_XNNPACK_ENABLE_KLEIDI "Enable Arm Kleidi kernels" OFF) if(EXECUTORCH_XNNPACK_SHARED_WORKSPACE) add_definitions(-DENABLE_XNNPACK_SHARED_WORKSPACE) endif() @@ -100,8 +100,7 @@ include(cmake/Dependencies.cmake) list(TRANSFORM _xnnpack_backend__srcs PREPEND "${EXECUTORCH_ROOT}/") add_library(xnnpack_backend STATIC ${_xnnpack_backend__srcs}) target_link_libraries( - xnnpack_backend PRIVATE ${xnnpack_third_party} executorch_core - xnnpack_schema + xnnpack_backend PRIVATE ${xnnpack_third_party} executorch_core xnnpack_schema ) target_include_directories( @@ -119,6 +118,12 @@ target_include_directories( target_compile_options(xnnpack_backend PUBLIC ${_common_compile_options}) target_link_options_shared_lib(xnnpack_backend) +if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED) + list(APPEND xnn_executor_runner_libs optimized_native_cpu_ops_lib) +else() + list(APPEND xnn_executor_runner_libs portable_ops_lib) +endif() + list(APPEND xnn_executor_runner_libs xnnpack_backend executorch) # ios can only build library but not binary @@ -134,13 +139,14 @@ if(NOT CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$") if(EXECUTORCH_BUILD_DEVTOOLS) list(APPEND xnn_executor_runner_libs etdump) else() - message(SEND_ERROR "Use of 'EXECUTORCH_ENABLE_EVENT_TRACER' requires 'EXECUTORCH_BUILD_DEVTOOLS' to be enabled.") + message( + SEND_ERROR + "Use of 'EXECUTORCH_ENABLE_EVENT_TRACER' requires 'EXECUTORCH_BUILD_DEVTOOLS' to be enabled." + ) endif() endif() - target_link_libraries( - xnn_executor_runner gflags portable_ops_lib ${xnn_executor_runner_libs} - ) + target_link_libraries(xnn_executor_runner gflags ${xnn_executor_runner_libs}) target_compile_options(xnn_executor_runner PUBLIC ${_common_compile_options}) endif() From d897e73c4c0a0a220d073a1b38c65f2b0d58a192 Mon Sep 17 00:00:00 2001 From: Sam Gondelman Date: Tue, 4 Mar 2025 11:24:37 -0800 Subject: [PATCH 210/584] [Windows] [Tensor.cpp] add #include (#8912) [Tensor.cpp] add algorithm include to get stable_sort and iter_swap on windows --- backends/vulkan/runtime/api/containers/Tensor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/backends/vulkan/runtime/api/containers/Tensor.cpp b/backends/vulkan/runtime/api/containers/Tensor.cpp index 856ff4d618a..b0722dfca06 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.cpp +++ b/backends/vulkan/runtime/api/containers/Tensor.cpp @@ -7,6 +7,7 @@ */ #include +#include #include #include From 6e09ea2b0ce92a180b66fb188f42120da4607de3 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Tue, 4 Mar 2025 13:00:16 -0800 Subject: [PATCH 211/584] Add cpu_thread setting logic to xnn_executor_runner (#8902) --- backends/xnnpack/CMakeLists.txt | 4 ++++ .../executor_runner/executor_runner.cpp | 21 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/backends/xnnpack/CMakeLists.txt b/backends/xnnpack/CMakeLists.txt index 02cd0b6d988..dc42a52a234 100644 --- a/backends/xnnpack/CMakeLists.txt +++ b/backends/xnnpack/CMakeLists.txt @@ -148,6 +148,10 @@ if(NOT CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$") target_link_libraries(xnn_executor_runner gflags ${xnn_executor_runner_libs}) target_compile_options(xnn_executor_runner PUBLIC ${_common_compile_options}) + if(EXECUTORCH_BUILD_PTHREADPOOL) + target_link_libraries(xnn_executor_runner extension_threadpool pthreadpool) + target_compile_definitions(xnn_executor_runner PRIVATE ET_USE_THREADPOOL) + endif() endif() install( diff --git a/examples/portable/executor_runner/executor_runner.cpp b/examples/portable/executor_runner/executor_runner.cpp index f7702fae3de..187d6f34489 100644 --- a/examples/portable/executor_runner/executor_runner.cpp +++ b/examples/portable/executor_runner/executor_runner.cpp @@ -35,6 +35,11 @@ #include #endif // ET_EVENT_TRACER_ENABLED +#if defined(ET_USE_THREADPOOL) +#include +#include +#endif + static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4 MB static uint8_t temp_allocator_pool[1024U * 1024U]; @@ -47,6 +52,10 @@ DEFINE_uint32(num_executions, 1, "Number of times to run the model."); #ifdef ET_EVENT_TRACER_ENABLED DEFINE_string(etdump_path, "model.etdump", "Write ETDump data to this path."); #endif // ET_EVENT_TRACER_ENABLED +DEFINE_int32( + cpu_threads, + -1, + "Number of CPU threads for inference. Defaults to -1, which implies we'll use a heuristic to derive the # of performant cores for a specific device."); using executorch::extension::FileDataLoader; using executorch::runtime::Error; @@ -124,6 +133,18 @@ int main(int argc, char** argv) { return 1; } + auto cpu_threads = FLAGS_cpu_threads; +#if defined(ET_USE_THREADPOOL) + uint32_t num_performant_cores = cpu_threads == -1 + ? ::executorch::extension::cpuinfo::get_num_performant_cores() + : static_cast(cpu_threads); + ET_LOG( + Info, "Resetting threadpool with num threads = %d", num_performant_cores); + if (num_performant_cores > 0) { + ::executorch::extension::threadpool::get_threadpool() + ->_unsafe_reset_threadpool(num_performant_cores); + } +#endif // ET_USE_THREADPOOL // Create a loader to get the data of the program file. There are other // DataLoaders that use mmap() or point to data that's already in memory, and // users can create their own DataLoaders to load from arbitrary sources. From ef73540094cc083e57083b82d039a25560492871 Mon Sep 17 00:00:00 2001 From: Val Tarasyuk Date: Tue, 4 Mar 2025 13:08:15 -0800 Subject: [PATCH 212/584] Add a pass to remove certain redundant branched quant/dequant nodes Differential Revision: D69947096 Pull Request resolved: https://github.com/pytorch/executorch/pull/8896 --- backends/cadence/aot/pass_utils.py | 10 +++ backends/cadence/aot/remove_ops.py | 65 ++++++++++++++++++- .../aot/tests/test_fusion_ops_passes.py | 5 +- .../aot/tests/test_remove_ops_passes.py | 34 +++++++++- 4 files changed, 109 insertions(+), 5 deletions(-) diff --git a/backends/cadence/aot/pass_utils.py b/backends/cadence/aot/pass_utils.py index 3d73e7f8c1e..9f556135dfb 100644 --- a/backends/cadence/aot/pass_utils.py +++ b/backends/cadence/aot/pass_utils.py @@ -104,6 +104,16 @@ def count_node(graph_module: torch.fx.GraphModule, target: torch.fx.node.Target) return total +def op_counts_match( + graph_module: torch.fx.GraphModule, + expected_op_counts: dict[EdgeOpOverload, int], +) -> bool: + for op, count in expected_op_counts.items(): + if count_node(graph_module, op) != count: + return False + return True + + # Testing utils # Return the compute/function nodes in the graph def get_compute_nodes_in_gm(graph_module: torch.fx.GraphModule) -> List[torch.fx.Node]: diff --git a/backends/cadence/aot/remove_ops.py b/backends/cadence/aot/remove_ops.py index 942f6d55533..3cac7514fff 100644 --- a/backends/cadence/aot/remove_ops.py +++ b/backends/cadence/aot/remove_ops.py @@ -33,7 +33,7 @@ from executorch.backends.cadence.aot.utils import get_edge_overload_packet from executorch.backends.transforms.remove_clone_ops import RemoveCloneOpsTransform from executorch.exir.dialects._ops import ops as exir_ops -from executorch.exir.dialects.edge._ops import EdgeOpOverload +from executorch.exir.dialects.edge._ops import EdgeOpOverload, EdgeOpOverloadPacket from executorch.exir.pass_base import ExportPass, NodeMetadata, PassResult, ProxyValue from executorch.exir.pass_manager import PassManager, PassType from executorch.exir.passes import dead_code_elimination_pass @@ -745,6 +745,68 @@ def permute_shape( return [shape[p] for p in permute_dims] +@register_cadence_pass(CadencePassAttribute(opt_level=1)) +class RemoveBranchedQuantDequant(ExportPass): + """ + This pass looks for adjacent quant and dequant nodes with identical + parameters, where the quant node has other users in addition to the + dequant. The quant and dequant pair would be removed by the + FuseQuantDequantToRequantizePass if not for the multiple users. This pass + removes just the dequant node by connecting it to the quant's parent node + """ + + quantize_op_packets: set[EdgeOpOverloadPacket] = { + exir_ops.edge.cadence.quantize_per_tensor, + exir_ops.edge.quantized_decomposed.quantize_per_tensor, + } + dequantize_op_packets: set[EdgeOpOverloadPacket] = { + exir_ops.edge.cadence.dequantize_per_tensor, + exir_ops.edge.quantized_decomposed.dequantize_per_tensor, + } + + def call(self, graph_module: torch.fx.GraphModule) -> PassResult: + self.remove_branched( + graph_module, self.quantize_op_packets, self.dequantize_op_packets + ) + self.remove_branched( + graph_module, self.dequantize_op_packets, self.quantize_op_packets + ) + + graph_module.graph.eliminate_dead_code() + result = super().call(graph_module) + return result + + def remove_branched( + self, + graph_module: torch.fx.GraphModule, + producer_pkts: set[EdgeOpOverloadPacket], + consumer_pkts: set[EdgeOpOverloadPacket], + ) -> None: + for node in graph_module.graph.nodes: + if ( + node.op != "call_function" + or not isinstance(node.target, EdgeOpOverload) + or get_edge_overload_packet(node.target) not in producer_pkts + ): + continue + + if len(node.users) < 2: + continue + + for user in node.users: + if ( + not isinstance(user.target, EdgeOpOverload) + or get_edge_overload_packet(user.target) not in consumer_pkts + ): + continue + + # check qparams match + if node.args[1:] != user.args[1:]: + continue + + user.replace_all_uses_with(node.args[0]) + + # The following class consolidates functions to remove ops that are redundant # in Jarvis. Currently, each function in this class iterates over each node of # the graph module once. In future, we could consolidate them into a monolithic @@ -765,4 +827,5 @@ class CadenceRemoveNops: RemoveNopMulOpPass, RemoveNopAddOpPass, RemoveNopLinalgVectorNormOpPass, + RemoveBranchedQuantDequant, ] diff --git a/backends/cadence/aot/tests/test_fusion_ops_passes.py b/backends/cadence/aot/tests/test_fusion_ops_passes.py index 792a6ee4166..4af3eafb72a 100644 --- a/backends/cadence/aot/tests/test_fusion_ops_passes.py +++ b/backends/cadence/aot/tests/test_fusion_ops_passes.py @@ -20,7 +20,7 @@ FuseTransposeOpPairsPass, ) from executorch.backends.cadence.aot.graph_builder import GraphBuilder -from executorch.backends.cadence.aot.pass_utils import count_node +from executorch.backends.cadence.aot.pass_utils import count_node, op_counts_match from executorch.exir.dialects._ops import ops as exir_ops from executorch.exir.dialects.edge._ops import EdgeOpOverload from torch import nn @@ -32,8 +32,7 @@ def check_op_counts( graph_module: torch.fx.GraphModule, expected_op_counts: dict[EdgeOpOverload, int], ) -> None: - for op, count in expected_op_counts.items(): - self.assertEqual(count_node(graph_module, op), count) + self.assertTrue(op_counts_match(graph_module, expected_op_counts)) class TestFusionPasses(TestFusionPassesBase): diff --git a/backends/cadence/aot/tests/test_remove_ops_passes.py b/backends/cadence/aot/tests/test_remove_ops_passes.py index 348e0b5de83..0c802f9cbf5 100644 --- a/backends/cadence/aot/tests/test_remove_ops_passes.py +++ b/backends/cadence/aot/tests/test_remove_ops_passes.py @@ -17,10 +17,11 @@ from executorch.backends.cadence.aot import compiler from executorch.backends.cadence.aot.compiler import export_to_edge -from executorch.backends.cadence.aot.pass_utils import count_node +from executorch.backends.cadence.aot.pass_utils import count_node, op_counts_match from executorch.backends.cadence.aot.quantizer.quantizer import CadenceDefaultQuantizer from executorch.backends.cadence.aot.remove_ops import ( RemoveAliasCopyOpPass, + RemoveBranchedQuantDequant, RemoveCloneOpPass, RemoveContiguousOpPass, RemoveDetachCopyPass, @@ -709,3 +710,34 @@ def forward(self, x): self.assertEqual( count_node(graph_module, exir_ops.edge.aten.permute_copy.default), 2 ) + + def test_remove_dequant_on_branch(self): + class M(torch.nn.Module): + def forward(self, x): + x = torch.abs(x) + x0 = torch.ops.quantized_decomposed.quantize_per_tensor( + x, 1.2, 3, 0, 127, torch.int8 + ) + x1 = torch.abs(x0) + y0 = torch.ops.quantized_decomposed.dequantize_per_tensor( + x0, 1.2, 3, 0, 127, torch.int8 + ) + y1 = y0.view(-1) + return x1, y1 + + inputs = torch.rand(1, 8, 4, 6) + model = M() + graph_module = export_to_edge(model, (inputs,)).exported_program().graph_module + + graph_module = RemoveBranchedQuantDequant()(graph_module).graph_module + self.assertTrue( + op_counts_match( + graph_module, + expected_op_counts={ + exir_ops.edge.quantized_decomposed.quantize_per_tensor.default: 1, + # we expect the pass to remove the dequantize node + exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default: 0, + exir_ops.edge.aten.abs.default: 2, + }, + ) + ) From 1a9a59bd86bf167653edbd03501f5d54272821a2 Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Tue, 4 Mar 2025 14:58:03 -0800 Subject: [PATCH 213/584] [ExecuTorch][XNNPACK] Rename linear weight partitioning flag for clarity Pull Request resolved: https://github.com/pytorch/executorch/pull/8892 Differential Revision: [D70372220](https://our.internmc.facebook.com/intern/diff/D70372220/) ghstack-source-id: 269599293 Co-authored-by: Digant Desai --- .../xnnpack/partition/config/gemm_configs.py | 38 ++++++++++++------- .../partition/config/xnnpack_config.py | 4 +- backends/xnnpack/test/ops/test_linear.py | 4 +- backends/xnnpack/test/ops/test_lstm.py | 8 ++-- 4 files changed, 35 insertions(+), 19 deletions(-) diff --git a/backends/xnnpack/partition/config/gemm_configs.py b/backends/xnnpack/partition/config/gemm_configs.py index 9b10c3be530..8712c2709ac 100644 --- a/backends/xnnpack/partition/config/gemm_configs.py +++ b/backends/xnnpack/partition/config/gemm_configs.py @@ -96,9 +96,9 @@ def _detect_precision(self, node: torch.fx.Node) -> ConfigPrecisionType: def _overwrite_precision(self, node: torch.fx.Node): precision = self._detect_precision(node) if precision not in self.enabled_precision_types: - # detected precision is not enabled, lets try to partition it as fp32 + # detected precision is not enabled, try to partition it as fp32 if self.enabled_precision_types == [ConfigPrecisionType.FP32]: - # if only fp32 is enabled, then we can still partition fp32 gemms + # when only fp32 is enabled, then we can still partition fp32 gemms # even with in a quantized graph if precision in [ ConfigPrecisionType.STATIC_QUANT, @@ -107,6 +107,7 @@ def _overwrite_precision(self, node: torch.fx.Node): precision = ConfigPrecisionType.FP32 logging.info(f"Overwriting precision, partitioning {node} as FP32") return True, precision + return False, precision def get_deps( @@ -226,8 +227,11 @@ def _get_bias_deps( self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType ) -> Tuple[bool, List[torch.fx.Node]]: gemm_deps = [] - if precision == ConfigPrecisionType.FP32 and self.force_fp32_dynamic_linear: - # if force force_fp32_dynamic_linear is enabled, then we + if ( + precision == ConfigPrecisionType.FP32 + and self.force_non_static_weights_for_f32_linear + ): + # if force_non_static_weights_for_f32_linear is enabled, then we # do not partition the weight node return (True, gemm_deps) @@ -305,8 +309,11 @@ def get_original_aten(self) -> Optional[torch._ops.OpOverload]: def _get_weight_deps( self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType ) -> Tuple[bool, List[torch.fx.Node]]: - if precision == ConfigPrecisionType.FP32 and self.force_fp32_dynamic_linear: - # if force fp32_dynamic_linear is enabled, then we + if ( + precision == ConfigPrecisionType.FP32 + and self.force_non_static_weights_for_f32_linear + ): + # if force_non_static_weights_for_f32_linear is enabled, then we # do not partition the weight node return (True, []) @@ -412,9 +419,11 @@ def __init__(self, **kwargs): def _get_weight_deps( self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType ) -> Tuple[bool, List[torch.fx.Node]]: - # TODO(maxren, T210537195): - if precision == ConfigPrecisionType.FP32 and self.force_fp32_dynamic_linear: - # if force fp32_dynamic_linear is on and we detected this as fp32, then we + if ( + precision == ConfigPrecisionType.FP32 + and self.force_non_static_weights_for_f32_linear + ): + # if force_non_static_weights_for_f32_linear is on and we detected this as fp32, then we # do not partition the weight node return (True, []) @@ -501,11 +510,11 @@ def find_partition_args(input_node): node.args = old_args node.users = old_users - # When using force_fp32_dynamic_linear, we want to get_deps to overwrite the source partition nodes. + # When using force_non_static_weights_for_f32_linear, we want to get_deps to overwrite the source partition nodes. # Else we want to be greedy. ret_deps = ( list(set(deps) & set(src_partition.nodes)) - if self.force_fp32_dynamic_linear + if self.force_non_static_weights_for_f32_linear else list(set(deps) | set(src_partition.nodes)) ) @@ -531,8 +540,11 @@ def __init__(self, **kwargs): def _get_weight_deps( self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType ) -> Tuple[bool, List[torch.fx.Node]]: - if precision == ConfigPrecisionType.FP32 and self.force_fp32_dynamic_linear: - # if force fp32_dynamic_linear is on and we detected this as fp32, then we + if ( + precision == ConfigPrecisionType.FP32 + and self.force_non_static_weights_for_f32_linear + ): + # if force_non_static_weights_for_f32_linear is on and we detected this as fp32, then we # do not partition the weight node return (True, []) diff --git a/backends/xnnpack/partition/config/xnnpack_config.py b/backends/xnnpack/partition/config/xnnpack_config.py index d261416a76f..20018610fce 100644 --- a/backends/xnnpack/partition/config/xnnpack_config.py +++ b/backends/xnnpack/partition/config/xnnpack_config.py @@ -41,7 +41,9 @@ def __init__(self, **kwargs): super().__init__() self.enabled_precision_types = self.supported_precision_types() # Flag used in GEMMConfig() - self.force_fp32_dynamic_linear = kwargs.get("force_fp32_dynamic_linear", False) + self.force_non_static_weights_for_f32_linear = kwargs.get( + "force_non_static_weights_for_f32_linear", False + ) def get_partition( self, node: torch.fx.Node, ep: ExportedProgram diff --git a/backends/xnnpack/test/ops/test_linear.py b/backends/xnnpack/test/ops/test_linear.py index b56a746651c..690a1109a17 100644 --- a/backends/xnnpack/test/ops/test_linear.py +++ b/backends/xnnpack/test/ops/test_linear.py @@ -948,7 +948,7 @@ def test_linear_qd8_as_fp32(self): }, ) - def test_linear_fp32_with_force_as_mm(self): + def test_linear_with_force_non_static_weights_for_f32_linear(self): def check_signature( signature: ExportGraphSignature, force_flag: bool, @@ -981,7 +981,7 @@ def check_signature( inputs = module.get_inputs() tester = Tester(module, inputs).export() partitioner = XnnpackPartitioner( - force_fp32_dynamic_linear=force_flag + force_non_static_weights_for_f32_linear=force_flag ) if legacy_mode: tester.to_edge() diff --git a/backends/xnnpack/test/ops/test_lstm.py b/backends/xnnpack/test/ops/test_lstm.py index be209082b37..6c174b16f33 100644 --- a/backends/xnnpack/test/ops/test_lstm.py +++ b/backends/xnnpack/test/ops/test_lstm.py @@ -43,18 +43,20 @@ def test_fp32_lstm(self): .run_method_and_compare_outputs() ) - def test_fp32_lstm_force_dynamic_linear(self): + def test_lstm_with_force_non_static_weights_for_f32_linear(self): ( Tester(self.LSTMLinear(32, 32, 10), (torch.rand(1, 32, 32),)) .export() .to_edge_transform_and_lower( ToEdgeTransformAndLower( - partitioners=[XnnpackPartitioner(force_fp32_dynamic_linear=True)] + partitioners=[ + XnnpackPartitioner(force_non_static_weights_for_f32_linear=True) + ] ) ) .check_not(["executorch_exir_dialects_edge__ops_aten_addmm_default"]) # Weights are supplied as input to linears - # Biases are not owned by delegates when force_fp32_dynamic_linear is set + # Biases are not owned by delegates when force_non_static_weights_for_f32_linear is set .check(["p_lstm_weight_hh_l0", "p_lstm_weight_ih_l0", "p_lstm_bias"]) .to_executorch() .serialize() From 5814a3b553fac66f6eb6b8cec03a1f64ac158796 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Tue, 4 Mar 2025 15:08:11 -0800 Subject: [PATCH 214/584] portable arg{max,min}: optimize update check (#8863) We don't need a second isnan; see code comment. (This is a small optimization.) --- kernels/portable/cpu/op_argmax.cpp | 5 ++++- kernels/portable/cpu/op_argmin.cpp | 12 +++++++++++- kernels/test/op_argmax_test.cpp | 13 +++++++++++++ kernels/test/op_argmin_test.cpp | 13 +++++++++++++ 4 files changed, 41 insertions(+), 2 deletions(-) diff --git a/kernels/portable/cpu/op_argmax.cpp b/kernels/portable/cpu/op_argmax.cpp index 39ad0171d5d..a272d4405a8 100644 --- a/kernels/portable/cpu/op_argmax.cpp +++ b/kernels/portable/cpu/op_argmax.cpp @@ -50,7 +50,10 @@ Tensor& argmax_out( for (const auto out_ix : c10::irange(out.numel())) { std::tuple acc = reduce_over_dim( [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) { - if (!std::isnan(acc_val) && (std::isnan(v) || v > acc_val)) { + // the below condition as written is equivalent to + // !isnan(accval) && (isnan(v) || v > acc_val). See + // argument in op_argmin.cpp. + if (!std::isnan(acc_val) && !(v <= acc_val)) { acc_val = v; acc_ix = ix; } diff --git a/kernels/portable/cpu/op_argmin.cpp b/kernels/portable/cpu/op_argmin.cpp index 8148efa6264..a0ee82d2612 100644 --- a/kernels/portable/cpu/op_argmin.cpp +++ b/kernels/portable/cpu/op_argmin.cpp @@ -50,7 +50,17 @@ Tensor& argmin_out( for (const auto out_ix : c10::irange(out.numel())) { std::tuple acc = reduce_over_dim( [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) { - if (!std::isnan(acc_val) && (std::isnan(v) || v < acc_val)) { + // the below condition as written is equivalent to !isnan(accval) && + // (isnan(v) || v < acc_val). cases: + // - if neither acc_val nor v is NaN, !(v >= acc_val) is + // trivially equivalent to v < acc_val. + // - if acc_val is NaN, the whole thing is trivially false. + // - if acc_val is not NaN and v is NaN, then v >= acc_val + // - is false because all comparisons involving NaN are + // - false, so the result is true. The result is trivially + // - true for the above condition that uses isnan(v) as + // - well. + if (!std::isnan(acc_val) && !(v >= acc_val)) { acc_val = v; acc_ix = ix; } diff --git a/kernels/test/op_argmax_test.cpp b/kernels/test/op_argmax_test.cpp index 66c79cefff7..4d68dfe88be 100644 --- a/kernels/test/op_argmax_test.cpp +++ b/kernels/test/op_argmax_test.cpp @@ -90,3 +90,16 @@ TEST_F(OpArgmaxTest, SanityCheckNullDim) { EXPECT_TENSOR_EQ(out, expected); // clang-format on } + +TEST_F(OpArgmaxTest, FirstNaNWins) { + TensorFactory tf_float; + Tensor in = tf_float.make({4}, {1, NAN, -4, NAN}); + + TensorFactory tf_long; + Tensor out = tf_long.zeros({}); + Tensor expected = tf_long.make({}, {1}); + + Tensor ret = op_argmax_out(in, {}, false, out); + EXPECT_TENSOR_EQ(out, ret); + EXPECT_TENSOR_EQ(out, expected); +} diff --git a/kernels/test/op_argmin_test.cpp b/kernels/test/op_argmin_test.cpp index 250fe4f7e1e..a0b2699a28f 100644 --- a/kernels/test/op_argmin_test.cpp +++ b/kernels/test/op_argmin_test.cpp @@ -90,3 +90,16 @@ TEST_F(OpArgminTest, SanityCheckNullDim) { EXPECT_TENSOR_EQ(out, expected); // clang-format on } + +TEST_F(OpArgminTest, FirstNaNWins) { + TensorFactory tf_float; + Tensor in = tf_float.make({4}, {1, NAN, -4, NAN}); + + TensorFactory tf_long; + Tensor out = tf_long.zeros({}); + Tensor expected = tf_long.make({}, {1}); + + Tensor ret = op_argmin_out(in, {}, false, out); + EXPECT_TENSOR_EQ(out, ret); + EXPECT_TENSOR_EQ(out, expected); +} From 5dd96c3ca9b7484449d39f3efb57a5e5a11cb90f Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Tue, 4 Mar 2025 15:55:06 -0800 Subject: [PATCH 215/584] Fix trunk.yml (#8949) init --- .github/workflows/trunk.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index d5ca93a98ad..ee2cf867997 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -229,7 +229,7 @@ jobs: # see if we can import the module successfully ${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')" - test-static-llama-ane: + test-static-llama-ane: name: test-static-llama-ane uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: From 6059fde746d3eda56c8cbba5f30401a25e194b8c Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Tue, 4 Mar 2025 19:18:44 -0800 Subject: [PATCH 216/584] [Android demo] Decouple pte file from assets and remove unused Differential Revision: D70528015 Pull Request resolved: https://github.com/pytorch/executorch/pull/8906 --- build/build_android_library.sh | 4 +- .../android/ExecuTorchDemo/README.md | 139 +-- .../ExecuTorchDemo/app/build.gradle.kts | 8 +- .../android/ExecuTorchDemo/app/src/main/BUCK | 2 - .../app/src/main/assets/corgi2.jpg | Bin 24926 -> 0 bytes .../app/src/main/assets/test1.png | Bin 166268 -> 0 bytes .../app/src/main/assets/test2.jpg | Bin 209222 -> 0 bytes .../app/src/main/assets/test3.png | Bin 719654 -> 0 bytes .../ClassificationActivity.java | 120 -- .../executorchdemo/ImageNetClasses.java | 1021 ----------------- .../example/executorchdemo/MainActivity.java | 10 +- .../ExecuTorchDemo/settings.gradle.kts | 2 - .../demo-apps/android/ExecuTorchDemo/setup.sh | 35 +- 13 files changed, 48 insertions(+), 1293 deletions(-) delete mode 100644 examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/corgi2.jpg delete mode 100644 examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/test1.png delete mode 100644 examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/test2.jpg delete mode 100644 examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/test3.png delete mode 100644 examples/demo-apps/android/ExecuTorchDemo/app/src/main/java/com/example/executorchdemo/ClassificationActivity.java delete mode 100644 examples/demo-apps/android/ExecuTorchDemo/app/src/main/java/com/example/executorchdemo/ImageNetClasses.java diff --git a/build/build_android_library.sh b/build/build_android_library.sh index cb2d47fdeb3..9a60595b222 100644 --- a/build/build_android_library.sh +++ b/build/build_android_library.sh @@ -178,7 +178,9 @@ collect_artifacts_to_be_uploaded() { } main() { - BUILD_AAR_DIR="$(mktemp -d)" + if [[ -z "${BUILD_AAR_DIR:-}" ]]; then + BUILD_AAR_DIR="$(mktemp -d)" + fi export BUILD_AAR_DIR if [ -z "$ANDROID_ABIS" ]; then ANDROID_ABIS=("arm64-v8a" "x86_64") diff --git a/examples/demo-apps/android/ExecuTorchDemo/README.md b/examples/demo-apps/android/ExecuTorchDemo/README.md index 09045ceb7a7..e9142759d70 100644 --- a/examples/demo-apps/android/ExecuTorchDemo/README.md +++ b/examples/demo-apps/android/ExecuTorchDemo/README.md @@ -39,9 +39,14 @@ We generate the model file for the ExecuTorch runtime in Android Demo App. For delegating DeepLab v3 to XNNPACK backend, please do the following to export the model: ```bash +cd executorch # go to executorch root python3 -m examples.xnnpack.aot_compiler --model_name="dl3" --delegate -mkdir -p examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/ -cp dl3_xnnpack_fp32.pte examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/ +``` + +Then push the pte file to Android device: + +```bash +adb push dl3_xnnpack_fp32.pte /data/local/tmp/dl3_xnnpack_fp32.pte ``` For more detailed tutorial of lowering to XNNPACK, please see [XNNPACK backend](backends-xnnpack.md). @@ -50,135 +55,63 @@ For more detailed tutorial of lowering to XNNPACK, please see [XNNPACK backend]( For delegating to Qualcomm Hexagon NPU, please follow the tutorial [here](backends-qualcomm.md). -After generating the model, copy the model to `assets` directory. - ```bash python -m examples.qualcomm.scripts.deeplab_v3 -b build-android -m SM8450 -s -cp deeplab_v3/dlv3_qnn.pte examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/ +``` + +Then push the pte file to Android device: + +```bash +adb push deeplab_v3/dlv3_qnn.pte /data/local/tmp/dlv3_qnn.pte ``` ### Runtime -We build the required ExecuTorch runtime library to run the model. +We build the required ExecuTorch runtime library (AAR) to run the model. #### XNNPACK -1. Build the CMake target for the library with XNNPACK backend: - ```bash +# go to ExecuTorch repo root export ANDROID_NDK= -export ANDROID_ABI=arm64-v8a +export ANDROID_ABIS=arm64-v8a # Run the following lines from the `executorch/` folder ./install_executorch.sh --clean -mkdir cmake-android-out - -# Build the core executorch library -cmake . -DCMAKE_INSTALL_PREFIX=cmake-android-out \ - -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \ - -DANDROID_ABI="${ANDROID_ABI}" \ - -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ - -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ - -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ - -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ - -Bcmake-android-out - -cmake --build cmake-android-out -j16 --target install -``` - -When we set `EXECUTORCH_BUILD_XNNPACK=ON`, we will build the target [`xnnpack_backend`](https://github.com/pytorch/executorch/blob/main/backends/xnnpack/CMakeLists.txt) which in turn is linked into libexecutorch_jni via [CMake](https://github.com/pytorch/executorch/blob/main/examples/demo-apps/android/jni/CMakeLists.txt). - -2. Build the Android extension: - -```bash -# Build the android extension -cmake extension/android \ - -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}"/build/cmake/android.toolchain.cmake \ - -DANDROID_ABI="${ANDROID_ABI}" \ - -DCMAKE_INSTALL_PREFIX=cmake-android-out \ - -Bcmake-android-out/extension/android +# Create a new directory `app/libs` for the AAR to live +pushd examples/demo-apps/android/ExecuTorchDemo +mkdir -p app/libs +popd -cmake --build cmake-android-out/extension/android -j16 +# Build the AAR. It will include XNNPACK backend by default. +export BUILD_AAR_DIR=$(realpath examples/demo-apps/android/ExecuTorchDemo/app/libs) +sh build/build_android_library.sh ``` -`libexecutorch_jni.so` wraps up the required XNNPACK Backend runtime library from `xnnpack_backend`, and adds an additional JNI layer using fbjni. This is later exposed to Java app. - #### Qualcomm Hexagon NPU -1. Build the CMake target for the library with Qualcomm Hexagon NPU (HTP) backend (XNNPACK also included): - ```bash +# go to ExecuTorch repo root export ANDROID_NDK= -export ANDROID_ABI=arm64-v8a -export QNN_SDK_ROOT= +export ANDROID_ABIS=arm64-v8a +export QNN_SDK_ROOT= +# Run the following lines from the `executorch/` folder ./install_executorch.sh --clean -mkdir cmake-android-out -cmake . -DCMAKE_INSTALL_PREFIX=cmake-android-out \ - -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \ - -DANDROID_ABI="${ANDROID_ABI}" \ - -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DEXECUTORCH_BUILD_QNN=ON \ - -DQNN_SDK_ROOT="${QNN_SDK_ROOT}" \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ - -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ - -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ - -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ - -Bcmake-android-out - -cmake --build cmake-android-out -j16 --target install -``` -Similar to the XNNPACK library, with this setup, we compile `libexecutorch_jni.so` but it adds an additional static library `qnn_executorch_backend` which wraps up Qualcomm HTP runtime library and registers the Qualcomm HTP backend. This is later exposed to Java app. - -`qnn_executorch_backend` is built when we turn on CMake option `EXECUTORCH_BUILD_QNN`. It will include the [CMakeLists.txt](https://github.com/pytorch/executorch/blob/main/backends/qualcomm/CMakeLists.txt) from backends/qualcomm where we `add_library(qnn_executorch_backend STATIC)`. -2. Build the Android extension: +# Create a new directory `app/libs` for the AAR to live +pushd examples/demo-apps/android/ExecuTorchDemo +mkdir -p app/libs +popd -```bash -cmake extension/android \ - -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}"/build/cmake/android.toolchain.cmake \ - -DANDROID_ABI="${ANDROID_ABI}" \ - -DCMAKE_INSTALL_PREFIX=cmake-android-out \ - -Bcmake-android-out/extension/android - -cmake --build cmake-android-out/extension/android -j16 -``` - -## Deploying on Device via Demo App - -### Steps for Deploying Model via XNNPACK - -```bash -mkdir -p examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a -cp cmake-android-out/extension/android/libexecutorch_jni.so \ - examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a/libexecutorch.so +# Build the AAR. It will include XNNPACK backend by default. +export BUILD_AAR_DIR=$(realpath examples/demo-apps/android/ExecuTorchDemo/app/libs) +sh build/build_android_library.sh ``` -This allows the Android app to load ExecuTorch runtime with XNNPACK backend as a JNI library. Later, this shared library will be loaded by `NativePeer.java` in Java code. - -### Steps for Deploying Model via Qualcomm's AI Engine Direct - -```bash -mkdir -p ../examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a -``` - -We need to push some additional Qualcomm HTP backend libraries to the app. Please refer to [Qualcomm docs](backends-qualcomm.md) here. - -```bash -cp ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtp.so ${QNN_SDK_ROOT}/lib/hexagon-v69/unsigned/libQnnHtpV69Skel.so ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV69Stub.so ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnSystem.so \ - examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a -``` - -Copy the core libraries: - -```bash -cp cmake-android-out/extension/android/libexecutorch_jni.so \ - examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a/libexecutorch.so -cp cmake-android-out/lib/libqnn_executorch_backend.so \ - examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a/libqnn_executorch_backend.so -``` +This is very similar to XNNPACK setup, but users now needs to define `QNN_SDK_ROOT` so that +QNN backend is built into the AAR. ## Running the App diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/build.gradle.kts b/examples/demo-apps/android/ExecuTorchDemo/app/build.gradle.kts index 615fee860f8..ca06671f328 100644 --- a/examples/demo-apps/android/ExecuTorchDemo/app/build.gradle.kts +++ b/examples/demo-apps/android/ExecuTorchDemo/app/build.gradle.kts @@ -57,9 +57,7 @@ dependencies { implementation("androidx.constraintlayout:constraintlayout:2.2.0-alpha12") implementation("com.facebook.soloader:soloader:0.10.5") implementation("com.facebook.fbjni:fbjni:0.5.1") - implementation("org.pytorch.executorch:executorch") { - exclude("com.facebook.fbjni", "fbjni-java-only") - } + implementation(files("libs/executorch.aar")) testImplementation("junit:junit:4.13.2") androidTestImplementation("androidx.test.ext:junit:1.1.5") androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1") @@ -72,8 +70,8 @@ dependencies { tasks.register("setup") { doFirst { exec { - commandLine("sh", "examples/demo-apps/android/LlamaDemo/setup.sh") - workingDir("../../../../../") + commandLine("sh", "setup.sh") + workingDir("../") } } } diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/BUCK b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/BUCK index 2b33cef732a..371c991ce88 100644 --- a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/BUCK +++ b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/BUCK @@ -30,8 +30,6 @@ fb_android_resource( fb_android_library( name = "app_lib", srcs = [ - "java/com/example/executorchdemo/ClassificationActivity.java", - "java/com/example/executorchdemo/ImageNetClasses.java", "java/com/example/executorchdemo/MainActivity.java", "java/com/example/executorchdemo/TensorImageUtils.java", ], diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/corgi2.jpg b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/corgi2.jpg deleted file mode 100644 index 42d7c8cce873a0a8719234ed6b48492e2c735fc9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24926 zcmcG#cT`hP*Drn&ilG{cbRj^1NDp9;-b*M_Ec8w&Lg*kMVCcO!=~6`nq=|w^??^|w z(xlnwiir3-`hDK#Ex&vJxa+Qa*2-BYXJ+=yZlBqEcJh1f_X+^lQpc(T5bzR&2m-+G zRUlB!-`N2GuvmTo1^@sApoTyJWAKRu{QSApZSJ}uY~1V-wyrjIE(i}C-qy?A1&6qc zbMtBnxd-tyJU2$QMFA=nql$405 zn24B|5ZFV=JHXAy#$U+IoBI!me`u)UyzRW4J$#(q-4K6h+St1L`p9u{{fn@Nle>?* zx0AcazsdhMUjN>FVaGphphm#9zXEr3cJTRc^V{3~yT6C8m+RkI?d?Qx zt~fl-&Bq(;iw39qN4g%azTW@t4f5vejr;TJ?-*Bm2N7_>f6@NefIobw`GX?*m$?6z ze1RQA#6(4}35kjciAnu?GLV>|v!j#Gf1C0DZ#iCn`+%eVEnaCA(CVDQN&m_P@sD6c zoNxHJ+qwMh#D(}D5|dW)_Qv^ORpq$QXesfl7>u~M)E}PzwE$@&ob3%C8=t=t`G*&8 z(2)PwjFya(srke?-eC|KA7RGSrd(V~_tk9saeEf9uc@|DQG?^nb5MS&{$7#GkeN58nUL z)PL*s7l8ii^S>a_A7K0Y4n#K4Hbnjnh2WQegDK7pM9{l`pz`+@pi2Kg016>4EDBsx z#_AFi|AqZzHmE)T|DPtr2K5Jw|6ggG4eACk{qH;!_`L|I0OS`gz%G!I!(cE9a&ii4 zCO9<}74_vy3^YvK96U%a4lYh!0Z9?wD-u^YxkQ!F5>hhq^71^ws#q0SO-VU4FIKq5Ya$>_x#x}iT-S?f2H@&4FV-1 zCLsmenEy0@*Z*w$y#P=WK>#S62o7$&-vl3`iV1yx=n$d+019r#1Um>NDZmK;9o7f{ zPEdq0#X*25D3Vx_UcZls34F^GB?^J5Q7VFD003qU-u}=62)0Oo3`H!aE(!q>1}yZU zJ)$UUF-2P_0c<3NG667B0N^A5Fk3j#p$sq)02QVPMS4yGOf{jHUKHLzPQybdz~7OY zfd&YqAdwDmoZ^L?u}7I>j~=xK1_z)C0A)H!V=)2fgMtes)`akYYbS!LHZa<&y#?qI zOtk)I_X19qpU!4KoytROWtR*w!H9_f+0kIx5DRB~KI*3K#5yn6i8gAUNIois8`9ZQqgcpNPQ_17(?08mg0A4c2m zk1ann8S{wgd(~n$_i2zos}D;-mPDYLhCba?P1En;j3G8A)DZxJH7Ha75(>_1-Xgbo z`(U&07kmD%!X*MECXKK;+IBBGF#UQC|2n11#HX>Dosk$-N*pLhQYbSCWom-8B3!JG z2<#f6PpOOoKnX}4J}X>ZZ1C@Td$F(?xASRiJMi!oybmf>{=-tPsjtN~QNv+>u;6F# ziSH+jy(lW&AO_@yQU%@tsG>r7!p!Y6bP9hCQ*yYl@`xeb6J5Q{DC8 z>)!ck4b?gg{+>0`@4T0`wFhg z092iIE;1%ty3Q|V%_j!DU?`ivP&9QZZ?QnSvh|~mvvX^9-u&awerqsLEVl3tGI*bo zA{RDWn>Gd_P@)gSpdKQvxuq;<@tUy)emWLtwz=8V;i_HPrgafZ-dpviFArW69Nw#_ zcr(fHCeyi5@U`WG<3Mj^_z*k}PN3|AvQ$1PQzdhPAalH>tBPUPoC!b*I%_AF8-Qvt zF%KWg%9uoDX8D;^`HWog_f6#aI9ht6$RzCsR=qJUdw#bYj}tO3e}z(p!~m*UnmuSr zQUvnNI1T_k2q2>2h9oEgqJV7DLqAOYXlT+;aL{Q z*5af*GFCK8Ti7m~pV#N;&i?wF#7@~-e2uen zueBUrv?{v3n4_m4a~ybh6nLPFB10moHmF=TvhUb;m9rQ_w-XdO>AAFslt3p$gD8Zi z1m{zM5c{{v9QOO|k0kCt&A##Ug&_LzR&&*8#HZ;glMj6pUjnb)iUHK^0OLf#A^rt` zm<(kt3Qr2K-+rpe#5Q-?-20+~a4@O;j4!>`|$IN(-){eNmundwJaX9k) z7$vc2m@XA?*lsq_9c&MWslW%(*Gy+{5dg8H&>U<$mH3t@ zgc4jGY<={R^x?_zr=8J>X>Gp%IrL^m+m;-@z|bNlwnTLFr}V+sPj6`W=sg|?UqBDp zV@Yx;Q}doF2VeCm;R!hOsZSJ)oFG#C1GOL~_JAG%01mW!2RliUHMPh5ZxVgwF>m_r z)t@%c&>qb13`$LJnK-IH(kPA2ExfbOOdtR^9|Fv{Od;yAif6hI{zO4CreRRg1Y3ZK zQjv=Q0M^`Z)@P-BW^k80jmo_bE~Tvmo8EOeJN-7id?~Ncx6sF3{(@zQUwwxP2*-dj zs{G@~!fkICiD0UGjMB^?1~Z64RT0dJ=oo8G%0F;iI=DVx{!sw=rajBS% zt?mh_Y7p>>poyo+S$J10g=bUBu6~>M^<|*wN|xMVEl$1a+4Lzqt-Y?4ShS ztqf77C;-9Kq1roclh>cPRbVk(ck@w!JJrtCQbU`&NwT411+U3yg-b~OkE+$t6A>$c zAhDNr0LYm#%~ZkN;no+-^O1IrL2a)KYxX)+9}Lgh!3ow}6ew`J2DgRiB8e;g((ZaK}xU321DBLh5(*Ag{70_e>2?E5TASWP-LdCL~ zVwKbkxe(fef#qNG@sJGc`j+|ptZFOk*lpk5R-52=*DWz;X&DAw5JFOHU_(voJH=+- zNORLMrO)iymafV~ilFm>jxGvrIs`j}I&B};gp0s$tBtT@>iCd7(0~2G;rD{;ORm@P z$6NP;l8X3bQfiTQ@XJ!OmcDNp#5Tq?u4Y>12mWf-2ND5m1O&7zMSYkp07>(9=xL16 zdI%?|dyUhv++@49Oy722CopGL-Ie&&oq&a-A9Aw!*?CNZOq5>V*Uhw>8|%3Z*QN7h z)@Q$rnL%LV0BG3=5WB#g6%sRKspF!}1&L8+&aW(vq2z7N@s~_G`?z0|S=Q?p-5S(y zGVIY&PfD7$BjPUi^^S3@1SXT&WXEK-_1GQgs3U;q1f*be0%|us%VOLzXs1k>iSQs( zjgmCySwfqhj-U6krDR!4sTjY-7{@aO#tJ}CgdPmk8(udA@{Feo0D{|DB?2{LD$nGevuMTE0#TeG5^0a+y{oDPm|&y`KsH3qdnQm& zeNg!p@a9v0LwC*G_rtridxzT&6TCj^X}M#g-!?{UZnmiJH~CH_;)SYtF^>AuGt#)n zs*5)!KaJs);0yo)PDf|b*BB>(B89@^Ko~nXIbM~K3=?kDw4i}|<$vTWc@ERylh4_@ zjnhwi&D=7->bKGRm?yXI(BkFW%{f|KoP*DZU4WaJ?&q&x#vYj{L+H_zio_4HOdj@t z(FLU~Bs9u*(`#8n2MgtrCN)UBG&<5d0f2hoRbCn;d9K{tyqK~qx33ACU z(#)n(!ekq`v&%OmC0&(ns9Bn;%M{c`3L>ZHRkF%wEZ^BZtC7B&m4;3r2L-6Y6ovj9 zW&os~V}cz(-6b9UZ8d?*T(}QgZGxXH?Qv$nxPv40(rJG=lw*(%q!}$NUS-v zHIz@~q>gtBb0t)m`k>GV0Q6yNE=6l1CR&74!{e08IuD`bbo94!w7@9p?99z|#^_30 z(A&(IFC$Bn1)0{IL*?DleI_*OwwA+k!uh9?@pTEHm2qkRIj8EdMcOeDtTj7Ett7K` zuNKrO#i}ycOhy`zd5kvKwXV+8-5a}{O5K$0Gcz>ZQr6|d)L^J{SmkYFKWP13`fzty z5(l&9>I1z31VAuAU?LEU5~11HT0Fg%jjjhv4+FGs=!@Ymd+hwT46km_xcje<#!QZU zo;CcuY#1kNX(4*Gn);U=nb&BucvbmhgCol%Ru?faZ0J)`Vfqs@Lg6JlAefUODH{UB zt86Zt<|2n+1Rk#>yAqSp0_Un@7g>{OhX#x5)f0hkzfl+utrl7gRA6BQ*ANsAO#R&f-xUP>#~``pWQG>Qx((U*_5eTc+m4; zga|V|GmLE1-ds0Nn|K|V*Ozr6b1dY(e2$|DhuU)T_4?sjD1f5}b&7^rLm_{HUKM}_ z5YJ}m5=UnTUmq6Be9%Fv9pPv499^u~=d78Mn$Pky)X~uySl$KnE2S+s`$v` z{pOR@P_C;03ML3BLjTwtK{z3 zCK{P3yi;wc>tLY#1RCWvyt!q@VlwYAb8UUJZ0V`+lVQk|B2dc&iUvvpl|b3TX-Qzj zI<=0u%Hm?kUbo?g21M`}iNp~B)!e=&B$XnS>!=mKmC=oBeI_HsCODJ84E=3IZOued zbb>7;28=vVppa=0h}fHs%nqG7VH>8r$7(sDG-0WYc!%;EwNF%Qa-kCNX)R6%ajA`1 zUp2G&;+rtdtkevW1Zz=9n&=;lAms!ih$FNP)oItfh33ca6x=FKi4&9k>~6dzr=9zx z{uosZ48n&&TMkb%LyhxCg_UU`ob>xdLBF#`gXggz8(`#`(PVA%=4HQH0m+maikLY+ zXQs(XMuK=UM^W=l2M&l5Qw38%vYUj&D1Az6PC^GbEd+rE07^wD;_&dXoUQ}7!Eq+M zspm;ecm)Rf2uQ-4VgUA#h6loHi$U3em_Y!|0URU>;*cV7T7b7@`Di$3Zty-Z_XJy_ zz?>2SMJXfIV!}lgz=T!+4kl3$B4Q{MN(2yrJwcCz(h$LEIf)UXXgWnMB{6*xdTw!L zYcR!yfr&4K1p4#H=r`aoyxYEO${6I?9$Hif`6vk7=H`mmVDPhA(_5j z%PYPibUXkZv9NK zFv;d0Do+wzSXo?A;YR)S zD_%c%IwH6$egnhRm)6^%U+RCpU|hDdxA@p&U{RplYOtYcdu|#3nwE!EtgAeZ4bP@g zv4(xI9kV%_CYKHJI2b}z?l++(C+A=BLS4(iZ*Kj`$}9Mr%Kjb$o>IYYPxWT?F2BsL zT(XSsR`VCAD|J?LwlqC(NVE^so}4-<7iO!L*WXZac-MVXOz>Avcl+^X_ zKaJtwgSfQ8#18vkYT5gpo5SJ^>gBJJr`TdYq^AvMGnF4G+h309rkeU{Nx}5Ies~Y9 z@YLelb(zv@jUQgngo|IfCP~)6RJW+0YdYxo$TPvC&y4*7_j>8~DfQA#T$bBp9PQpc z%&)JH)!-X_90_k;%e+gbc&Vy&l&|x^41IJ~f?T_HI5nK~rFVOtnL)O3U4U6d+0?k` z__FGESxUkUGEEqmaqIW%UNA?V`Gi*pMs|bRlSO_aw5%j=z8&Y z^C2!`(z@sRlUiemtO3=j_1xdYCvzR2k{$3X^n`jjpDHdnt# zq`XEhY?2&gOt&+adgAD5XIc-_6CR0l-l{f5_+Hjf;AE?o6*u7;e`ON+%~bWYRg~k0 zpl0?DrQbmMfcCS|LB~Pv54iVncn*o=EQcL5xp!>x)4?QTZw)~o*Y{suaYb4A1_yL| zX6k3#xp=R$Ew-s~Kjgg*a-QxbqRq@`SY#2-QuMiz*<^B5A#_K1?OdGUk+X=d*TCcAaC>Zsa z2AYHj2rgMox@Bqn5WXj-5#))}bui1Q>xC!Su5wG>Ol5O3;k^9xW_4rJsO(1natJJS zGFI_Od9(;KDQ;`FobgGs(^T$954fE>1|QE&Dl5Z)--Ak{J7zL?gBQmL6aC ztHY;ev-S<|_fH#`8q{VO(9u8cT=?AnTnJgE{i-tQCFiS_8won;4e5OQ8LS3wVpyE? zX!^>2Pb|BhhO~hMidVJs)2_DZ*Qa`q81>%Rmb_xC91$8tnKN7Npl*8mEN1cb-^lgq zdUx`$N;B%Q)8M^mKE3aj>8DbT&)u3scpcrTg<3Yc9bb-8OV^yh%>C*O&<{?s-)GC) zFRJAza3jyJCZ9KRr9=hN0o3RC`?hW;5%Y{hZ?%ET_k+PV!<%n-Q1H<4nE&pUg#-j}(TC8Rq6vqVyw1 zn=Vi5m#Fhl%-V7(XohhfaXe$6epDLsj-uD$dNE)66T>7~y~+#^HIYPWiOJ8(;_BrvVLEMhSlEPB>*|J*Gs)G@jpQkIN85SMj_QX6>jtu~tR*B^@Cv#; zw|`xYya#U_;TURMKAy0<|8Q-me;J(Ff6sP`8_5;Ze~`Xu z$}<#`s&;8^e7*XjIksM)-_lf|SH!G|CfB{|7ZRJ8oT)}eW@w&hwxG_2$}nhL&<^W4 z_QY%->FlhZ7#-!T30#*V-tFk;`Vm+HcOC9=DpobX#Bqf-KDmL5NvJ;aW#Q@ME2oLR zR@x*z`}X;Vw8jbLOq#4+;)owl<;B;y;~I;^N+Kd@4bnJ~lT(6E1T{XD_nYFS3t1<( zcj}hsOV^@PPwkk>*3`|Ohh)_=9wg(<7Ld1}z*^Wc?K8JiGGFdBek}TM^va*(>{;Kh z;K!79Xkl~hFSh4S7LA_e3|jYYq&w`trb&{^`SIYFQi$s$ylVkOi6DE z`;(*BJZ^^eF|QX?lPPr4DsRQp%G1&F`?R*V{X7*66q8>r7Y}HrDx5Mcb`cVMZMc~j zn^WpxLMv6}T8AFt^73YQ`q4mxCm_GFzQ1%r&a%nGmZAPwXQi4U9$WfL;y~>tE{U(? zXeqM#+x&g?voB6ZUV_TUI-VtQsBDt5(t#)2DzvDY(^;pr2gNGkq0IBR_};O_Y8LkG zM_lc!zIvduFilwG>Zb4zzBjuQ$c*8weK)o@5F-v)->Lb(9D~yBV)@ z1ag_QK{41O;pU;qWzalm$ZZuFX6EwzTp}E0Ua^WPuPr82Z3QL(^HowfnAyY6QJg z`*<89#pO2K^Q&k~>3tIEbBNXifk z^i=HP)$qi!5`9cAv3FQhs8+_hlKr&)=7KQhdUyj~eah+ed{b#B58JqgCPkth>{`i* zez9_1>_dI(oi{?#-HM0G7>aX&eb>hi6@yBQ6dTWzEG$!P*4Z?4&QSU-g~Uz;m6XhS z(MKEXS9*vj4uYQz#O4tm^V3kAjAki+@_BXNrdNtanea^QUCs5&oiwC!aAK|Zo4!aT z3(X=H6`l1>nuEyS0NahETq>eosqkJ|x|$1-Y>*gs8)?aa59MmcIADt3I|5RZ9Mx+0sQ&D}i;$M~iqfY3*)Y&|wfKoIB%%GI;0Ve&7)#&y34@QxPn@6 zy0%!PMxKh*mCsHwEY~^^7^Kxz(_$Z8T_w*|j?<;;Di<&4eyLosV!(fM$!TQiJ>H8X!vDwRdU5g494}~y9b{3bC?!~Gqlivt1*MIxmD?^w0bud@x zK+E_C3&ogH>f=8BMq-wHA^Iz=yHq4poDv3e>RhhvK^V87hoM0aO_UNHI%%_Xx!aQDxb3+lT{shL%gk zSPJy|kIx8WByP=!hI}pgq5j?Y{Z+fik`$Ni{NE-OcR8=M-q2_dlhoq-vaZ20FvxM4 zoF28D_WI1;m3f( z`h+yKLZzeSTN{=A;wovRVrA9+1H5=##nN%_qBSj^KEKJk_gIvzBYwsx=cwJh^Cauk zr|)qX+MAf)e8r`_dKArF?TSqaNxOu^nQ^`4A^Oab z!+y!coGTTY6QdM`J zERcJ}_1Tpa*=Ze*C!&GR63gHE%Z%i)EZlh%@nn|V8gdY!pM3%6CKOkE>q^9%XW=S7 zgR3p>s*wF>0apfFTAW;t@|W56&L=Kp1Sg7)oGCXpN2Vv+nm!J*5$FAYs*kU!jwjZ+ zvUYWRWS}+i$v}_)DQ~j*dNBF?w9bv*NjepG`+A2KKkvh**U3&^we$ZBc0duYXm&pcek_0K zT1x_lS~@G&O|$ETMP*B7tE!zx{8b4zA-4xJf9x6-@m((n4$l!?`wjfr5#V4F3+A%_ zHJzn_bE1_H2BJ2ep{d1+b)6Hd`~Q*4Dsb{L_R2dyP^Vid7wI_Ljl1Icj6tac=MX=% z8GG{LT!NQx;EW^Pb~#;uSA^F8WsZ|cdCzZv>r9-n3m(Bq7Rqw?A=NgXj#{*K<+h=2 zk8G8)ZOYI6^dhNJtN5b&@McfvG+et$)Kugd{v>)&Nl_l3+gV-q!G+(y|nkS(Zl zbM`=UVro%Vl}+|p0{wDPaXHm>B;(iYt}gwEu8_Orn3BLf7kkDxoMJ~_48fI0>b)vW zSIy;!r+Vmy{PCCDi(f}Mp^3gSb_OxcklA)EVfy;s!`tHbZ0<*ndQ-tNRzBzrNbS@- zDA;$>?w{j`(8s%)UQEsZ4b$p@qo)8?NF3> zA+nm#I(zO+XV@|1HRgdOBR0gPKnGC zbojLe`4!wvWNeO|T{43}5J#l2iF%O3>R6;OvxA^FU9-DZ#1Ja4qve9~Jr<+clD3V< zUM+HFv0kpVRk^~!u|HLb+j10Aw%wI2+9J=dnd9SB7+4t7R^ut{TuW_nhi37_SJW#r zu3WdH-7}5#DnnDaypFVhQXAJ*U|=N^5T2S5)t*=npG)`m?krpOKB5RckkE~0xm{%c z<;0SOY?)y=Yx;TuSB9~jt+6Ja^C?DRJ3dFV51u1OCZ38YWYN*WIet+!kYA8c=@{aX z=8&bP70b1KHedN&ih^_`lV&DQG02y*VTqp7GtJ#h7#m83mMiQn8a(h3M`^dY^O!6 zKIe)sh3)f-L|)NQ)(_C*wUW*iXh-Q$Wo&K9X6)W!_4pZawfO0!&c}TEgA0lqx)zrx zR(!RJ$t~5h&6v)!e_=M_bWN2fa~%2>n18hL=Zk%3h@dau_c!Z|zt{zTu$-+RS^J_w zx41(^2g9`r;|-JGo14s75l^^~O4e6Zju|%e>?p_$X6vREEDCojDfr=(PVf5CZl$aT zN5+1cDheLPN;j;e(bLMni%2Egg1JA`Ey+~kK80_KJ5QD9tfZmwd!iQ^+T;x&ogMu=Q_9YFqc_N8kpAA{wT5R@MhKUwS+_u3fe0hFI?HgQoLWM3eM4I+H zJdaJp*2r9!NpAbQSiQg$)V=X0qJ$#(Y5{=wbg_pkpZiek9mhmaexx~G@ zN6!4;TqyEZEqyat;PtdNY>gGkrKhNvU$HKH(af3A&pQxY3HckJM=@oX`IlZD~nD;>^ zxWJ70+Y*1t$~T2{ zSNA5fI}?v5f5Puc`G@OGya=sPaQbp}n$wq;wQXuOPhhW2=)?C09ch%;!oVg6AdEwo3|&@Hyq0AhTDyuSVc_r4uX4-Jw7dz2tnA(Q+ z799&sH_TSRY8;#I6GG~N*M9$CrYC+Ea=(hxt{`st(raSRb8U`~$fGIg%S&4IcigLr zPt3h~@VUQYXq}LB`C{bU$2!KdX0O!cMP<61X+ z8@{8+kQ3st=GV5n9rf(`6k|^T&GY7~-+EeNl56u^d@ya1ug14nZp3Ot#{BF314WILgM6Nr87athqimRL_R|=!CPIzV0gNQ+*vY6!~o?{mA zB~{E+#$X6BMv86J@4;SD9hy$9$LbIyvHNg)g; z{E9qNO+kVOBl=@MDA6t5?o^KL!t;tdvBHV+y6`QPoEtwceaw)+sE2F#)jEYP-7U>f za&^@_x6w}!mhX@F#hnUGEKnd!>HT215~4x&sY|wgL;CuVc;ictYduo-9H=~69+fXTXwc8%Ut9M?ZK^Vwxs>O{`rUI zEL)NgZuw*y%H}qSW+7&{eUn@KD3?u_U<;&`ylLe5wwH&T?PZwSQN)bkg9nek9dMTr zhe))YGHN2N&imQo--V3Xn;h}2=%%ESap5r6>L_Z%XStnX&MxPD$m&m6$HpkHb)ikW zkY+5m=Zfr!bsh5<7m(rEEhZdyg`k4 zJmYfI_JgVf92rT>)e_5>_Aab3r?kI;P1zFnQ`9RzkBiqn7jU1dHzJXuZ!g-StGv5W zUgQ1adXz6Gym>jr<>k>Lj?9y=tKr=xy-pHHWmYoz0d(T4xj`Sz+l92QewFT?KUJN{ z(D=Tj+7)jTE@`F|J0ngPwN~p7>&CCTP_3^JSw;za5qwbtF7OB;Dmy}SaUJQZ9F?jI z?mW&3HPPX&UVKO~Rvb)qq4+|btjhaq99OOJF#{76Y%H`QOjg|MJaoOow*A*Wl&IK- z4oOQ3VvamK5+}%LiWw|&(E<~FDsk%0PtOu6!_+@B5(=(_R?5;|oQuxn(6Ke3=u}|5 zPcDc$&`}`eiPVkyJjEM_R8U^o4!XQr7Zv9gGdWwgAYxo~vCEOSE|yc6wDXcOYofk` z2C{D@kO{~3hIW5 z2M*cZw`IM@km}QUdHXB#t*Z7$6V)(tq1ROQGMUtt2^}x_LjwUR+_In5N z!lO)Vabhdx4*NG%gbtlBM<^qkJh{2~v_-ZgeYu1|69yxf%zDL0>mpS`UODOTZ40Lj zGG%X-REgeG&H4K5ha=|jcgYW@uGzUZ^A*xh$iP$3o_R4MPeF(E@MRJuS}Sh7WPcY_}}=F^m4q` zSaZ8@G=$uIWJ#II&ZaiqH*BwzQC`r=j>k-XzL4s(Ge3j5^&TP&-iW|v(R514v3^!A z`s^#RV)a;}f^EcNIG;P<35e;5edHz#YL|wRU6=lRTJF0Ed%6CatoIae7 zO==H@irPqubJBv?M0Ne?X~%m@wl`K{q6#DG3XZYq26b0V^IYeD(GV+BQK7k5F+y$X zI)VyP1yS*B77SskjkQBG(krTO+j<g}Rc6t%mJ)UG)5N$r`sA%1e$UxCf#%uBnZ;*l<>I&KDO8>X_y@rLVPf81tH| z>nPid&v3Pbw41xbVp+&6h zwf;C^=jGi??+TEH57od5bPrN81+-TpIAKa9wl%UL`)HW5jW(Gw7iWx)oeU?K4gUS{ z?%(<+{BsI24NDA#A!wDH>$MPoCV~J#rfKU!Vf0_VPtq#^a6=#qLQG4Bgku5lo~B9F zw1BM;r{!jG<{fvINYlkYi0c)J;cyZ$Cd!IDY||qCMfp4WDQr_(DsL)T6AVU+Mb$4R z0s}f_=~%e7vsnusldhvSg{>!Q4EqFxAD~@(=0gT zqK?EnsaG7>JGtHwFQp$0lQpI5N-XH;PXbr(=<-hJwos5E)7x^W`A*b+d~dylnZko2 zE6coFR%9V@dozg{)6Z4WG;17shzA4Wp_9@Zf%f|c(X3m)0*2XF4jm7($6J%mm z45hJ#i~Xxeay3bOcGobLycJhgfXAFVSD5O`ck~!VbL{skwtq>(;$M2q=OWUvjbW;A zLnmmiFUu2|Qt1D%QiL}RMZ!^w-6_BmFNKF+u%6W2=A^p%SA z9pnO*UP*`+#&a4={~IuF^tqp>&69_<6Q4bG7Eg1oU35x=S<~gV-U+H+{tXnjD!d<- z{l=1{9+r0n#K2KH(n7peTUnvjTx&GGK`CNB1m*?T6 z5GIDQiD|MYAD{&f2%7bNWul+6b1^LpB6DnrSv8?V{*W&ebc#bLQkTWN;}Gl7c!|}k zY0hF0p^Q94>g4B6L8}T5zwe25O-rlAHycZgZIBZ>gdUp|2tfQ4jaDPOZx>Jb^^L{- zq81xU!v$>9Y%qpE9Hqqh=4?GAiQ#`Pjk^Xl>C?o0 z>5lOFlv;bAyVwV6QpT!;j6tT6X>6W64E!9iAYEL$Ut@mUeRHxkqaL4?jwfgWv{TMo z=Mv<%2)ZXrdP-!c_7=DGgdC=vfIKBYA4xE?^C7byY39wiHpU#DHjhqf8(ju z-%7%t8Uk1w0`GtSQ~_3pz_UQ`XDy2Qf6tl!voHi^oGWanrr-KcRPufTRIaS-qk3-d znil&FPP=Uvsh)*wy$RtN#>ae9aQ<1u>(9>WckhxhDeYd6-i*GNX#YWAfud-kW|7TF z^U-Z;RT@_j`yYw+msMno&_Y%4l;1ny-Px}4F~(RvvJ>3&7?*dJsh)W%LgvNGufc0l zST5a$VcBu*b`>d}x3A=h9Kjj{;zgKnyi-HN96W6?gYw9!ZX1Z{AXbLdx840K@!=d$6AS z`{><4hLjuSDcc2~Hto&Oljj?izk$?bV-q&E2AWU1MXDGk3%wI#zr8$Cy>52OH|>R` z<1S9 zS;|4mFu}cZFAd6{^G8LGK^b*-ZrZ~&!sSWQ(ciwbYuiUG%O24sF@;A3K`$~U>`C2+ z$vGCDn)5Vtij8rS?Bi<}*YoqUR|?aIxgUG03?TD( z;YesmETW0-StTw%n?N@1)Q`Gb9*V5?5qlLQuqemKx)xCuKPWEi@2=GfXj!!+C%&H5bN@7Gi_7(^WvSyR87O{ zDULF72Z;Kyfn*Z*S3xS)4V^*qG~GWSD|-r>iu;Z!b6&HTr9&|Ot+*`DefJmmCX11> z!xYD;b5!u;qRqO3cn~y5vLIftVH;n+OwBR){%+^qq7CzpLgrKMtaHmnrtFDk^6s}y z!#{^Q?g!ud4M5l(H}8Xv&q?!d$NyIm@gK)WgC!;<0~?ROr~iKnh@jiQyZd3eJX!5G z;9^-cGM30lVP@GDE}SHuw0>NhgZnu4EGPZq-6k%bQ@xqvcx05x(HQ|}WbCnbejd*e z!<7W-{~n}{H0`YNzo>ok>PWpZ2}@hJK4SACK#WQD!iBZ1Nfz35Ocxo=PphMbP7TsA zM<&5ntWxaBb6*!fNKG^YoLiZGSyw=gP6(1Xu(qfs&=N`DJP3v1_p~fcG5HxT#8xx zyPd)!-o->9KkJK*KDH0lmDx+!4to_F8sN%={Lo*g#D)d z+L4c@!QUemDP->!WCz{lIZuDOu=DnFQc-FSzGWTfI=tJu6?NF`p0roJvRd`6J)QmA z)U!l0S=m&v-Q9NHlNxQamyY-ss@*Iy(VWYX89x%vfwQjtR?SBh+AF0mrFU0x_%<2J z{$}6rMf$Jy+><wK)P%H=Jjg1_p$fWN5_a1LUAI< zgVrM7{5ts%fn}{bdP#wX{UEnZ|vc- zz4lFT+1)>fWof^1V199?p2^^!afE139|W6?4htTrdgja0$hQ+=G^M&%o6eAV0q!I_ zEA{*N`IMNwgOE!K4XZ`WX#+Vi8Uh0E(y)%9?kSEdZDtMKbb@BC{b7y-4OS6Ip9wNQ z-<{{dnQ$Rh5fPhsW}=i&OqL~7a8Fxy{dJ>(S*jG= z*|Q=~h2M*P&+$Kh_B?uEOeSSFbj`nF4QI7T1=E zJ~eryPk$-oTjyRK^^V-*-W&<%O9@eSeJZ$jZCEhPfDO%|M%sPgov=zu!E~tl?Cvk) z@b9(9EZ+~>+mr9-fN;VG2jhpoMMVh7KX?Md30P8u8~m*(BK}kq_x`h_2#4_+ie80e zLWv(6ma9e>Wp@gjw=-_0a>9Ebk|`5oJ@(T!WtkXEJN6A*VeG2qKtz%?0yQjSa1i*oN3iErt&6 zD|g-ev$*{bw;RjZr)IeFvS-Z+cWzFt8ZtzPuJJEz@YK(t;|%f60&Jp)yC*^Yc7~gb&6)HkKx|d#$eC@ipJ%nDF`yJo&IBr*O=E@Ukp)tBLx| zC@xc=b1)+YW4Rk?P{CqL*H+5p%EF+LFE0d|}w%%^U}ZXA+%~8BkhJQ|@;e+emR&XiMY^ zTg6RmhuHQ$ZMJ-prqA=P&u-%MCjl0-mwF**Pr1INH)<$>L>(@vGOPFfvb$~*Fd-MwbLFw#?sjV1+wW61pTZYgh{A~X;#LjMQPAg^~ejXQhr|y2P zIk%9NS0jnmK3+j=hV?t8@xo~aZ2LDLLQz>)>=kdgJLsGUdlk2lDVQyoH{av7!4zM4 zJw}1HuzZQZ8QmjSobL71167$G^M2(wpr#gCUwla^p5hQx&_j9hemh#+R7iZ%ZL6}! zPA-m*Du3yDTGBr{e?u3wsTRI!AmUL67b0wwsyw~S`+Pf)lVo-Y-+li9#pdfMt!8Bx z^V-9Vym$A%i4F86*gBQ6uaPk{u z7{-K^s2$O?_P$)yuu?kx7nt;k`h^Ytn>=2b{cMeGhPDMqB*^Ib*_Z^DFj z!*pdQ_gBG_VVm$_TC`ZfC3B5{$Qu(&5zPU33i`)+RC7>Bk>cX>a2KE ziWZxjx{g9dM#seT&tCdQuZD?9czb=|t-ScPgIppbNcH*fnJ&>uzS5?+N^;V=!Df*~ z2c+ZXonp=EE<_(~jckt>fy|p{y15=kt(}R&brN*A_uS+$x zbjdySqh~GJSE$q|WHC2)q5S^~F&ob0g8&}SQ3ScZh7Ak6QPjUdM-F2FZrocCmY4Gb zj-1r^kjM=~k$xgZkZvVJjgq2M!lG0j2$F&%Y601K0{ zxv2|0L18W@Vi1VDjw5Q1h_cuj5+rdsjBP1!gTm36UI)}r53wp5tNP|&bkpiydiO7l zezF{r&hi@J!{_*eW22M)Jxao>yqgxvaW>6c`jo&L@_#a`3LgfnrBzohmU8i7%&i`w zH8-{57f`R!%(#lbaRCXaj=%D`@PjI%c}|qth{EOM!WxdtNNPkI8I}R4Zr3UC*T=csP;X~Ypx=pfx7kX2O#I_E+q*XDtcY> z9wn8=)S?#Br4%L0io;_BtA@c0 zF|xz}vLu0-8bO3GtV>}Op;!QlbqHm0f}l!-=fzH-6C#x{WeHzU1ZnYp0qSw!iF2O9 zj38gqm#egUl)PSQTGJQGQb0HL+)nY-tVOr=HLqqv*X}Sr6|e%DD~RK89s`+=oELF2tct>QSZ&cl|s9?EH)H3$IZhC4LPV5QZ^SO9k~jY*AxL>ah*UXXSB4Rl5TQLyWKTw%vzcq;s3i$c9S{$NkI%$8 z7=fqcsb#Q1cK-n87A@z96q&_K5a`Hihf0`*umgw|tbtYXZfKy|y84Fxo@y@hHhtxpCr1U|`ecYhuh=KkS zXxCEUP+uFciN2!?P>x}Wi@Uknmbf)DaFxR^h(Y4S8C4O)0VqL$cA_|n+0O;)C4IM7mpL-Bk1hx_$bT@ORFma(D8@I~-#A zVD*?0jTGiF!Rb)~oJ3sub|hXjP+Bc=X)fYpS`i4Un8B>nff+NQ$Bz6lYw!n$!M#3qGL0YhQI53C}moEg7^6>f>s1 z%lpI^k~y=3{xJ@%BiMo!R`(KHZ>N%R6eHg)O2-z~UkP>}=ZIgS{{RSqI$aQq{`qBl z6`%WlFWy;M%%`ZM)Kazy({UBVEMcViH2_d-VVe0R`=i`Cg!Kuv9Gx&_MB#JkEM;MB zfH5f|IZGHrz!9i;yYkB>5QL~?zZX+hf=*yc@>hgdv_Ufe0BKU%R_mq*60gx3Hdmp@ zo&_9SLC9OgVrxp`E;+zD^))J~SMMULo(D5-8pdbD-w@gwmBtmQIrRIMY>!Ewk5Zdo z`z88Tdc*mM;SZKtm49wU7W~3j)D2@U;@5KZSy5kg4SJ3=n<~iNaXw>X3SvJJ(fIg^ z%a7dj7RMU^dtiFIgr|Qn08}?maRM3}EfnK0FuBLm93w9h<(R0SXn3lNlImG#fVG%l zVj8)L0fcCm1}PHCfy2erJX~6u;t>Lf$ghZDYW!@VSNxpvwbo6IRpcfbE1y!edwj=a zQ|Of9I#>797`rIipckO_^#N{9!2Vd7$h2@uunIIglqKT*#r8jZOZWU{fc^0ra#RW@ zgQNcAnpvXHk^Z2g>Qk|l=~%a$Rw zy1uVtwfio{w0x4m*R7W3`_=ydXaETMnsx~iHy%_9SrNHXu2f8x88?(3wO(MKa_OnU z47pb-qr@7Gt<8aC8ka7A5+Q*sk5R!lE&1d1GOnJ;j(*IGOr!XTTK=O$GU&TY(fEQ3 zC_0HPhqfFVABLll=u$7mOH@(Ii~E-0f0&8Aanx$Gn+WZtHC00wWy6%0?8pMIT$C1m zg0lhWEsU}81nOVPOTUS;P;UmqE)xD=6J=cQ z+_;T3ihw$y1%UP;cn^tHRebXd1vkU}K~ULP zkUc`y$B60*p_1|uRVctB9%U*mwJUsmOGX0h&m^NQyokKQ<8!Ipc?1B=gC$El>MgWN z;{N~_0-=9NV;lTRIT)kD2bzNf@ELtQN)VF*3Uejd%j#C^ly8i$_s5I;=vVEtt`GY>CJxiuL zI4@Nxe0{-?U&1I0y61SCfqyv=+k^o_Z@F=FFJwFtRNyYiBve-xv3R+TCf&{=-?f29;vQiY0=*GKKz>#0_Zbf*I_&rq{{K?m+#x{F+o zOgF8igOPBA@5CY!T~F`fRpm|lJsGU^V; z`zIeO1X8op)b2l0vJd4J)ht6#X+nI14&&4+Q4_}8sa4}3RLO)=9I>eUN?~57ajg^c z1WcDMI}3?$0$c+aDkTbitV-oIsPP#E4sD7YyB%0;#an+72A*DG-YhDhuGx>Q3+0Jw z;MlsnGS=RMn9j)LL=q)E=(kUB^)d1*5^5#-CrDB6sBB!m$hhAMTE^n15NYmm{`aKn z6{{&S=6AE7KZx8VwFL`0h2@s?%gSma|_9ffNe$5J}u+Ttwr>Kt-^GQl665xmIML zl&>`(FY0I%TPn!-8qPPlZCjGNm6yaTh?aaEK_>fzyNa@%!uItnxHBc!uep9;T{`tr zw?MW_e&fP#;B^&pdZIR$eI&M=`gw#+=hPT?X2`SkFdBL&7462I)FG}vm&!7eMY&j;v!xVUkEErtfvlogfwZ&Kua53{6yFB z`$W|-qGDE5uZZ}lYbv2GWV)2Nbf#U)jrBc}m>%VWC&~;orV`r%NEC8643~l5Q2zkD zL0rnX+4z*x%|;D^W#K#6J18j>YeUK)43(TbZ|sU=Z43G2eV@rI4!|b;A;fSA9%cTY z>L5XX5gJeB3a|K-3v}L*Y~*_)YS-~_R-o?+tsWUuT^y~47m@xTnBrKfkCK+fL&)O$ ziGfe3_OQumOsZfUIhP7@MuPNYWUoD3L#wl?jAy!n(GdBCJJxUG?ri()gGQ&+pl2MO zrU<7f%aoUXE?)R%aPU_N%`#-Vu~qdpc)t}anH1^-je3D{N<1Kptu;?kwOZN;!j7hm zfZBWbm+M3TQ5_&#Y;Cau(4A<$;;-A1e;@nQN9>@jc!|Aga(y14wGO-cgskE$;Tr;; zm}zQ0ZJ7 zg_Va)ynV7#RYnTw67+mi5a;o8M%5m!3U;%o%3Md|q=h4}3mfq((1bwlU#2Eh4fyI< zbrLST7N$jsr%LiWI(U5N#6!~f@C7eOBGb#q}g{zol;Bza} z`Smx!#!D+$cSi^FDkb$0rS~Ws5PhI64sinN@w{p(=RR%xGjTTg$JbCPkSX|KLsj~g z*~jx7viyI@g{S7BPAD1eJIoGRc|RrxTu~F6t4btA*Eb&8E&ph@$T9mnN#w&_suqPy&dBaVy2dzU5?YY)2&$ii<=W z@lyArNs_`+q2MyO)fesyd(ZZWHY~5FAeZT#y}`ae=wLS|ajjD^D zY;VWcu zbMNxuHV!o-JY36F#Cm=$zzc;PG!^r_GA8` zfQZZjQ`spOefxkFhi!-mcPSsD3_fCTsOw!%(I_vdi{eo3qy;3$;TAOn^%+5dQIFiM z6AyCXyYm)YpiPWiiXgV)y-TUfb1GOxcySvme8vrKF2(SoVdYmU0Vri71s;5Gb42=K zX$TUxW~Cg6x{d>a5FZSxA+W+A^v6gQ#f(7qLpRh|wQ`HaMNz+8!BxJEMGmI-Q4Ixp zgGvCWbrv>i^z34?!r4p=c=(zxPJPEw*D%Xh7b!t84xnmpn92l79Js3KEU&-sC~ql^ zg{w)Wk9F=?Tm)nwCC;Q}PJAtM0-{&BSuThqQeSOFE5b8~GVVDB+=llkJT%8K&hsqK zghnhnK1!nQCCXSaiUB5?B_^`QiD3JcueK3a^oE2R6aWLmlOz?=VlYtcK&&bImRrwx zJ(KB$IFHmuncLOFS@jv6*Y`_u78h|zOIYHqc2<|wLV!!&AxgfY2ED}(xEv*CBi0f6 zL79v=KIH=bm?`N@R?*iIz1?*Jzd^{T@&Kizf91d5?P%h20{~rGW=0G0x!; zdxf1u#)t=r$C-7znQ?_#QxI|pu?mLxi&3NI3VR`jsf1x_ujVNL?g?{^Ic06}#7;9A zfp_VcF`wM7>(u5sl=f?5XsSX@xw&<OprPtg9nBBLc#t(7vsiT+ zJjARNg@KHQ<&Q}B?lGqfR%qO*QjcsLl=V0P3z>lgcJFq#*_dX^TI0Ar~yvd%HG zITbfeT2R{a97OKphsYo=!V;3YuqrTUdxjXqSI>i)h=y4TOH$#a4bWu+Wgg|1E?H0# zmn?;{F3XHrWe=G4pUgmmX4`Jyc4Vl_f<5_H5D{$c7Zj)pPGRZ_Uyl&WM+%gkLt5}P zHVj3F?mK3c{6kqXB^VO+qJ3N<Yf0_rZE%e`spEi$Q04RaZImj)s{__slbaaA+vg6>xNwF^^yT**Ty zkD%}zaX&KpU>t!^7-vyFpsOyropTiu)?rMo)y`ju#&BnVwj>nDZgS1+j8$A^JIW$* zcL+aG9gY>}#OzoBKZ%f-mqgk)!R+&J6I_9Qb_bCIM+lBMgKN!4Du+Z284=r;aP3Qh zbuV@(qFStq4U3`+Q>jFwT#nSnoQ#W=Ec=3z-vuI~5kyp%@NFSNddP9&yd}FU=3y8$ z1n?#MggUA#(fN$Jd5kXKaa@f{_Zz%qv9%})%ah~FVG#)cH~Wb=rW%m6>I^C+c!OL` zek0nK2;^>7iA1~b>`tKz(T;r=%LimxTKV{c*upE50_k3kqO}bR*g_O=Ugm9P7zYY9 z1;EIGX*5c-CV}UsC8q1B=s2Z;bKH$J2W8F?q^J)H9%d1zs0ae|Q~dt`h`B@>xntbD z^g|5sXHbDwpiTOecM~!r+`Gne9dmK@mX1m#$|{SEkC?vE`<$T2sX&KJWfKk}H1#gh z$Za?wKA3yZlE)yC%g&*$BXNs1ZG==KxQfOFj(ZVpOt?L*fKauh>I~^}t`;^m#0~H> z+@qP8N8RF>%A1)BqF5J`0575$svZI|E>W*=Eg;~i{{UE~t7ZFvvm*$>No$5YA8{z` zaVhXEC^>*8$Em4qfnpS)hr9vQF5gj&lEkv)7|8P)I+WKDA(?Y6M(}GQg4KLN#~B#x z8xc~x%9U+G<9h@TQBhzztbIpSbCPMU=gk{8p9V35;hv^V1X~gb%nn?@eIi=0yqI8s z8yEd)BkZc0jwSAz*@?zxCB0CRbczfs|| zcq6Z<%ALW8rZKcfu#^@BaVh-Ad2vrN90O%Khs-^e>8LFX)X@_d7vg7$dg2rEgElMD zWCot0cQHKG!A(a@yOmOu^(q!aN%kL5fj*|xs8Q@-Y?wOF%5mYo-~v;rWZ^{}NLcjB z_MZX}tj1lN1*!3HI++(>#@J%4H)gOmkm9Dq%piM*$>412Qb(AFsY56}7MQI;q+zvs z0LHWAJVMgtzU84hpD9tuaj`v2Bk>qpxlj)m z;IV|fix*?5AcZzdN(6(-19Sjim<1Bw55)PvLF9}M4`h@#wW}JM z55%Q!6CMZ@khPFHVb?1ua2*!m+E@S?$y_;@Mv6MXLE>6t)2J8@T%sUW|>K04xv(#SaIz zCdD+`dkX&W^p^XSsb5p#fH=rEkAb5B72uSZ1ZZWPLI_|>&(MJd-FlQcQFN7kLbRz# z0#(PAqZ^Z zQ3>i(QlLZTci~*Dmvu5#6mb&S31$c7E73Q@71ZF3`h-jF4Yns?)7<-{flY)Pmd9XqD`gVu(UYG5Dlznd^&RC+9^!@mCpl28v9kD& UvI>n0NQq=$GOS15I_01L*~)=08~^|S diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/test1.png b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/test1.png deleted file mode 100644 index 087e25efb3e8c11545aa09e6bb711a3f33c1e3ed..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 166268 zcmY(p19T-_&?tIh+qQL*6HGX9GO_JUaAKR2Ow5UG+qOBet%=Qv-hBVP@80*iSFc^W zs&>_`UTb%k!j%-HP!I_b0RRBXH)(Md002VhzXu-jA0|WR%;Mhw;jAJh3aFkYKKVCM zGSm8IE-w#Y{0GAWAR#aTQ2(*~y8)qK0QCRB008a35deV7h4{bJT*&{I3L%sW^?%|2 z7_qL6^Zp6Z2CHegXvxd*ncCa38h^JpF=O?xb@-0~AmG9G541INF(&u0wXt*N^AM!` z9}d2M@PBAFO7j0s7qfRVBj;uXvH~fE5Xs5O1)RQ{^QnkS{_o}gW`dNK zE-nsyY;5lC?yT-0R(mH4Hg;ZKUN#^H8wUr=KModWPdgW54;DLTs{bYA|CJ+d=4|Q& zc5ngP+mZh#*Vx3~)kTn!@;^oY=lWm!bOD?HUrlz-|J$s82D1G}VPj_nvi%>~|CS2; zhvidr0-OEQ{GWUwc7gxl{Qu$pw;lnu|CIlKHuJwM{U7WbxrQ($tjSIJU3W%JS#;fI(KO6j zzp2rMAT3|F`cs<8t{E;cv`bwv%Yxo-0B6`@JpL&YjY8Iva+(Os;auW1W#^mZpOyGz0A+gW5VJ7Xqbr! zJOK;@LcsxKD{E>BoL(O;7;bcV& zn#up!jSRMlLUEfVAX-mQB!>8E3^v-N~)v(_rY#oh(v5S}LPTzh1v zq5e)t*k4IaPfyRx+#(`7zldQYzg5&95-kgjdSJz7V3qEQrtXAlWBE;aKy=~C&!OOxL6kuvSI^D z)lBb>ru}xXQo;N|ku2VrLd%$87mGiz+x_`^7mH3oiA}=T+t*tEWM%95_BOAmsOX=e zS$>C6jEyK48=d|ZY3hyaf`NahxetTk+9hBVVo~Iv5#sQJGM|BytYnf%-sCMjEp}U- zWt6EwaLM0Xu6rTG1w~8Ms>b>}tkmNIf(!eHhb8HR5j~Qj{-nuju52wCL5o}A2`ZN9xJOIs+iD5MB>^8vzCx}~X> zC^T^&>_e=KQnx>JEGp&LcnWy;Li7iT2sn6I=)_B{N$@UkHdYBCV^YF@ZQco4ak%Ztpd3X>o7iKHM>KGSS(3I9tX;C0$w9 zCz7j68k@4sJddwEOp#9VjZEyF8;ZnZ*sJcige>r$TglM%#vee|Zj>E55ou+$6Ke%F zlJ6G&pj2@IC`jjZ&KeTE@c1x^9kfm2a(Zno5dIp`Ch-aLUy% z9)F3gN2CWsw{wIZ9uCiOEMj8Mf(QuWda-|avSp6iPaK~HELsK6VMeQ#7bB`uDE%SSZOt}Bi)QTwNxBn2H{2;jYIj2lD(1qXGowF zAJBAGQoQ;|X$_|X+xv%$GHHT=zBUySMsGp|R(Ze)ISPU_0z`lhZ-6eHd^-lD(gN?v zb-VHn;|!#!QfD16an|FvU?Q#;lBhWuz1S7m|2Ewj3P%yjf||;XUOgOQ(ui-v0(RgJG^x}+JmyXE-Xm~fSdvNIDRxrZ2JQgAtSISx76u0 zmNQ99wveh5@FB(k(YtUy7}4&wrs_Zq%^(?;H{oh&7X0rrVImRAgUG34B62QH0$s7e zD~*a0)lBkoYQqm5udtiq%Jz=r1jY&B=m2KpUMj!-#ihTOq_b{QNR}8xqQ9rf<5_)C zrcB*Tegr@wXQ9C=#F7*7=D&xrgzL4TbK%(!w6S(*x1oX58{sWL6ohNJqMeKgMiL#&X9#j=qtbaonsh7~ z%@ldh1!*JX&O2vWqy*Il5Ff@DjtllC?~A5o4=&HfYJxvq5_-lS+u-^92tHOEvrEMKARpJZLeCx_0BH#` zO|e=yEhnRB;qemJ$>!>&sG~hc*2JUv(LfgdXVf)d+HQ?D+9UKt{MFl^y zwN5gt~vp&byG+J||5(A?( zbSwq-$jfJnN<9dRq~a2wgOo)QnD-}q0t8$(LueY}G#h!Maf(Q1NMSrASIoG5ocbeX z$-;r52t}Bib4q3hoRQJj46B6I_64X$qAgwG)Y}^ZK9So86`zKUAME44o(hp_ug*iLV`;!Q9;{zKX=0j=6$;kReXDl7i@pRQUc3wOACKOy{Tc^{} z0V0+J=!J+to!~j>7J{TPMVLXdvrY=^MwS%u0aD~hm?o3u>uJZ_XqbeSg_GuE1;IB< zxoZ-tSZOmc^2MO0*!<)XR$GDIbedakiNj}%gNgXq_5cDE_`h;sZh$x>Y*8)PbF1XR zNVwFpsKpjPZ4p{Ig*dfN4LD$cLm_RHkwG*~E=la>w|~M36o~=k&G2GQF;1dK@Eca{ z6~@GoCNQ!vCF5eBwb3lFOM49F;PV0Hi^-4#jBb;>67gYRVdctZnh1OIM+@YvNQ@s_ zxsow#?6kLu}wSo_`+aHyFAo=z`2ePv^9T{xDu8D@cnUx#g@n;PWganj&LSPk1> z$y28?!K*H^gj!s^GaLd=`T~vOBo$luja`&YI{K69=^SCq(pai9vocq*It3d6dFL$h zWC3y59sESP{KM5*(Pb#4Bd*3vPo{IC_~AjJsOZ7s9tPX?L)8^(wAsocvZ6vxS@W?} zLO3uUl&?Rc&BG*Fv?<*HWl; z4i)9$2r!Zm3{S1|bcoC*)yRwh&>`7yca(#R--fqi_o}5Of#i033mal$nm zW2h4h(x?ZG!icF7VgMDLnsPh_hrd`&4DL!;L&_Gp zGDPp7NAuIl@3882Lj4IxyK{W;$r0mx<&}HJ*7|rd-C2FRO1rB{GG3?e#&*e}_@*r_ z^4~Ztj6qIn!EGxOl$qH>kgyRI&o}^E&2=MhStQx}0ruKWViT=iv{UkLba||ceGMHv zP(dm$JG&*$t`|VMN^v;}!1wa4xS!N=T+PE*%8;o6&~K@Fh8?sdGw;G*ttbkyJGFFh z5bAJOwpNL4&evK%+CzC|@iBs_4NURE$oPb@;%8F&4PBD_$=c0D@6$_elkFwr3)SOO z(OmUn>w4iti)*JB=90_-tMhH=3MiCbiBy~v$N>0?*Q^+y)G`c~DMsl?scU_jOig$4 zJUb8olTqXdt2=SxI0U|6jbp|p%1A`>aH>6TQ`tYf|EZI`WXsyiKsPv0(2*b;>U z`Ii!7IDmmMe|A==VIWT>hSD)qxlBBgicC0iBQ7Fcr{NC9f#$bQNZ=CA?s(^!H!Be2$a&p)#~gWC0Aea zx}m8{2SCs3adnPZHZ?Rj?daY69O|%9vk^xOBjqQZdQF|_P{i2^ej@n2>%T(Y8O$*NYyLQ$s5YgwXkBPVhW62~JQ-+c=`8&-IkN7LeH4;&*p zpalSKh`wHtZHlg$ipe2F7t9^B%WJM7f5&)HuANgY@Y1{QWs5 z_NBeaIQ!A%-=l~K$rTYo_uqVKJzTuT-0_}>v);l3yU#0^Rk|VCXJ>WH`tyyrUo$m^ zkcC3XO<@WC933go)>no(U@MNT4b_g^4J8H`^EC>I>j&gmnxu(n=1GW2ceJQm($ojU zm%%au0%x1=H{OtwD8-8)@pXnS`phQ353y!DWvfPSj%%JhULec(RwFrcc78>GLC_ z=gZvw;MoCD1s_=RVy*n*!I?;r>)y1`lGrj4GiUY~zdj34vd?mjmlH1t*&Geq^;Jvo z!U*gXmu9SZ$}UAqaX4j!B%i&2jD(hcQBsECPPb4a?55y1)DiMPcyq=9GDb83CrR49 zIgXSiOoMnmV8LJ3{jeulJdU5qOYS%2q@{@**$3utLNq{?8V3xZIK{M075VBk%?Ri$ z>dGQCb*rAB>3yYrEWUCLlG=8rL~Supi2p*9J>Y7k@)tpvifo*{R8GK6Kp=+q;a%Mw zKS=p1VkI6>*$C4|lszCBy>5al>Lyr4i~ZZ%GdYn29b#BwK=a!58k=wS#5YbLaTgIpy#rJ}U|ri)`?yf_s<)ju;KZKL9pvj@Lgk5d^u4Cz{HRtl&(-{>89Y8WNU= z=Wa;1ecXvRHP^M{PFkgVDmIk63w}2s4^a}34#u*r3~iR)TFGTyfnThoq$a1HoIJ|M zEGWcECii!ybB0b+i#9|R5OftQGXTI!(%2qiZH>&e2}6>yMH`A1i@FxElXayW@f+xF z6R+3VXLn;V%I~qE@Zs~I4n_PMG+l645V zG=ST^qEc3S9`{~jHaZlgB7u*dK#$a(9?1ABoW`jCREEzBLIMy#L>fR|BK9?ch$ZCE zy*QZB$>=!K-0#=0V+ip4Akn&B-iFDWxK!ylB%@cJYw+n6fB>vIR9-VBmob;x%fdA^ zf!;ulPEClTYV3A96@^@|!hsTNNg1ZI<5_Y?UgT3^cGJ&fn!|^jzMz(Md1Yq?-fN8} z>?qq*OfJ&nG47m*1hFIKo%9RS>CMiv=j#;3@2LJbb#Q+?*WL_{rlGx!w)fdTo-L^9 zydB>ifSXmpD-^*ca!^Q>UGf+%TOqSc(?*>hUl2HzRvIjynW9SGa&*#0SfTEXT7^F5!iX4ILVS z8l9S+W}9SdCoMoYKuR6UV(sOxiwM6V0FMqq2H=KP??hqB5DikOQNjsE0@#CGybkv1DhK-gfZM?T!1izkNJb6{|qz;#4fb# z_t-SSzT;|eEwiXKTgg}F+Yjzlo6Xdb7DW!vMMNSec}y3J1t?iCdEN9AIL(NcwT7u{kGEcdH-4D)p2<) zI-M4u8`s}+AQ~>KpjoZAxiXoI+2|3ZvF?aE0`iJ$*00#M!zDEOT8XV3!3f3_sojM-XEV&^Yf!V!9gKZn#^M61y65@uTQBb4KJ19tbcdYxQ zDMdOr&>lMSftn_z9*HrApr)`u6-GCm@%BbbCVd)a9`vl~#tEMM-vp?EYG6uS-8Jr! z0{eC51p(lkE=PJqWAaIr_D~zS>c$VIe$TJtd7iOiUc4jIM|zRPYVAQfMuj*CS?ICX z^d!4i8dzcqEhu00Vgs=n#ShKB-8eLn=F-M^V1i~%TZz#K^3DcOH5)40N4T-Wi%Z~Z zT{OXKCV7syhoj^a%@d`i>warotyU0?)O;~Za;G*uauzv9O?Q7#!dVoFDo$-i$r17# z0SY37A-C%ek({~~inYpklU!008VPfgxlS(>`&CsKsK`+GBKfZUa?MEWgHWH(u;1BvEw#z_KURR2sJ*(hIQ zUjK}yxsovKAA0mJT&U<1gU0#U{d(dcjS8sz5h!vjZn4)4GL}ra#@sZJhXh2RdsX6? zz)JFpLeclzZi5cnL3Uor-;{R7PcJZQd5S=f9KWiW9>YS1zxJQh6E;Uh0r3ZO_d1^0a?)Qn61#%WaANI2Ar*}`pPms*~#P=&RZDw4JlHlWbEky*i1 zGLQLeA2BCdr-W7NgvYzn5usJ3JOrtDLZ4U7X;aV|d4S$o7NF?uvK`*+k{4DAGHv6c z36!jH?Fe*tZcLbo1*nzj@P-El`eDQsPy*MvSLpBH#W-f!mU6A!m56U0rX82Y^gk-g9#wqWklH^{x!B z_Blu=bd&RW=ilyff_Vi8*n-Y5)9=adG8BkIFl`+YVA1JWPUOsJFK;0BcPm?5Y_o($ zK!mQ*sL+oIAQNHvb;|GIclIv`4l2HZGsne&prS&NEmd%`pi>1q` zG6!h=$#O&cwjR|ENvfvkyquOpY%)EmU>zSFZTUsM^{V|VH6&xe#M~Z!syN2F!!OH3 zsGds|1>#m#aP!o$U2miKzn@|o0CQH>(`a6z=vyhxj$sS}suO|56u`C(<d3vOCo<0Yn6UM&9h03l}DtJA( z4V3Pc26*bXIce)tSg$=Gk8)R6tyXLLaWu?KEg%idn1m+allxx>k)i+D4JGsYeBJ&m zQ}jKpviHo8FYCDEGnsuY`1-eT&YGv%7;NlVcWFOO#^>T;wn%DoG!`9AYW24SDw0g$ zw|EgYxxAiFx+)L#oW$4eWt%uuS4Y0gQxY#FlMTJ)C?l}nGanrgVk9ddZdwc%^Qet9(W6m&1VV<~-xJC=cT1S@5zki^Sj^`? zBqA&j4%+&Y-$biWi>i06U?$;Pjqh>0T*5oKxs|xLz#2p!m_|l|A{*eCX#zV!&Y>N> zY}O)HD})*IL@;GB&iFvsu2qJ(N6z=<1eSw@4t{oyR=1BUTk=S_nsz7HZZ-EP%_Aet zuxIC#D=U{PQ|k&+TpvpZMrJ^6u5M7P?>p>tjX_&nu~|1k`mGg)`I5c<@-Suv52{|r zGb8Zr-sp=^GVSv6w8MLN+2+_u1~l#4@F@+8>mA|NDW;1?3E^a9BPJeO=cHZCW)t)n zQ%xp2+tFun(3W)?eume@#?Wfni|(@KXUgZx)5l(qkK1nlPkW!&^`5^KJ*Q?%IUj9? zeuswd6*o6Si|gKOUDu4D{>gd8ukVXBey-l86GYC{rTE|Ag?RXzyLIek zrQ_TD{%sylKu0hko+7J&+^k1CI;LR96dpO2fIe*O{cKw@83cX8j`x!h@&6IbUgx24=aU`Xsy)cZXO!_ z=v7kx%}I=fs495{n|mFlTJ`vzZ4e(J&l%mE5NQ;G$sSRgRA*aV=`}E%TQw+AFbQz| z%S9k-k0NK|CrbsHiQ8+wJSov|V-aqr57k3;Mq8fT#VB#QN4cP_)9*mm1h88=leyEu zTbasesiG+R?q&6u?R~k+_#C`DnFdl6eDHH_t5~jayhZcbfx_=Cyx(-+w!J4UO9_gf z@EA4S&CDLFVv|4#_YIW|ZzNllhk7Dc+fjtIUz5I9m?>(TxC*zBA$DmyTe%9{v;4!IEfhNq>>I_ynWIY&+7b3zAv*MbJ^BkOS`H?P=8uCw}lOZ1j8* zwaH#K5T1Hro(Xxf%0+qM(*{sM5BOX*KGiNT*Wtz8qm#N2b{TDvU8>;$si1eer2x1p zRX;czMZZE;-0o6pOGs1JY_{=>QYAXlA*2<&^@(%SNa!bbwXvpVV!#+i0Xx5(U| zFr$-K8c@mzGby60#JXpleTN|W%c7s`x-@yj-0?k_%b80+68teDT$*yo16vn2hnR&g zXpUfm&NWZrf6_y1p(mJuk>+25LVc~qp_d%{48N(S~6V)@jHVjzl+^S^h z0q0l`>8AXMYIi0MTrQoLhs|G|2l}suY4n~?NF@R4oGJ*tGyV9C(E^oARbI_Z+op7r zFQ;FI6BV6xslIf~DGXFtZ5qtzTt2R?_>hUZvs9`1+6OPU0g!h7x;~xQHBjHMee6k{ z>@I2=Ciydq-G~rPUwJKeU{kE^S{uF+8n@*H>{{gb;(FVAudi=lc-_xR<$P{AYfN-X zfVDpclO|Hg4TW}EhiFB&ngEKAFKISdQTJ4!2~4B}yYjW~dy`f}A({A1 zM2jPwPyYxV8xjPOvqwA2uYwfmIWOB2o?jNidDAz!$dbaKuT>SCS8hPxLK1InV^Xw+ z?FuLyMn9AZ;u47@}da*I1x9+`g~W$>kP;lMo%>v zHWg((?iBKJ<$Fn54Oj8+Yigq^%dHcx(%Q%c5bB)kd%x3W{h6HL9a(~vnzI&nhpn=G zBJIlG)$e4lIZh|xB@GfEa^b*5nNF*;^V#V~?tUDvEdS1U`-hsv_vpX@i4eq?pRGia zs1J)`#1N`mMEv`^mX7CnONiqEwFwOuU2q=EY*oO*y=VBK?Ba2JOSu28*m?ze6TNj-)|)@7W{CL6w!b1aX$0g_;C<+g{#1$xrCx}}&nYZA+&ht25PH}8`;&rQbG z+WE_PYn2#Bv_=%$k&M{Cz?jBezR;NOoIlbOTyj?#oH59mGw4kKI(1TZ>K4uiV1(0Y0^6t?SD`o7S&TD!DeSPI`zulB>$6eh}#} z=h18y0c2QZ7DNdFqI6vwY<`35MSqgT~GQ06%$gR`uSE_AGA(NxyWF3ZR9QCb+gOj%^`6&p*C+{n(a{&Y`sQ zm!F^iM2ojcTEur1WgcQ$2430LdiqhC;Ib25^I%|Y2>W8flFAooCCOhX#t`}?*CsDU z*nP2Njn(=cGFpMD*L|_=N<=WTT2MTqKa}`WG3m^aF?IT&R|;}npvfU}@A#|x)q;1Y zXVed*8`~vMxHpgM|7Izd1qU>~(H9=kx^Ow}ZK-BSsO6na^hF@bFBZJ9&V4?b{wpJW zj3RJte)E9NRm(p7=b);`W(b} z^;`es17326m!A8b;kLwEq!WWttYz&B4p*ZFG0GRCsOrFlz!#N*jLEIT-?DgVSoN6f zAJGZiX+EhjTQMa3AA6B(vxKT1d*O6`w{-ru=c^#3$UCoEDKg*tg)(n4|F^n-)lraW zMU@V!@oHD*J#&d-PDu0@Gs)#_-^W!_wW=dj<|*r^AXhA`TOYi7J9sE91EDWGJw|q> zsMfJZ|N6Sxy}?0ye*U#IBs-2c0FD&tnneW7#FFOvlJHHB#EAjFlC2JdHLEqL)lSx_{( z$ON6SaLIL6O@<|-?4>q?%LIp#n=n6eA#1`=UiLeH>8q=dMlIyZ8Qcvq31VpsN&LFW zge}V?l!1}9uBupo5@(YkK0_u&U39{Vo|ApK-jTuWx2G?quJjJV|X*m*67`h!vF zekV}-#gQ|XVgCFA7ot<)JoI5T&_$^$KJ3t?-7zMPNA4Bxl;pb`fL6G}kYl4qdK*DB zEq!xV25OG?VmbOq3L-L7<~URNPodYVt)?cPFcMV89PM^}I7HZC5CH`ljbz~yRvQfx z+QcQX&Un*nv}-0O_d-(sUY*xc9v4=56GKiH5k$IJ9eD!*(D*S70v>|{zQJbqU@QUE zU4P1Vgr5`=9Psm?D8&HMH0ph2MCkGD?zf@G#jh^<{nI*AcMLh}`7#COXZI2F;x0BJ z;{(o)5VHBNcc8BW&I*}dM0`k4idRsEnjil8x@5K5X2(MAm&CyR1`0fIh?&mEifODP z_qsk8ra8|RarwWmmt$O9xJ)O}DW(##e??4w`|C~}OM#U7JvMMho4L2ZOQS-FUY)yS zG)ke}77!J6*nuii>Uz9^jY#*`?v$Y&KFHi{RbtD&L!KGefvw#eJQDhpdf39Hgc9`q&4CD z1#ORG0}9MgSHge6UkzwmS-=`=`!$r{W)N;WC#E@bu8W&6iScI3Egrg^!z1o*1>noO z?-GFR6YY!M6L9B4!^uGaRKZ5%X_C7wYn`y_VfpXJFIK(J~_L%VRHg zQ-tMn@Xf;1L_NV4OK2R=Bjh!`Ewhd0AG8vMO(_=lP1h$Rwoc1I(0G^E7EwRk{X+~x z3^DK95l>Ir69>$x$&*|7_CL>6@ZsZW)BUro2r9C*sg<4YztpLfMVPK?e44p zr|Q*fTF%F)qW?k8hX3ipZV+7bAEXdWO+J)kto;U1Lc=`BN@Swq^IzI99c9z^H-c{E z5?V#KD~SEu!7Vq%FlkG_|9 zr=0?%C9lH0P3w%a>F6wjBDlT^JT(V%-rDxP%FFRZra3@VT~`d}<{8AQ+O*8C`BN>| zcsF6+V^jubY7jUyOIqgI1*e-7SywzaDjvz`i`r>uTQ znTJm@S&vdf>*~saxfrkNbo{~hT)dtc5G*bciwRGUBStSdif&=awHQ}08oUIUFpeP> zV*Hn||9~UqO5(Y^>UuJ`ydR~C^?6}3To`UHh^q6y!SlblonVu-_SmnY4c#Jx5=9kt zQ6B*)%y0-*M6!;WN5QMhk`7(En~aa^W&8aFiyL~pz`+_Awd1JXKXv{Qq`R8-!dFhF zHY6m)Y%1JxNTwRaphnI>ciR?c6)RP|?t}HLaoPoYj0cPRX(r~dHn{m3DN$E>OJM^b z2;>~IyAa-jue^A%Q+>?K^`YZP`B{$s09{Zs=kDi$yRE>9bM!aXscY>nHYj7wp9BNH zy06Vr$~>uZjfk$+n)DX>pmfTyvL~b2f@>FFslx{pi*=s{QIrn^cJ!1dPsBlA+YZ z*a34Dz67zhy#@OHORMkK^qa9Jx0-x*N)!^QST{3sM>D1r111sQ(x#q%%<|7>J9FVl zfV`cs#=xRlKMKvUwBK}GMK8-^__a2ObSTa}m071M-RGYEC^9Yk$q!0|kywIkTE3Wl z6I;Fru>q3M2WV;cUnBimQa}@Yrj_x0mXCEfN-^;2OCI@mwC{J({GaE3E(<;Pp&52Q z6JkomZe;O~wb;GC{r-FtLleB;yz+Y@x$@}Oo6dM_l2+g`4xt-{mF@A|PjA#_b>Z228;$e7!K&P7uMqN#2@u->27@t{?`%xvgWFGgom4z=l5>kbA#r0&_#FE^}B}d zZA!D}W5fBkwtHH)=LtZdA3a8X!A5i|ULUXSj*g_-i5~?BE~C_j15F8P_yi666E#i+2k^J3>xJZ+S5C5ikuMS z{Y)oaGL};or&JpZOSM-KzxYEw={*M+fupUjMKGz~l#u-~Xg5|9(S=A_Q zk}E0|XCIm^NT-~|+kxC!8qpWMd^<8Rx$s}#=RBzfzh+KP4=N#)H)4-YnPo7fpukA{ zC7HrOSr`VXeZ^jYL9p0|DB~L9z%drqu>_{^)>oJT#o`e%u`y|fJD20Etj>#Nau&hx zSclU3{2b-`SQ-Kz7@~5}eLZt=VY&z9t>y5VoaDW^(yk&ViRRUeD#8@XK`nEge$X7x z+8p&CB^WETbZ+xSguXuhYxXPjRQz2J2b~WA&@`zMc^#sah!Ql)?3v(5odqW83e05~ zUX^4Q5Fkry2EJqYXZ@1K`z+$Ix#t!b^H|EmX%fCpI{jLY~|eMg2=ElVDuK$=du+g zRsOWY3iqk6bQg%ZMZJy@kH7T73AsFsyRJFvSi7{lUHqLh@%f#Izh?MxXy|q&TAFV? z?eTUi>5^!`Ksr<&rO1bkpPERafMl%GOOXXg!ardl9Dgm6E4yHb+su^2JbG@b`Mmi0 zZt=OHooFz8p`~Dw=WX;d^NuEPuW9(kA<&)_{h+R{Cx(>~W3JowI)$uwR>60B8_E$= zxzq?#zYtCqS2_?>2MgMJkxbSMF*{P7y^Ok=K5vB)ys{Q@glfvH)Hw)X-(qDW1Um!s zI=GT@<@l$&vFeOdK=!IBO@%+X>edLjAAqc&kUwjUrqBkOCRoFFz=Zu2(zY# zfQ{moQyz)J`8brDeWorZKf-{;1kvBB)9i#YKBW0hUUS3&Q8`Lt1GfJ=Gn zc&4l_k_n4H`#KdCCb=Fr@J?_Mst^ffS=3#Ov9qVUh`BFXV!5N?6i6UCve0>0jpi1o zkk>ozIwIO_;Y_|L|AJo}+ogwY{h6{YA@k{X9D_PG!%}PuDO1u?MyjtrS`_%b1zfa7 zBz{wdjWZbwEQoZjBQEmkU2$EW?Ra77nImpJY$^7*@;~jGQ~XSL>b#$`mg{@Td-6^- zGo5)j<7Wvl_ITb6WixDp#~0nl&$9Fe-)zX)(YoWf=<~fhiMgKaSA? z%!_0gEendU91h@EijYo*A4Mqla1zM0sJ5ERK(#Et=KalrS0wsI&*i$3tzb9(Nb&c% zg6`PmvrTu(zp!E>CcK8Bfr^XussmZAfN4}M;niaAW*ATqKrGmDCbI)=I`D#XBoKfe z(43hSr*MHdXRF`K_}*Tfmk@rLb2oxaAa*a~?aoZS95U5=^pyX!$W=21=Q z)&95rmwQ41o0dvwuY{a#l$8C@F%bjgnM_Vp{HS&oStbYpU1eB-Jl{jd3H@?1Z$ z=q?Wb~RcNviTxv7M_LoRsxKMJ^d*3eKlZ|b(+uP$}*9gxv~oOP57BLWuwu&lQ#vS zKFqmQ8)}MO?2MNJ&ZVB^Qe(+Fkie8$9R@xR0;^)M;mqkC0N!CfW; z5u!OhVGhd(_Yr5uJd|xSSm$K^NBp8W+SfZM2=*f62Erbp?c;ag&+ea4wU4C}svHDO zK8$3$LO0lmCj4BU1l6uL5j}g%GM2@%=c5;OL5+5@e0x`Lu}O;UJT6-(rk7OW>Wd^l zf(?~N)hv;)o~SlNv^IBVwrvXj2&$|f$$?4*WT3Fvi1lZ#RNSq5Jnd`%(BNy@WFO&4 z8f0w+LzlPoMHg#7A^W14+cB)CvJZFvCtGSgUca}x7N7UCZSUt@)A9zix)Up;se1h( zu|~Lc+;ZQ5TFmR6XaWT20hdU*q&_9c;Xs;k1KlB8Uz7$Q`&HpUF=>+=${x3+OwE$T zl*9ACc8ZM8*U!hEU&lQ!QeQt_dp=)TIY-4Mgaa3TaH8IXGnFgpV+`Lz%Iqd;=+yGA z6oxKo9ZWb7cH0=@O#F(;r7^wyUg%(K!aNEi?ZlzESvn$|a*L4!_>D=8&BcwdSXY7= zTt0E7!i+#_Ha&HO#~0*4XV`km7=DIn**tLWouNjQr}RG*RN`6r3$FvL1kKKMR|yww0o+rSRDqTIpZ1pjD>X34GF?9 z=5#8ntUftf>dS_gS{xQA`RYb3EqUIfgS}3Sy6NoeX=&?c+8xEtj{WMIU9MCz??$0! z)xJyV@kt&^E|V#;zP-N4$>^L+g_`&lCZKA(XQMFeWCphuEyv{3xDLT z{o-I(t){Y&$4+PQ>B@#lwC{gai>{JJ3`nS*hFId(P~7m2eYkD{Ed$9(#BM??-IQN#Vy&K_v88J# z`*Z8m$ifMHc|F+&r571s!k3w~qW^&DC_ioCfhg7GYwhJX#w>|s`Wu$3>6K=VNwcOd zJ#%MKHgXl6+n&Da;`xW!CD(mL);?0Ca}=x}ZE8YamdGX9|!?IjYuQZ|_V{gaktArR}8uY7a~Dzg?2q zf~S9yL*v&ihh%89pGnRRZW})Bm~x8AFg0`Y_7KV{JE^v_bM&+7qg|`i3@TYF<{okP zP^w0EupdrG6V}a^E5aLX_j0md!&{_vz9Zh;FqWL9Ka&&_zh)2j8JXvI+27J2Q~doC z-IuTH(K^^$b>5muuhtKpr(@&aY_^ zVX{-s{K?D+QGGXu=&!uod_=38$!_M9d)M>vm#hO@`RobkVdvI-YxoX=4BAyA1}_`q zqkxnnwMNJCQCeVzUG~9D4g2cJnf=?aeJz5d#+~k2+C4KgOY+0zmqB&D{CGGNm;oPJ zXPG9$H~IBH?Yq?|>zSVkiy=rxOSx)aeLmhiV^@I9Lpe%Z|Uo{p-yH_yviRK~1 zj21zyVb8C94|1CX%dW}?M^k67t0zAg;FiG!Z+ps+$a}>K>(|H~8H5|H)lz9|DA|-* zLm-w1r48h9IH+)8gEZ?PJ%9MNRFkDg*kBdesKBnEP`x3mn%g1{(ieD&^mZHpQKWD= zg7Pr_65c=U0WgJPlVhTv(5K}5cQ+2|ZlJ0F2%>T*)(!hGLW_}c8otf&)IK7pMygFG zypl<24{uqyEg>ofe4GZrd5s(WNyMvVs47LN0~hmLmWEiIM2yepj*kidyfH6+nSt_z z1#L|=vAno2(>JUud>igA=fkmpy`D^DE8)7S9SXz{BK_X74Z~}Wd!7EF9ap~>n!HU9 z`z2O8$uye7d2ikqWs$J%7G5z|-d9<8Tj^E5L|UAYE-h?Lx640wQFqSDaY#}gQ-y}t zEESn1nk$>$H}~M?!*jLRGhj$rp#SbqyseuU_fb4=Q1ZGdP897|xARLCT&A{pCh1>4 zaV0iqo!FmRE79OB7K$8ku@bnvXQ;Hk*D5EFW3x*u(h8aCw(%a*P{=Cz5&3EmHKG>O zS)qla$~-05Kl4OmXvwQzXqH1w>I{$kA@kUQxZU!7(qGcWBzr%W`aF*PV7WlJ0NhkG zI^74u`82oDRhXB|exHis6X{}vJW4eJw9Ja7u8O~Nm`+eWt`zN!-ME@kRm_djr1wec zoA2mphWN2qbCy6EsaPL(wuUYKK$i^1a7uf|2q?pGM(o3`*}d!_MD0_{UPtW8hVFUfjvsvzrEyM==85=izr~Ka+Cg zuhcnj2Xy-Q|8S@img26zgWr zz^kJ3$@d-Ohuqri@}LNooSBedPx(nPqZf9yJ^Ws9hgAh z4g&^8I`G(H(p$qIB*!D_z_b9~ggj2<^~T!xOC#Z`1(cAl7I@XeWNu=Ovom-)G?L*% zZ4d%((jA9ZVUQ4-F>xWZGny9|T2x8hh>dr*GBKH}69ZDvRj;5Z8l(|ygkMQa}-l9!~xilqSk{V#?-Exj%hQZ$=FbJCf~sal;$B{~67dINa_L<LADWt+;=0gs0UL3eV|_2X7Zwfu zH`I7*4R%xsFVPJKmvo1HhMS29tUKqgn#u-7ATRxL18UoxG=R_f^aM}(?40fWEoUfVbU$ydNkwZ zP0WA7_Ra3L>2gK@O`tB)X`_9o!kn~Nkp?m*2h1g*nOg3buoFJl;SPO%-y|!yq;x+HVkR*s6%f9A=Y z9Ud1#70D4t;96SeF=J+^2-reGLO#|>B(2XBZ|AkhoC0iYW!KGp*Wj#A(otwEL-kQz zwdL>-H9?ctqu^1Dt4^cRiOx60>KvIuH$R0M@mkk>FSj}EL2sm0tl2EI5ygP*Y;SS- zO`53O>rZH4!kCYAw#!4?_YigQxWrh--25tECgHkUL!8y@!w*0F_P4+NBOm$5J@?#$ z)~QpcY&r5Dev3nMH0`cuG)st|k_Bbae&7rtlnMW^6}ydxuW&>U4iEjz?^<2ozxLW| z=gpQaYrplaZ|OCDC1fXC!*@NMl2^+P za>2M9gN3!(sfY)eeG$rnCj+q^a7-Aw%5n$=GmT3K60L!jv>(MpPEP5qHpyYil0m-N z8#mN$x%N}98PIdL+DKh6P@s@LEFnhQnukHk>{jfHej{d5o}RkdNh}?W>Adp})9_Hb z;k9Tu88bvp@s?tC)X1TQACN6__LHmQ;GX8dg`xPVDPHbzr#YMhWU+x^k%r&e@?GL3ozuP_QO*ygvO-_6s2cfJESYu zl`5_=S`BhtKRLic%1Em5!*eWF73gcQZQR{SVIzMq(6GT)MIVQ#-pUoc%`<0SdHLm6 zyyyq>3opE&o7Xiw{`ljVC+tuC)KC51@BQB5Tz-0kOUuDs8>Q+0T3$kYgP*qGZ_FDq z;~!QS;VOV6Y^%#bD)#v5*|TTe_c(Rp?B+9huQ#JTHr8l*llgH_<#(J6ZxB5E3Nh5=YRW2vD*F4l$d3 z?~O@`ot?Bo3QN2ZLXZ8H1SD*r%G3HyLVRZ^(c`*8_jgI?1ghCFz?yxiNEO(M(@?CX zZo2!bH;m-(3(`#x>6$u*Rc`~7Gh~2^hg5je<8a;X1Z@#!E>Y7Nww}|<0;5e{O`>!$ zaG2A@z;v=hyGm}pTbP}lMvXH>iu6eQB+?w^K3cz($xF7Ub`CD*CCjBvCW$U(~}1l=iD?f)A3cB;cZy* zXKc0YlvRO??{359CbKm6i8+;vit(n6@?DxK-^$2ZLN|7BZua=`yHWh?XaDlW7hmK* z&ph)XPUTD$hGL&B4!P8~Z0*(#WUX1Oyt{yWw{Pd$i{x?@w#MGl`6lpLk-FfOS$eS( z8yPiKyF`d8BTls1Q-U;nzG_5Flh3Z%tm;lsKXT;IGtWG8<jEWY$USt;eS#P)$R(as%kZJ%odvA=v87eTT&1E!L8c*@UF&hnUb+CnL}Jf3u3h39F*r3TeQka>xlWqFxVL+1i-Rm*1vbzqGqWw*++i+zj_llam%=^!rAI z9~5Ekp366{QWP_ej|l*LWZtACcAPJGZup5Zr6l1fP+WW$Hp({9Ot#D-AlrtHmpC;9?pThZ%V^i z4wTj6L(nC7BzCDd+KZ6oy0t>k7!G4|@HSf6A2A2Gt{vJo3B0`@8qu zcQ1d_RBh;gKhB6#A){_Jr^eP58cDoRb?M3_IyC{thIu$|ymsS?Wi7UinO`azWpDC(qd#6NEhq5BXm9n1nG_g`Np|GgEfHK67*>z<5t1B zbLXyHy86X0ev#n6^;^I7fB!#!LX2D=G%|27ZkgGcEy?u4mfii!X<3j?ScB&Fs9aNn+6_0u zvSO-h$@#v$+QvrfWyJ7n{$M8TKqr4xs~WCqU&_DQ1&mF`?19${0(oIK=fe3T8W5VN zfpOfW7308`8m3?rYUjhN$?0YCHHpE&Lr0!?@?BmX`0z6yeEZwpZsB9q=ZhJ1XT~^k z(O`H(7_UvgG%8(dV?kT<)hm~;S)<;2;=~{Pn}5q)EuOug%Ypl5g|Uj>wW3?ed|PLM zrRC*(wC#Zh9?-7UtvO~~h_1^BaMF}LhM!u!$3!XsE66n42rM~i^8aecTRZMB?(CW~ zfANcdWs33juYdia`|oqvX*FNT8Q#K+?%efqX?CZ7Y2M(OF}4gb2jvXR9}&uO%A|(B zofspQkArp_iH1>KzD#B$NLNmpRi+T;(*?kg+FZy#X+O?y9q2n4=1TcOSb|b<*8+y9 zAg5UhMC-6Vsgr_gDgWjqrUq4z*ibiyX+O&~rLBBMKNoMd+BSu? zDoh+GYbM{Ep`o#Scf(CNqTAA7TskOmcB%9E*4!X>ej-f^K zyH)qyutrNe5{*zoARHu}7H4at`fb_HDQyD*&a`N3;;nCa?CP~!-}&zMA-6M4%coDD zb6jdc%WpNdePb@#<0lg_QRmg(vo|qroN2%6-mxJx8(dL9+B$llbRKafXtFMC4oBQ7 z8s(1?L83|Bhn<;x>s#Ln9i)HvcYpWe&;COljf)pPNn;z_QZ&bn&QNzJYbM*c6Jv>D zBXT9AAs29~G>OcyqORgCV5wFC41i`VbxNpqo(3SDPS6rmNX4w~cAXZ$hnl~5PATY4 zgq?=1d5E*!5z6Z`9x7$>Vz)iGF=G?(kEn#kN>P5EqJD9z?o@?)24lKS%<7MDi~rmm zs&9jUZ`U0O3o_%4wX2?ny6QAYtG3i=^QtjJLwJd@#>T97q7&3xgf?TCwQV@8p9N=i zas)2Tt2qj6pdQ*=z@U35HHPWX&&p+np?>l+jP+#q~uy-NcBR1d9P8#AYORTN(CWntOdD9I(h^?u}pJAVi$5u^&Z8#x; zA(7rsLavI|Ji^<@Kxw8Kqi%z_>o#- zP$wND9y+0-N|^7zyyd;V;mQ)?W?oTm7Nd1hY2PszMzFm>&rybUfiuI*a7u#XoPAi_ zrr6{*-yw9rT7BbfZCM$T>UL0V`35Jp-K;YkAdWzAUcZ)FW2-&w;y{@-j|G@OQCBt~xrXVjDa>Hf=>oQmmic|Te7!5fWi&!*IxH1- zd>v@YD@=<%I_fuVmZlB)7zm#{ax-?kxvp=WF!stPPo~{zaU+DdzSxL<1%hc}j$w~d zBl(e>v{~(1(|g0DW?4EWw1G^79K=k|?i@OJ*WrVC(frVnquTVw&D9(0Hw=1A#K4GW z!V55rg6i7xbhU&sk-Be*F z=1n7gup#%6N8Y68Hu|ww$gwQ+h=Gj`BcjcWOh}|90$mOabLS`;Rx9%=oT>Li2|7lF z8dW5<6}aw3=ix_AoM1a;-Py=lRP>*P$@uPcTvvcxjHrs@<}FO@4;LdsBQ`oQ;<*8k z?z&<{u%%5aGznu>7FQ2WBDzK7bfQ&1>NI$i|d5AsD1(iVJ;=M^9rtE9NMig!AAcT~)dO z{sv+8=io5u+@GPQ;R;D22y1=-vi&XE<`+l{)YTM;DS?8z5Mr-`Z%D`vIl*aY=|@Ds zr0>|2o?JfQEva#9(|-FfGimsIpuh)&x#gCZNp|ck<+bz$?~2aNFV5)Uym`j)Ew{6R z@g@q}_x2b5?T^tcqfd9Et@b#FHrN+QGY=;cet1Hoa(nXY?r|^RR+kcqG7`{_dg&l? zC2VXkzjW|Tk39O`r`~U{T;JTjbonA>&Msxt66iw9wIb?TwV@iO`0XeFbFs{@X%xMB z^_p`X*3XUC*H&J9@#W*kPuN-c%9sDzM=*ZnSN^-Vzukv2HopDMue|9^Z!-6>z05p} zDMybUw_sUZn%9tWxQR7JebQN9>r-4KVZ%MrZ6Xrgz>%?>ws}kIF|{&8vEWf?7ztpb zxDz87eTPp~=vmyUGGSgne$ThRvv>Km{C-LXR47m{CBzi`ZxFz=W~%{22N_M5iKExU zY<1cNdPjsV^)^e3t<-~%zen)$Y6)x`G17MHd8T$-+9+-9+xAv{$;7$(|>UuK7Jv%e9ViNStH>zycS zSX!Ptc<`{_YH*5k;o{|&Upiw3>xcG^-Fp-gMoc6FuUOv(d3!G;as(<6B=Ty;U~vBY ztG0P^B)c%LUnm6|<8m}edkS>b-NTukKQQTambpt%ik*Ns$2%y16v4lWrU@Xz-%GjoR zg_HNr%^kUU)dzV_oIm}F9)#Tu>erG03tbnaG@hxVbEW-L~FO30# z&bfpqj1~NF@ywvgyE2y6pp`=M>ggd&V^nC~VDluuA3HJPtDO1szn7F{z)qgn7&Xo^isCM4=HX&gk{zeGZYq}Z8%>+ZU??3< zJo~Q47pKRU7p4x(O)hx%f5KaZe!G=5I{dd?WFOjY?e3Iqx@?Hs{LKy@k`%={^e<)r zSg>-mU*!{W9yhjj*F50G=Zn1joG%zH`tb}smcL#h_y}iCc+cD3_T(47@D=h z_v(3HGSIcp&d=PueZw~7^_$n$*KS$*96EfMfqHpkVPO%|>@>QMx%y6tV_S>BCg!v) zQ?1%FG~Gz121=ZdS8+KFfJM#Yf?@`Q?C`}6(l2J)H~U8E;jL*+q`Xn3gLjsjuT;}B z@qCfm#^=CnXfJpCa=BMG1=WOY=Sx>_PR=bIK6dZYk>i_#xz(MitLMId{GMY>*Ty&m zI!xbQwpY)1&_!7Sb1oyX&2X!4765}4y|b)XLN)wZwF^KVKuhBHl+n8r+j*6 zq56;Mi8PYw1=|y+Z_PEIxJ_2L6DFd5b7RZ$g5l(4=83(T$??V6Y0EG_&NffZiHSK& zQTFO)lcNL!OkP)-q6YrpWo_Td9~;c>?&-d#Oj0+uyp$VD_oo%M#=NsT?jnV$qL0Q= zJY>~}+b*^}uBUK|-zJg$pWJJxm-#a=Xl;(l(a7i5BFv+*w6xdYL%Cx+6CeJOcQTrK z_t4H&_Jv5t=@^?>gQYXjRaS45q|cYjt$7tIhdNJ8tbFeCjRJVs(2w zZ&bZBE$hj!qfSpJYV%9;GYLIjnsfQk!MDBji6@?X^8WiDeC~VSvm$->yWjn>kNu)W zqV1Rq7hf~X;v#0KAu~Cvew~Qb$>Gf`LTP<2eC6N$yH87h_`@H5^P3;Pbm?+kV+hCR z&YkVU|udn3erZBgF#e5bgR4*3fLma}z$;Cx9Lx+5uJ^iQ_R7G7$Zh-X;jh*KgUwrYYr=B7U z%^aQ^zjXQXp+kp>aPIthcCxgzc;osl)l;oTFzi)2rv^E}uK;rm4^1SaVuj(MPDa$4 zF=p2!-DNf%s~3pcW%$)|vl!VfrQEoAQ*&cL&Y^?(wFIv;>zgs`inHWPjL*UmTw5i(VBE=6GVrCBI=C92vBK!=6fcJ{NF0lkA_17d-NGWA1w0t?4?Z=H%jd6sSn-pPI z4Te)u)x@r)rq?;p(3%PWM?G^td^MVJFZt2tm_iJV| zSnv)){_#Kl$OHM5>BUP|E@z%UZ)?{8pOsI7ajcZ0k+=zyRttly#LtWT`Jey!T}SWr z68ux|`w=$hrkBqnQJV4$vrQbmhKSfyca=rGyfF;K>$Y*1GSif-&iWinir5lB0prO&kA zu)FL%%;8k9o2GdDYKmWq z^OyZz1F7YhBHgahu3l*Mdy8W>PL3SC`}&PrC0CXJJ~8dheX6!5T3qyI<)W+Ax?~?I zW*s#$XUvCFcSdz|Mmr%bSj7ukLS_ePBvcn#FB0g6sW-YVF|l;u;PSC!_n6S?0ax5coS5*7wbr`7 z^h>{V<;pd$>GYsQM2ZQY2qaC&0Jh zsLK=sQ&;5%01QTv=hn;!Pg*Qa6@(o|vIIdct_^$QI~hQtuZz=(CQ#AI7P;mZ2Z!&v zhY7g#Wnt(;ir@Oq^LHINWGa-|*FOK86jA>(i6kl{5&gx0@H^sD0%GT_)I`*p?^h?d)(Wa^jjz< ziQ2&asGdE8CrQjw!yY~5#%R3>mi31!@>ynSsoJIXzx97|;8a_A+`ih&SZtgV=>}SG zV4f4_jE7ONR#th7L%`mnq1Rp7=)}c%NKcbv1;!uE4V7=)AN?CJPl*kk+tp1YU2vvFVGrLMmoT zkawvStE2(g$1L0sl<8|oU_t9jZM}EWP35uK0!cIgw^#BurvEebNKdkBk?E#{Zv}8# z``n-T%xB*8@WUptj%NgxfkbUfXE_5KR>YDp_`#n4@E`sRqK|+4e|Xzl9zS#D()DZC zzVq#GKl#KHU-|M^KJ>v4KKjU`e%jVF$x-iGexPQJXr@dDmJYmj{%qzF*UVQ#Lw{EW-Ho6_Z=NsSnrtO1eJ>sE5E4OdK>9`U9 zK7K?9Ob{>IEoMzv$T*9(r$}w*K#nzxX3kW3&u{#>1(@N&qo&SpXU@!EbIn&8YYKMH zJtv*^pFjVaPv`oEr?&LNK7LdcMIS1#`RCBWSLMugGO50CQ(r?9Y=(4l6h$HjB=$je zcI~6ScHsi;*@5fEB|c?kNm0uRdlQ;3MKv*Loufr6fnH|AnZwOws*m{34~ZoBY)D>*XUd{f@-myyugKqUJ5b$3sFZ^MF!!rv`NCZA{3EZ zRin1Jswf!-k5Q5k-yD|ecRCN|KWy-xvYxGk^7~Uo}IszJ>FF2Oc1dTId4vF7DL0mvS%IeK!kjZs}XU)A(^-Qop_q zvBHpvA2At~YEEe_3MWg*c{uU2Sj$c&0Hv)WRl#({Ruml-{h0oNM{;a zb$s1i$?u*Qq@og(5n{{;nLCC~0g%4;-Uoc_ga?A*QraIUrt!}`_xulj@B`nzX6NeK z;eXDzXQ*BSGyM8ci93FbEj2fn_ z6gS&8Hx7cKdPDol47t38bh8K~t-KY)l`?eJ-DjSBt6*5@;@&6Pb4boI;$OkcJlV#W z-1pjO*6&csxln$Fc16XdHy{dL${e+7YU#LTV=Jr-YXlr*mdEUyw`D9c3t=An1dD}I zglzm=odMGxW?y^rof+cDihJf;nKk~U7hbS=m|f>$G1By{y=@i4L~lI<37-uQR-N5Shf1Vv-MTdL6&nRA?2egUGH z`7m?~DUQuzhA*>>1P$haw?`2*XaG`-bhJhWI?r2-EvBV+^%g;rI4TmSPK*wv6_GB^ z7xm4<#-^q-gTsAi2DV*)!@k$wu(xJva@>jrVZ<$-ZFo{bc#`<*fs85T(2Ksp@yDbs zCPPert~th4E>Xe@Q(vxiw7<(~cQ?{6o-_xh>2=Dofw~(GK~=*30`7i{OV$TZsjr^zm-CYWlNnr zct?)13qVLw_8a!7Q5u2;A=TtFL^M20ugjh!&4^2_;?oK-*I?U#U57C+@E4k?#hwUE zGs0$uw1OH7*XH^YKFDYy_?aOHSAvKjgHX&wXr~pLOSUb=t&v430F5p-0*rOv>=R%( zK3`Vxkbx5H%9cJ4UnIuRkyKcfS>+bWWb-Uq44JI5Nl)t=B3gptp@L@5^9tYD5k*vi zvT2($gPc*kP&QRpiXdkq7iAXuNMs{ynbopR78~ds5i}5b;{8vaIdjTI)p4|aa6k{d zVZ-{$%GvuC^|yd14!&ms3`Sgz&eMF!hd%V-!2tuRvBL)r^CxxG2F$*%+11s>(lZA( zX+X~_eJs0>Got!r-?cQ4B|r5>0Pt;@)L0#Lsjetnp|7TBjyb)=!BfLMd9aKEiMRL@ z6A9)@c%IJ6y6H;(R2%sUvq=og1ApJT4V5W9cF0{Z+c1p7DkZ4PPua7epKOhS65)pU znm|vE_Zs3Z=;+!=;>RYY-q`=;wAvOMkX;|%8crh3#T%xOGcyF32 z`WbJjfgZ-+B7;>=lr~fCEy32iy4@?- zzOYrAQUNMdYvH*|?f{*YdDZ7zPl*|qMS`jJB*`i*R-$&8VkZombJ!q_#A*-GDO^Vp_BbZH172`izim!| zcdZK31=&o5ii*l8D=U(&$$VyOFm4(^cGdYHry)aI`x<1QoS5od-|4HmQ&W@3Zsg|) zf076#TU|kL)4V>=Ji@it-Jk~?8Xh4pdP|Ev^5L}Z`qb&u+poReWy|6-W(Q`@*VNUm z@7lmRad&%;9~(Y9+Om4J&wy^)xEaDz=VmO<+aXV%jd%^*eC?KitZZ!1fAS-$Yw9MZ zCQqFnSYC-N3-#4~eW%5(nXjyCGFon4)xwNfNvo=?LdBVL)9$^4I10sJvQSAEZA3jb zK1Ig#nueSjCFq&8o!G5#@dKi`mj~UV(~r-8M*ka|Y2wQR_z(kKw$a;iU1pJ7YvCxZ zyEbgpLC+3higwI8p%5T)nL-ZJ;3`ujJdjjl=u#PAvwlE@!*1aq9R4t$80kH<*$p-l zk457zJmPSPgX^J*F*XSAUdQmnL-$0L#oLblnjWMrDYLbqqRh$Mf`qqrV-_sK@TglT z@-w@5D=Au8JOOLPNan7oRM(OorLu*k;H=(?Lrl;?@t}%Nb&W6XjgF1)*s;Txr!Bft zbRF_4Cv|G8O}fYfzJ~B_3c%=TAtsa=p9RhdOiWDn_4VrwTHD&A-UV(~AOHJr{w=3h zzbdEo;qLD4BS(&SOUd<(W8_4->rNg!URP6VXg5AK=JC!uFK$h@NWLP-q9sXcS%-SM*d$<<_PHg4#8Vb9)?k+Y9K{@%fX0o$$s z4C5Ppg-ctOr5!foI5~4+d}4BZeA3gg%8rgs2cyjFE?-eeFjVis&a|&tJ2e$w>mD2& zrmox;mqXbn!%R#}PB^MEI5;G3PElV>31hu*Va3QFVaAffinY4=(=?s)NE*Q zh>`UfF*cfn4h;>5@4TY2g7_?5Zrdqs8y}zG`?{pwm#`~L{uj+iR&j+JU4INeCw^ZW7yht9ma4i$29DYX6V zF_vy=d2#RF=bw8{C%mS;{Y!uLC+_t4>R0~y?C|iqwd=^1Q+~J`tqCT?1Z<2F{l_?l zilI|{j4^qGATG~Ug;TXJ?B4zS^Uv?N^;V|ACaTSwHci>v#iSTcAwIg9`BaR}x3%-! zc}7uJE}pE4rp1o@a1J$jo@jm|Bhx@)WJYc7J>w#eS1$N4=$#m!;E}+tuC``iaB$M6 zUVK^Ho=V3weH}ty7i+c#ke6qCB;Lg_>`Cl%SiHJ*O>@iYrsft5F4zUzbjxejc7RT| zc)wJaR=|R`FEgOMaGpvlZ_CsQ`#)-GG-!Hy#&8uA!#i2D)NR}y! zF&_>XlM zPJ4TMcX#)9zx!R0=bn4c#g3+?)~{dx=%bHLPfzaJwd?7ppMK<#M|SSqS+pWnFH{X@ zp&$^A&5-9~13fe}G`%K{E6c+u96fYc6aLHB|K{KS+h6^SU;n)8X|%3oZ5!f%hMuxf zCk}KHO1iR1XDK8?ag+vGT`~ui=jbd&PNdVlh)7SGvw)|<^{Cn)tnc7%*q26`eO}CG z60MZFlfFfq#Olhbme%%$=2e#$xTMxsS}w7k z%PMQ?V~>Mnxf)cBE^4WvK#G;i!q$_rB4rza$MFG`wzqAbU&s}1!)mC)K%GFU#tV0N(dtu zTT+UZL|!3z;R#o5pJ$DFVXk?G=Gv_|P;Rfomxx5o(N{75(a5MT{3d}dE$N|n!SrGX zB7LrZj{F5k#lv^q<_8s3afrSGB_H1_Q#`ZRO^BW$Owps_S3F|oqRf%7Le^cQVJ`bB zXNL5~I!JlIKmvFuu2ljc>Jp^q-b0?Mg&&-N&nG_d>3whP9~~RhPPR|xAN}~p-0AuT z&R|{n%(0@;Jahpwi5ShieATFljpxg#q-feQJ;x&&hhY&KwF<#KQ+tC>Lg{eAc#A`iGI`=Z)qU?{kHhEVAN}Z`{n?-KTDj}1nwxW- zv4EO=U;}#UG|(Z|$@R&xG8onht#Sb`Dk@eo%lMfj4R&a@`LGg@#jv)frn<4QVP&0_ z(zp=RXzuXgL$}>=M_p~K72yQEtE{f!@z$?wFm^S9Wmb{Bvf9|BndeklRc*JDap}yt z^CoL85^JWWk{eUZkh0|7vdZ8Vi|mF9Kl$L1!|S^?TI}^zt>_UJ#THDeZm2qZVuQ@q6;oPe!6f`g{~w*`^plQ%Rco0@QB%I#wNy|LgDW0K!#Ze zTgtlf;1Fz<__gCVfkW!FK9{=YsxZ*uKZw(}hx)uu_?}W%20ZOnBDHQxBDmXIxpikL=lZ}OAb*kgsL0FvvAlfN_poMY@nrBW$PiW;9vszo*pM3Jkr+@O( z7!6KL&{S?HbMVl6-gEE0_u9dS4eBbGMYj{^6@MEj^`Z#XwQapJuCrKLTT@S5_PzP$ z-aRiGS#8_4t-dC1ocsH~|NHB%yKd8_O+WnM4{feAd#t+d?rw{>pndX_pS=0z8@WYD zY@-2$Q7G`9-M<~JOC?DQt%#5f406xSZAvsh!ZT;RoIpBq=sIYzu^9vC8L4{Wk}CnA57pY*I!MN_ zcKLEL1wEsiAZ4ktxw(ZroV#F(Ip&kOzcgTSYRZ-v!@C%rUcK7f)Vz7~)|Tc~dhals z*DRU27&jb+j!jMRUckZ+Pn$91k1#4N1(l-0dO@9jGzMaz^b#0V9m6k#QD&{bHR zFz25c0ZItf^;-u95UqrYpyE{BQal{&dCZEmq`!=q(H+PckeD(7mk@=`EWSRLSfIrc z*v~^2QK7u~%<3%v*!y3$wdSd(zK4+9Fi!$OyvPKh0mK+aL83{}V<;0hXmUcL z0UB(!!^S5azlp#yQ&wS4)qDx;|M-9VTBezhMAC38eC!P79hvX8iV~B? zaFUZ-f|{0v+=bJ$a^-<{53H&mih8ml6 zrxhlp3_vaa8f2QOiisOjL9q)Z#!ig3X+m8wso?ZEodYX;rdi~%v8hqq%);7r>o`+! zm1gPcp>4{rp`p=Nqs@;HF(b0dN(UjvM3@^pHw-29()cCC%0hKjHTk{}-@mgBGEQ_d zRGbK^OU;_#+T7CC+D1amW1*rhgdgZ|65uATw{pn8$2lc-3#_O4YZ+zn$ zM~)m}N-C?CfeyG3mZ?}Acxqs2b+O8R1WgJHd?hoc=!%s!f-OC!tj6C)j1N8hF!HbO z=%BsVZriTgA2>T4{Jwn22n_;6O>vOJx0?$H3I}{C^@hWuvx15DNm#gvC*!h^cxI!w zO0hv8r^35?&s%n13U9Nrs)f;dLN4WT4J7(RMY`K3!lF!kCT z8X>+B8BpK zYghJun2(7j7gE5)<}fZ|76(n{fN2318_Ku2O+m4Z;z)sd-YV`1CMl6KgSrI8BU}s{ zT3mhmtHR#nqHM9gdiARLJSL<6R9s%G7BHP1g^37OHaGHYuR_(XGFvHS&?@a!S13|3 z*3K12EydY`hJfk3@{|Nc)CjoMX5ApnD~%wQFV7hbkPDuHp#cjs(c0SH+uN%bRpZH% zC#5g1j{OD10~x_dl?K62m4Yu3wZVZH_-j@-A3l6|_wMK0+uNrn$32f7>qezZ=HI5K zj5-xzh{&X2YfvV@m|hMQHN4gEP-{l|LRdy4ov5pPAwi26n(;DwhUUt8s@K>7{4z@& znkmpHDTtt!m6f3>q$N--17=&w5yP~bj7A^e8yrPjib7tUiEqs&g^3h=Y($GT+DC@S zMTEixnR1d3id5HAJ{3?fK*VL0V?M|r*s>+XD;Y8}3!!A56Q1G*6gw)Lkf5S?40@wi zQOeU#Z>1-?8q9{k0T1rF>#jTR+R2MJc;qOONkkMYdZ7u0WC@ccOxkg+WT0ydur{e> z_wL8RPoG_Y{_1Nl8{(?2T+0?%QCb=3i6yFB4@Ovbd>;db5eVgn80Ex5tmsuGNB5hAg5+Z$)+WF6NnEkA0g}m098^TyO#xcUsxngY$XrM@JDWhmrH2E|h!kg` zf1rDzLW4OVA>0{G-1unBw$NUzbYS+xiDM33KtVsM6x`r^ zwVb^~nur%@@b*7|LA^w-a3n}qf>z6*_RJH0XHYWaBpMhNFm9q&T#7|GZ6x60F*z2# zZieWJ)sa6Ere|7QkcIh*N=yBkdtEp_tGcn$QtZ|xpMnUiV&V-@nPK3Sr*??M&FLj% z%eBR?e9EGErG_Xah~sa_P40=ELrl6^3HIX3Ox(@S`D2hou%~%d8^~rj^o_fA?Xr9E z+{O3+7skhO=z_~wf))kESQ(;|E9`Y$w8+|Nq7h3ASFiDfk^cT(PVa|5{NYol`i6&H ztkKU;9v(U+@sUR!dG^_7H*MZvD-PfsW}PBkB1lM|T_0$gW84KA$@H_ZGFHHeiD^C! zL=e#s;TVh{S0PksyZlm1G#+vaQarsPUK&CK;Sc$Is#@E zLeyMfCKvK&R+n&)hL;;1LGca3K$Ia5A4{e38&l*r=FPAw$iKMcsMXAj;{kpBr)%rX zAjUm)X0LcRF)hryTo6tcA&H8@5@5RN53u9Jytqz+%o8ZA=L$)5iBRF%RldgN&6{> zUvtfMSJl+TrE8bN1*oZJhz;6(H^T%zg+Y!5CV$VJj{_S*kdcWkBn1}HqG82K=9J;0 zSZH@WM|c@owaoG$uqT2*iIfT;p#>PSU7kLt;jv~Ql!KFrxv~NcFoov=$3VWQ+bq}P zcW4U;@<)+ad!|Jp5120=J%fs+L^VRm1&Dz|lrCa{*CM1(&cgT9$yuwT;B3o_Q7?r; zWF>kPG|K;q;6@E`;fqn@tYPyq>Mc}ImLx$5cd2ntPmd|$&Fj0c_Hg%6;_k+gu)%IN zz{NsniJ)l{k|8z5@7c3=>z3`tv8Ikc@{y02Lw@kV`_-~z$Bwt(e*1}EdGy=g{`M=c zyz)E0^EcWD}VW@|)W86Z-{k zDTSYUR6Z7fSp`+6QT!Cz%h;KN=stS1qoZTS#={~cvZ+-5kVuCd=587&0B6UL+RhJ0 z1Q`Hf6VoawNJZ&w0Q5i$zap=()J#woBk=Cnu@mfOP^{QZdhYXRB*9PBNr$;sBN~{1 z`zakV;z_!c2hVfUGX~)(24QWnQ$X$7rHE6V)-&=I#2_O`x5GT?NesRz002M$NklNaX0eeTS|G$Z6PSgT!6$83mk-}7J~hdsb1$|r)f72<=BGdV>%acW#*G_adFACt zAAN)l(-XH?9X)zvL)Us9pd$qP_wO^PW(eN%o(F+4>VbY~vh{gg3EWNnAySgD{4Q)O zG-r6im=$}5IuVc6Z#!LM=W^mI2R@6a&}WB$px*Qy)@9G9^ixPZ?y6 z1=Uh8fp`KUMG()3XHZFiQ(?0g@iY3ZSoVX4i5a9PTBO)aav-_LZk|icEiI=9;>6EM z9bip-02QgJHzF%cctt{%Nn)`k@ukc{5!tX+&|zvjWOzwzqNza{G)LfstJ>Vr$f&Eo zE`|fK#8Tg-v!f%9@{*ea2M)Ehw>LC3xs+#o(jKr07i-~>f{3XK0R-AkpA)xvBa*a* zTYYW)+O_NSnsjAsbTr1rOP9$^FfA^YFCl5^6|RNlB=Z;~r;sA%m*{SYJByHX)=);O zwA5@jrzl2l#W{kB78QIVzo_&?GMN(O*BhUk3^x{B)A15w9;D`$^X6IWGSgf?C0hytiS*&stvrAvfl z>M-1xOiy(wMFXuhQ)xxav8zHSY;`KT(|!B)nO;_lpK(&UI3@rBWfDt8)KwycRgtYu zE{G=@hyZ3VU_lveGNd*OLHhEg%9`3F9c{Bd)x5MhsYUL}=bB?$7@Q;{Y?)@#iG6U6 zK6h?-0&{WCthCe{T@+2!KtT$v@fN`RNpDSJz&srG;@u?Uz?l^Y<$o!_fg}j1<{5Pt z$@DD4lUXhU6640W=9PZOM3&)d*EKO61z|g!*&TDRssjYs-5EJ$ zQ_j?kuExwTJy#s^;5)>?x8iI|afHZil^|R*LXezivU=nlGK3S>A`dE)AtZ@&#Ofd1Q!*0p4aASC>mkK$9HZ66O*$ zV3W&P*Q^hv*3x;*&Sz%z95=f-l{MNex#L+p2nrWKV_R4W<;QGXLW0gn#$^xz)>Ft* zF^GtB#n)KZl0sN{4Pnbm(EIiE4~kxQ!%gk&YyY4B@_#$+`Pln@g`KWlSr1fJ3dTG{ zZ8JN_(gVH=o%N+jWv1ogAfF92tz5z=;^Ft>cT6haBT=C5JD=g z2-26Fv&d%?K+Ynb0Lq3GLjDX;#YK^g^h+dCS&%SfS*3!zMRerxRyIF{`G~k|^j3;I z#wE}!i%1J}YmAHKsAZSkI2;Q?e$ox=YAQ5MXvC|-OxF}LbR)>(!qW0J0~7nOm9Acp zW%HIMXGbkXy;-R|&?vP>{~()!S`*L;suD$1r;7IWcA#QkO&KjKE3*tLWkdvV;-Mnx zszw1RijWX__-nlWm1jx zpt-zbn8VHDu_rDsJRcmFS&|lst5i_CisoA(E=bJGvnHSr2R$K)r83LFzGoyMcxK4r z^FUM)0{2VP+OkC*bD`q#`|kiKHYk-@AJuRGp=cOTvW7(FaH&YR1C23tCcq}iDlQKy zLptMGM}Q61#)60OKY(naCUG$}LIi|ZPe!b`!eEhI09>UL$DOyXc2wo9x8A;W#}1#i zwXCm`G|Pu!GHVP2sUnR6e@Vp>oO|uH*KfS>CKodEPj~Nrf!}rf_z4PZ@~N$@b@vOq z>gt(TC)5W1Ci$6OMe)X-k1G!6XxEMS9eS`id?3^{h##rFBQp}1Uk z7*hk`F8J2?c^js?dyd(emaW%br#)}I`wpAhu&iMg4@kuNGk4bT&sDwbXB+|b7G#38 ze*JpTYyouIP9hs@H_u=F)n7T`z=^V=3w}~Cn^UYzY;3^2OABWg0Kmt~LXEKYjE=xi zA)mfLX3Iv*s0w|pYHB+ArNvbtwMZeZ`67_vO^ByvimMmF0P(hPA||3i3vQL*ry#0` zAe*;-q60%L60M)>ki8Wb6v?=!P$0z#vV2?k964hvODuPV0?QUnE*q@~93-vKD2Rq; z3QP`rW&DD7JtvN@Zf?;uhSCS9%8ehT_##Lg z5?ZnZ1pPcNm%&TidTnB&v$M0ex6gho(-Rba$%^GznPB+{s0ug3Zv^y~NJueZ0TlV! zzkmO=Tery^^NSVN82(i)s^Vfp%uJYT3S+w@$utypPdWiuv0`Q2763VDrt1vjV`|cc zatjw5v=MZfGiab1#rb1u6r^NeCzINKYO+^;&XuD~zZ{PeGRT%il01bKZL!~ z*UM}PU)6D&eDHnNRgoU5>p%4MS_IhxzknWfB+wY<2Iq^_vF)H3bwz~ClNdy0MXE~X zXex%CyAZydt@ZJ4&U*yG=2#>foA6z(NjZUXZp-42Eh{FIw6MLs4G(VKam$M@zRdb= zzy3OKR0pQfg0``(vax4Gfxwj>W9r>^-!*u%Ggl&Km@pqd(R1_7H#y(;g)jUzPl=>T z@9OHz>62)6oZrxP#0skmH;$!rwX4hN0lhJKH9)$i9G5aw!w-|6uGz+ya<<1k@m0$! zW8;T~hb)*7vS~>%Z#5Inv^PnlIf6LaC`FjEjhP5l`Qf0k;$DH^rec}J1M{3I2+D?% z2ni}9CE*v{Z%HI%G02SbCuNNYB%EiIw2t)C()P6*YwB0pJa+Wh@dxj_-+Yy-n;II8 zYizJ{=+Btb8tdrz1pmkk0)-?6tS1lJd@)Q12Zv+oYoXmN7pazQFozBuVRVXZz1rBa zGHgMy7dmT;2we5yk+Uw!=dqgs9vB+*ja2j7E_`>i__Lq;w|r@VhHED3*=PkmL(G2$ zmR+#^pJUqCfN9k^j!uPb3RfcT`tgZ|OJ_%hz*md$4ImN;jFkRkM-zW$$F6N>bVIrq zm{gxYOb2;VeS=TM!@pcMgyv6hlE|LP0_jT4Cq~2|%aO*4IQPgb@G}z*E6bw9#mfH7 zrsy%VV6nd-X8=}1$c6~W=>UU(`!2Y-gK)&2eG?!d4aHT6FLsBlXhxLDQl&eNtEY?V z0-e$$k{HiIpFr?;u z)UgvM*xk)rx6nzumsQ86f?=}Kb9!Rhf|mUQ*uYOda=6>H3V@W^YS5Se`mcOz^T?6I zuDRH?m(E|-)i4C&IxtdhOMvU! zX(1=tPj1f0*hEcrJub0$moLO54a5}jE6&Es(#zwtm zw!e_bEZXc0hqBUT$l|oxPhlPcAj-yJR6*equufb850J$Wk)O*-oq%G_R;_M;5erBe z4jnwGkS?+RRKFD#y&O(cmDF=5F{p~#HBcS_3&1gTjAGadB|A)Yo+*6U@XG4=v=zFt z6tG1WBU>G&E|+pTEvVylV5zsam$?LRU1x{sd}=xZw;y=R987#kFajv&I=JMk^GrBImDFR-Vdgx?v?C+LB9$$H-Z{j`yYgop)H~OUecp%G?7qG^q%^DUd0mDFZ@7{B&XYvGTms=yrFLI zi*K%5yXu7(UNTB+S+#1t``->6Xl-qC#77@#4#I)kfztz4M~y$F&~~>Pa5w|zm8;qf z4O`dS+uJe1F=;4iCQbIXy?xEl*)uflRR3v4jd|4&Ks=RVQmHW#Ii9IYq?!CL*Z21I zpK5Aq?&#=z`|WqIz*4`-h5l1MSab2KzV){1M|T3ZJO&Y)KiO^DTTQCdaHXD1yYj41+u z!O)^|5(OzKQSsW#D2vfY9fQRJz2c1!eU$~W#XFr2v44piqrWd#zWtW7Z`NX)rw zKCLcO?KtG>fwcPM+i>NPM;_r6IU9hr?#sV<$8Ej6y^hE+%voLbCoy^8^G$IDROmAa zn40u~+|x<{``mNS8qjXou;EKz`qJ0F_Sero`z#7N7?jflP=XE;Dk%6#>_MaOK@^|= zjT5Bt0YT=R)o+TEb1SSNn9VjMU9zC!&6i(c-v)=zF05EKIx@L*$?$Ie73O@6@C;!pm3>tH=B2fBt8$uoeDU7ifCLll@2Gp^F9zVbv&b!qfRefRiUrQUjE? z7fMgtd-_aAM@L^@AJ<(iot?38pg6w(WEmhVgHkA5h%W8wxMME7r176g0(`bo&X1+E`4r~109pW;MQPLr%MRZ!`!-Qi?OKvPkdks zEdPryzG!}e=%^IGRcEpt!amF9r;o`I8to)E@;JoT+S*RY2*mr|_dcTYr+@mVzxR8; z_u0>W_Q8kl9~l|;0XLWc1<9nZ)^IH)Vq~VIv`tKoo$NhMAY_Icum8-zzz=@#^h>Y2 zViCWwxz%Cx(ede1{lm**%CgRx_4DVhENuy7G>4+99U3~5;swNbl7MQp4De4X1zAeK zJR#auCMTrLt1Br~1bPyQkEfrqfsiyo2i1vta zWuP8P*+<0<9v>akZSoo@u>CFI_-Rg&mP%0<<>X|l4#D{s5<$~m_@f@5nFpl3s0Vp( z%_PYdD&hMK)6=%BFRNVPrpTd@vygKc?DDEgOa1kARlWWF>(_Sy1rsSsQA-eLvg%;c z)6+vF@*KZf(3T8SX_cyLVrs@tx9Xb8#WUyP-q)*FZFjYkgL@GUW!YtM;o>>8wNo-M zTo|+WA{1@OqH9!S();-FW7EEIQRoQ*GNV!ojb6E=1osN1gN)+1&@X=uqO z8~Hc5i!IASY$=bSpMF$-YrVSE7GI)LHsOdUs-UNMgt!1;&d}ex<2rEk=j2ucteeY- z0l868bx}>ZE4Vn=ZmtR2BZH9Gdj^DJ+cGglKXAQr#bS59!bMjoAUy$+&YnZa;tIkU zN-D^Ll5+C|&@hAVdckAIkLgPvek68Kr~`GBRTrbCY*D#O)hp^)cOa;)MGE{|wrn}> z-gn=yn6NYnHZ2$~=l%i<^eWfMez4Om!H1T5OA zwPmRw71$6-MWA_-6mWt+ix=0_HXS{B^wwK$E@QujqepvOucr#2D27`Kbk0D=L5|^J z)TUP$2otSH@l`vWSA?CS8*lL(g`C@S67#hkD-83kKz;vVlgM@jQ-?=>X0l%G$|C*VeDtrk_Ccjcub&!5a%=Gdq=xi(sk$5r$TmMp%;!nkRwm|FAY zC0jjWU5mQF-aUzwnvg|LVx8CG7g@5X$aJ^}|Jd>4E}MMf$tOSZv5(k^Wnj|Nb98*% z$%LzpT;fIF$-eRNsm7H}S1w$2W6Qob_H}l2?%A{3aP66Ao^EW=W4Adbym#;O81XCb zd+&of+HySA7ODS$zuCJ+eOC!ID!*#m1Z~K<5noMp+M@P(l z{vZG6mwfKGvuo4Z^&2W{>ds%dWY^BYLkH|v9v?kxi{LUJD7?Dj{LF&Uk(rlYeEqgt zcfPo5@5s>bL-*dd`cha3<%a$)4K09PMOj_Ccsp-l3 zmF}XlCS*0-c1$yhv{nu{a)_d|qd)sOP<0}oX zy!txjr)De*W1-)s=WKzSOsM^9ufF=`{{1A#2hF9(=3cnE+Un*;BEW5q!-Es!lfy&9 zLxX3TS2E^{%H!j4yRL116^nQhOG&)BAlBdI!DHi-Xyii~t`iv>8Rxq!xn^k`biINv zc7Zu}Wp3{20JLjR^VDFt**D&IO~{(<;J-#gL2N+ z+>d+w+#}>Doi*U|9HID#v5st+m^Es4rx=%_Ckz1xBuo+eU5vB(BGBt%Kh&WJ8{PC(}l!`i@#TY~$LA(Hp2Sc9FGK0(#84F+mkr^~00%o-l|$0uVs*q9VR|xQ;U9; zREWS0{lUc9UAuMx`@jPaaLP1<^Q9po-fEgb6Hu97Z_f>e6(nVVPX;hC&J&)%TH50e zTs&u9bDXmgIzc!ltQ@|T0Rk@aXYAobECAfL zc^jSyX9mn`9-TNh%}lPSsxvJ~I8}-^`UiS~WUes2cwxHd$l<&0-ue8`pZU%I_BR|+ z+Pn7!`fg_12a?`?_nr0YJ4Po)^?240*$CS@ENBzZn2;(m2Mr9?G|1cGQv@*#rHCGK zSzNs8(5PG zkhe&!h~gJ!VxI;(0N7mDT;?IFwWk4pXjRntm7( z8*do`5FjZ66O^IqGE0GpNe<#bzqHYi2@Wij;XrJpDl?aGFrM|#%7Z5o&4>J{pcJ@e z06`GX1FG_rL9!HfRe7K3(hGBP<3HIruC@q@Kt{s_MbHqozy^m(n6E`*5I5@P)Xo`? zM8!`*i)5m~5)~6fGI$F)#S4=0K-R3rQ_iF{2~epN`4#jO(Lyke*XjR_*V<|6Zbo zdH6~S<xwB4vSWt2*PhMvA_4^?p@Cz)h##P@b#~K<+|;gE0=F_fydy`nTd%p!x@UYY10Pg zlJqz=v0!QR7~ENlidt7l8B45zZ4o9*Y%rmfnn~u6%1_yhYE_N58p7K~3XwpjoQ1I% zH`~D2Gpq#R32Kv~tVO)o^x&tsBkx4{eE5M2EVW}Fg(1ETb&Uq<OKrz5XzPUO zp2l)2Wz#Dnnx6{lfrb0-yAR8Zd%-cO0cUTa1P*{OQE9PmVF%@8{lN!B;2I=8C^j+> zrxB=%xQn`cH{C^iK5f$7b42mJ3-QfI2l31YSbpG{#N0sXIm`dQ=t6_Mb1JRX-a>{y@OWX(nzrs^$6e=b4v(c~ zoq!7C7CkwA;9;Gg7}G~J^!J_o^rt@gkKg@HZC&N6)lFNsY%s&gp0=)DWf*B%iyXUo ztD|%6@X#6OC7_o)^NC+5VJqrsX_5}ADx$iCk30nfLbL22j=mT3#EZ8@S3(ZFSEM*G zJ}!4fU09@1K0E-948SJK!g{*cyAVfp5(BOanU*e<$V29ru5DZAdHQtBzX z&^YB!F|**=LCTLN0tyIz!q!jp0oD@@MbH6oKP&FO`C3m(U22NT5(ySSN>oY9CF!7< zaW2QwcqSB?g~&G7r$+`eyk%J}^XMDl0fPV$^;#K&Vl1yITJ~%w3{x+QklvANOSoXN zo$nqvux;D6O`A8jx3|j#j(M$l>hQvWt*>2a?bNu7(ym|MiEa3*mOXo3AZi;nbn)?k zf((WMiw=QF8O70N(58)4&%_2v{C0N`dDXr2^z;}kgQF7;hdvIn)HnkEv!DIUy1MCc z){FWCf%x6=y;j@{r)zA~^*UGV-O)kbc*FKh<~&D-H*DxE)4;KrS>ITfx@v?L4f|(vTqhG;1_7zM{#Inh|E$Gkiq9h=QklYi;TkE(8<xluF`MK*1~)Y&p7&s1F4F!$153iNUbdZEaN7nIU(%3zE0fLK#N8Sb zH{5XD@nhW^HmtXKb{W_ji2jXsRfbm8&>+aGPcnVz!M z(WJb=a+n}=uyv_=)hdgMda2>z{y+Hr|NP?a=c#UAUoQv4j!(4`jFl@}_I!VL|7l&> z>HF@#$D|U(P!H$eg_1y#Kaud}i<%L4WJ3XoUh!IxPN~A~C}k2xUWg#N z^`3}p*;I`nh18|{&CP(puiPRO9V4X%d@3%W3yUDgPXT`VT`kyTG)t*eS+p@61XsY4aOZ@F zheu?vs_)hWqtwCSQF_Q0=osN+++=H=X5y^He`G-f4V1x?GIO5s9m4R>QmXJOZ9#z? z-&lo}bj5WQChd__uWO~!;(}KKQY^m4$KpmzCR6w;)xM%QU|X-oqNH|_?F{1iGb5g< zFR3XQD7#^8j7dGCXoMJqvIQ>HiGFSNTmayvow6X{vU!3gWF(pB>1VjUC>TKJ>>}Ou zE(=H$*R|wN&&XG-h`6#@9E7y8BH3&acnHw)RFjZUOJa=K$db+UEIm_z(0FbW#sB=j z{2ntY;4V-GGi__yk&!VDPpPF@jq|vPkWu3<8Q5J1GClw4pZ*CufsH;qHnLXvnU)^r ziIRCK2ym4xg<~PHKma|eOPKY#EExne91d45p0^R!)$L{$hR=_D7=TFT8TeHI%+BwTQP&)Yh$Szv0^L`(A(Ti6`Fo-~5MP-*NNJJYt`D=pXs+M1AjFl8VrNT%t6`T8}eUN7k%{8T7)J9R}Y|wpa}ZP9Lsuz zFNO)emnFV>b*xB(CO`Wed`xbZY`}UYQL;EPmM+a1f-v*ANxl^%{j$mM=%bI~#oG2Y zs7*ilt29|B#Zamp^2V#0gH0fc(@X^^yq^GimB_4LzaAO@f~--3xa#E8{VL7@R-7S_ zg5!6t^bOuo#o?m}J|eHPWmyZDSrKkROewAwvjT`XMP*#TkxiGbjRH_IOG(67IuHm6 zQ3;RisufCOn(?>x@6*9MiE&|O8a=tqp?vlMl@Ew(+I-NQj0YFybSERK$hV$?pMuQd z^F=gYDDNXN^FS|CJQHl@_f|^gsj%P}A`m-WsX@$c$?6@(idavUH8mGFp|Wf-OQDfX zB-ukfGNkGzw-6esum{+{uPo^@Be5Uq@?|?+p;2ZdE-;ps|BP2n_I>C>A3|KAyQfFl z2OoM6G;XjwTo)-26Sy+qPfbie`|Pt2LwXD&ZB~i+P9vjnufoQS8#voAR0wbf8<(I_m? zS0b-Hhs+kKV2MtjIj!v+W~%OF7Q@um)=FgK>vd&&AYP*eqH&>$7bOZH@JoF76>S}D z)!}H6UiaR6?lbwvBAAr=+E>47Mu0$249o(*y0+RPygif7(Au}{(hOhUt*xuGb%RY| z02~97=u4kQ8J^P^gVOSt2`# zZJ&1zS+L;jSfZ1t%`K}OS0F0Wld%`wZgi}@82eblrLC(+ z%2TJqFY;|6b5wq6{Oxz#amy{Y44)l-;e{7;fYE=JV@AMghCNuS_~#d2X8t+YOssnB zTjEolIB|l|Gr-qgyPc5XsQjvyAN7c%lr#OOFdnxkn74cP?y(_VI(Ax_R|Q@Ymx2CM zdNt!9_L`FTYVWqK+icW-?bX+P77ZnM!nT*-B-EI1+u}d{sZTug!2PS5Th^^pRcz#fqdyZv+Vp8CL(%kd zNT(ISiP+>gD>rC`w(=Y6>PA>#O)C!dQeHjP-Mo2|QY$K>b3^{%>8#-owv@ok$NyKH zeY4T6jKL@awIR*?5jdcdy=bUS(9AOtXIL4l8|J_qZcHZ$)2~vP8*aEEiz_(m*DMVD z$3{l|1Pabjf+q&iL&~Oh5O9qw;ezIftZr??3r#qB^r%Xq%cf92EBWB-mca*y{|q0H zl`_b(SsfaFdP^`A2T(S}k(ZpnlLE_d_{iaMzZ{k0#KtV2&WMhg!|(wl=&{A76~mfV zR1izup$Jrg(gdKiMLeJLk@>{feOcTbrLv$>84^gskw1%Y+%!S(w#Y-&B|u&V1xv<= zn5N2&kfFNlG%}?FHm|zMhdn6cW6HU#TmzAcI**F7gs+HGJfy1ts4#4H+?5awu$M~H zHM`865L6$bQqZ6SahclA&Q70_A*?K=;u|(@gz>>c2h-eQypAE80d))vV>mIbL!g1` z5-h}H)$ZN9w{MU2*o_;5wE~nN0MEr-$SYU+dV3ji_u{W=X+Cr2)X>o2k%Nb90NAtl zg@5qB|QKNISo`*U31GVxAMuXuWBO4TbrAko3?D) z^u+t0Aj&%1ndzCc!*PJyn88_I)@Nd3@?>1H)%@mLZ*ShbhC+HMJG=j$Az}mHb;?WZ1tkV=UpB*dq=O4H1=o` zW08{(inq44lihf*ARfa2hwuQyoYJ;)qwi!t;1Ik1 z`s=L*gKtQt<6XUa74h}NGu=oc78JP>*yJJy*h+q`9~rtRPNhD2lE z<42DfZXE0Bp(2wL)B5U%#d5n%iM6o+64{84l^Cn+tYi_q zGS87Rgg)*j3lT3Lf>JMmj*nZ{)Eqy4f@$-SXOw^Yt+(~VpZnbB%Mo4n25o;MaXkUDtaU6#VK zL~}#KzBk@jvXo5tEV^&Zjn!0D{$GFjm+yb#{ipi-N6rj)c5e9gfBlZ3c+c_U?rks{ zfAg((^#7wA{`29q^D|6qy6wq8#mSZDpZC#*f%To8yPkXY?Kj`Dv+8@_ed?wgZUpmV zAN!d0?%Vg)Pk-{vZMWU-4!Y*muKB)9;1IiXyRmK&=DDJRa3ffV%f$V>}#2tM%)% z#fu#?Kp6_4#tY}1pnQ&%#Ftb3rJFFPKK_x{?Z~ z*zqVsrOT?WpV3cJUZC`o78{CADL4zR{O6}anWD)89>x4BiZ~bmgQ%)xk`I{3UGy{4 z^C?kI@~TwAisEJ0UbX=ymIF30F!ZAz{CM#6;8XwjJri8@u6nt$ zzz5t$iUUSKj{tGn#L_zTdVJpSyMPvCuR0NS9( z7{!no==rILAg)C&z=Sy~38uyQcrL++h0YsV#kOBnMJhdID{gh*f8~4(o1q%=Jf}`5 znQZb*s%9kP`BwgncxFWL3|@@KE>r?#tEX1pY7jGxl- zVYavGf-=*nccogm2>Yav{PH-(BHPb@_MC3^=9_OOrh|h6WJ0Ch-gNVg9J~h}xIb_% z0UF>>O`Mezo8{4G@+9?;9Jz0P;~RI}?hfaFy#N0De6;$v|A*g{fy5j;beshH&N>Id zAk1k*P2mNdGLhP^|31a7Gn6w~iT#RM8yq&Ek8vOEGS-D`Og>(T$pZFRa}XTnwo7&YB9=MWUot{+=%m1!LM1F12e?0>})G`HPZX^LKaJxM7M)t zVAj>vll#WTCYJO)4?U#2d+pWND9Hyu_(64btnb7HQ_#2o4GM!h{C`A45K2qsz@ZY_ zh|4xA71mz?pKg1_K6*umDLgJq$8MO@G3l_A2io)})m452UDN|P?X{e`M9rk4ot>RF z)k%@rc_)zRWSKKMBgM51gsop3zkXHnw>J6|@T z#i~h?GGAx;%=F7y#q%dz<)4QON@DMsBu=zskWJ8WE10EhwAMm#Zh_3`J042~BbAj^ zxabZF!qM8kMjZdJ$6}0$iqY{gyaX1i++$A}SbCfwD?|iiOe$IvqBshY4>wT(ZpofB zAhELe3RRVgLx6~os%_iiHbCeR6bvTrc<nec-N1P1M$)IFio#HE| zr@(1%Z8@S>%Nkglj7*Al%&8 zpw$-~*R;9F=@ef3(xr{{D-HN)vP6QUlsv%t$wnvRGz1(0FM_g#+lZ8B9wWa8RTcbnQb2R*Bo zrc;(J<&C?4o`-D$izN7yHMSy~$hlx+64{A#^6igrdErWhI}_tCCf>*6lBs%^kSUyrSo>rU-tdJr9KDSbE13ub=#`zYaGw9VaPlxtn_wPH>W12dwPyJ z4Xfu|v!?y^*I&E!mRrn2INJUB&;RXV=Un=I$kInm;C8$Kl$Cz`Iv_W z?AnyozNY=U>o=@yY<}n9JNw?+_q`u{@8ci)1U$IPY$$yoZR`sb)j5+kswIWa03r}O z?&{1ri@OuPw`xA3p>gG!we58)>#PrBw_CC#nie|JeI)YC@Nvw5T9uistX#o7E?r8g zcm!2EENtcIm^?S5L2{*>I@NE#Dg?4yCf}4+rR*#u`OmsyYUwRx^uUH%`}e;wF*cfu z^Y-}qgcad5W#Bt?q+;0;E6jD3RrM~|HVXE4c6@C5!j-nQ>ntSduK8Bj&eLX~IE7|! zV<1vKm~k<_$a$&4W*OQOyLRIGRcnMHb4CCBj2UOTU4|>3{+tdrCmpAWqW`6Y4#YYK zf8H~`83c;$KkOdJ8`iaIjAm$O==+=%0UXgaSV-;1fC&Gx2bVvsQJRD-;)yiU<86Gp zCCr*lp#YJQ;}?-HX2blj|5ve9n@B% zen<~M^7O#p{N~?&{p%j%`L0lTlu*} zh)HubM2f&@D`F;j_GgefXhJ##tbt-QC?fKxjzO*P#rW^!E=UFCOu9_{>9t!&+*bvTN5aU7QZkQ%>l4 zWG5%SFKp?r;4-!L)FVcU`#o{(y_@X7Z90|P6`vCY-eX&zH6Qgb5V<;f7g@&KJD)1kA=pnMlYWqoqfVH7*Wn=g<-fFUI;#I1>Ocg?* z4@wb5r36h@%8*k*lTFjkDY`lIf;F2ERNYU>9+YHWgF9t(=ZtK`iyIsOOGs)%UM)xj zY-WhUwqMQ#zhv+#$rw;dA1+a-Sifd;6g|_zR?dN?x60OkEn1 z(IW)6;AK=yGNfC(Hc%#W{-n48RmB2;N+JZ(r?EgXya5zKB3>0QD<=JECqLdiT6JBm zPR1^rwzhWofGM+gRugr(sBFnp3LvQe&O2{)c6L~yF-E04H{X0S1Ik(hPCa^hdN9Tz z93?!>T%Y?)K#uDfDlVsw1Lh0B#omN(SY`xdZyjG#$0 zbdeFMb?&8RHmsK|tXQ#lS*UUBj<{%X`1pxqM8CGKcJ=CJqfG)aIW=ws^+#X);_v?M z?~;26q(u!d002M$Nkl4qlg zzzY1qAN;}RKKJj4@18w-AAR&u#nsr*6kezRG7f4ZiNX%kN-<8H0uy4(U3Q*YwU!N?Rf?F}C8ul{|i9ltjzv6WL<3cA$xs7nJ87{TNN=@*t0j*o_&avo4qG|5 zZ{JSgIWGVh`pV7M>uF&nMxr1SfGL62qJPyks(9ARRs}H6NU7LTND?>=p%q) zxMTXs)Kl~i6`k!NxPrnNz*6xqS0+7w2Izns{cvI_Cy^3hqo7xbmi9}GJY}ZZE9u^9 zHlkxQfb)68r7){r@c<(@GP+`UYipb9m#rMxx@>KR1Q2DJ9WVOwm%j`WqHo!_nT|<( z=9y=9?!5EkAOE3Zxz{Bd;vE%l{?%a77oj-c? zs6j6=5;RTvEw|s{UKTgOtc(vlt~ft6-Q3uORWY(DO%5;1`MnDBOBHVJ@M#Z3=ipgf z)f?LW+F8ysS$YJ}Jo;Tb@3QUmm6u<(<3}$9Rgk*6HfXfw2Vv*fEPtx56_ez&W-@1( z70Zm}lVWU;3uwUM4eE%@hN4k}yKu{*2#Kt%k6u(}Wnml~2|2LyImS-K8Dh1Hc?}6M zzHO8fFFYw*Z*MQPu~g3RF?pre?RUaXqMnv&xvhqvV{7rKeM&Vho%2LnI8YLl_Z9-s z&?5KvzzSq}5MNtxhq)vSWI#^Z$bw5Wd{u<+Kovz;3I$<^iCI>?^ix+%$MnYREY6-h z*{iM`X``o{%7OrMk@e;9HVf{jwItPh){C^A0Vqarm@WUzluKnj$RB3srz z_dfoQ975OHR+&VAmg*_1dPM~338FAQQbOs;fg!Q6KFJF1$6qGWYY{!J4L#m66kPqp zGRhWKfhmGOV3IQtRbUmW6+0K*0s(D_Jr%wmc@14+8X*iZIt`ABZ3(D#xzW;P?gU24 zzP^*Kt*gx_l8HCoc+EM4_BE}hm$}fGx@uK3PyEc8(=@87F*aS2NG!ofmD+6J0+&yG z;*+~~@7BeUQSO_!K*4-?Wa8q?d0#Z!)X}x9Vu`-j1x#EbU8@e?n&#J)Q zUtZW$yK+T!rG9v9bVMx%Dz@)-barT+T?wdq;J`Z{{5Ky2+2t$md;Gn&=!}kylJdre zMnu3cK@-)hnc8b|NZf87w93Hp1sRLO3`s(!*Ogg+!r?bgBg(u6NArMkIL^l(y1F)m z9EH`6P^fFb9u^!-Wlc}S>LX_@uDOU=04|JKC{sRKVZ9W>x+zLUT^Y3$darbVHXhB| z=($w}0JYRxd=Zx!Rp~QQ`x)c;;#?@DxF8!bzza$)XthpT@j&eARh=+B(Rs>4a#%Wq zqH=m;x+*jHx7>1z8odJ3PF0D10_zpqp>_r3mkuDJX{oZbngXw=s?s8YC#Tsq1!ZO( z5Gy!R_WuDTC@334B;68|wFC$*jis6B8P!RPkrPSFCYM;W(}e$juN#TQPliv1K}ZU~Sx{*v{^Gf*vps)g$pnf762-w1@e~)L)F93~h0Da;h2@*WSo1%Eil4VP>!}7$$%&hH*jeYU# zv(J(A$?Laydaf8;S{gq;e)XF3sBwqG<;#=b`kmjR z$;W^B%Om$4!8BX&Mn*@>Yvs1*ER+Ki1YrJ7*An*rEJ?&K`nj#@OJL=8VQ&Aln zewr(0{)jU2@EP)=9ySZc=Pc`sI*>gsHn62O_FXgq-&PGEzBgZwGQ0h^eZ7>K*nrzxt8x_tsE=!Buw zC(Em73}NXY9335XOP&C;bW36qi6T_?+^LHVp|WEpj%5WK8;ur;qD^_x_fRxS?Jmhx z_g9r5vCK?`HVZpqDk^kR)u0kdY+gY~_+^r1JH3pAwv-NJ0+)7)ypa6rd2nzb22O6Z zliky2QJgeeU6WXJOe=fJY?J`RudLtfQJSfyi!Q^NQLD z)AgKmOxSI$aCiQ07A$OypJ*^|{4`#)j%Wy?Y#r_~8Bb2lnqLxEvNzXr=%YG>k&s zy(mr(NxT)N;6Y3-gyj<0GBD8)Wr9GjYZDSkjwNx_G^~~Of|L3g4fgNtWil{)ICdJ9 z)3agn<5~b1tQYuq(Y<)lNyb>(h?~NI*T-UlLBzp>2gw}w)ks8Rv73~wzdzRPz|Ty_ zra~d*QWX%7fdZOS&cMTo+j5N#4~c6m6$B|GHA6X641wlgWa*d)1%^a%>SEeLgoabj zBu~>Q5rAr%FoOg=JdO`OI0ggVV`wNmdZ;3>bLY-6Kz_>p^wUqvZWMrTjEY2LP+-## zNWxqe0p*I*yC&zUnTT-q>?!g~m7~`+N8m29#RYC4aVjohc%|4HnrYMR#-@3YWCCgh zWtHZOdD&FmZOo8+Wf}vDP*BW^q%y2ENfAFI#WUc1h2hZ9e&|4E01OwrX$U3eTbvc5 z)M6Tefx+LTKo*7{Wpa5%Exkg0b87+ZnKlFe@cgQ+vFgv%nhg_D8d zA={O0$t#ttLYzdd;5~=#(E#r9avD-MVQ7wOtna&nhdLq4D$0H{a5tc5&!9*{h1YZsWZ~&yJ6ae11A_xyu zYJmv|Q?Aa?TM{E9wI^%<CMct=o2oeC^7Gh*KsX=sAr^p^O4nLV^ZKQ&_XllcWK%4H1R;Bz0ORY2b zz6rH6%az?|xHF-*N=XGk9-Kn7X0dUeUTJEP?EpT5SD7dhYG{V}z(7pZq-4MXW8^Qp zt~_x{9RSECT}gWVbde!oxAlh#muLn(?E-oMXhXcz9r#)gv`T`(Qyh{+r6v4?TtVBf zni`sL*`0=Y%@YBMXzG8!>_W-+uJ}DvQ#7@rz$HJEG!bSjvC;&;JKs>DgzWHQmL` z>&-N|R;;*F(NI%&|LCFSn);sBj=|o&r+#_-z{n^%?Z7ri)z#La`}L?I3{)diQzTd)MP29S;&joO(=mZ$1o6|K{`ADj<8}zEBd(HZ zn=aWz!m4+~meMqpR?BG((bx0vfl`erm?qPYVU83;>a`>qlK81;*_CB?CnVz9F^L3^ zw)8^jA@x&n^`aG;V0mflQ%u?VAXA4^e~?XMQNk{GQl9j3nV!Ti0unZ=4ugvO2?=OJ7OO{97FYCBBoi+|!jDdK2Pi3iP29nPjnbvyOuQ}YsR2+1L2^mV)|6-? zCio1}6QL)7Z9Zp3st*kA0|)3Xd=%qn7L#$WvZT~B@dWl$@#4UU8X_Rgmdlz02$e2M zn@?{u;#E?UW%&uMx8{B@?xmMsnzYk@a`NqW-x(Mfc<0@B9((L@l0gK4efa(d_-iGW z3&*suk=htrZ8%i!@BQBI@rNzO=sjGPz$7!+BtR>|aQoIxZsc2Uy``J-3jKCa(B!Xv z^(#OA@sI!T5C4!QzIN@p*=h2D)TYXY#oP0b9(|~#w!WpNo>_8yv!^3=cbTj+fwE_J zZ(O5MwZj;=tF?WlW|MundhH6A-pyf5@4SU9Uq^LXEEWvg9#H&|BNo^ZU>ehRU#X}G zIrSoN_`r;_DV$FHG3wU!Ied<>h!L2@;8?jS62mD2)n#bt6`%0NjY%5&jjw-`)KVBr zDUUq*Fg*C9Ub}Y2OrFD%ot@qAD64Qi_ztHQB%{XmM3&j2dnEp!{^_5x_lA(7n#WF6 z0grVc-MuaXk#r?$I~7Su@bsRZdnJft8d>)gG8cU?e%5vx3m;Ta8+j|QpD0gHgkgdK z@ktnNFbX(bw0{La{i5gM;+&(xq6PCL!K}ftHdB2$llp@TfVwnA7sg zZ2%fw6_5c~d+{EXCA+%IqOAC?`}!u=dU|T=%9Tq?^NWT&+L`FnAvh@qcEG8F^3$tr zCBifTf1vVS?F!V^Pq50(X(bUMGv*WbWK0;ZM5;#z_JjAgm610XOG@#XoY zLx&IhqOMDF9lJhw`0$6vj=8-4kuN-oiHO1FpT72*Ihc;Nj@Mp&mF)21tS+~;$FiX| zv(@Gw{^9TKu*uOBjor1o$L@&BS1w^}S4SMY*1AY?9mgO2(I0)|8{ed{l*eTYj0J{% z{P8E+FS9qU)%SMqJaX`!`pO;k74*}Q*V@)bla@PNJ~KHvHMoDUv9^JeaxZ%InkFl9{p%3tTYb1#n@VZ=wDCEu71fwm?jAT3dnG`A_ z1f(qZQ2>1!^#ems@Ybgm@)hLK+~qti!YC&VNhB-?7)WG6#?a6Z{ZCDX4Nf*!DFUWJ z0t|p=rC>OwET)Rxq1)y=QU1moZ=gXg_$6W{m8PZg>V+Xx9FZW+IYtPorl#OG!%Pyq zN?-zJS^#0@rR-{mXLzY@!l-a85Hj^vL@|r8Qyf}2l_J2V%2*d^cU2c{+uQ@-t!7Yj z(6Lm*Ax$>XsrZg_96RzXf1A~#`_13{DkOF&V9eAk7`aj+lv1;8k_b;QGHT#7nRfxB zQ6&l(=amRBiJBqmU&gxS{nl6xp5C6_=(AIW#k0l6(9Yf?--B!vi>vhOe96j8jP(Fl zy?p%f$0H-drC;rHKu0O|9)It9|BnV!1d~kKg0>yerN|X*U0K$r5f0+Ql|q#XIZgGQ z?|fHxt>Z$TS0_%M>F?Wvv-Qh6jvT&sXIn?3ZE=<6?kZhUV)nwH89H~|?C;;}Boubr z2BZ63-4WAHd@I+!w0C#fOS8DVWNd0%9}#)+#plVy@X(O2q#24o^Yqha&Yc||_I0d9 z-6jzvbBSnPv~1aBg*k%T!la1k$=`hQP5faU@DOLXlyrkd!@X=!^iW;g zLpc}~N55v}<7ig&$PSp#4<1^17w2Q!gSalyPZ1&^B2gBaLkXqG1&@eArPCiKOfA%l zk&>7_sv_`8q4X9j@QU97p349dHtAX!%=q{?D*&GAdKSJdYmt+uPoqIbYH5cy^M{!n z0luEO1DSV<4v6v#4t#l-NDVoDkDUI8ek z7Ap7+b?B!}&TKa9VZ{L$cH``JVK(xppgo0nHtQg!K7iZ3@&Ji0LRw=*}9svR_>3Q9@UEo~I8 zN^wVHa)|QyX>82ZXF@T=Fqm)Rv@`KFC1aWA{`-#(3=ICmPyUe_m(>Al1h8(I<6;(e zaF^&C4%1Ss*8ejxq!vl%&Yh!UUXeCsVI~&k=Wr0fvLMe>To5m)!@bY!^h90ArnkUk zCm4c)OooNjR>$->&6JTmNWp&<*MCwceXpP56iI{PUK$970~rztgO;^_%#-H~n_yG| z4+7Znk#Z4-j5Hrw)x;V1bl{bUnp&0g#8Ne7VP!X!C_4vR_rQE0+jv)RsK50`jrjlk zK7KW&!4n}Qd24_f+TnjG0zN=R)S<;3kg6$&=Ueo6YcYzw2(zc9yo-!7<@U z8kAy&d1PPox?ln<3oe#p+ZafP2MoHEgnc60<%tL~g4YbGgn-fKDV;UKS+9baVnXm5 zcRb+%gW|-WC3eOwo?V<|LtPc=+qc*CM^8TeRO#oOZ`BG=!4fbud=e9 zrs*Gh?6JT6%fD1E@0Xw$2a#aXz!TQ|;4OosD3e0d$Y}e&{?Gqgu7QEU*Is*#otCJY z$z_opw>O`7`=xBMD$Bg|HoD^4=o>FwI6pmmbLh~h z4@E7m+_BxpesTJ9@Zjj+z`$E?zR7psApG#Z{_yBS52~+qL2SedOwb3a9p+#W)&~D; zV%?~4I>uC9X23`#NEhQ@8J+7HK6?P z$c-%~@Tox=+1VZwR~Ljz)z{}%2i+aJpdiD`!NI{YRE!l})r8-bI-VE{8!|wO>+w=Q zgMo46FajFrPm|LpO-<4Xl7ccL#SmdBu)-5Cby8(8`2;VWwWX7aLtElkM;Z$TPVB;j zzP^66F*Lkf>5LcI$e^YGJn@-7LMl}4j8l!}nht!vas7Jsx871w)p4j(K@z2)md@|( zH-l(mCC*ugZ>VQ*oU4rc=uI{X!-gBt!O`2YrixdH3C%2%{C;Xpl@i3GiHj)#V2_6Y zSYm=f0vpUy1QF?;dfk0~1I^meQ3BwpdFP+f$fA72ZlEKj~G+;BtfHNmvRY)vb<6ec#b~w zkhX?LDc7#J7Uhc87pFRquzX69q6n2z1xuo``}Cr-Xhsf>Xj^^f<*GfYFbd_vgkvfc z)#ZnT&==K3zaYDx1tK1)1<6AN#$EhHIZLMOaW(ta0+PJR_CPqi?^laMenSK<(RsoW z6)4l2K}^_^tu8m4DzkX1p}OG|)Iw%~%4RBY0J0|qWFoygi6C)YfGvul!y6U8wm5P5 za{s=)W~yF#@x@>G+#7>khA2clRaX`VzDZS6OQZ1{&a$sR|M^E)qg8aC5b7e%o;{-q zTs4GUycnBc-OHTBNJkIpL|ME*Ott>1x)zvdAR=uD? zr;RrD0cstiUZ`}NaqvLjXzgrm?fj*S`vwQzeDCegPMt6arn*kFp-0P_&AZ=6ch@&e z`QB#>?!bPMp~ogks>4`N9wHvBi`mFxU)jIAUY6+uuVScGwq&4RY_6~RoKHtOu? z@-~f^y
B@W4@Z)NB3dM?W$Ph=;bYL_68eX$GM#RsmEN=EYPBgO64g|3!i5WQ2G~5aDM{MuHY?_6XHU1T0sR<= z|D5TfDWON(z3w&!^hifWhJYzODu7^A%t37hueL)FQvzU#Agb>s261-5wJ@*s4L&Jr1l-bCZ;85Mb)~Mlsi$IqNPd+6|?Pd@q7)s1}7 z-yV6Py}GHhp}Ash)7fh$T-w^vQ|mLi70c^Q#<#1LtFucD>osk)&6P_Pt94ZyRql6g zdF8!#etzcE`N=DnZ%*F4HS6%uS6_bl%~xLSZSS&w7e ztTc&^&q^r|Tu&Z`U_I`IBh|4Z&pDg8?B7XUE>lc$vh-HJ-h1yIyL$HR>%TcOWq;n( z)OEJyD_?nOVq(nN(D>LH-{x@lrH+Tp*j=vackgn>fMC%XCsE=a>Gidh>({Q#-JY#> z601yt?dj_~_R%p{=b422+rRx=vZxg-M>}|6tvgVX9Pw-1vwK%tORHP^2#L8@cZ;x()P}Z@LaIv)^n2>gBOt!?a9gV>sw?JIH~hq^ z3ZI4|JRS1Jo!VM|r?>bXf>dD&G$xR7I7~yi{Rk3vPQN6<$Qp)%wcMk{izwM_wi3*y zfze=Wc5nG+l!DOH+q*~q$u;uSGiX%T^BLQ)yKJ-BS9yJXTwWeBu=n44&%Ef-2ahVt zhY!%>yV&|M4QZ637H~2aQNj4kY_vVmk&Mok)>bPzTEvN;#wI7Zr&?Rv*w^2A&E1D{ewLloH}HnjgyRW{0Y9XspeqIKnU?zDlP@XlSL9$E$Ul7 z)uC_M?#IkU=`&5*;hXB@R1NcIEUin?81ljdnKH-6(cj~e!>#rXI*4hVWw zA^QWe#A+l$$l*~tg+eYaAW!LlWkfv6Nku6=dSl9keFWLK*BmO|qgMLjLL8P*ZPkpg zla!`JzuBiEb)gF}KUhX6o?1CXqy(ScP9+NE4Hw+IGCNf&jT>=^p!cj{S`4t`(j<`sgDg zLqjKi`Pt=5m&fD!sQo*4?n39pSLOL^OVi7ZBlna}7DcTa|fG9wMe(LnIV?W0Sjxz5C zWA%_`S-%ac%&p!RsnDfZJ_kl6ZW8&?Zr=Ow;nn`vNDc~W@%Ax~Qh|Zvw zua1nOQL$5s8gS!j6tX$JfYZPAO06Zrpt*~UZc|5bG^-%~gvgh%n{-NreilUOB3U}* zGNOTes%ZTQ}mYC?lJ-J{;8%V4J;3_07Wy248qU)=w zZft5G3~?#hZHrH3?Q=OkYlfO=&2n`vV0gF++79jL*JH=CO!{Vn0v2cjolWG7o<3Hy z^M*rN%1j45M1+O0n`*zDg2s!SAW9JTuSD|XsSbWdVk`?qhoJs0s!>(N_(=@{viMqQ zwfN;#G3iF+={G_&XxOa)um zStxPFw^F6pD{}huY08h1ix)0p9h>-@zxkWGTDbEU|ozf~4N!UQFv7R_Ga4`6jctt`aD zE5L~d4eBDrco|v(tA}WHRirudn70c*7bVtygXW@BJF225gHRy3(f2T8cpo754EuCV{U$K@4h~r|Lxo4DL&+h;cmU+2J>m{3I3m7_}4gX zFln95Xi7G-Y~&rlwX4@uKj;t6MP)et`j_xkIvb9xyID#3_Y)P{=MYe8+Nl-;}I(pF8N)mVEO z5lJ!FRt#4uT8dg`8V4wb>?$QtKRYt0_k131kv_sbx&3>n1pr-HH7GnwKk-LhlhbxhX9+=-!AOn7V}2Yqx;z~3OW2p_2i zXIhf?#zDWGo(7hs5Q_AAj75A1B)&BwE-_AUI$rPMp+V z`SJ+De)5walNAm#_@UvU(b3UsSFfO_t23M>ri8g={$XY;6pwV22pZB%56qL3Qz(bu zT(Pru>|sY+yVD=P^XRWTW}tqI4#&=U12>z51GBm5ls^u{`prgoLHH4a#?~4uFKgAC zH)dyU-p+R-ehSVk3wRGCV1XTsisXmL$CQz2+VEtN zT-vepx1sE&z+6&i^0ua_vB7~>OC#YnhYiXORmX0#WQ9{b)q{KYqy(g7AX<FMmUbhg>JY44cv?Wyt8 zXD^-Ka7$!e4QrqozWn8v{_1<*d*J>Dl#ZT9AAgJu5@Ay~?w;BhwefA6IB^x@l$^%H zylfqe41K-pB9QT*Y5TDcKO7i{(;f8f%;_`woF|`r618u?{WhF`^uPV@w2O7n@Nc~SQ+Py*c!e|1 zFT-crsCQBk>SsR){bW5!2z8T!Z(#5cfGVRL6_m!1yRb}wmc2c~O0!HhjR&lb3 z$WA}E<&!%KSinpK0E0wcKN&b=Ln5OZCEsLB80m7^vdP; zF~!Qh?x^OVNyr%=E+$qJa-89nAsCQ&^b6tI6bH84`jRH6G(_mFKrB&`s%tP3I>u?< zVgDA1bfYInBhC)^c9d)7`s7qqgbK2WOF@~Rij)$n89ElmPYFn2zyMg4E*19PQT;L^lIYfGz+Rfk5yNduLm zv_v>X#7{HNWZD@sbdri_c;k-J<$7+bu4YyPOvP=H;OWK&H$+@bF;F*XR|ehl%0WWB zm5v+z{d;K_H#=38&QWiN|Xuxlli|CLu>87N;B6hzjD#Qzm=Ync5`O7lo(SO^dC1A#L*oY8KHiFWd$jCmD)>|R2;|H6met# zQyiR0PYyaXR;g*!i_w!^KShqxVW!3q>zBZgIwq!!etje+Zp^_D)Rz53x#;WgZ1J$l z6cVxVHUmiAYJ6k8$zc^PGi$&psgmm&_%7Y(C58-26)gNm>(0eISWW4a@fH-DBYm55m@EkmH zt3w#Eh50!*+!AHHR$R}{o%n#)K}jJ)kEGu~5+Vr+kL&uitKt(A7r}RTb;b?gD=~+; zx4#^rF6+oqT-jGhu-cniXJ>BOt|~rrbH-FdXHTcAq`2!gOBl8Cs2wnvotd4!G4;Rx z`+whX)zF2pjO+hMx~|y3M&GVc%dlBfx!9dQ9D@QR)wER6$%@(?SEsH|+_>y3y*KA( z^`8CxeRhF$cC>|ci;rkF$2SdUrm-*N+lNeATWM5jzu>n0(70M(7hk5wv1|x+R1C@m zJi#0yA`4^3ZVGgs{1Ih6q(p3FBLGuZa;*EM6^te`N6}$mwEfR((}?m$H`Ul2f?Le2 ziG~4T|9+Rh_xJa^OT<9;%$YOB4#Y#_G7X=6@-Z5a2ZMr87`x4i>cXQUWJwo*B-No9 zdI6dMQgJ$foYY_}{8R)0bb4z*q;zUbWU-n^XmrhrOEeIG2m){bT1fB^8A@?}R=Q44 zy}b1VK$)?^(<{w{TCZfmI;_yaA`kg~viY=J|Hp4>yxzJH}+Zl59(dT}$*DXV~t;yP8ei%>zsUY=na)rKHvo!E7eLD{c zNiKXdHKXq>`zhZHi2Ry?96+a%FnTH=VY1hSTs1;>%di%_CIP7*)nNGI zDT@?s9B8*jR)GnD%AzQ=o3>MFh6`Ku8016}b8dfywz+=ehQ>pWp!QZ%ep%bYIXF0Iy*G$2A2LQ8dbBr^+**Jt8Vds#L^a)$t4<`Jm z2Rx+t_F{aMgDuDBoO(vv5CN`y)vw_xB=(D3juqkvA#S7jE2G^&obibI>SV3rHDhYlTv$B12_JADJP zyT?8yoh?H4<3AJ8Yke61Y-`1dz!{XS|3U7HOQm`bRMYC`Sk%AiOp<*6_m&Yrn zhS533QQT^yc47D+^M&|p$js}Lx+)JHnM;3 zUP{R95y9QPF(!u_eoQx<%cBT7YHaQ9>EaPtIQ5oMK%rZ|k}A21i@xO(I;*Z>+^ZF$ zwP^vV)pr``b+EvC(0awn(jBVnc#tcp4j(*Jb#X-}h?krS?HE2cv%VNQtU_^;qlhKK zG`!Wes)q3ED(b3l+?x61^rvgq^{O{*)7jIz`|@R*9KQO&2k+~R2)!m}U7ftqG3q|Z zifWQYP7%&iU3jPgA&90dZI4DQRId~)U|8XmCYPvQctuP}m=twyB8QN@0({C8B!f)j5@;!{T57BL|zFP9%e)J>a?naS6`?Eh& zH5t)>7;*^=8^xsvsPa}REV!akWwu|5eo`uG13xXI9;%84svE++zE1rVF)`tD#4)Kx zwXzh-0IllFE(N>Lsb0#J=v8xbtV?;N)^IAzGiqI&c`gb3$fLu+6b&;O)2MzT0}YCU zhf6P)h$rMqL9He>btyUhltvAlr`d2M8_uRPAzqrSEImmb#!Q?;f4=ItQ!buKRf9( ze9Y99?hf&AM&IjuCL3|fz8QcWjZWxQQQgZouAaMezOufWk?P;)I7I7}OP9a-&2PN( z_S?-(F^;u0f&?2lkSCu8AWV8)PiWPGUw~SZi#SGEw;RYvC|FzuGO8w$O%uT7l$^YP z;it)-=`)+&6Nd^+sxD!X7x92OU}{8*D9nL-!c#SpN0lnmf5KyS%1^Ce62Y}k-~9SF zm>ZXh;D#9*U6Z_mh*nk(YUaVeP6(N4LItP1;!>omf{fzy&5jyB1+)Rf;Y6E>;&7r(MC|~;CWW6Y&fP$#R>&e? z28KLh27l@{cM~e+8!%@s8irln+^}&)qVFLv2YO>N%uu1>&SH!v)Gk7C{Zee0OIa0K z2#cl(aTSWhQX~pxJZhh2ORL?eMz7J(#4LK8qGr`kKlTMl0u<_?K0!=XY+907T32$; zgry6_F$Wc+nljD}wFE}`3mSOCAMjSS1zQElpsp!0t((CLhss->H2E-pW8)!%Lc@KZ z)(EhsAx?y;Kh;I3x|mFcXZBVs_o=%S?LkkB-!!Tbw-pg%49sOc(11K7;bxRlNRuMn zFAUmr2B@{wn85~qU0PjZoo&)as$Bip;j*%3B|5%iP}k658O84TnOa+R?WTg?y+nHa z=z+^|i}>6-8SwRka>|mFi&WQb7;kJ;S6rF7cI(bULu*rQQ~jR)JwEJey6efOp72$K z;o+h2^J62U2Xu*i4VASx#6pwlAh{~$Cy^J;S$0!NxyxiUho^@X8bu2#0;VM7n`RFjvItC4B_lGZB^!Y%k>r&+ zDu_MeB$FsVkpaIaw@z8W#IpnUv%rT*r5RD>a@sx$+1$8(%_4t|Ez`1am}S@3Q+pc- z(3$iXmy!B}rubh$^c8+(#{Z>Uj;pr>8DmW^+lTK9k7lbB66Gj~_G&UW9O-q{Fav4? zaafS+DUOsZ7zIN3#KqF9Ge2Ads0m1F;}P4oN*5vSRoL2cIg7+#L}^-sEyNk0!tBx} zTtRSItdmk= zt9h9u(|CTWj?3R{fiab3cWGTit;&cqFI<2)f`O;2^scI@zOlQr+nQX{jq+`zIG4E; zlLWCK5#ufh4UE+h4}6(H)ya_fM)iux=;h6oa}(zofcmy3op^hfj%D`2qetDdqnjlw z!^6Y&V?$z*9aYG{0Zb!*j`%@kYf9AD9CP%#pI6=JB@nRPQnDA$L|=yqxFJiXsIqwc zEVF2S%2j-1j(3LWoy-7Bmjwo6U9^4r@h1*88M)~CE}Xx>qZuq;{W0qf8#l=`7=i_6 zgd)KewJ4pf^CaQ~G*uTolbf$_4|Ra4C~urCplqBbav}>z09sI?T3;4#-K|OI(T2j( zl+k}^EF56C@n3xz0_yE4m)QXryp;=D65=PdBvgJ<-OM796u|=wuykfZM3<}9aw#j# z3bbe|>KYt#*okE^dE~9B9($e~770zOb(P1w{dq9B_fvM{a(Q-kcOo8>AuqF0;d{ap zv!Y@4BSbqFt&whVovxdua^p&)+b8LeYYp#y{z6WDBtz`ErMj6DvxFp6l~fWp6{()l z0qqqY*|bL zcqJGBIe*8cUo*8(UuU2UUR_K_uw>R=+F5^X3?@a)Q=C^1y`SJ00-g(DlG6m4L5M>__2M=NC#G-?RMA0cc z%#BE5))3MOEnvu{o8Xa%0O?AU4wW{QLd-<;z#0^!pH$J6V@d%gfL9Ppk$J&rbYOl) z3ziuy7ukv@es7aD;*@zo;%&xdhf{y6ru0obEM_^}uN3AZf1*JVvY;P%q9APtz!?M4 z{=R-rjO7t7l~od6phES!WJXIcE;hU?5r!z&43#dV%t<*;gAcks)Iyld39>6=BZtX6 zPIBx#vx~_B2CJI8?n@(*Mf6lZ`RKz4mDw|E1&hqvs;gAYQX~Muw1on|C{p3TY`;b> zmslc;faoNY6oHW%Z!3%8DPN$)XBf0?K$clw4Q-i+r(~Z^YgZ4e53RB$Cu4u_GtWHZ z=8C@lz3`Vgnh-&x5^LikwW8AIe#3ed^;8uE=nbP36A58x!dr$=yuNw|cTG3uW*Iic zAwJFSl^ss38Kvd&;-w>gX8BaA<92N7pwnR38Z2Wbh_4nXom_4Si2l~5HP7n$jkpt| zdgJ|%K5Xvk@S%jZu67?tIQrlbGtA@w!6ta@XCUlq9UG!;a4%aF^aJG2Hx1!ukCk{= zb%VlSue#_!OAtqlr^bRI0a8^!s&9%W0K7^kHq$9N160D%6=3;1lg;hkvF z2dg{u1ON_50ib0ufsGp-9T^)NQ)Bc1Ksf-wNYY)yg9(NzIT$V-X-s`9NOlCU0?7&y z?Fdqj3co^CIaI40`UR{M83~@Qf~q51M#$w76e>l!?InoO6zI`CI6Y=|O0QKO`v2Gq zZM;fuaak5Fn&L1jP?dS2m<{Fq#ULv>T`($zQEKODrAfLy6WDH_aZD=45x{D%(WC8v z#Q=`#K6d0Ds(=Xn5Zi7oSOGjXcfo=o z)oX2Sw{FwDbGNThGafF7vJ<+c+0==p5sH6D&rA=IqE>8L{1NErO9(zGs`Ggu_Q6rg z+;C~BM<+{&cvhQh?pSVcc@2u!m+rVp!O(eWWpR09X}xxRwQ6N<{dV7SXJc!FQL%M) z^AKdv=V&d(uijbT-@m`Uwn34t?VYAZF*ywDUXz=f?|u4yZJYh$J9>8ec1CoYE^W2b zqOZTte&(1CSY1;b7OzxUSl&6eu;}aC6WT4Z9RL7807*naRF|&|4v);uFJi5IXxLY} zlUPOLL%;$(_Z#1=5NhXWR{m%4w^br$kz=HsiP7hRX=rNG0^(*WdCiv5EGxqA{A?&1 zoE8P*B940F?N?uWt#98zOa$JXJ$(4^pZv+6nDlmE6E%DB#TQLq@r;P&)vA4seP zJV>N78h}0h+_R_m^5si zXSs6qvU6rDYm3fM8)4b`r~A3RFpuT!?VUb5<~(3uUq3NkSX{6NhCb3IrxgqoEu=Up zXXlGQddIS2BQs&;w`iUg2-ZctD%aW!Zp-NaeV$<+0Fyv$zh{DYx(Lni7KHU1(j0@R zryqHUHfeYgPw9)mhh29BPh27$KpF>1$8i$EVzGb4?{^R>)CC3L)5c(>6i0_N zu)GwP%?-IKKZAm>G~swk1gSD2JV_Sx!;e0?JvUF<+-qYk%kU}oBCr=$VU3FlE_xDH zsy0Ts2-^-p9ZE`{$V3TN!aV^8Wb{>pKl-ciU}A6;s}be{U|=8%?y_Tsu`qZWcfZMP z%0{YOLv;Dl<=b<&)w!=M!81cPfpvD9M~V41%$TXV$q+hWv_z^p+bCR3qZ^v5n_8N% zm7J*~`7qPN?6H6qzUx^yiD|_>A(XKiL90ByDy)kdW$(cPqjS5;(lbaD0ppnf38T9b zkq%X1_|{k#zU44HB71FoqR!C3CXkvWtMxlZgnj2;1CH9EK~m^qjHqNz8!hX(3FfgJ?l3i^Xt5 z8$&kfO7~>KI2}Q7O*p_LNM7=xTz1p|%4CvhR}O20W3kzr4REfp_&<7rK}_mo5_U{%4tfuW3?tDk-R;V?PoU1EGii{heT8o~+SZWU>o3dcCuQM5fr4p@-ETOLGtZ0R!^R4rK(?9&N^ z4#N=F=SoMj^!#crGg$b@VC%WVWA3~eJ0I;AjJ@VIb1g3p4D93aj*Xw&-Mh6wK#jF*DuY@hFX~nN&u!JslwZ~QuHH%q|EmxKrI3YBITe*543n}7SmAO6s45NUQscXV`=+@sCh651?;Nat8ad(FpZib<+Q*C@fwGEB|k zWf24H4@po0N)$y!Ij-OZ@G9sh7`g-@T~Y)Omhvk__b38}y3(93)Kd?x%DxQUo7A&#D!>_!ndo3RzG^J3|dlg>_3_m7FmPCEcnC5WoQRw^~%PB9Sgp z!)?=-s>|2Rj9@ey7+r%n9-xW+;&W-Ow5DESy+&&9?6M2eKHjP6856=TTS8EwD%4&t z3Ii&{JqpqBCJz@zZ12lohlN%S9*J8#cMJfgkt{S|td3)}b zAwxrR!>-;QJ17lK=jU&qyD-+-ZSO|&dfbDs;bY{t=5C(9aDKjR)ptCr>T4G)>8;)I zX>VulvB}Xtf=G4Y#|e&|cGY`V^4QBgatg%<0%Y`oho~k!h$NsTOUga>KU7YT_%DS7 zNEjwGIinr11rJko!!(rujAAK~Xo76->e1VL`@j5`y?gf}SpQ=g{Eau>puH5seEy|N zm+0cy*x2v?{_meVckcA56I9nL)MgQ|YT4~4oXVA=AaTM4*usJ7Wei{rh>!`ekj0pb zj_>Oc7<}-PLgE>9f=yl&ii@ZP-4;Q%Kop_!6Ah*tvib_mh^OqH)BpyNBu_*sX)$Fd zbpjg%A%OetyH8CKqf#Ihq2r}g`YnVhMGB-aGs!whJ)GS~G=z`lXpKryUDa`WsdC{# zkJ1ahakLOl&*Ty?rFbhi7Lcl2=!}i7w!{F#2PKkdz@}8ywED?M?pG=^Qu<`vi4Ix` z9E0e;APcN9U_h4Kjkf0LDq^39H4NT*9P|T2L4!Tireu*4otcB_<-D$`a4-E19+uw2 zl8oj^ZFIY$=Fgh2bIA2^FsJ-X-dwwWgC#_neuh<}sj?US^(zcV6c7vw z84Y^whE(qS#T*$QO7TBHYoNDQ8@xknh9TknOP4g=YXU5N5zJ7USW3}&*p|MJ*{4Iet7 z6K?6SgSV!swI$XPYU>$9j=u?yw$}DF+8x9T(&0}XP-{PlGW8I|n1rXaSd8$#Ow4T-_HQo5YwbkdI2#UL(j zYoJ^jAe1Pimm3)Kz$#XqeG#K5jP2KID;=EFwQGzTR`zg{;~_t6LL^^Zyxoi3hq zbcT)z#Ar~SFha3eVapdXO`)GOiDlS^<7qz>lYMWIE}24w;-7E;Vnl8uL8w`=ms}2V z=(P`6n`DB)%#*GL7yzd=FpkSU>E<_=DosuMpq5Gvjf@7KvJ$4|1y!hP%xD(LRu(dZ zkbMAK&Tw%NgOCV?s_DJF=t{vsG|AMz>nq)!S25zvIH?i{lgL zZ_dx&UYeV^eRE-X;pY6Ug|(&GrCZ~d&(F-w*0naYcDJ=0?6o*Fzj8Z{xM^IGZbE+#n6Qu{f ze-;4PIVWUx@5_&{0@Cs)7{aKP>5Nw1-M825x9!d-;SvuH4xTxEh671_jeG4a7fhAY zE3;O5W?S$fml0+N#RbH8!hm=|#M^|?>#C{jY_x`^t_HN}vg&&cl4&E=RU>ACY%rtv zK>rD`CzYm4;5eOAkXr2a3YmU^PzdEs8z24Qov#G1Kvb%B7RlNqr8*8(3w-4NN$i7v6g7GkR zGxOnGEw36{b4j=r<^ZJ&1X6_U?-AkygXWIYhS7 zDTY##3{Q=21-9*gqV6C>sZW0nWD~3i1{ulu%GPJx>mdLm8Hrb4&p&_i4hZ z!oZ>y8a0MIQ6nkRr6fC~cm)G^(v@23t)B=$jDQA3Dxs0~9U1-Ui=#WM+%&x` z&=ysojL}}jvw)OT@X6JPLmxGI=8*@mSjDuHiI%K0u5k>IMs8Y}>CrdyD?+@M|+)(-x3+NZ=VHf^fsuO-4jdj?soH4m z>F6EWd#7@Ba(grOwR_ zpqeMe4%zCCKN!w$mrFX%q>yb%;pckYOi>ph*`6t%e_Bi4(2Kl&TF&SDgp7d6t zTvx7KQM9~P?`7el34R7+eV0h>HrI`Xbi8iu=K-ruTdP&8jk#MlG0jiRQM=5AU>T9; z88np!N@{&6R*RFsLV29%K%`f+mL^(>l}>d-6Q6nH2)Ele>hz)_Ja3&EJ7Gqpqn$qx ziq6@y#zoLM+%XczIhSR5#OgAi-Yr(_7l%xV(0!p{=>5 zxnW`T&g{bN6?f{kG&XcJyM)?((R&8>%`DEJzka#Bx2JQ@uKwYHsl{7uo$i0ysBxBX zX|c84WpVbFSTJ<$=|^PE&B~KCK?-aNTi5Y zsup!AzP=P88$0Dd-M#&M3iwc!_=;Zy7{HKGj3xa%4f2F_$B!S!Q{y^%B3!*bC8M=m z^5hkF@oT^SwZHg_zc9BJ(B;A#BsIt+GWQX5hv{YMUiQC1giyn-?5pm+HbTd8J zjVkJ@ML=q608AwE$OrMD(vbJ{^^K2@TU)+zHc3HA#^|NCL zMiq{M$%5pCKPglN>WYDQjuO}y0Y4E>w4Z8Os|dI;NoY~kP^vnZ4$rem<4vmM8bV#UPQFm#Hc zC*`Zu6a z!B*1XiJf329)63KsEj)qp+!|gTZ5^#O`e4Pk+nNEcbHACaFlQI)@*#Sys2?);{5dD zt>%u_UHkerYO3r>#~PbKRQAql`DS04am7Hrhs_n9Cr_N{-`j5h0>7;l3L>(>=q=YV zgqX;oQwluznZ)}^bu(njcD+J|J{O|aTU8Fzt_YkFf~wf<=7?-8FesN$x|)$#h|{t6 zJ~$@b@p_O(v%1rLd%2DKY-oG*(MSL4um0*QU-^o$kzR<(QRc}T*Qk~(q+C#hMn@-l z;KW9B(s0;RN+L1S?qKLA03x2XMg}`Y26VYJCp%#gy$G~2!VCZ&I7K|AWWDmKAT8DueVj%o(S%IP|)5^7)w zWFjWMu;tJt*UG0)qCwYJ;(LNDg+ifHL^cQHm>`Z$V4kW5(oi%QSb&-StfsfR(7@9i zd%3J62Txvl=3(=-#KRCa77i-S*Ad8=NH)-o>DMqkmVupb+T<41AEX4J`Wlp}|3or++(j`i%387Ku4Qt_DRIu}}rXM`M?> zL_t&uQa&$jsk-@UyW;Y0!-(hGe3lA@Mbp?)U}Ku}etC@!odoY|^8}IXg8Aji)8yDZ z^0UuAea($&SFbvyXn~jFz4FQ{E|jCs*0&X>F0z06Y1|N&t|R=~5ke_sj>Jo$GbyIB zuihJQPFrD~`` zj3>2Bf|Y_urTa;N(F}Q-B-G-SQoI7D2v3xx>DML@vK#;?2tA0&*SICdqG^pArDRVj zU0a-Uq?yWb6=_JwY>9bL98W)^2ejj&wgz0>(-6bG7?bmJm7-`NXNC2;-fpSx*Is-I zk?K<>ehExW45|_N#VDMWpOP~G8bD{vrWvYeX*CQJfyy#!B`}5=*+>9V;GnWglsT88 z_!P}3!=%W193lzty~#GC=vZLmne=q;GH=p7AOcCqe8JvhieH&LS9UQUx|Td zB}Qw0Y96JRB0`R_Rk}nl{?CY%RHT&d1dvm9KwdFGAT^p?kc6>pin}Sz73e24STt4K z;arO+?HyeguUwkAa+xbLw{&M`@2*>O3oTT)T-spNK>KvIte<|^TspZVI*3K2T1Q3V zOFVI&lURf(+*Mujq%1#$0E_xXKyc15BXT=P6!LAxJ>?R>=)8j)MRlbBvyX~6+N?%W zP4H?;=EEz6iZ8EiT)cSE_Zc*kBL~Fi|9j&N=_ohirOCeVWKX=7;=$^mj7GiH5h;`o zWI;)Qaz(U0F%TDMpbANmg|7Ok;@Vx^;m?Ir6oO2J5htwZq%(5q-a)b~>g82(KoOZm zE)n?A;Fa`D5m!61A%YS?T)lvS@9*zd5lutXr6Y#Njyz3|BmpI|zyKQpbU6}JbuH2a z5?euT-J%FYPM1cfieEvvfZ7K&@3XmOU{c#^P&+d@8A%F*8vdJXfZfLkkB0Ek7((?sXTS)-Uw5}l6~op}+xj^-*B-8an>8Efr#Y4| zGZVp%vN01T)iYsG$PIlEye$$Q<6-fw@Hwj-?r==5#s~$Tq(|dZFuH3xUJ8Fh3NM7; zV@snMBM9tywuWrSGM}TlvBkU~cgBUX+zST=+yb+EcW*;o!{+LeK2BZ}YLvl#S{^#V zH{8chv%45&-*=gPaN_|B8M|{-L5pe?{3Lw%mckL1BKG3&RKg2Y!T&*ZQM(;fHse}E z+%qDTTZ0#IZ(&1&;$mZ185Dq|D;z}wPDax--T3%8pdb9;2QEg}6(2ryXkubQMzm2O z@~{vWlg!&DX}GJNs!0hUhYG=n6qiVolWbL{7^FqHtSdaJ)sVsMb)hFDzv^jt^q*5TEH)^I2e*k;%I=! zPj5q0xVl0a-$+)pT6o2T3Ci)(uvw`7?XN!PT~<$n2U)>$%P?_J!xW)X1)%gzW=EmS zI_X0c1BOAtteC1@h+FVbB9Tg#S78GfERgyoMG!4z`Cm){$eo|x`icyA4MQ@nUz^g* z4SM6|7V}2u&W-y}t;HpdY~ZX3>m@%rQ*~*dQY6BrR^rmdv!_jY%6Js74g7OA@Lr^V zM-`J994n_}6$C`pNYZWF=ymMAe9dE~@aq-vL8z)7(^J#-GoLwg#!3m6(MWTcCia)- zZ`*?=I{|EO?=XUZ9#zH;Mk98mvw4(ALs~jArEfw4pM!w1EQwb}@1;eyHjqn~ok&Rp zxvjc|@zsAzxkWLIe6fJZdTpzyYD_5=b z6ugomqFlv^D+Pt9C2~@@9+@az9reQ$Owr&80GU(~O#lO_GaCXkPK%T8z+72rXQ&xw z;v`PTEDo^+4(x$!tchU)I@JOO)l{z9CD}^H1aSdLWWzQsILkPd7Z!q~90cgzFob~s zX(1ENz>X}?fVk)K!|Q3B3dmbeg$Xs4?zf6h&e4C~d~k#|lVv>t+pc2KDcWAK!^|xu zlb4$2AwfjLY5iF)w?rpS&-r;fJXaT&;tD^t3Sz3ZTKd|Vvy^G#($)PVL%qBEtbBPZ zAT=Wz1O~F^f3;-#uf%1iLO^K=BKynh<910NZ z^2a1WD#sNz#i7v8td4SHQ0Je>SR4sdsuc^74FImNFFbFMDgZ?yk}F>&1{nppgwXsl z7v&X}0fR?gMWp;mM_IB-u0)PymIUC(FO$pyeRhh4QRKqOl0xYeX0jW zMjb$BEY0uZPYf~4quO&*X8BFCAdgW~BjRf?9dFBZgjx4OLTE2`6+5}Lm2I2aBuPcK z`H(3IAn{)caONwJ0@@%^D8(RH*_0Tb+|&1RowPn0-hKC7mp|%(4cmYnEaAX`11Qw@ zLM8j9OP6wD7D8Dhri7`QxGs4NAu%z{EqjMVi7z z1cPj3q%Ns@%Gl@G0t|+P+~!6$y{8~MgaClaD6XcY-Pj&B#AtpJh(hUR6TxTE2_~aD z!Xvv#+5&i_Bvk<5NmfY51V7bO4{R*$tx73Ext`t%BGO^-1nHHhMCHQ2?Uf`XoL>DZ z{O&h?Ep{ZhA51wh??*XnO+fh(7R|+?F{oS8e|C1bk;1JkU50k%g!MY%jp?kMa^jRb z;J&uvDy6oL*0zooAI&wbQ$C$b)Um+XQ4W0ku|ejv(VHDL%f|i~Z5tjUc5!o6C(ees z@igY6-2ECy()V=oKw<>x+h?_M1}alFE~5-5*(4LKaSv=9%hdBm=Np9CU=V9TjzlpY z{(?$lKXV6`WW0#}7OI%bF-c@biDz$*F_UKm%SKt;HKT!{lT(v>`ulu3;mY-^OKZ!% z$LiL&Ro|zrsqEU>S>NdJQ1j{2C;Iz)8(KPUOwTqnx9#lhuWM|sF=N==GCg~%v(s0q z8XW=CTj+oqtw4%u!!$sh(wet?Vf`xyF*HWi^i=`ig99wG{jm z$WzLzxYpfHFOy#;H?@_ed=<8*hgq>dyk+d%#p$V=U0u5o;vytn$(hsRNA5jrE6|x! zr`lUvFJ3rLPN$|OUwiG<-rc)?^rOFXDKi((WCQ-qEzG%*Z@2RpH8t!kY32hHGDYj0 zNpm=jWmc9}P)JlEvJ6SkNZc0*wl+`$PtGg}m*SM4X)JvV?Eq$S!#)Rxa)^SbnOINt zF_N_5z)7!+hoI10r&*%cqUsohk(h&37!wl}U8>nciwVy;bLzLOd1m_J!X3KklOqE=dytH_1PDZN)`d=oF^=)) z-3~b;NtpQd&rFeB3U*|nkr*9$7(6M=H2@(~GOt&vroc>>sA|k1_fHCvk{jxU4|s)g ziL!w76IOZ2w8SW)^a*gL&5p8*SosM)%L-zO?`q{i5q^4lD=%R9%Q(8^{1+a81sU_b zc+g)NO`xt=Egd%9zLq+D`gBJ}mw~VDR3fkV%RY5yG!~DFTC)|E3Z8N z{PX6eR2E6ck01BcwbD{*X`HW3|G;hnyNSqUr4kxO> zd&-MQg^C0>%AP|oxRgbM5%)?%V2D>B!7EE#A^`DxihxX1o_>N9VDJ+R;*pIW!vrK@ z3-S&qgX2ny8dKP3IX6KuW(r;)mua{iB=qcp#q;p=FiL}9W{c2vMgIU^?MeHeL2wt z?9QHDKGmthad8A`3{%K=4>{H)6K$$1jzS^pnU&nGytmuItFodSO&h?4FZmW;iHy2* zbV@`&!pxwiB#}r>sJ>oVP5`D2FiJ}U6GuN7jLeqCtHgjXuh=y~5n2cb0&@{N@JRq3 z;)ysmY5Vl3lp+h`pKm?Oz$Mx>=r6hrATi`qx*#zFONapcqZkZ4qr>A+!HW|U{r!9S z!*9O%*2RfS8qR=?b2~CJWaXAiJT*1tgJYL3Ulx&KKZcnq3%Q0c-~H})N%`R5plXkWG9u&Nuc78@ zvyO6k<%z@6vDEyzpWX^08BZ8_MUpMsZ~2nysA+iq%nCfnLk}?URM*qA8L^irm+cmK z)fmUrjZ0&%vJg5KGqz=lbZwP8K*BT*;RJ7{Eo3bgDa=;7{A48@tF7^-dCchCrNkEr zV)t_xDJ2#MzLH;1=BJuuLYydScqPE#RW=5k0906_P|6NnRS@JN`9y1mXc8?}NAtt@@Q3cZ?|#lXua|hH zz{10uDfq&gLX?e>jpUgD=DX3YhD3&tDk_V&)bv*{q0U>Xix`lQL!zzKSG}kxhNg<@ zYP@2wRUD*hdV*2eoVmp)aR?KYSE?mNae(E8GetAT6~)m=Bsf5I<+3?cI!JM}!T><_ zXy+t8Sx~AFq?bfblw_q!nnsB^NgkpSb6<3fHP+WJT)cSr&^18{d0ASy+56Ii-xdP+pIM78t8Qkqa06DAl!K}3qD zLX)MLQC#-6mbeC(cq=-QfF!_PiAX_-pD=@m!PNpN)SNIc#18oQ0-zZUCD5&+z9B{7 zF!1&*>a;XEZDPljg+t2;n$kXY{UIb_L*qs!7c~ZvhO6}7U>C0#FRj4Tuqg)0$fLQ1 z`1rs#?Ne^`|MEY5E6GxGL?BKb0<@@TkhZi0uO?xQypmr{bdUji%Q{AM8Ldb$aU{dDLCWS-5!kANMQEWnXw3q@oKP;uEXD2$fm-5kE zR#5q^AA1^69?Q8n`#UQylg@$*)9)J1#0@x~iwXjo@^bUlfVvh>NOa40QH zq^#Fpe_f|-ONO3PC#y!$ zX0MA0s5Kp%)xo_~B#DXu3{Q%LxI|Adv5z1a3uc`yo7it<#>BB*4?50}P_tsz{_0%HZw1tJ2JtjO;9?_N zB!ZIC|CdfE#%<<{ZT2Ysa;yoL%lczNo8IZSp)ws37+7}NS2jq-?ZG&L1uBVLRHjh` zq>^4qM=|DlN>plcq||Op6~$9UwW6?9pJKNlk)q~cVAy6Q7=_{jc5g8;S#Ms(ywUn< zdwX}S4U!vf9+{KUUQTR)hauI^s+6C##UT`d#H%biF-y##=!^gmG7g5zEx!F$B;%PK zLbSOMCnH#qz-mM@1;A?LXGly5IODPntjH9^fWgwt=q=2!B?FV-%%0_DI-Ig&8et+v z0JvFz<;~h{S0O8sPZ%=YGYcvdZFT!#k|apRo>^um8rs>e!U0BW{rs#mGHu#oS1%L>Ca0ih96jgnpfdaJQR@(Gdjx=;{kOrYQa z0t-WwNZMA!7!ni`*i9k7NpPgK(xIe?CK(`v`c)DOuF&Mo(PeVq^+qup>qZBcy5su~uX-CT++;<{S@( zUqYR@MFM&fz*-fQU}RE7Q{n&_JQa{(MC=f+Zi2DYp&=X#B-IYVOkx1M9z+jN(D!)P zPj$NIxfRF~Xw}{1Lq@f<`tY(m_2wR(hJ;T?dZY*7)YET$SjBuVk{mf~b6M-`=5f-j zk~)?m>ygt<+KKh`5htTQ$SzxLrZx5x)b2b~#K!;Ar~lpHD$@m>^%~eDQu?^l%je_j zl?8zghFl(^PtjCb+b!c3FA199O(CXswt}TQd?+aeLnJ#*%a@#%EyGoSg4Phr?LL-N<6r1!t_m9Ln`_28?ozUnO3*rwgtz7L(SoLHi0 zLZtffsPRipdTEA?U&A^${4cV4{q_gEWU!IE+AYWQ#*Z zldc)!77PghmM1yUxZlZ606STZUcb;~7mX6&@)fb@GZFh#tv>aHEaBcrHAakW@8cSK zCJ!Cr*Dh5aji9Oq`lH8&Ox@+Qovd$=KJxaq!B@X2{2Ti#((SAeu*jU7C6W3P&0_Ru3MOud4+%{@D5#sw2Jbod#z4^ z6zuN^Dmut z()Bj8lM68&B};_G#%bG##y$QBXNDAvJzz+%%pg-OGB603DL;sUkL9csA`ir&8BP4# z>yi)qNifsHGg~2t5C99r1WZB-m_RJvAECud4)8hGl|6=;XPD>a>X)NkEj@Pp_@Dmi zpSnEmUXfjb-}3vDKlu}?DYbKEo@X=2C(U{9bN#EI0LHkjqHE=}o^{ADaU&BY7f4{C z!n;_@Bn2iwxzo7;gMa}gus^RC0=1#b7dFts24cw{S`!m;0Fat;5e``_Aq_qe3zsJJ zWCGwlwqS1ZgccPH7AuL(66X$_lIl!C0G7$@>^0D{0FNN8A>=U3OMdPOyPjkmdbgg< zho5=Im9NvMPI(W`#^L<=w_Hj%^X6H>zqhn#&GrMa*4ypg-`O$g3Jdq`r7gbRI`v2t zIaUN0umSE~v&Rxbsfk@GS;GzTvVJHmV~P&rFoOV`lopDnL`{s)h><$+5>WI(qR;qD zUm`PYkgrwX&?jD66)+5&(-dhXXH1UPqE{iVZu0xmD>vtE2rg0FuppRCG?*`ACsFB- zK?^x4F%>)jYb{R*G^JHdlmdu72@Y!o0X*Q*(8$Qx2~Kes_JIIE6KN^21Tw{xoTP*= zSNjo)7z2hRR(SD(6J;LWCr?^AJxAeVai(LZ zGr#@Yzb$c1)@c6V5B@+ZBlC$2wh8+ezVHR(8L#nRQ_UWyeD39DcvFhFjO#*?!!!Y6 zd5qBxh$RTf!OJ7zEz^7VgofkH2rqgB9p=%#14c~LQTL8I-GK$Wq7|%Q(S1WF!bdV> zOm%`FsmwBZ1hG?kVKI!IKDY`1c4G{lsRH7x;r#jY#`)2sM~z7=7|GFUr2$~EJCmyh zlk;49?=8*?8k1P)(IAflyRqAQD2tr-9%AFQlLuQx$kA(5kQryiP2md($j1N2Z~lA# zIgTd)fn%J)8n$9qGE|ahXtE9)mSl+nvY}{AGyw{+x?cgHR}$Q)8|v7EzRvN?X8o~s z@y@QtIfzxl^yBo`&8T|7WP?X3t%!?WX{4Ha6_H*=(_3J|W64m%46AOnw()Vn0SUF= zX-1X+Bv5}*r>41{>FV`Er!H1qAL8*4j6+K<^D?2KamO#2M{GPL2c}}t(TFi|oaJZF zzKKjcwaQ(s=7VylQ_3MZrfT$h6zobnm|s{yO=DQ zUi;R!Yhd^W`nk8zT)1%Y#g|_G-QWG)-~avJH;m7mIrYg;e)5Z7{Gz7se)qdRj-mIh z#&Y-5pZ>H(*ob8wgUy>%duE7UF2qMona8OBFn~Qp)(IC=NtRmZ8JQ;1WJqe1OvTQ3 z1Uabzb4|e;JOE|+xweYHEAF)hqYg0xnCDO&6kN!HtXMs+dm>t8CSzQzq9xq{qsb^W z7KGX|>n+GzH|LCJze))yO?hF=yOQk6ys5?Obh~%eOZKixSvK7c+FTz;@SQV~9(dvI z!ljF5%38qt0RaQh*{m&O^`7TE{4&L(enNB7C6Ze3xFCtuZheR-4EuS&X+!r{`=F|L zNQY^I8|YYU0#Nf=G!0!+MAnob3E=sxl&}^`f^dL?SA%gW01*+6gfC>_3Nay&xo&JP z;R#dsNMWd>$c-^qfOG;gd=t7prnIk0SPsl zn?S2Bq{|se7$Zb0$!ZoMfRwC^j5>gguIMojSFA$jkmKYl@TH>*_~y;qA}kv;5u)ZT znXW8lzJoL(`Dh4%EgrZyu4V)Jm_Z&u9i>t#DqP@`WZqbWv2Cu7*f{2{&!J}yfB7%} z%98TN8>g<%&3*KvAN`|0`XjH@PfhRq<3Il6PkriB)@t&N*nHaNv!DGe16;~-t}9vD zW?=!iVoEaeXlD$7`%%qWhY5}*1tl_g_O~yijVnb7k$|IDP;x|gJ(9sw1xBlyaCn&1 z-8iaXGPTh)w1PDYX#`@284T;oLN&$cAS?`1Yh|9zopq0Gc*W4XX0|$Zbe4H?#qirX zRcDbH`?oCogiXDIKE2a}?taS0JiA)=fyR6n*>{EJP4iy%ym;mE)$7;nWIVk3BG5p=^zeRALgD03}TUz{9Frn7%nwM5f_%Hi_M}Fw7GyXPf~d zP)&&NQ3M}2QPu=-nTKt>)0A{U=Fs>YY~yT-CJ&9-oe~(0B-ol`^es?a(ZUmW9f5xR z&=nb4h*D#Zg@GXxVKZQNd8M-H&Vamnt?mhVM(xz8)5xqzmo8s%F@}7<3HGBu_9Hsi zu(S_!zUR`$x4-@E4}IuECr+HOZI;bOuUos4IJ<5?Qbh>+*~VG`_Zb7G(P4rERx~jN z{XiVE<%Xv4BI5`GNcAvugT#x4O!fNG;wz1i;X*f$3K`c$nz-iY!mO{>23U)M(KesA5w_ftvM|9UTieA3Klb}zcp=`qWlwCQT$ zGjWXn*Z=&>LclWCGeR?e#E}m$8M;~(K{p5?MPV4y0;-UJk;C+<5vMNqdMky7KwQ|t zx@5H7YRPzE2tjXu044&Z-Gi$v29wWVp%Iw+xUJ_IIyriQ_e$;PQdvtXz2qZSgB4;^ z0~BqIZv5H;2=7`#G4^A3p=Sn+YktW#G{IA(cQuurot^5zep7753}2)|dDQmkjMCnP zc?uTA?l}DfLk@Z&qrxo&HF8`Y{1iH^4h>xZlfX$bjF`e66>5jf&|ku!3-Vj}(gDBbHQS(d6U& z8>2G^G0OJakSB*^x&Y}L97UjKZxp zya470u=!dY7>$qh>6p08Dx$y&PXmYmfN%xiS#-exqZF(1qI1;e11lOm!Yv1)-5wMb zdP1-uW;}qRq<(yXLBk!pBp>0lEB=eW_?*o1_?LzB;L}g*Uq-z8$~RcW9OqyzP>HAQ zu^9;?yx%=ATrE4AKKQ{8e(rOh``XvO_A@{8Ggcn6r&*tn+J*HehnR|saV(K-w1XL= z<^oKz0ub9Yp+U?MMJqfc0xU2N$uz&4QmbD&g(5AyFkyitN{I?rNf-}CrU;QHG=vbS zgbP0OF08$Z?#R$rVOcf)DS5AoQ`_;}5wi&yw5fW`+hdh6@wUJL4kb?L-Y`m`&|*7;m8F1^nt zS0JT%naw!+0u)0C93yq8a8*;;*C$jmgAY7+NQ4?(v02+1p%0@PGI1d=TzuxCxael_KeW3=| z7r*$WWmnz$JJ@!`XU@K1Q*CKA!y3r~{;hBMwbVMdchU9te((3355K=2=U8y;w`k6lMQvMGPRT3DLhKhz6uhhg=m00hGXyqG1z&%+)OSjW&+g+r{v{c)(LYPQQj)pIW_6j=itGobjXffzU6dgde2NtSg7x6j20W++7pocujixdzF?bI z`#Zy$Nq2A6ZQ7AD?VzWre$K-e+|lVha7EmfqjJbZ;sY!qfq}QiE@;9`;q{BdKM(HtRjYbgLU+h5KWy*21RhTCvZx)eN7dcPu5)o$uX`6;5>3mFvlpYVUYhN8L5^sVLvW@P%kUcIX0v53Kp~_>w8b zpHy^9z3r^2(N*rHWj}rU3(C25!21OUqVO=X(aX4vPU3gLxv^g z9v4a^o%wLT0^5&IaEccKT0hfX?zhsNbbYqIM!sqM%{R~a#1w#Q&OH4B*ohOz=jN^(fqq`e^(0cg=5yuJ<+ALtCpQ*1 zKCT}+s4Z9h0MlJRYCXB#2Wz%ZZ5i`q!VC*0Hmxo#9GIEh^ziB)!lH>bCb?cKR; z&(!uAF9*4eZz1LHgZT-+EVp^%;L!My7EayalPEF|WxwELQg}jHjJ+)ZNZHinC>X5^YOp5RCdtSFo5rNC z1(S}ZEdp|MgTsz_T~fm0N>y!#O-1!XTkePX;JaIJJNnV>(F4OVpw#uu4fvs@hwi&u zDa~LYN~;N?R5PiKi`afJ#4J$jNg`mskJu7~zUN9C*V-u&j$Z!^;dyIU5Gp;cB0pQA(ym@(XEA9(_KpV(yS+*F3;pnXq&?iFHB6 z1t%*lYul=Nb@8Q_UUs+ERKh$T1F*cf+(IBm5&+JD^%NU&Jx2F2(jg%T)IyFI2FrST zxWlQK2S%Wj{+8l@-s&5ns2nT@X<;==AEIzs+2f_i(wH0Z`J)2 zTSd8}FdBP2*AeK9N(`(-AfMz!C1_7am5(O=x`tG%l9{aLC~W;#>DFOG)shytj~9CME0_MHL5JWh64 zYJNei0I&xDzq9;VJo0T2jCX@6ADP6(=KDDVRS_L`V_&8Y%%Xnz6Vebm_>y8eXGBU3ag z0_7DyrrPm(IQi+P580FB)+3z7E0`|5%-^dIyTZ69BmrEx#N2A?|%hBWtW zt>r^HjwRHA0UT7o9N{SpnKqUV#ZFU*L#c^&ydl#Hqd0`GWp-W9f>T1@X4lH0Q0LG` zRMbk0;Wgi-ncxSnPnlFb>(Quszj||Sg8@iTY=(7&u&}|@N1TJ+H*oi6?bzGx+Ur zb+vJ3x5{R*QYRniO$^mw^opj6m;PXf0^lccaf7n5b*+HoJcHoGLg5k?O$ntY7zF{C z@bz3Ugwx80G~tEBOJkysK+!q~K|q=ZFvJmDz2v;8UmF78RnQM5VHytqkpM1~L;<3a z<*jOTDM%B;)bI+E)F|1BQ8e|0!(i0$s1fxBIZv^u0H#1$zY#}Qn!xac0OB~Yf$2&K zeH_7+fI}*=K@)rqgP~pTpmZ`O^Rkk-AZ+19Y2bMW$ki8Dtvur6%oeX12aH{DXrVS- zb(X9kUnof92p9qR%w98awB$OB4Jc_rj0OhYp%4M$MT9`6I7FsiL)7REWm(fDEJ3Q# zr;y2*41-uGjGEAxn}W|Uh|R)69(T}C=e(V#;%|BqCH|&huOT`PzVRtwE-SdquKM4P^?jCN@#|_5v26Vh0Y_>_^hiuhx=A*Q z4w_hyj--NQ25Ev<4{-$ZpT_u)KhrD39X+Qk!d!=9~9m2C;Ey@Cl-PX&Z9$o=NgI>)+5e_f0xX^@5 z%`cdiKiXYK_~47=4@?4egAMT59Y*Fr<4|HbDHG(`+1by3{`2ni7_RPcVE4T^$UpzH z|9$-UahAsm33U1LRoNNUGRXsvvYBl#3XCzVme8Cv7&t4>oa>UH(4G>Luq1$K&0%0< z0Avcs*B=h}Q06c&;sEFalL=M}uR@ejl(54~vlz?_$gKG`9cWO(5MsG9T8tTz5gP&k zha9wRA;iEFNi2r%a2z`aoiF)#r{C&3H%)9Ze2X zz|MuDgG2?s!q&IKB#Ocm$k?am5jBuhBo7diFj(B&h2f#%aSLM2Btq#vHC__>dYGF- z^_o?^{#Q3SeI1TJzW+2DawLqb?Ei>bfA|FcAsc4&tW{{&RkVsA+sw>Ng%=-}iNGct zkrAj?Q5{A#VvmdTj9YR9+SG`ED6L?T0ZSl0c=;)nqE(OP4Mgf#yHk=8t^jUw`?_Uw-bn=j^3t zXOGBEQ(_bu>bQ8y7j{z3JCY4uLP*o0lGr62$Q0p1UoQ06fJ_R(5TaFD+j~@xc7a!b z$RJZ30#XCAVQ%s#70C{8MWdR9kfiy|aa_z30T?TsCIylWx`1u^4qP{8`{xu)TP~i% zWFY7x!i5O0M?t+rWp9EQUQXV+d4nk5n-_${)8#k1DoyH!t}o_GkFn!NkJ#sm;m&R2 zW=x&SNd1+70H}!Bconcp);JqO%tATC?14Ts!Qf>>^^@aJvp!gL3g=TO8x?utBA?g_ zswGnvS$+L7qSMfIm>}VC5yekYqbuQ#D0p#9lO&T0LbJH!RjPSGRB8gNp;DP@M)cD& z^9y(SM@k{85J)T8JU9wxx#00RP$efAsh{2?92{5m=NR0zvM@3LlzqlME*ai zCM1S22`qwy0~B&Jr8UB-<{ip5tODu~LAsV12sE)D9zY0W@PixxEt(=T_#8%l;4rFb zfr(^d!D)ggVJIQf3b1t816w>_c;N*r02Q)RcUw6vvj7sD%YF2tAC;_5z0RIJ8`XGu zhhEPDx`!V~5_+vg+%y+ZVXugCeHl!O94cZMPYGF1x!z#-90CRj?;u#u8<=?tdXzB3 zAzkz`J7gINLSbT(`0Q&}ESlIi-Kkpm+YNp{iB*D(2R*pqJ&=00h% z6hIU$@Tpg@YEkWe4K44zVdh!q+y!#pU{rf$LHfZFU`f$JDf+Rsp-pXZRQx+`xjn|nm>L#_p}|-IeDI)8iR*(QB)g`L0$OmrrPa4S9F%NZf;}*QFa?dk=85#bZ3N>NW32!;to7tVbv+O%>iUQ2{7)fNRY3q7i=hUJmOa zX9N|gUP!U?A3Uf5x75)NGlv5b(5h-KhzsWPOS7U;=kNB3a)G6*JTVLZw#y%EwBRE+I&KAOZxb3mv+xrIfjz zTJc6ObHiv+44V0k?7p3VDNP0;YPWBi+_A>T+-WKxE zGD|Et78dl@ScU*(Ga+)(;A-5CIYp$EuKp zWpE^i|D>Aa%9bv2FFnG$r}jO?Uu%HzY2d*d)$l^W9tDQK_*epo7}B^kum!{=U$Mes zIk62fdY^y(d8ZO2u(tA{pI&_UsIDeigl%W;+_@#TWcF2{`pnJBV!ca1APWy2I%K{; zS&31nSw=)zB8p&_aSBykZ`?6yw8A04tI#McaZ~)k>>ov#L=;HzLupo!@(NoDjHX1^ zOR~PO<5e%;!E*n2_;VA>pW9hB?Y>H)u3KC6(Vfv$ik*-YFm;Sj0x-a=EM5$;@8_ni zgol&hMNKHsV@|OL#_M@0FDh|#1eXp1gf#{ zJsxRNA{?+D*)2a%Axl}0g~7#=*p`^{U7lp~qE=vxNwx9A&Gdq43kC}zkR%9;ZW4-P z$Bx-0IWG`Bht6{0DKZ6|Jb97mfT)-h2OzwNWdF=*l+pLVKDL_vkW=qe2ayT zMndr7(h5kc4ho<(MH4_B0Iz~3eUT>Dx5Gu0B4xd*g3PNbHf*xZ{xBBGLYJvzGa9_K zYjyVPa~%#P8Fns&SWPK8Fhi&*QesSJUbz!Y=)tkdaMi7~BCXxyg;&s*GhKpNJNlvj zV;Kq}u>2`I`UW!F$T3`vCDTxH$y#UTf@Bqd3p_Pv=@hdGMur)yhz(SWBt~I5bLI?D zmQ4?hT6MA5kGLOhe4aaZjx_8Z{l#vCV>x2Ul9;SdR?eeC!TB; z2aA)g^Ok;M#TPLYPOW5GB@w4+VhPuD3QPzh7Uh$Xjm|Mr6sR+zNO!0XP$46$-&$Z- z{<2;NZxp)j?z{~EV~I2~2~buW@d79SVi_U&F@=1vxHx|t#)FtL+S;qI$GlemdTxVP z(37LYNyY<)61>AEDVlcxAsEPbO%XDKO##S*Wi(u)H29{fN~^l8Z0%YTF1i4kSX*=rXjO7J z)bGX&O@P)vvMP8OGJ_Y)L=OhcSr*#$NHZ&*yFL=e2;_dYLMAf)W(P|zr!>Vi0u4*8 z3gGRx-xff_)v^q8@!~~;S%`#!3W^93Nd)90u*^EmHU&_DQuY$!l9?VBnS_F=j8s^= zR(ux{w$623OvxA#=NN{I{gfv22gZs(V!;Q}8Kj_gNUmmplJKd)(O0>)w%j#I^;UXr z5GZ!k^HXRTw7$@Si~I*436%2_-Ti zrIei^BMpW?#aA+Z$pKU`l+3#LgjzbakS7BYUfDE08og-VQAj6h&=8ONAz)OT!dtQ{59<5Z`c)q*`d9-l^eDmU20liJ z2`q~TiDYHQD1!4iq_* zd=?lBh&AEp(W77g+AHLE`45e2`^K$_kpbSfK;KGhV8ehD5cCT9loA-cjO85|7c&C$ zj+-zUSBOmzguuuN63B_nD8dL(b6k^pfS-)=j)GQ{3dtg%mOqrjV_}%f{InZ;B5&l- zOFO(C1ROtpoN)eHLIEs6V7#e~SQn73(iT!;O-uyxf)>Hip{GxuE;)U#NFHEECYS~( zSogYGEG4a-K8WF352NI+pUNdi&Qx6wLKthkQK~arM}_CEaZ8Iyp+3*tV>M1CR%#tl z)SV#!ksPhM!SXo+Z+#7^m4X!h4}~WR9YjZ*&SrtaVM6dmC0@?0$*BLaU=$i9R^_U_f{R#rvDU|uprt$(xlFkonnF=~ZgH6a{V zvx?2&*sm#E!(o8;tWmBfM&pot($uVFBqS{fsY6App~WeHD|7t9Wt^9+-v6U_sNgbU zo%s{x<{5f%i19EEXJ=<^wk^Ua1<){7#MUJP0T=2MCr(78`O)ev@vW9X^+ey`Sq>ydj<9^VZ1;?=5;TPX@W-wYtp;?Xv@8 zz9CYt7Mn4AUtsC}$lHWQjN%)I)5q)UisRaAN%4QE^??9&psEU&c-?<=-w*k^XufId zgaIALAFQ}Uxz$sdCQTeF~dwx?=o?**?@Id&k%zoRjvD20WSlsUiAWQ zeG1LVjwyS*zHi@YO8egci#7aR`wrw>eA(_7cO4%+sIld+&Mk<%B!!sJHF(b;5)X9 z%i{7qZzp@0O;o**y>NHl4zAP0O0Sc3oF4QZtTZue`tZsFUFpl1`rjmK0y78K`eLkF zZ9I+WLkKr+JbU=?-1Y0u{vl0oatVn)sbR9}IzX{XVl$!Sl&D#Ye8r-s_U1+59LZm{ zBKGUFkSVD(`DWR^y?dv3xisZBHT`hm&7((;c=pa3$l}|#Z@v5@FWsHLL#&U?T)lb) z{@Agjw{G6n^%6tJ*^G#xL=sOk8bDLVjC?R*K`zYSHH_Wc$Y8TLJ8#3q^0(eR_wq|G z?%w6sy|($Xy!UHu02Y^SyY?ery>qRrh4TK&L)U)17Vc+ey&vueI#)MrzPI%7*4>3W z3rm}}O-%Z%^X^^xK#F@~+iLF1ROY-oQJLIsOM4KpKpgYbph6%L`t}@Di9NNX4O|iMq$Db$&t}Jh7@u zja9EQuWTH9w9%{hk2lr^rO4}jl%^?)01@?`^`I!N?>Kx13xRxz*R=Rm3^f|vsiuo! zK)e`XdWsbmldDNr9Kg850T zmm}hUtQFPvC*}cS6?}xiBpI+<)2u%XRmP!}c5dbjq6ul-(tC4B(WVwxT0wUYd7{jz%%st+0 zafPu0p||BeuxG{(W$oWRLyk$u?|GwSiDL01S@$La@Ztd$0r=23OmQf?8Lyxed-y`G z#jw6i&uDEtVS(n1MroHG4W8Ytsa~+4NhdiJDK!a@VBsLwAp!v+3Npm-0JVaJ1zOXT z;KIi;EZp!c7BZhr(D5*K8TaeKsYgyGd?IR7KS711WVUzF72y~a@c*}jEHy2~wvI6d zbA+?tifrXiZPnB;WpHz?%js$r2Ts|l356Aq1ieUzND3q3xQv~JgR}^Y8Hhd121d{( zJ=vDy!Y=&i0aVz5p^0Inz5VvvCr>;_k+jo;iWu4uYNFZ&XBVqn!IF!|jvcd0j6f0) z;JSCWo;_kPd|bHyp!!;;&`5&1wTnP_ zVeE<7>iANRO7uM5F6iRgY4-VLKYG9{MssFxRm9CSw(OK~6&ro6vZmF$cWV;W1VeqI z38Pb~N1u))P&i_)R)rh|0I5}Rh?o#aHGz;?BUV$2LvNPV=X3h|F38%keq`H3(+@nM z!ONJ^)PI0X_$O_y;(tU`!fB8L!U13OHJ z;T6(KXqOyDHN-#$7)k`91dy3S9)S%oc-BS^N9bjV3s(Zd9t&pHQp87&9Qn?-zYD3A zk20Sqg|a<;ni7I{iU?0YJYplx|YcC7{@% zuZ&&Wmqrk+BBh5=%8$?wvGbG&Mo2Z8DJ0+uB{&4!fJy3dke%Zj>M>d@@NIFu7R51? zk!Hej&d8HQfsxT0$?sW5SDL^n4)G3_ID~gd+7K@W_;waNB$Mjtmo5&Kkf|A?&)gf3 zC;})TCUUW5q1K2c*++^JzJ>CQzBL&3P2P{(!8{H=*bfvdA z)vmADMZ+sr60OX$RkoIp*>sxg(ygMlwuBs8N34 zX0mju4VA(U28n{gPb_?hGjwoRjlV?oGyo$T9Ma8JekuSmQ^R9-D7W~`8F~lH3VujSU4EC0^Ms-1cKqQI1E8lUKv-UDS=7mxKYrOi_6Yxe!gD1cB#zc zx8jPxr}RY(jV+ubd%Q!34ml6f%k&axwiQqAu*4y=NlrW+!V^d~!-j%UAyWw)UNB0@ zjS^7WmzN!y0#GK5BvU~$0tJj$0YfTy$x-J}9bEu4@ItjN#1ILG=?pn^4vHS5IJdJGZ+hcfZ!155MEfQMswl91)~>%!T@CE<;;L;a|bf? z6mc9Eb*wdeQaTtsIMuFH1w#~>gzzPl0MZ!13Pcclo(BK3cy?6;HDtN9k)WuuGE^?}!*84Nl zcpQTWS76OZri3N02XF9g%Icd0jxJ)!VG@{3D&DiuxE?=nT?6uP&T63d6fpJs5T*afA z2s}E8DdNtP8bAF^LOPNleu@jlSkx&PAqh_SN?#YP1QG&IAiyC-8qI1vr#%In!Wg|y zxL_$_jkxvy(fEH0!NY;uNZ<^Tik0~T0OT8Y5E_j6&e#K@q02q!0!$Bs&S5(*5 zQa*~3=%r>@f)L{oJ0#g5AtXesx)qj52!=yQWJ<_z@iA`7ZFoyA6UZ@fne_TbMZGMJ zQvR`)84}JZ?zwa4^sIbk8>_G=%vRF^APpRYW8PqiID@4D0ssm6G%m+N=Fye3iIJkD zE7cCuI#2>26@a2uDJQ$U=ixvQAYAI$07*M;ODKQY8BrlX@XW0Qz z;|GWoyAhL~frOF|k-2-Oe)^eMWv~NR@&m?*6)&-liS~0Cr*?|zihF2Iz^NwICwC>A zbBwyAys@r1PtR1iw{D+iBpb*ad}A9Mn3v)(s!K(EG5E>C8(7VrOc?@)o>h9fRoC^~ zIP3hDo$)>UWHvx%MiiIzoLyOR7oKhGBpim>4A#41VjgI|>RJHdQVHM}16?Z? zQt48+k2^=T-dpei`WkiRU>?=%JI0iHAM?(gyXN?u(aqhsH9x=La;SLPGkXk!NzR6n zOw15=-J}p3?Pd9fx;`ucT43!REM{6sU;|gVD({phQSGdSK^A;^jnW4E-&@3J+{(MsD)Ro zSCHK`6$WP*VpbD%t(bO~{fmI;O5WO_=Y!sb!GKe(Z(!8v07xu+Q}Y&5>;M#JxJpf} zyiya39)ZHpGOq7JL00&CYb`+$lLk=Jh}g&pi$k60{LUjH5Bjk(o$>Ucr~CjSf$~ps zAm+SKVnI(Ddr}e|@Bq9gKDIHBNu;4+2|39dhYaRvgiwJ}Q$$sFtnGZv>k>sD-altqkUYAswOtN3VK)!Ruar+ap$G)s-vP+~+XyA3j_;x_#RYUeGAo zXU?3FqrQf5^5jW9Vr-iZ^PvlB4`;;!0)eq7UiTp-Y4?ANCPOXmCE|9(>d@6bRhV#R&z$BrGtrC5{2lL|w|vMeTi z7bT(;fWj|GH4H5zL&7BC2?Qw2PF53JKCfLO^_+=R64=AG<@@zIi*1Ra!cTYx;DSUplz5SPKn8$JamXpgFjyg02#|42E*a-MVt@M7fD|`@qLgrjWR_6j3YPg%K_mVd zCSWvdOU_flNFHH{3l1R_m~;fq9xTszk>2&U?24B)$z8x26rQnzahbNZ`-vL+@MrE} zw47iT6QctFBNr)vNk9R3Nx)_Bumf47fdL8{1wD+=@PrnJ24U|$OsgP+> zJ47taZ41!29^~V4zNYDEDsz_Kct(RgokxN~ev}4i6KW0utj~_%Q=^Pw*qNPZf;|_a z4sZRiN-u+0k|Meyl#CQ^lq}KB$k33(^7sXYM8;V)GIhw*v`0(9ov%^{BQ61ng(1TQ z(=u)HJt*n-I}9hG+#)0Q?5R`Pt5@=^mIVWnRU$)z?Ke~9kYqi5`m{$2zVel?I8~E# zn*6$7@*ayU&zR)3;^7cBTa6F^^qO5x!bSMZOuc01P{Bo4b{sC=3K45b(8Q6{dbmh` zu!9^XJ_(3JKuVGRU`aGm;{s^smr&CvXGD@2#*F zrnXxuQHqaAhY)$re)NW}^H`Q(ObmB!AB|fRqG*sn9XGLa-$uP6xj64;j#n(Fr>5@S ztsjdaS(lN6SR?@GSp}(7l-JfL`%8FP0OMigZY>&`VGya}lk8rK!dCwrJu{eqZDn?p5as5aqJg1mZ zKmOaPuY*|FipnbYf*x-4Sg4h!=II(*{l!+7RZRTSjuw4{*CL~XzVL-F{Or&E>}Nmw zS$RnaH^qvn^s*m2zdSKUMOoSBN*r{1z0~(GP{#ls#VIBQsgC=G0VI1^NDCM~G98BT z-2nD9rN*K_RsydpTJb8x3N_-USv@k#s?R)cg{4CSaXPc*q;fhi1t8>FL*VH6$7fXHr7nnzVknuyR2ao{djw=|CU_$N5|wn>)D}` zlw>W-0_Tbam&jr_$Jk*=BjY(`Wl%qd0GW{)vk1%-fKq5Ox+F0gO+0}Ki^Iq#kTk>u z3YMTmIAg3~Mw6*P8j!E&dd{j`5m2TYCyGPt4zY^}fl7DuC}R+aOG@zTYb-6fro$Q- zYy0zvB33KBq+}Sgb!U*&kx4k)O!it~tX&R;NC_EyB*O=eg|P>Qa5XLP+$T^0BM6cu z;0{PRa_#C|v()zoZHa}^tiQB)--_ysHqO4dpfHsUy2AqN(C5^dclPXA7p@F5O}2UR z6+k8f_@#HyQxq4p>MY5?5fWk&V*LnaBrcTtSDg@Pg^vVChbZG6UmPj{)JlleX;Kev zg5ifI#6)qHcJ@Rsn~gL`D$oOB2?A%NFw#TK_+SWks0pPaM8v{y(;*>x*%dyugvVm7 zwwtgHCzAySF;5j{U`LaDHfhQMhoTD4DwZ=?p@L)ufI!V2(}cbz2LYMPZeQh6Pp5FujW6oSNv_3=kdmm-P51~5vV%U$j7&R*nc*biZ2SynZyS`*tK(aUn|faGi9taWgTkT{j5p$3qQH?4rckin=2 zB$ynk6-Wlq%Qn78K8=864t$ z3W$&z0!p%)3XF^q1UUmXjrz2;(QYY5LxNVS1&!pW;02RtAy%CLvKbiylwi65aXbML zNt6s$Dm9U6U>vR5Bu)AQQY&TZA$AcI>6(ZJYb6nHTGvaXo|D{Da{!B)Sy74-Zw%sz z!X}5vIBXij#VV-~^CB4;b&PcQ#v5-i*tPp?l0^-1qKPGCz473oXa%pYd8&|a=n~6} zvZ*(IbVVt>`Xk|nQ6dC@GBYmr$l-nx1r5J~RD(reWM7*Bqi_~7HH`|!p?&a_%1B*!W3+{YMq^(#l<6s zl%kQJf}>_0GKu%hGtUSnfi5K+KYkpSz{YqgurQKUYGh&?9m})sxIFb@=pzn?k0DjpG|0tlVGS;OD;GL|xf9{%CCT%EnE8|7Y>RqlJeL z{GypxK5yTDu;}I4om1QWvgf^}`5Sj{_`CmjX?b;VZvOg)EsrO6Z{N179w3?6w#nZ{ z?{a!Y)9?IxGK&3M{a*Zrhwf7@-&lHhb9HQf^Y-N(Gfu|+Uj3BUf;Vj5GO=T)k77LL z^0vvTUAx9NP55n7vGcPLLO*}|j^7L3zh3}7AbRiW)$1zf3*SXJa zqoV(~ospm}|9kxCFufvJ!Y7o9o>RnhW$FI*&0F?N?OMJ6@aEi&ZCkePnc02&<}J~b zG?uoy#U`2rZQAtBZ+%PeT)gNDgMLTXIoOU%m##ej{EOGF&3Um*ljl$%)oz@*v!DxH zEtKJAKNs&!T((P6kQv6v?b}`r^-(L2=2Vz`ffL{?Z)RqnOdy#~mZO&|di2JPIiIP% zcK!10J97*7d~I~&*T4R^AOG0Dx%l>ZKVkfM<&n#5w{P6MeBt83{Rd{Irk^?Zw8N+N z?LYGD`yjRMo~|3Va-D13m-m(|hvratNhw5-X2u{Z@9S4(9?b08Ik|ny?OStuc28fs zddX|p_m}T^tF+!x9e1kk=OCSu>xTNJKRIYZGX40gx!-a`y7fcfaJ!pe>NH7gSMKld z^Rg~`Z176P*vdn{(0or969uoM4?eX|zu%aCF+M-R7e+u~JZuL7^Iu=B~YO_OJ()7nkn7{`z-41aSB6 z&42b||LW0-Z+DFQtoXJqyZmi5Hdf6$_KdX@8^EiNANcJ`Ur@AYj&InyadpCESAHRU|$Q!<&pcBe%4C}X|jHoJ67UE=fpgz0i=p)MZ4D-OGlYQ=DwTTi)M z=6aqELQ-$g`NgdzdyjhNkpa$$(KvZGH15v(wzap{SC*F^xX}dT_OrERSwi;>47ux9 zq!oZ{<_Kxhg9Ik^jHSh*0Z~P(LwLnuJ6@xjGy#Dr|DBN3?eDrlFNx8R3`T<&nE}if zL){>^`=UrUnoDd)Sz1CnA6w-!5 zW{?4fL!J(p0ZFnzkr{T34h30kgX9QZb{uMEYXuA|8%lmLRSd}#6EP%22WfJM(#pMyrfQ)VA@Re}cFOqske5@ZtUAb~uKG=Qk-{(Wtw_T8t)w_019XPP> z+SO}&W@hX-2{FiJjt6_&1<7BzXQNhHY8O@K!$N8%M-Xeb__{-t+CQT}AX_wn3&VMe zE#}q%e~|THUHxt@nwl6;>Z1quS3K?S^#vb{_ILBR6%v8%s_Y@F0kWPpjM zer4V_5H>oIu=}q*rbR7q>7-=8E-$NlFUp6uc*6AK3kKUKo%$QizL)1}UR{4y_{K5x zLMa#wbEsexZrzDM(v<040PS#{A?PhQhme$}DJBWeU=>Pa3{xO+(W|yQC7*BwyBad0 z#L&2fAqP_TOM+;WG%_9mQRFbv2m!M>!imD-FacR_Fxv=$XAcQ3K?Oq~QB+7T7=}^p zCk{*#+4^SYlFC#tiG=Lv93}$J;(TRJCVk=)pCEyD8);eSX$_;osUx$gAXY1Y8KNCt zKx`&fKoWr{LdH&UBM`AByb2%%GWa#blmC%vB3S{`s|^eZF19|I2h&9bJH>$_(Pph6 zn1Do5YuLe9%p+=|SbO;JVb|O2d(WOdD^}QzSX|~ZvEnSGyuwIAVkLSsil_09QW&`m znZ257-|Q&|*&|j;d~_p)yVM$hK1Wj%_5DSFEtmn% z*(S5Pj2q0`JU}?zE6foU1#?P9=K2&>5AT3?hxiG+M-n;;mmI>EPD&4`ti-37hcgU6b1d?iH_*c&K{ka*+T-aj0c@3SlGEE^!h-2(}gXyVg!So0boGj9HuGs zT6LQu#9FBUM6AxnB|BjdtLaK>2BfY+bMD+Z&SLcB4GF>}V{|~WpDwNBAYdDHgeWlU zmF5sW_G6JZYBaH*NxDgh0gUmS5W@)PyOBv_oFXTusRB-doC6R7>13Gk^3tkh-A145 zJ;pFch^3A$E|9OtOVSidBU~$|;CW{v5CpB7;+NJiMuNljB~8KL0;t&&SZkW%ubK*S zAayr5-HlklwVKFi$4)z5aYH7;yx<@Lupz21FeU$$zBW$aO1c62i~lC0UIFXPh7ZvnO_S~C4gE;%eA z76VHF3XKYYh14lEf$DEImEfAd{2UAK*h!5a;;>u9W-9e`7=3P4dje~|evHqZrTZ3J zKcnfsh~e$44&E>Ht2`@t-N(#vcmB4q>PuIK3=^p+BUWkv-@G*k<~&!GM2NHAx3<&_9^BDmWGgZX!Vo^G1_1-1|6AHcPg%7dKJULnJP_hB9P&1fG2y{lQie9Y> z3`$LmWP_TDAW6fXxM2Y+jNxODlExq;L@wG1%TxIj7*H0m`VjQtwRk642eJqWp=bbv1DiA2be;aY6UR1uHf(URyQ+rZ9bH(6T><9XfP~3NYbH zka!i#xb9Y4!zoqc5(IM69f+xhj7fg9#QjhSWhFCeZr{0i%LRs8bL=6<1rd|HM-4qe zC1!@gjIUZv?-W#PoXa2noKagH7w*kFX*H1&ipAZd`9`yDb#VTqBGkRCa5 zgn1Rfz5`4$0$QlUrLX}EL!fL!5Ua2!`2diF^AtTfp*Kn)HCZaDGhA5}M?~1km{!`Y z12s?gTqE0Pf9tKcc!XF6+b)VEzZ_bAnMwtjd<#8qt6x5RgLz0CoyUdWQj~5_mfWLfFQB-GR$tVjfDfShhv# zj4Oa)tkvvg*dQ z8%8PiBn@dSVLwq4<^7Y-VG&H0M6yFo0>ZqwAUO!Puo&umIe743rUL``10VRnU;p)A zCvVWi5l>pSd6h0a?J2B2MFz>W*o=}TlGR2XV|XEz<{b^)!Py{(LqJ9g6wH;-&5K}}{3M;Kl z#+}okjNvdLEVRPt00fMug79=LJN46GbIe9YDHGS!R%cVK zz`#cWA@GC~MPp!1^lD;&RM2WbO0NjSD^OrGuH`1P8F}{XSxt=3w5{eBj1(bO0mtT; z4a01$3gZc+MXrFuh?sDrmpw5XUaPbtiV(%b&tGR;&cQsIrs1goVh(*r~7?i@UkV%oIv|5FX0k%IFr=W(LbD264s(4IN>flLP{49%71@ zm_*;EfKBsD^&lblJW%E>iC&7Gp1GYn?^J$b>vpPc-oA7B($&SKWxq#s@bI%FN80vn z^<-kY?fUh(?vk3@AlLOlT{U$GpTazcYWNOR&1#xhD}qdOsMvh!0C>dqMS_rtT=X+I z^%DtBJw-{Ua^awklxIhd9u-et>gENT4Mn-jDrQJ50BM5% ze80lVJN9z~3pWYOLPVF7BVo~mIHu}81{eS^7Hz?22B1=E?JR@vaRfgpG{xcg6(<=ujadL<9_JWG)aGimWoo(L2|WvB*p??Af<(KVFU__NC-nG$%IS zo4>nn{~i;@^sb#Q5uG^kjBt4quHQ)=-3oChPj7Y@YfyMo$Vpf(9oHr0iMm1S;*O!@ zDMS|mdw8zcczZaqe?r0JxM6JXzTIvI+lV`d?km8xDoUxInCkf_#_2yb9lR0(u|U8* z!G+PJbo^^bh_dv?=zUt3*X$GBJtE&iX#I4mZEKCb$7&MWa}6TjH@L>e?Cp*8>(_3~ zUBBrfioo5xSy!j@EY(>D=#n=wRR?HF2kRuNV}@=JbY{zdn3SYxOMjpWJyhjr4{4MN zOq#%iXeAsn0;PrgFhU?<7y#4bu$CiFDLLC=z&F0}4Hmk3WJ86XKuJIdneiNk1iX;R zzYK%OGsjO!X7qczkrNw zbfpuGheQC2zBYu20;}nY*B%t$mU z71maAGHFo&S^-SuPU1tmR7~829TEXvl;$ZX^O+Zh$Ww|;C=jp^aW7Q0gv4MGINSRp z;Wd$Qir0v-piB^Ir1htQDAFC8bdbOT6pU13$-KrS13p@)riC%I_>cxU1bWin7~bu( z>va}WSz-AwzZopFI7DWC!)Uz|B}yA#@6XzR)v2@GveOjd8fNwK)WoAzOS}if>N9Tb zs*6u`e~I3CbNakTy?N!X{~Hhh06+jqL_t*avU-IPZUzHT3TS<2wFeRfE(_c1djiK+ z+E&OU;9WhfY~qr~LUEaIyewH)!IqYj5*E`v3`*vap9sUyLadnJwTdPh zfOG_PBp6iO6g|PGMTgSj1}I(|Ekf*nX|%O;a!Z0!J<<*zJm68b;$5k;q4RIoT!uyA zCffv1RArN~VtdOedvKdh*=PCOw$)AkCD)w%B}L#nUMn}|Zq45{A-D`sd+fdY_Vnq)ruoHrSAzv# zzxn|Vj+q{~I)A$^%{xakC>G{zw)r?UfZAG<(l}n zOiZ$^el)}Hm~5#tOs137Qxs3?71`;12h+!u*~|5sB^uJqMB+F#K~3X5_>WO0a!Ok4&BKH!C?|&8l}D=?NGWC>yo&Q+{Zuuan2F| zhCq40!gyO#wWzAuZ1qB>l~{5ZAXi9OSiEf~=6WxIq!|@z@TT%NH&a3q!lbRy3%lSBPY!OiFwJ4oN^pngZZ2jQf04n0)nV1!V1X15C246@N0F z+w%y_q&SjDs#!u#4g;uBsx>K*fB-Ms9Lkc2(i*W`2p1YQfB{(8uPGW)ve}FS*%@`y zMpq0?E}k~QqT`s23q70VCtivST6l+s96KRrcM(&>`7OKHKRi-Y`<7z&a$P#|l%@xC&44Cf zN_C>279!3zNv*#V9kwz{W<%Dp)qR}!$}Ki2?L#dN3VJ>iogIZA=$-KU#d~LGj~zPn z?3iB|GHzW-o^VoBGy;t_8O#U%&@1iCphgABY{-l&^n_!9_yQb!tf|1qs*p2xK+_EMjH8 z;Gp5HJESsL=9vl^={zN#vJgGvu`^YT5xaq5uvLIML?l;G-mAo5EeuvT3#L&aIj!Kt zK@&%iQADymgBn&!p?hHGocTdmGK6~`otkG_nk~xb^n<_)1=5UKqsxPj$YygC;Azx=pCl$4@EELaD5X@5=E;* z2#IUvk>;b1fH(@-=T_fk!S6?;KOaZgAv+y#*jKDm`3#MY2O>C`s-2~wfATnqu%6P`5z(^fRskX=f zkkO0Gp;4VRhcPk(y-o+1eq*FK=vBM`UI;Zi6F|X7L$x3baSIp9&^rXHNrnNaNkdN# z)p%hXG625%CTC$izcbRIR%08$c!d!0fg}^&uwk$vY0SVNvWZWi~;S@Xj?8^ z4%Nc$3WYOR!M9-{Eh|Lqk#%U8$12ZKB*MhXHwI`$?+}5<6}9R(0(b2^Q2y4%T3gWy z4mY``mo2dtA)7h1PXjvpqewgx8IF{qDwSZ!G;yCUO={zCr$>$rbSI_ORW&1M#UVLF zfez7eU%;PXY5R)A#UnSw6rr&Wpy2=(*&|s^HrKAr-Cy>ah<>X3G@?vBIRu_Qefpc< z{H6eLOcQez0VPc>um`Iyo8JvuE#)$tjoX%Y#mLx=;$PV$~H0)(Wv!S`@7u zgXAiW4u_Ny6a=lMDosO)85os{b(bJn4nT<)tbnC^CU`Kh9MSzuVxD4|N&?yL`ktGl zq3-`NcIUr+X6K>U#eJu^@1#gkyJkG&8GAev&m`lXH$iz~Tcv{^LCq_wAz zz@M#HswnZJD3P9FfDkr#v|_wh@CpnP7DY4w*an%@fdTfaijjbp#xTh?q+3bt?;q^z z8|dxrALzGt7B}qZ%&|d0M2#2iq+08-s6-iFah?W%d0N!IF>-%IU{U;3SBwf0F*G8Q zpLEi}lnF?T-=N4zqbwM4hH1FG@=4eqK8i_QQ*$g`wzajNI5B}6>r(~;WN2)TCb`^` z(c}X{8&@?o_v>n7Vz9cRDTlFuQ*#4$`ZiHg&^IOZWiF*Tl5E=3R*WQl_m zN^)3Uv96<@mRgE7BT)oDS^eV37KA2F8 z*ax~`L_ERdNSiG5Or`8bH?H^ot*^W(4oqE27tv3eYaz|C34$>-f**V`g5_h&%!m!i zwM_NHAO4WhWbdl?R+S)?LX?@}lS<7D)h6WFNX}WC1w{y1xvcy5V^5eka+xEO5|K{u zP-O|?B+6%{&*wDk6@Vwquz^p%#S(dED_S-gdwSS(rH6M zN@QU00HE-!2vq|ztDCUky%Jvv=^(@m!Bv5>kf;%SB2DKaP*JaOklL4`NZ;7V+Ujyp zUs+Ja64i_I2Y|hDG(YDK4`WnhX?ZI|D4xRdO$3b;{m};Y6v4(*LZY!{O$tYkvkCyz z{n0nx>}ctHwEO76<^xBQo;+gxdp=PxYW3lrmaet6qq}#&FWSz{`+?TQ+1WY$vVLyy z-m>nozkm4UmtUC}KiSpUbLp3#eERW~wdJjq#mz^%#~Nx|{bH-m+0PVO)iQ!MEY&?Q z6`PQ_u`KGG{PpWMrMu3-XA+GKZH-M08|&=J$1ZEbBV)FhR@T;M=jUs*bjAsf>vaE| z%dAc#@Gq8JZgAFy|;NqDjfqVhAB`w zBk;=TRT#C3R5f(?a3>l*zZ%-u;N#J9Ufb`-`cBK)w*BTNpJMKBtgrd5%i5z)nHQIq zZ3nw%LXY6g#MNupOfJSJCY)a9f4u*fAAb0i*I$o*hv6Cp#4i~7G&f9#rQ&CCVNny( zbG&D3W3#ubdvShYu(yBZ-qP;%z_shwmzI}D=%=v>W*8RlE%}m_fL2Bg zXSJ`s6E?G|Z)~_Tb9eIO)cnH2iOI=(%RWR~y7=M?hSrc=c;TXJH4Nf$JAPu~@|7!{ zZS9K-3)9n6dIjSwKekAG-L5sy!NCC?FkyA8G{C3YTAjqU{2nW%#Ro}6GTi{D<(BEH zpT9~us5z)@sO6^SKbqxOn|XZTdry1#jQhd;ZQq?}?ENxZOS45vJH1=1UyEnN=SO{y zF(ETyyDF{iMu|oRb93{}Z7l|ST|HfXD0uh5Et)2L+b;#ygWE)ZR#}T zGxE^R8&>Oo&!hjKHYS|3T%-j{%z{_I#S~x{?GR^I=Vy1o6gow!6c@SsQ zj3O-mVo@LK*VbIKq}8{_t$i|_GbhxNraNf}4b4uRm~w*JpMF(WRZ<>O^DELgUh0h4&fu%`e<;KSkxP;~uySM2rIQ;H`> zA3_^s6w&Z&De-AyY0^uXIV8~!fWfdLNNB*3#wVO$wDA;`9HLG}Sx}|6r3*@_3lSn@ z#|sR@w2H18H2Y7pLx@GfBLKL5{kk6YpZ@8es8@F(SC5T4o#XA?xpRs`PGd1PD^kdX z0ZYXgVnZGbW|9qY5uA%uPbn0b>G`bG1uVxMT7xW-z(_DE;<(8GSBjHJQytyiqxjl( zc{yeWz*I4xx2T~Yt^{&HB_ai50;iH9Pz61+E12a9q=N(x$t|2@mz(gOQi#vMfax<+ z|6bvj1%SrRpFjV}M<07G%q^HLo){mKzPh@EP-lDVM<0Kvfts3{w9tzan@WmrZ#9~D zt4jTZ$sqN@9ChtyuZ?$ly4eXZB#!rWX$ft}nQ!{azWiudh^az3mwgrulS%;@=a6X4 zu_b?P?R5;xub?9-P?_x}UTwumpapvI2@`4Qlp>6)Arheoc+5NWY}UZ33=BrqH^m}D zzZAq!{nn6tzuJBHxYn<&TfMRaYoWtwJ;Mb*+t=CMrPa~x5i*or*0G6Uf2^%7ej!Fb zW3XVOTbp8uWaa+4l~{2cjtr05kXm0{=x9H#p(QY$QEzDru_4h4I1Wx(L{@*SGgMHv z_>)OhD_2~ubU4)t=`x4|kn1(2&fLOhJfs6iFQO}zL28psm1J=Ub-5ZQqRACk#E~r& zshR>6B#6&Plv01JM2It0%Ti0*&_w=4WbN14uOSJd#uP&)~AGA1qx3h1&9lKN5BOM;#hgO~9 znNCpAY)ZiZ7>TI_agdclQcF@&IU*VN{WWvs@*tB$X0a1fdp|BE9W8P>WOuy1t*@uc z$(#B4*y4f;Ki0?+CuRUsnpgVqq#rn`!V{lzb+%?4>3iTQM`8o-@75u;W7kASWuD7G*x|-{ z{0OdK1IPSvVk~|S1zr4Xnh?gOwZ)F+p<{!lnNbhN>P}9bf(=oMwTf)gTmSIU{=Fs4 ztYC}EY`PvEyMOTLaAr16-dVoY9y<}u^wgeGtI;h~#7np-ov)}CArIZZYh#foXV5luV6(Tc=s8feaQzj=Tt;(u?j73{jP8x4*#(JBP zMFL}(3n`*tR(42NJd=Q`N<~tW6t$JrCJsL1vN9q!gI9?PBF7uymrg&5kd6oDtGT__ zP&)M@kziO-5VwFuuweFMfgM zr>Cc}y?GFWqn_^Wt5+|#w>BHs?CswF=%Y(+xzi8dUtiH)=SBkpxktT(#kev7)i4qx$yFt{XRQeRlO4=9*gDoLw~SF+pl+ zYjuo+7?^8|Uor^~fyKAFsWn-Yz=fD#OS8t_Rq-QqR4$NfVKim9`0#jdKkZN=fzdnO z86!i#oE~mj-#paTMZBnhV|BZGk2GMIb0ow-duwxNZFR$x+mMZ$JPK>kdpEElpBZYp zEMHrE9yvL!o}yGy2csVz8%2mV>7p$@dy2CYUX2Ktg3k%%3WfiuuqsG|M0&{&0Ixh4 z{R~SKR02VENeCIQ_dUS4*Q4aDfI+sLzXs-!Fzs-Xz0KQq?jgt2Sbv=XS4~6RW7c02=1?}h zAZPkbhAF;ft#8=di3JPTSS*S7Dggy_JlSx{qFxGuM;0&W0dGoWC5nJlBjzbOA@->$ z@k)fOFBQsA;a6oyoI7(yzrx6~PB|pSDdP08x^o5z|HicTWh??Hq7I$yEz^N+Br&G- za0QM22`4JwAUe3(KeUsoxrCM!4leLqz<6kJIk*>{8}b?$#KH-k-6)5Crca%rP`$BM zhf7N=HCMm9i0yl&=Au<5WT;NvH^<~&_p-FKi~|UD`g-wus~bLjx_x)fa*Jd1EzOHe z!200M?!#lXE|6PaUfsO^VA~|8?RZaL>?j}BH`TYbc66OGCyXAl&~+KoJ%`>qJ68Of z4!t8m9v5X$I_e}83>J)zuqwsK(}*2!lPl@L)jw2~s@aPguzh)gzyWpG@3X;mlX;W;%OMq zz4a$=z4g|+@4jnYz)YAYKiH0IB{735{J>!E5N6cHX@H{SfpACt>r)kFMlL$=ff!^zyk2=k=&ttGR-0(KUrAsvAeU6ai+f*KcX#*@sC~)9k8_)>ds;3|zZQD_E>eku*W-2>tHG z9TYIYCZae!SV-_BPSzZ%vho#xAO#XZaVnC+RwxZ!9zxyLgmkC2h?Gi55~)Q9V>nkE z6{#_XmNH?ZQSjJE;F1#Gm%^q`o^jn|OI$n>{#EnDhrG%{Xb&C$P5ML{Fo`S?8}n{% ztoa>wmwef8TV7f`Id$Ud)vJ&8YbQ^PbBx9YUwSN_J#%Vyc6M}R2(T63Fps#?uW4a^ zh6P)*Z*GY%s9gH!(@v*NF;>=o_qAAT4CeyDNrY_-3uyyVWD#77r+CQ>--{5wpf!x& z2lkzfRhfJ93*{WZL+bj~hYgmt-E`=xmLqVL5LgtCwJHs#8yh?QLS>9YZLY-XAl^`+ ze>wzjogs3}hlJ+YVO)HxEn4C;<IYTJ0NVYXZQq z&)t2uXFlAoWyzF@czhU$TfaCUqM{`c5D)d;y*)$sM|SoeSczUZ5E_Aji8P7eOWA5A-`l$t02v17=jGl?l|7RSQ=dGVxotZl#~Q z5e0Gpv1c8Zv9kSHjHZIJv{aUn)5 z$_CU8VUfGa~IXoVz)XL{B! z<@5v$W0sq>41aQ>IGtmFfGS8SSze=yDuT-uVgCXicne5RaWIOTdg!PwoIA_0M~364 zVd&6St%FNc#3WRKiUaJcKd8JK>}W1oEbFH8y&XrPj(*k*^Eli)D60&$PEZ9vd+Zp7 z)=37#YEHvE!#1oP^7Vov|$FW_&oRFCo>B%@H4Pi%Jz?SIoqoCHZ3ZH zk9OH*j$2y*v|(gvYmWm=&=QAo(G@^a94f3zmUNM@1wBdm!NJVJFfdO61C;0$Ho%^0 zh-Oz-*Qgj0iIgUyI&sKDU{eq|HJ5z?_wMjwLaRDFLxf=Q@nO5|(bksh*RQ?(J8%E!NB=xNK87FUj5Zb) z<|ijl+_@7!)ybIn_N%YHVhfkK`&_ZVyUU>&^W?K$k~ig z6c|S=nGHXoLz*-o_emxv?Cy%6sK0#qvJ{8>RD|ltrWP2{R2I3c7wC1xQBumjzFOYC z?{nPKYvtgF!N}0i;N0v?|3IHXgatq28q{4OgER+(#EuMM9rEq#>t)`>P7dAfX12&O z^{d(@ec18%1TYS18M5f%w8%z{vM5#xJeo>M;A1;0Ya9Ao(n^Jn%tlpvel*goE6{Oja^?)BMmqq zM-f8xNM$1QD*~;#49vcNz_f@4%-y!Sj_-Z%d*AuacYgM>pXfbU3?5iv4tP`y#*|GA zw9jcFBv;f^`2`99z=EIRc!0${;Yp#ylO^d@qDbhnj0`L5OjMAI02YcMam<63P+^o3 zd2jXr(iw_HU2NkGV*rw9g#o-ocm#ECBql3Ex<@{xh^Qd7t~q8Og7e-Iu}Z^eu|mxU z_9#bVp^4PZn%2^Z6g_~lXjyP(g8Di&6*82bM@^MF4YlAzRw^n1@|nsJ7fGd|E0En& zDaZv88q4CIUIj(sx4lafxxX7b$<*aw9ku+MfAh_`xfyM=D@U=R{jyu(9G4~uExnMn zmx?0H8C017zgN%H4>r6q9{$h5k~o2IS$mDut~odJVnWMqhKf~Pl=xe?4vc^bzz`K2I!IG` zCjw9lYuCGqcv=u%Tv#Foyox{7VYq85*SQ(j1(4RARrY0lTwxd%4Wp}NTMaNwQ*CGI z%6u)^uI3QMEVw>zi94Ub$pT#fY0r z5rJm$4>Hw_F=eGlPb5{{LnyuGaWn*S^QjahBqD{lL=i~D!HcVJiCN|7Qk_?c)U8wz z=LuUOh%5{5-N(Z-llkPq6R_dEL~MIa%o$M=Mwqtf;urWDYOT=tHHOXgjf2OB!+umg zcA@+*24i~2mF4A&7cNj5d)2JEoyEco4I!xwhN{=knk+7U@#U9&+^D%X&BYkE zXY1n=qc?6|Gm2CXc+l|qL`2-GGm+gu<|+@JAT4>N7C|KAxRsuPfU@X8!~m@PF@ZoU z(Zwn%w6A}ltJ4SQ4VN!}`qekSvbnzMueq`Q#?>npR4<-8Zz^>C`pwRcZVs>CR?3wS zhDyl!jK)k}Pl%|17P=KtbxrfhfNJ8dv}&v&o~&|6HKkhuBkShPo7y;D#238cP&(OF znc*?a(ZMvuN_E3X>@Pxs7H5V~sSL_`We@T3tOXP21no}MYgp?#+G5wED{hM9l(G72 zd7OReqS$$FU!1E^5PpMJtC?(&fwZ-?8W~|(dcEU(bq!9y##UZiTQ1w~b6f!*%45P( zfLGHb9IHzf`S39IKfL0YyDE!C3)#1B-eHlnpe|zL2kLs=LX*pDwzX%6kK!i{C^U}9 zsV%LPHFQ5V+vLX}&YV3N!?5_gBD@sB&iF~dnDo}y$0s0qSC&UMcX!EkvxjW{02K&m z1xj39vC3zOuFXWTu{mx_MGd#DrU1VAU}sz|el9TTn1HmVVp4=mkp(}|!la7pI(jLk zf|2f(`XKc5sz{>pOpQZJb<~A7np9Tj#!D%OCUtRIC4J!JCHVMRL+%>iSWqZU5*SizXO+_GQl0T0qBm~bIC=6U(#9{G zSqDP5#$&d!%us-(X)4aX>2qxS6obQy6gr`EuBCJKif64bo^`0=ctNWQ`HFKXa+53S zir3&ffQciWERUOFHQT9A$5)*|l87s7=|)us2o%_Bovo|OmLj^AyW^L^%i#h@!fkvd ze$-5a0!`5*%Ax@zNP)>DhZD76&`S^E0(bzZ1NM~S6-7n(C*BN^vaiv%B- z?jPue2PbU9$NHuS$7jbz*0nSb;;*<*b6q1Ewut zs%Fw^MIP<%-MhEwi#|Ta>Y4nyi_}+F47r`Sa71=n_p3^r7umN@TkAHO$tio*P7$~0 z{RjK|;g1Ew!viONV+EPZC}MsZ+O~ER?B15vmg8NWBf}%^tm3-l z7mU_vpQ#TF`a0hs`$sNyr6BBuK}T=e;KyyWbFS_qyaUg2EzHS~rE%qGX*!;2PN0QU zmN;OqS<~VXwC=?;GhGD>;4vdb7QCJk)gqj3%u@mmka1H+v2S=XOHE7n{j}MeEf&D0^+r_9t79h%F43QzDtm>s@=PA;k;FVH{N(1fEz~LT&zx=Pj*C_xiTd89#M;~fYx*c z@Tc~i_*D_&ky|>LtSL%Hm(eq3W9C$8(Qq8DQZ=-vvJ?cSDHK;+yx2_K7b^>&E+YqN zS=*$;zL_=!heaoTB&F50Obkqiq(jj&!N0fTT=X%oi&r#Z>?c$I`IT z;n1MVs=9jn@sAvln#+$0sxgi8p6P!qZ^X&owlZBTi!3^{s1G_-6Y544<||XnxYx7; z009%9T>XnKMOi7}6URNf zktiN$BPUuwx72I-+4*oTax`exwBb>m0HoX7}x z)CI0fYKe&EXax!6C`v=guISC+g0s;{5hOl)(6Evbhf+kSLJ`F+xl<9e;z~gk`((Q1 zn(~7f2XW5Xa1xlws>aTJkM4-GTZBJG|XPdqad+=aee}o@iNFr^Om1&(2tF9j&8M9Hgzdxr6v1liTrnse5 zGGXsQBwE{G3Jz_tJZp;}8O`XZ17g3@2O2FCOtWOAklz}~nhtPpeYPd5xtW_PZ)Itb z`EfZHzBuzGa{2OSIyA-ObA-lNH!=-!$-CLF?$hj87BIK<*!HkVKx^<&u7*G$q*jV7 zYvsc?ipJ-a>+V=-93C5OZmPFD6Z4baK1hhJE}d4PbW848uZqy1kgss6@=QTUqBxx5 zo`5q{WxdMslnYWkZ5om^Sy`$Fynq?@!7Nxzt`gC(QmTa2MwOJ}PzmWIN=d{O0Np&8 z5W9S1z_th#{M1H|VT4Cgd-o8U2kX0xb`3E#VCgLyP@FI$)Q#AUc;P8%W2B&__>2`P z@lg2omR#BfY?VmX>gqDgh~h!x!0$FUH`36t5NF@YCJ=5#lm(o!Lnx6%bb=GPc<`%w zU}}{rOVhGcgBBD*c4hC%yw_(*Mgea7zJC3xMH6&K?Elp^-HKJoTK5#<%>>y%8NpCp zA?;*zl0y+8&!nih#tWg;p|WIWU6^EbaScof92E;V0*qPybN<-?L`#I*_!TpO?pX`atq1OMY^~C_>KVWuG`lCqP-+23dTS#(jv0El9#pMh20y z>K*gJhdYJ`YRe?b;b#r7`a7~qzmj-#OTfMnwe4tVX(WvUOX5W79y);^jFF8$IOJ`k z@XqZ!hW@#v49w?7M5Pft92^D)m{n4|Jv~!_e<^|ptuX>w8TS;@BBd+t6{L73mi!nN zdR2M2!Vmip!JBl7Rns?a+>j`R5zvgelV`M91t&0clXPPC^jb(9r6ZIWJR`TB0PQ_sWIW2ciZUGBuNTjG_o8HXj_>wHNXfLUx(uji) zv@t^|LxX3y4P8N!D#9~ODkTDgkH3OYxxGvxFdsF5pPZaDj-8nJsvG$XElQWTt4VT> zN_CM!qXVIhM#TxDEQ)YquMA4`N8!e6>v1j}Bw8YX#nB)O^I(2dmo>|TQfej#8=a^- zf#3iH*w{H0>HL{4Jgho>h5^GfH{+!JjzI&9MZ!V~1tNhxkPQQFiRx%-=%;zZ0%)Vx ztTQr-L6(gox#o`)s~sV`l{4wXlU=-87T)v$h?)yxBLHZQS`{1}858{rAMR~!tQ#lM zon1AH&X+G=)!TO+@40(t)=!05T=m0fF16|H>2vbjhMu*=CM#Y$ds-}Wxm!1H-n?;R zV|_gus?vC=Ymn^fH;9}d|`ghw^z*3eMam_-Dz}v=xdU?y}OM` zrkS0q)34e?`boL8gxt+o+hoexp@EMBb{50+%keRIt%Hq^PM#d;8*JZxu(GwieCpKL z=(yi5`^15<>64T9?#;C}yYQ%fbZqRu{FncmOBgrKFU;YgzNK;B!5cSDw05j+Jn+R@ zTYI+?dla*>ykrQfuh#@vKR3ltbFQmcn4hP4ThwOJa?LB%%i_}F#)j$Pfr;DZ)&|9q z*WSnU^whFmbwy_u0~x0>HLZ(&0OZl5vC$FLtu6}dNkDl!G_4T{4i<3b`E<>8|Mht0>i(!>$Jn&rEnMjphMF3=w8%QD? z&F;X&OKJE~F(=0-LBhFPEmqfDdN~L9x&hm7W)n5 zHcV@lx_5I-m8(C|<(B?-0djlb7cX8Uf}UqB?pHZRWY;zrfQci=&hq*VuCOFVLc(G28Z;LJ+& z>gt)0&H;SwW|<19pzg-jw0tBkzHlD!%*-7Y>y@_gL;1R89+4~9*iPNz!8DuRa0o_F zs-q*m2^$?5ho|166ko@In-8>w&da!wW^H3#u6>5T? z7DTRJzYe&1r9PMmj6D^M`i(p2kUh@KW|jNM08f5-y4d->INg zg-9uLGr3h_MlxMI3O_CGHT_q$!JKZxVsP$uc)Y1s!0cNPDe!TuTDsh6RtqmayEOvFUQ==kD01YHfC;ESeJ7JR#g(ored2v92ok?6c3% zN@I{ty@txpauI~)`RYJd9NqUSi{&GMe~=uU>_kwT!$a}uQhba~NZezZ_p5INlmsN6 z#k{7^uz=|M%-@5kY=3{HZe#dg3;NtVQ` zpoX4H^ePcRm6fQZs2hpQT1w`MNXL(0{Y6}H0Z}50046?3=@xGorg}1Jys3h`QLw|`ey`e!Yk@J z4F(;{&Sj2g{e-z|SlJVpeZofPgx)ZMFd2XL(p=?QJ1cLQqUN> zzAlwq<5TK8w`crThRqK=*o&g5mtJ~F(=ax2n)9$@zU}y}jsXVvP$AL5<74hVRZB#} zlte0`0jd^=NlQ=>mV@EPD;}5@Byv+-tTZ{#Txt|pCWh&dC-d`Dr%u@$Pn__?oHH&V ziuMkq6q<2$W^uM-9i4IhpNtakmc^%Z^&kH5hg3Q>H8nXoX=R5#jm)h96B8u;6S*Od zMa2DTLVCNS{;5)6&=QA=@L%`G|J^t3L+EXtqA`J?RcD6j!kw|EB;~Ui=IAFC{G`31 zV8itE8FJDT&eSe?1lEAzgJ4i&f+X5D8V7cG5ML^}^Um=Z+mC*rx4pg1302G7j+fe? z^NbI_4)%Nd{JP=P$;oLZMOoBMcjDRtaTX6ViYiQmDUcB9zJVc%w$ZFx@d=a#GZ$|t z_2@8a7bI@&n+aSBV>#p%5$HWbzUK33TDYIMRSuC1Tv7^$#TZEgb3sjyrBvk7Bz|ON zB1l1;#&OHAgzPs6A`TA+6HpH>3psI|;~ySzSfxY4C_zG~pr<$(l_FgwLIqMm4x8A5 zohQ*dVJX!`v5Mft6HYCidSqCJk@^<4!f8UT!T^dK#)=TIuAXjonT**Qq7Yyx9&Js< zY2WA^3ZJ1dk-~g5YV1c)RybCEGR`zcmupW=n`tYB&^9orXkvVVt88sOtq=uHRl%+Jv!Gji)!x+km z!+@f?;LqF}0Omab+w$>bOS7}H2JO0HrR=3kQG`|<%s>Tlt$-i&vy!xZf76&r#rmGl z0e?z9*x6oOyr;%B)|Ymk0@oL?nofV?!fa1Fsj-h02oX2|lqYf_fSm*JY-- z#ar4RBR17BS*%uE zgw*IFN10S7;#C}2wm$4bTqV(ic2p5~gp3dhD)5370dP_wT3Q~Q*r+5zMF1mLDd_?> z;1`jopmZt%mfnRNO;R{r;inP`tMtYRbC&C=3Y4N)=}Zj*|>?$ zRI^r38w-$la3TUf>;YrA5@>)`P+-c4ZA_{Kd}b{uMZ?%28enLZO6jE27*sl5@TT1G zyzs&c0H^{CRWL&2#8VHRP(h*~goIUA2SyfUjhQu$=gytO0M7Mx>KY7qG8$JheuQwS zfiSMtjH(^}4Bl+$=FOYx(IAL-q!e}k=|BGWKAvZNL&HN4_aFKo!?!oQ+_GTj@y;02 zgqrF!q&`Kl(3e%CtJK^ty1a04!LrcoiWLIniwu8OP$f4 zT$bmV0mJG{O?#^5%6Zg*+L8!~jzt6#761>ap%N!Jeq>2h#}W~hE>S7z0&SF1G0(I& z;h8Dr|D(aSmfhG$y{IGFRwLRLtpO?2pjEt;m1R|<`W*Lg>}7xNA+vVm1JUHcRap$t zpoXy`${b@czl;Jx3ANGIOCxe&%5XZmT(6N^1|mg{qsaw}B7&0?c;Gh}HZbt?L75AB zEznu4o|>G3!P%*~xp{+9uuzjp8vdy=jzCeEH+}Id1;!Cu z8(TMT-XgfWpLDA%+2mh8iu38|)7V2D*x9pZYmJnkBbDl*K_cOaqg!FpNB~7xz$*YF zD*~tx>cEF4jTX?(Hl6^$0F#qR!NY3d1WYwKwUdGbiogKquC+0&0V-OS#oPtWyr);l z>0Pezy}2$g6~_sYFiFK;F)1*5bV;jOQUr1&uu6TN$(0Bg@eJY_o+kVYua+p6n4aPy zo{1sTGeu(3qSG{4Pg1K?#z8Y#ia(G@LCjN>IEEPq&fy`Zr&0hahNY+%DM=I+L5gPr zQ?epiN+RV`ucsi3as*t7OIP<^0gG!m(wep6I8+^S*(gY6DiH?ly}HJXD#oxnQjgL_ z;KV?(DlXu#VhIuDTqP--G<1jzeomtM^{0tP1(V28zE`%jG>ic>oPe!)Ai%KjnIa7e_wT99s3!sZou?X}lmdF2)OZ{5003L@0~PygXtF30el ztuLv3gsV3+V?1~Mf>X`2bMsfPU2pH`^zH5S8#kSC?CG_OKgoeh>jw_#jVrVcj z>THIJi9)Phx9aD2sv=C%;k@>XoRh$}g2NM{CRYj}xMLrZPbF)mc_smw?e;s=^l zmUMN`GM1N@@F3U65wz_f>e$E_vsOE(a}q4Moyov{xLoog5gx(7wdC#_b<>THq8&J8;>KBi4ojbJL&oLcw#xYqLV6tx7FNId_kVEq>^U>umtOk9)YQo^R|ez4MPCwicMS}<&F6%#fVdLP zW7Ux7i?4jil>XDtF0XHHSZQ?B=E~KpK6Tf${p6=VE zsCHGravVMHG29nloL@qS(&2Oxi;y(s37DGT=eH_A4@})UUO$!{CL7I3v?yHHj5BPv zoCKB?p*UN1sn2$-k@GZU9Of!y4o@Y%M0KgFD7i78Q31?R4JBBlN43@X4w+CH3UFAh zS6K|5u0*g6Ck)zqFd|eW0t^jcQbeAz7Oja=LB1R$%9f%6vnWMxhyw!ymEKxez~ZJM z;6;!ImiU#;T!R<(sXapghLUGWR0JoCEZu=laGQfB@p4+Pg{3*goYQgg002M$Nkli*CF{ZDsxqUBIk8USLzP9#HZ7sMGy!GiYb)2GMBCmc)86PR>L7kYm> z;m~o8Gkk<(+Mcz#}fr{rf+FPB;A#m1Y|L2;Y4oE;?i}cC<`Fb!XOfCmZ>{c zGlin$uru`m7-lfyz&tZ9SJ^~`0vG`jiHau=R!mf%Wp72mXJQpED+rOLcqIrD;_!f1 zmSxZ+cfq!8Hk!A*sWHW5*ffzIK4dJMoY8w~c~u9iRS`BGuoRn6MKn~>>{vzN{K$2| zkU}sxuzGOh%ux@NhzqCc{fg`(M+**ed=p0Noppd?)x!vgDkaokHjUJ4;-Mp9bR@3B zrEaLGo^}kjA^haiPYFY27Gtr6Y9Q1UtuC#<_10Tx5t|+SCt1P<%6CPqGpP{M>WinU!6XHTAmu~9)C1N+kXR+7M@&1d zg%$oxS#P}Y21CJ;;`k;^+LnSw(WKH4sw%~$fI$(GdKYlGjJM@UQ*(TB0gtER8J>;v zyk`^D!E>((?3r1fcmq>jBTja4HCIe3##jV&mLRcTd3U*y+`FuA_9$%fc8<(qdAGXc~!POPSo2pfg$krZCDA$vL` z#z=T9zIe3Op6`+#3|EX^%Fm!bshOW&@HNjL{m~y18SuntOgpV(FE6cbZ9fUUH2zwbSNC>zKJy+rg`v!;(}KeA_71#Qq}0|ay*qH7YJU2RVD|LiNeo4 zr3L=9^02U?-IWXb~N*BB)51wArA}IlINK_LO6FqGQ_b0$Y7|V=S5PY&$7xT&a`C0bb zS1y7<-3Z{pPF$Kfzp!}z{JG`j6$XQ@^|QDOv7z3YmX_U03PhFaNP(s7shA&|!6$RV ziMrs!mS)*M7sV%@0V_^A3e+h4-QWG4(I1h$DiSJ+ZYgH+%#t3npt$`_69G3y(4`N{ zHQj!0I+Xn6i7qNzJ-eiMhTp3-T{!j1bMZqTJI0k>@reYAh5&pR8^w>}nwmG?{B5;d z)a%C|qWWsB`hZrLV9bc9I1_NA8|2R^wyWS(mGP1$1Y{bA78f6$*-1i29gxtD>ZC`4 z`Q}qU>?R&JP&z5LzOgztH)B4qw6wUoy39~5{g>acak#|Hp&2SeciIfqlz{=X$bI5ubP&fZdirnV_^3@Y^A+5n{sIAz2rX&N2S)8>3 z#*9#v0!d4FV;pKoan=Ah?OR!p1#391h%8T~6p`XJVc^7oAW^7_NXb%JP*#BounJqk zzyL=5dJ0tyW*k?*QjOVqQaI8Q*&JhHLreuQ&SgzbP9|fX0$R!fMpwkEcxH)Yke=*S zywR9%#c)bqFdj%)hRoi{ZCG~GSRGXe8ViP@ikW3@H28+bMOAYG~z`t{itQ(0_=g%vJA0O-bc|Blw zt{p7LzR}~~a8V5`=@Qz!FMa7t-+1R8>DmTbz!`@jKLYAN5nVrN0cfHqC@p}5D49+m zrc}0J&;nEUScF*w_4H4Q5a+b|VWMhvAjdKBpp`CHDfsb;gb4hevH-IoYws6VgBiKCkZu`MxknFSbG0bQ=z#5|>YN~APTuP`W^ zF!_`&NR$OwVlp7Hic1mCluV2=PeZ~d3_Ox44R82KU&Um?C@ZUaicll1qD$ex)D(c^ zmd@2Up81U00jB!kNo>lZwN6B&N8$acXKxa{!*0I9HbC zD*|mIL0yW@kk?_A4gdY$|2_K@$1oCdAy-@xiI_nYe*B{gr>EhuCvm`-?wRf?Xs=X3 z(_lsPZ5W1%q?}HiXn*;ae+dRfoNK>GZGi`JcohHOgAX)q8jHF=`yc-80M~{E3m%c4*h*N0uCWHfFQ;J8+eP-m^*X`|CPHVgWc)zi& z*$=fCo?4(b_GXE`a*6?&8C5K0#%Ng`ax^MtTwXHCGa~Wt7+{~;`kuuFcP!4}l1Z|% zUDG($$u7B*l@jwaj#h7H|1kEV%ES^@t|7z4$ZpU!aH4dk_`vNXv4V1x+vL^ zKjFmX7_9vb#pDAzAsWaemP`qT9Ex{Qft;e(l=FQ&SVx`y6RjONN@`lT$72 zo&0Ux*KEi6-ulQu-_L&hqqAqG9q(FOU)ErZjgQ(>Us+p0)Z!OCt56C@nhU85{v+se zcwMQQ*j*Rl9(b3?Lo^U%anup>JWvw`#w<^kR?n+`l^&dJ@=2*ZDkq}c=Eb9WdOKHE zmwfdA%mr_(o|@gHy1wS;;G1{1S8?9c+jI5WHOCW2Mn`QOISlM5GSZ)X`q}&c{38ah z1|}y@I{I1DSljOAEq8bJ^iaOpigiocFdAm@ckkXhb7s234+^qxx+ViBPB~NKrWM(i zXs)I6bxmFNt6nuxI3}U81QxR4lnBze8n$qT(&!o|NZ-m42zr z9!sO1r7994Qt~z9Ug_l70D}M(0O_b>CIM8UJ|uB^f{{f85diGoEQakeVwNpSEvp(? zi48&wGj|~qr3)280A4*YAQ89-Je0sf)U4fvZ+zn$vQYWsKmKFm#`oWUA1W|{uHFje zwQJWepAgbyeH9KDoF);HmqP*Tp4_L zVmF&K5xJO#9craCQKZeGGIq^a*c*;P3fM{=F_Glnm|k&^V6bby&Q63LSR!nSWZeTG z1BDI_4zm~}v0a^ADpCYhX0!}|VXy&q%&>@{AXFlWhhfN-n0RniF(y%* zpE7S2lp+%!Ef|a*4Jnzm&P`e7VM@h3+_9!lV@OC`y?PaZhF)(xJ3DJKi7Q|zD$b?p z8wsxv;ylHalEPzvpRi;RzQ;rOFc<*R;b&l+1a&giR2T>v0OB+&vW$?*UtB$bTkYXT@3N-05H z$g&auP#tEZcujZBh%5%=S@G!=Bqb9~4YCcdii1g3$(6526lzC(awQ0>Pzpliz&L=% zE-iO5w!y)HpZ)A-ljZER-w=88%{P(w$AA1sB!2qSpE4Dlp5g}2%#0Oj%|&?fZ~yku z>MMAhq?Lw{rEN;T#Q?A13)f5> zAh?N@8`v!w_&xH~rDd-^`|y#6#cQ)Lf2Ka>L(VR^Maky%VE@4Rv*#wp#@*jwj}L1sa)ZxTgS2u2b^svS|Kvem#IDVT2Zh6kkK z*Y9x{WeF=07$j2ggAH-~$WoA8oC-h!d-5WE8-%3J@Z~RmSwZl|0!RZ;P|f0sLZoBK z(|0roQnvGyNXRPfN$fD;%E_qT`s(W%FIiM&SBbW5BzFI%)7(tTtr;9kqDJn{ZzncKgz2(6{|~2OHn*=77Pl3fl4X71+JQCADI``Sw@qkc&WS? zI?`caEhhRUn-`cC}# zSi;mGYlrh1a^uIV?Jm;E0^^PWUA>U~t#rmg&oTGV(;CItT=ir*oYd#aNJv43^WcLk znu`*EYy>k(5pcj*VL<>)`Y^$?BG}9^hGQQ#qZF5Jq;AC%IhW$`*mB~;>6v-luzO(f zOWr$vB+vnX0}HNfTSJRuLj^chWSQ1KbPn6NVt9D8Z|sC?pDEp|K+DFGVSXj(v8ec& zpt6fgJ(KioG)qyOkm=+iNJ0Dgi1b;21dBQ;=AK#XMnPFDMhYFu^@K;dMDeKRvOqzJ$ZcGy_W7lZ1%ZH_&nPL-9bcspg?Vq*Q<|x#bim4D zi;KSZ-g~AiSs9h?=m?A{#77@rBDtoHI*6@^C!!)W1sir#Q_50As>2Q&ttsyH&Yc;m zU$}Ww_0Xry1}^4+7$o_LS>?9?1giY6516TdiICx6N0PY;Qk17a6@(R0qm%`ZQ6xkN_FaS1Fm8k?JWI9u%uk@mXa70FSYR%(96blL|5& zuk49j+UuEe>=qvI7?Y@WlEXyx9A*gRN`wjkwUkh2X~THf`5oWQ=|~Lhe0#`gb#cs> zEuB7n`Vao#5BM{yU_#RuE-*!{zkLc;dh+Cy<2^>*SFU`fBV{(Uor2JFl1ptfm^3YT zH1{lxzv)yMGv>Jv_GBrWPQucFna3)KqNmlVtS=UbD;fa9DZcvEui{e{3|>JJ5Gp>; zO>oZCZPj9|cW@9(=C?oo@sBku9LE?Y_@TSdzjcA^cO;T~c{$a~&cxb>a#D40=Z`-)xe){Cp;=;V$cQ-BDiN$~$ zpks1dE+|w<>RQ~O%kgVIsa3hRxMXbH+w1q;+(qBfS`XT8ufpD%6yL zQM?#*j7pd(hOPW-q@A{}a4s#|5^U4YGAsO(Wgg&cTW$sXC7xqex^>{G@Sol2QHF>iPKo<4o*&h6V6aaYQ z`6cRdz4-XJ^Xh!cH+?>f>rlt~*roZopMUUkTPQ25E3du&6_(X#$Q*nOpsVvsTGH+6b2)dLJLJL;?FRSz_NJcWg^Sv%^_jc@b8ny3 zpbyFGUNKQMjQ4@vAMMY2+o?^}6UFx9Rz(}a*a#^n~SKK>n8K4rL zzJ31xbBR@vOs`%mB%hZoF#t?K5D}z_r^#Y0NR}WyBVgoe0V!RIbdq|~6%7|0M=m5? z0G!Cd2o<&pGSjPP^5FGZfaenb|6a3Hg$ijbc_yl=;uswL17jFuOeVeJ(^DO#;0k(8 zhNbWd!7DIm#pS|mLZq`{b`XaA0vv>dS z5C4z_GB75@(mwm_6Gnv!4uLTtv~cKEzslGkWP~hDTsKG-{7BSWituehp~4zP6odr4 z&J6}&j8MgSP1nE`NM#k_|9VtT!Ox|q(ojc8r>!^}zI5r52)dgaarL5ZX=HRL1@z^2 z*+Y#9CKt0%_sws9Q?GgJ)-5DV9|#VfjJnt`_$R(bh)?d~!$Q@hg_6XOIQrQ5h*i7t z`v_g%_w6VJzu`fA)P-&hiO_MGMUoqz7{K zUisA}Ynu(V$9?&&nqzf{|5^cJE;NXGWvO(;Q(W#?WIWn*l~i6!=F>-}R0ZSpnN|Q) zm6h?d+Nu<~X}-#dBrq5NvQ!?EQvH4@O|8HHMk4Kj0SrtEoeRuGkqW=83<+7WA(E|B ze(3*Hj`%+jX1*tERW2lnC&9!aPe^f)Li(FToM4;&h1uCz283aDnZDtfjS*+SHb3{Q zu!+ONY~b;&0kqa45zTLv;Av`Z+o`R&dGqGU|K@9D4jlc7e*Wc`zhE1}k5k(7WaaJ( zVxoSuVPkX6#y?MEKbSELm^^W^Z)k|W=`@Y8PfVUm>ge(-E{7=hmWXGhaY*^f5{j@4 zULwap6q9=RoC#`VjWJE@IHXG9S1F+GDtB?!qmF`rnh&jmdoA(6ZnHM2#RZi}*B^5V zYG-otByxZ87k}~Mi!Yu!J%ujOjRgam<$=^^8^~s=1{y^R#o8X%i%?3L zGYpc@v|LmTLoFF zGm&@_KVOli4Iu$ok4PA33DQ9l7BEz@ymDQ}B%a8n2;wxEc*Kzq5&#JSd{snx760`} zRVooC9vC36@C3$Zrs9wkf}s<TR7OWo(Z8qI$(;c{e<@LWRw1G5|n5 z>T-Dz4Mh+@5vc*Dj>$j&^FP-#5CkVk zQ@*}>YHErP>fSkm=85Z}5?>J@oNQ#!VUYY^iP>{yP zCqk%7ry!^hE+%?(vm6+#t9n&VTC9*30L@yvwsdTGVE937W|6|!p`E!qSAIIs?}=Gt z>u?LdA+h!kS2Sf;AQqqNEL7ICcR2g08$WV6tYQEg#q{H+mk^-U36&J*1VUmnEk&7iMUYWiig>@!OW~IjPtgPG&i6`q`RM4S!FcopOqoTAFjjHa=?TD` z&B%{0m6rR++^K`vEKA}*t;r7usi zV21ZnYjP!$@hVcK2jp++bu&G|swX969El2{c)k*ZXw)qvq!fnhRl54~S~4S75SI%N zT_uL(ByAnQvpjfB{GOSWoK$ge_6|yXtXX7|zE=)o#i0gbDMPM^H zEUFF`6jVDZf_|u-oS*a&T?7_erMW`KSaUw_rz|wGwH zZxS(}XBGzxEL~A^00a2R$w?`CBmE%{#3ym2pd~;>Y9Sp20B18%|C2xY6LfK9ZIw76 zx#Ha13|a;hq%eRdKG%Ks-FJWRgCB65S6_XV&H=*@URnKca4RObr_VPkkV?W_3a<_f z6|hbo5hXFCy`crd=m{}OFSIE@l2uZ_3+qB&WR?l z{OIj=cSXpBt|!dE^h}f^OUTxZ(hSb5nmOG7#>WzRX<99eGW|O3lK8Ee@G$V(?;! z;yjgAD*2#=RGg(ScwnF-N;g=iK~vJKEgb7VF_bEx^f%skV`BWo!@Ye{v1FWRo```u zFyyosY9%_J?-a z$8E9^+!X7Vh3BZO(vqrc)!`}FO-;*DHKudsf$UEmfmaKphFM&wv>~2S%L0%v{4+Jf zJdpFle@P1=)vYtovvCsS7!s>6I#cP`lgJ63zTz{MAu&LcSW;Xr63heVTp9pFmB>QS zQc78s!VSRiFm` z#-4cKq&xz0J5u0Dr#+%HkTC#x^yvS6_g%_Fk)+&#hyTMr`~xtwK+>&4UHMwOyS?An zJ@{a2@Ai$Ey`6`zed#OKd*^0m9gfyYKR(>Qa`}^od-we=!cTtu!(&hOhX(r3o<4PW zu=j)SzvtA=#` zcjw^5#Hr2>M{!1`CQlnn#HAU_b07ZdXHCs^1zm@|?Z#q1Y;bt+aBX!-pKAc&c=OoE zcw0+{rQ%=y{FhfgyBwDQTF`sERkOE#>)J0D=dV3E-0~BLEgfcS2T$r;h#a4%vPzAT zzVO&#oj)>Fe;g;`K5=!L@R-Yy9y?wge{uDh#X~(|)LAjGqm>VarAlB`f70n%O?zcY zDvVt|-apvi+2hb!SH8>i)V*|*MkkXDA7kG0=bO5pACB{m0vh9r#Bc0 zErp#11$7@Y^>gU*#W6{V6-ySD%r8xzG~v<}5dkNu5pWTLM*DL+nqeK!r7j4qY`cmyMF0xU+1;U zLuo0=Y=^jptA9066p1D`f z4_&kw#gw$EM-ErIbC*_E)mW)Z%N4QT-O(vNUoiiXLr0yDIeB7QYjelUjGTe~p(__} zjIhdk_?WYOL&L9)x-@t_JuC0ZFhe%Fwz;{bqw{oIQ}5ZngO!Jh%jOgm&Vn^mw6S9* z=42P9rOcd^oRO1}cjMBq*5-DmgHVXBjxJNS`}Xg-IqY&%b8S=Wsgx;+Q!*zEUhYZw z##bJr(3?KO((qAY7Q~5u%L^eK#GzQDbxhQlq5FvwLqH;`VQH*=^igXc31P&Mkue=x zGTyM+hwcztN?u+rcES>1pGd@_Lkef>NQDy1IIVO=%Ii zLT867%3)nV9XX4OO9IQh0=pe78W1jd;v`!Q!F^|EnVQsiWx785CzivX{RrtX5r!vn zQix`Bg&$^GB7vjKXkH^oSC>zvE7pZ=cx$aVB=&%A^~GCGBvblT?KDVn2IU`{n1bTEyi3vkkJ(s@+O&|=zJ$nU42g<)DN_DIY10wnR5_^lM- zUbow$wMvsgbrKb;Eppy&SZGENWdF)#JiFw%T}WvE=dRGfTP!S2j=l)D+qpiU5}I`D zk|Az}M9*r`0ED5DQoPbpt5zBt8=Q8JXpIyt)GP#yg8ym&>8P;K1rI*IWc z6C$qN6aM7eFQ(??T!=QWFcNA-&FYWyS~ zjU0q?0T3PrWk(Ayj_i2gz!VIj!T}U3#n@!?R%uG$i}PXQ=>}iL+W(OwfgQ+@o0}`L z3;ja?B^Vot&;mQK{kvvk;v78;0vqAmXht-jLSl4@DHFN`Jc5g#g+u-;^~UK0r96g8 zp3hDoiS)+HSEEf^A~W8*ID;|*L)5(WegKx?A_x8&i<(Q7}Ws)NT4$@^S>m~$mM9RfAkSr)89?<{Eu zo12abmWkuQu7y)M4PQ}Vv>5y5b!7AY`|rW7-Fx;xlB}6C;6tRvvvd+@VIc_d@-<6V zlg5WVi2;_d(U+8zKt-B_{00WY!P$2AF*=$Ab&(2mwK+TTh-Yi`Jv%!an#FmP<8wra z*re#A>~{0f5_#waRtsse&-E0$Reo=8nDrrVWN_3dKBR|u@ZwXZ zpXBgb3s40NLm1u}0Is|Z8#W*$!1C6BX^jvN^Q+kEoH<4N_lGsm7!QsmKQX8%?Z_`E zz?p#0z)`J@5b){02fdfG?lj3;Rl zF$+eGr%m6RU9`7wBFY<|l#Ent=0xvQNFc=Mid0yfH|W1QBF#O%KFbp&ewO}O3{cQl zoxBV(E^UKk0o5Y>hGgM$^8IrkdL^pa5E7kQtG_;^%Yj=6SRsVR1h*uFlgEcj<8bp; zjM*{W>Z6~~$mJofQ65*ImRlPg`We4qOH8_&7wno)bKq}CD0GeU%2zZO5@4)Fn*z)3 z2>vb8o0#3?NfY6#{h81eWF>oB!kPD@Ec(ltneEsifXA{YBqa;bxp?8S>q&Iap^n2;XQjX@X9R8&-uv=aTK2$>!Bld6quFo|eh_Yz*K^}BcPQ4OWn zS4yCh=opA(W@La7s_tlS+p%K@sJXBlBHdsq2Q#CGD2jMDPEZ`g;VzI>RaNP&ua+-g zAsgYZ8BM3d3G2RPE#vNN5~zzuG_|w=D;yd-&Md-t*dWZ^R_}DX2)84`3^Zt;6}>C%42uRi})hX3`7Ka?(i%oO61 zLs3!T?zg{+pW_ViTd(4uS_|FdAvUtNi!wn;J{UXL!Zsre(OZ-uu+Ccz4^K*PXFma+ zQC2>wOA8^dOj96vvn|B1DyB_Mr^Hf0A(BI)3B97CLNM&@?SWBFiiH(p6US0mpbaJF z_=yvSOw|_744xm-V7Lw^4Ws;!O4PkQq3#R+CMpElA@9(nB%eQU{JFi5U+O? z<>lnx`|rNr+t&lb$+_mH7JawFx@TgHFf);aKsIgK+|%3rm9IRDEedu-iSy^rBlZ%$ z^yoc0MzoUqdW#rVty)EB_+BHl6)EZ%Ma9E{1q*Bx)YaK#JLU1?lXCO2nQHtKtnhp9 zy%(>dOW+)+QMXy7I&k)kBN#(Gii-6a2*8fwZGcH~VDbRgDgq$Q-LZ4KHkb_1XiiNy zA)8NvDI4wjMO=fI=k#=|I!Caqg6q>s=X9o^*YT2$wUCkbBIfc?FZf2n>I7a?3%&y- zxMGZqpTj*`5U-umW73ULZSZ{94^ef}eWS*Z7l}rDAcV7=BVp)6BNuuao5@j5KOvmc zU-dnlEI}y{AEt`PzWof9!y2S;>7xrtC{SHeLXEc+=Fc(*(*4Y=)spy<&0G0p{&%i8 zvurZE{FK>UitF}=QWWTl-`@TS2PC38N*UH^jNnH6nuW@{N;;RFemw)mKO^{kR(?jl|~u}N{%k1q@)5e z#etL}XhP1U@qN8L2+xux5`g5l49?(EX4%l@Pnn}8GAx|$@9P~idX#N3ZN!7g8g9~+ zKqgAnYgLf8L9nS))4jqn^x77X$k^yx6hQmcK?=HN=-sEAKApc{eot2y4stPs^xl{_ z$tho54leY(WXU2bZOnk69}gA-v$FD-u#NKa3O4M!@4TDvZzxmz;036`3plUSFYe?WYh24w=Xw0&xdSUvN1b5TWN+H z_zW}Fpb=cz7=&RXr5W+?19HcedPdvUK*1z%kC0}V?FBS|F>vk@%p(xj5d?r;uN}I6;RE++=MY+sUg(9Nq?MM_8~cAz6e6$;UZi8Xv*i?kF>l%O2Lp&Yw#fJ3B(iY^Rh+L+Ps zK8q@dWE(Q zgw`JcqZ&vjXlLO3#Auu~OZGSrFKTl~)c!Dog=mR~(2OXj917RYSgEE;0J1J?zfauj zYU*fz;1i9k=tb-M!ZYaWDY=RrkVbM+ynQPUjdqRWwzsz>jR|wDf!$|kng)l^7|GSE*YJR)OPAEwg+OJ}PsIq;xJLRB zJP141>pWWp(9u8rM=L>7MLyA0I*A&!Qcfqi;RqzQ5-7}K!N5@lo!7CCKmPa&k3UWz z>zIio^V+%D8F$~ky7Kt3ii$f)Gb=b4xTbU*jH0&@_Lv@326W`k9cD%X+ynBEfxxGYfUd+5lu>)InvK-|a%_8cK%2uVA4HmpN5Lb?O1lxM}Maulsc1{p=N zg+LS?k*IJ#jgDszN)BH}K#)1~E)uHV#x?qB)a3LagUho*yp;#eNHLCmJ)LB6SZh6L za&yyZLIb@b)9Y#ziZFhBM_b#NQFdJ(2{MGUGRY_lt)@k>b?ju?IBelq)_y#Qn$y7jWCTX+JVyRo ziF%?jpi^Jp;MEgPJhgZ4p0j71$~{il1EAR~+z#MQvY}fl>SnacOf9iD*m^U~Q?FMc zF}gfM-WW~YkyA`FXwPw0U?LLR3Je1;k37$cmLDkelsCei ze4+nvNQYXYnnR_E9;fgzp_Ooc%?~k`JR2^ zSvGCB{rT;OxE%NRK{l^G|5m!Txd30f-YMKdJiS9Cu+#7{n&L?sKn#G4Hig0lqf*0- z%8Jbql7^U_#UoW;SG(xW3b;EptQ8ZZa%J}HeC>Snk%yIER9Hw$1SX@vRnyqm*xl1p zTw+gScJ;N~y!@V?K4-g2x3#F2j1Jq8hxs{VL+biUQJ#^m2a-mUv@;^_OGQOR*6(6N zfDcPih;%rQ3$x4kV*K>2pL!>o2WkamRt1fKUH}f{jeh0kWCsNBapd4Y7cp581Zu>` zxziX3F2{TlT3sETrE^P76FGYgB#FcJ_EuiuiOFF@<`63}d^l-m6k1FdMKVA+wwmba z>1AWsJ~iQwe*amGKx0}Ek5fZ^y>@GywrYV|H3CYJig|f?lT(stgDjbA(F{0?7rcxz zQfGV~Jz7a~`rw0)a&z+tMioz=p3WH7Rvo5zBR>t2S%_wgLsTOI9AWFfw{y!%5u_e! z%qO30qy@+ZS2{boWH2Ne^S6Kdw}6!r?1cI1!yGR+NN{1 zo(yr(VV1%{c;QE&+2||+MJe9}5n~DvlA$A@;}xM_7Fd zfI@w^!8o*x;O5PrESNvi9$mA zBdA7g|4eZpz8Hzuu3SPJGiS~?e}1Ty8s|-$Hdl!G5-XJ^xYF*}@2;}IRg7EIzf;f3FMNG4rFU4|LR zP!STY-TM0uA317&obKKp-%MT@3qWKi@>A{Zu4S_TS* z*_264AxM{A2JzHlUaP8c{PM467%j=QsJ)hbd)uTMAB`%(r>gd;|$A_3SgDhWc8 zen=oECr4j2Ko6%+&!7ovKP23>a~BT|EYoEGMJAkQLE4Q8(>#;7vnNkJe?DxXPO(Rg zh}kf5deXms``h1g`c|x1{{Gu zInT1kzxdPtNV>|cVuDe{vLYuvMC$JoC1<;Af zlT@TS&Tt*TFHfGVa;GYiC$dIky}X$|-+=dBOD3YHtkc(iRNZ1(H|A1buILnK9pqQX+4 z5d6qS1yT{3E-JDP>Xa0Qua?B0%k0~bISR(|Gj605muJ_%BX#3KZs&|X3j59|(}!_s zp@fKS#Z~wzTl}GCmvr~rRePnu?i%csXZbyV46N{cw!a(65sF@M-3f<+rX(TbR@W|F zv{$2z{`i@%KwLmpjE*l^viPpM?kXuR7G@x@0Vxs&OwGh`V_}*)dwaW8BAY{Oy1F`d z?AXRP%g0k&TWiTU%-pwkUt>d~*c-vd^UCJ4d$V(LjN*`)hRc^Bo(7wx(N2gSL>8Tn0-x@prU>!(a9L;byb_QAf_-gx8v4?gJYKl8#1Ur$L%&CJN0 zHM>x=mFyxRwOp413Vu%NYKav11lBfQ(RWx2u_9NrmjOp9%mCtFujH1fYwqR`P=(n7 zDB@CppK@9m4s_=R1|%xVhloxsJi~AZ;!1~7N|0IC_-2$G3Xd)ip-It{?``_I{VKeL zQ@5@-U49SJ+&z1UKa9&!)UE8{-`hn=kH3mPkJCS2LY(4loFR1b)*+)(U`WjAE?*jg zl&D+xV*rX8@vMAwiWm9HU1lk+6)RSCxsj&CN|P1oI#mMK8b*GIQ3f*=$fP0mi<*GnXz?eOo7{PJtB~Kn&0i0>MOQ z?Ilex_0lc=MT&7j`~pbGsznv$ba=z0@sL^9f{Hi7l+PJ0L;^?yW=fZcI4PTSJwzde zQ=L46iqKUcgO)%McZla=3VC+(p;<6}r!a#mhRlwDZ+9hTqjw;vBZrTo zQWX26AN>g72xlVy30I^N*1f^2b(*7W%m6$~R; zRj=o$x3l{+wyK{-3w&vV=+-m&Y#%8gIC$`omIYePEhxyJMXS=>#>Ph7e&mryViVjm zN(LQYQJfkY8cbWM-9qHt0knwBoT=BvuPVo;H%8I9QgszfDN2I16=sEA^|Nz*+5PtEM!7u`XWff9lH$~ zE(C?v7%NBNw_7FA+uOt@-Lvc8A>Gd!>9dHr_^DNKf&cpXE!vLjiZduS{^a)0+s*iV zlk^Dp0e=H0UH5g_QW}j%rMNvz&}`NKOYc$?@JQMy%3JaaD6nAKkb!1f2FK~Svy?!3 z%IsMMlqu5BoB&f(Ff)>{wL0bQcx5FYtf^FfkfaIYj~_n53KeKquGoy!_nS+M3+F9Kb7@ zQz%|Ua9B#}WCzvvpXnC-bK z-PSsv44eW%g_T3VM>M%B*L*ax@%rS+ux?{odOFv z$>5~BV<>W#md@o3_AodYc1}2dK17qEF(3mk!d7n$41!d$x-c-c|MMA5MWF0MBVpeu><3UFJ=Gc$`S8P!e7mrsA|oU7=fC***I)d`+Zh-NvUErk{h8Ua0Fzbt-zQqN?J(69h3I-dgn8p+X z7q$8P-~gKB+thM7BSsUfT*P4rxv7Fl5*hXqH2~>qq$_S!T#@Iv*tjk~U7_OtZ_@Rl zJb`MX%X8@3ZGU+5c_`&pp7`zUuR_TY7xQ(T%{_h_-{bap_8|*UzlZBSX=*4+I`Ar_zM(#!U>G$DmfNG;%2CB)CW8*giGm-Y6Nr$IdIZ!+T?6dkMj_X1 zG915xegkLv44jEmC>nQ)n1xrE6j32j%ZWY|0hx&zN00`B&(3D(WoBj&Aqr8KIa_cr z?V?9iMTBS0Nnhy3dP?dVV&(&hszB<%^l9nm2F@joA4j#dp`5*9Afh0xqeqUGm6bUJ z^zOUwW?d?W$OQ1$Uw?y%fi8S5!Wl>t?p<9By{%82IAPq`b6sN+ZZ_6eCyh<$?`peo z<>GitjzL9Hf_3&&7NJTKa_IB zd41R*12ZXkJbfTDE2Fukg~ydIYRlHGIk~yqy>Gws+wz5FsZ%DeS$ns&n{~CP*tUlc z9}pZTq9uMPDJe=%52ghLb==s|nHkfL96pF?TUu}bjmwuVWKNx)pEL97F#mTgtPve{^+s6LYlo$)w7u(>?cX~* zA?*AwR?qpUPEoiq?8>Dp%(QdnFeWCmDQZIGUr zQ6wIr2>bwTCBQZh(Bd}9d5{8qeW6CpaF;($55>@5qn(p{?vf(E%d>34NQ1PZmdoa= z5aARuIFtgD#;AhTYGw1a>ysx=>gwoFO=?>Dz~K4GDQQOBH$ah6tf%-;rBTbfUV(0%*3%l&R^HQYW={wly|4BqdvN zD2y@8uUY`WYwfIM&EQ+JVdDZDJ`EqHF&H&UmMw%yoWD?8SHJ4+)fi2)MYSEB?H!%< z4UIs+Lbkw&r;Rk?46Cj_<)DJ`RxO2a<#AA&lA)rerqksM%bQx7j9RkoyKHZa1G1Zj zhDMJ}tg5P|u3x!&ePYTayLkhr`OcgfAsOk0tDIYHMOTyofB+T?)u>g|KY{Q8X6n3GBY5-CToCLwqwSc zgLas&=Hf0a!Q&@RWM$2?wwZc-=}K5vff-P1ckbK;56a6csI~YCS!&Q}!jmRV#**jF zn@^Hk5RP}6<1H>O!G7}d3&?%y1l(ng<5Iy+$eeCgt-ikQ(@#Iet8#L3&^(R(u0{@1yReCH8nsEY;3pe)vZ@M_QBLSr1%*WxeCEz7*t%&m z%w{T>2gV&QUHMEL$?uXMCAf6T{qr;c`!T!(TLnj#oG@NvbXiVs^}=0>SGpF{s#pZY z2JDYaW$8#ER1_viLXfSy$Q~?QqPJpzl;ByhD905SD4RsjuJCzOVEn8(-n#3)2Z{k+cR79f2 zjBZ0io&1Cz+mFg-2$;LRA`)ez_`nv*4$17kc=6&LyY?$wPQ$@M7P_#Yd_hrB5%0sQ zRjbJpd3*QLsZRvT)K2A{RpP%nu=hZy4;0WXn71S zH4;%OG8VeIdi83$>b7m$G#GGzf^K-a;j|X|DbI`2<5qC(nSK|!9D^VR8W zOG`8Fx+!bvY>K+Ny5-B49XWCY@O@72)_z;^Fe3C0kQ4soe}CcQ4IAj?;}gfzu^)cu zA*vv@%mu%B%N9n-wCR~Ob+t>EuITRTog@}&YAV?E_Vz#U=wp>fj?i@r3yU;s=Z;(*%~q{RyX2q@E1r)c!XjhpcTSPQJ+pac@aVAMA>&~5Va3yg%>LA7+W zx3fcDdF9nzyLMIFSrOQOn5i|nOO_BA&@$4L9cX`*2&E*)L5djeFwqqz9=RLh7~L?D zJIrb2g7vUY*)RL)nFKb~E``k|Mc}Jeq3@VHAUYxKIY_fW=Yfh~1c-TMRKgkd5m`!) zS%YlxD_@1&5!LBSe8F1z{*JUJh-GQ**!)(89c{VTymUl z{?t!7g?1EXiZ1$>}n(B*mw;M^-#07*B%<@b8>h@&ysS~B%G(T z)M#$ygZc88zqH|lkLVy2Kl%t^#o3XQJChHY{%IP5FhnQFa@n$FyLazY4nU%OIwT8= z&g?CYk2Ro*-Me>x=}TWiEw)+Y(E(C#v$Jz~xu;K?YMR&j-ud(A?LTlpYXwMEoj8er z*=HK;qzu6gns?WV<(oEfsf@q>{`Cmk+b5oQT)kkaBib`^WP+}l>D4=ZX{JYN}KTF3lAH^b@#2s zO8}P>&(bRW%rXaCRZWBv8p02!XUptP5GNp!^5l`<|&M?C|4{KM|0)Zryzd zrn>qRptExfj9{tPUVH6i)e-s0VHArXNnk505FuK6;)y3T5x5~Azkhl80t5{EjB83t z>Xt2ApdC)k=Y+>Y8qunT$1rGy{qnN2vweG}?~K~-T(}5-nv^mbyRsjGOK8_yZ@oog z%EO=e$Rm#$NG%yR-lkpz1p*)jNl4SqBGj3;fyN>-p*Kc)+%RzkBL@Zt@Y*0eZ_$Ql zm!h=9T{<@Jj>|%Bcmp-%_SRtNLQJ|r6CN?bGDckh()ewl91J1SY*9{37z)2#mu$X@ ziOSs-f9MlcBVCIt4WFO!Co*_&c?k4+OX~V5PoTpnv!8K_#Lr(LH6rpjF*FY&i293k zz4TKb2?5xGzz8CN^;k2>>5iC<6rO^ilXs}GzP=u^#wrFHL+XVEvq?FFC1+vbY;tDn zwk=ANO@Pgg9or2nNGgrYBz4h@JG8m~{`-k9@bs+)VEs(N_uhL?zz@-9+(~#aS$)si z4?g(7)FX6S|G@f_C#tN)6E>lWQ#GgNFIXUe2fm?9-l-^VXM}?kaDPGqk03LZCI#Z@ z1?~VR2}vCUOnBSV)u9)jj+7Ay2MiSc)v$6HUhkJJI)nHaSupB#9NnJ(f zc*Mly2|D%mTW`QsIlv5yZ0YFFd=zTN@}V@x(0^^L``X{2AO4o2_T3L zS>~F8Zplon0>{GEDdnQCbQR8+G)GS0OHSAaNZ1-OG^Nff;Gwmxb!cc%hwEzV#fU>b zgpH!;$uw#jf><4_SL^^A&jNn`e+f84)E+$4!%Z(@{$B?3GI) zN|vZD59;*_ExEh=^dZl_nLKP8pfo_b3^Myx2@-wllIU$*h*v(mU5*Er#M}3{kjStN zlGzp41yC*(_&hFIL5c+d>3k544Gp1@4sMUOWt2yEOfk?QXV03o<60};)$|1#pLG`?GjnE^zL2+O&KesV3JUUdB;oPLRtowsGZJF2uQ@2> zEV5?sELpLlx2Lc2*fCR6XU<|;m#oO)iD_?Rnr6QI^2>xOJyv}LEZDImpZ?m{e)zrb z<15&tJau(-;M?2RVR^P4cQh0}XJpvqz|m~o(2nwa<*J|@YaZ%Xt_bHffYJ16(@m7Y z5|aUuud7{A9qnB>f}ghag&7Y%_~7Qvo6)nJl}C>I5IcmRwe@u*z&$9((dux{YK0>t zLj{}F?t)AxL?hij$Jp-yLR&;;060Z%r*n14$)<&i_9U0tY-CJko+-@GPWj&A-@Za} zaf(58#f7`awB$p1tT=%gQalGocNCy2v$koH%(0YEuN3KW#^tCb6#BVhrFa!zvc*<< zoG1KLLQn!ipFOKiel65ssII}5GqBUVg2x^RG#UJK$6Pcrj8SE!7RHUdV?@}3Tlv)E zr=Nb>nAX+R!a7B%#OGx1Z+`P@y&$nM{l4BF^r<=$0@g`HIXocRV5|)&bri;`Mcatw zbLR$Spo+r^!KZun>|VR(-n@c*GF(6&3~y^|wOZzhC!Tb8-D|&jjp5YT*r=z_n5ZX9 z$s~?t7N2s+B?4;_L|J9q9R zmQ&Nhp6{kJWwV4(Bw1@~%h97pDaPOa_O}r~4z*+FRz?tPS2YPswJ9&3uP@m%W@7h< z7RLc<5KDOR#YY^W`pPT6_~MgKjU79tc;39p6DF`g%u_K8nwp!}uYVvtsMiCVw{AJz z+`M4ke6uoh=a$iXm9A*(r4z@E&(F`VJa|xvx|)#@mMk0Eb6DTmUVWH3Gbp$CxUcH! z>a|RwAu(MhE)1Dz?$&KvITb8HHngWti`)ul2OG|w?y#*ajxcRnhF1XB+1U+klxAM} z#1R$sO%PXsUHrVykdsW{Eh`ixB!kT!^18=`p#w$LXK_3{C#*ip%ucZ!2juSc*&A;I5(f z+|$#p`~L6;#uxzUvJGBGFn(MKQo`q#g9 z@Zf>mT#9bkL~-`)Sp)zA;Zb&nRLSA6tC3tAf=Re(TAG`r!-_4RZZ0min)T)%{^7TJ zy1SIj!?JJxUh`JK0#IVMf#}@XejFz~U00@HAEF^tr)xR+SvDT|!sB0{@R!Xk!WP~yJ`Ml-Z%#_DjpFy`jw ziyB?KcAXeqemo3@T91%ahJjHGXu^Pq*-&iM#MFTA-wuP^IJ=^pO zqB{aYEmUQt+4wW~q@vTI!&y#e&tj7i;e?ya%F!4(HCQ*{mP8bFffgOb+`wgc1`hJW ze9v4)uq`2BB$%K-Z9@-ez(kR^G2JRheg($0$l#T)z`=t=mqZz8i3UATezc1>HmoRl z6sst$iW}@%Vq9QcTKu_`&y%i1*Jp))&ciG*)Em9JEC48yy4vG&73rN3tf@Ik_-MJh zw8v1Y9dKN%(1wNv9Na6tRunuDnJ+(LrX+i76j*>Nq60%y`j`BW7(3>Vpy?gfT(RgAI<2K_035_XySXfccRe?~2a131-x_|xpo!htHbMHN< zmtMJX{A?b~)7tvsAPGiTA_g=J;)k|!n=70vm{Pk+qR85}xSzM#S;HB~38 z-K8Htb`)&kv+l6%fnD1mKWV2iu$uL*FDyrmq<*TMTi38ediOaV)_e~%FfQwR!uhyRNVX4 zuZ3O0uPtj-WlKx5xmo>T(C*o@*GzhTUI8*=?y^5291?A9ZGj;Iuo&3o1hto_EB2OWaj3$ z@k!_eG$}lg1AoDqPB$T7AsO%$e!D2l`hrBgZK!WF7@1i! zAib}GwG(BN!G$COF-Hrhjj(5u@d_& zSzark;^2V;%a<+PzHKYZuABR6LJ~!$e8GZQ1^HwzVM~G3FcT0R9qj}FE}Wa24PlUt zpd~+He_C2fK|wxLR3jirWGHbm0})W<})kj&=|jpcI1MYK&3 z(HWT;q+)+>->Fm8L=I=q#fum8h$q9&8yAL#?5SbgFJ186xx*qVD+v&Ieji4(_-f zo7>vjLV>49tzEaq>O96KbpwE%EzG%PR-7})Ie=8uhaY;FhulbFUL_@^cipwBuctS8 z!bHob-~h2yP%taui%+d4uQ|1kRaUOO_ufsPY#cWxtZXPMDypnJX8g_%UEu1;$TY9( z0EiK`u}~$>hl9!n`HaDje)J=}2WV{4(bCdVFnczOAH;pmum)nA5YPbyt^zJ9MU8Qs z3aUejme=Y7;3|_PP*_-qEe#D`@Dp_4CQ*<*nMsIIVO>HfJp1*L3q~#aIm9JdAWO+wle^0>d8(}Rm)KQdl zyR;0QAHYq?`P)A)VHB@AayI$)Tp-1xj1jp4`GN;cf7GBm!Aj5bv1dYgPa|E@7 zMVkWRqU8rH3m*#5lMS^HTuPQAgQAQmN>hs}V)F42w}G9KmzSqTIem+C5mNk#oS!E= z^cjsYHOgN1% zj~+fMqVnohaTNJ>C!vg+3N^Ee4M;IOVm1Z_20#6D^YZ1(=az+){zwjd$)OKEct0m6 zJ1Z-bbdtHX)#h)oHW*+!yl2lo6x-U`vT*fk>$0$6<}ns)IISU8rr1|V3_+((ovuqr zf}V&w&>S)|vQkr059~jH@9OaLUwu)2(4Zsm00N3FKMg#q3DerXbNd4i+;6rWq8Am< z5k_<3M0IXn*e8Iwa)(pN#)WwTurMm7GznE-G0AA!-r6FtwpKKJD&Sg;MSaB4v1gzI zBF0f|O)V#Ce_y{nnS`*B=SCu6Fjrq>lyKjh&ZJ|#%raftCDW+AkW@MPjG*VEA-<4hiMC34~9!)Hp5ffiOn;hdSB%^3mBe)5xl ze&GHGjvueI_G8hVi?(muhHmxJw1^RBe=b?P7#dLADDZfSzr4I$@f|>6{|>wiJ_z;a zF0sFd%>bBSP@=S?l(|A*cCUr<)tE1GCot;D2fFAIF^x=jl9)FTChY=+F{AYhl zd3x*h*R>{N>Qv`<(YJ~Vi@=)a1upjV_Ag(#lCEEK>J+`71jl}8_Vg3b7axOZnR7$$ z0TH7OE*O?F;1=ZAwGjF*;%cF{{Hyr**s)t_J!W|HbA$EN{9t!2FjI3D~&F# z05h(ssWViH0w0ubP#NBQ62_aZ!hOfDD8352#R8$)qhENGC(yxUPzw9Rp&J?-%+b-Z zDBdk?mP(b}efJvTV)?378$SFH#={TYLCH{*JkdxE14oHc6x-AkT-+oZWTlfM02O(q z3h0SJ{(gK-v?7_7p}>B{gMl0{@0(UOToHs0h^v@j zKX2R&ZY`e!a-4z!J^{mPq}0nxcvab$m9b#`@O%_Q)^*>lx3)w5>Jw#($Z@4fS#?|i4Hx7*Z-!6hDm zT7$LWl~-P2Co)zEQV;4fOh5eL4+(Zmhh0Eu5sw801)8Tu4Q6@52SX3e##bS**IU!h z>mOLRd26e^~XkX+D&9&x|vhyvj-umjY_jT`^;PyaNMd3lIdK)7MVq5AvJcsRKK zASd~gPdo`ZO`X`zD&e2L_kTEiLR`rCfx*IAvutKPBO_DX$>vR)#L`ldn3M|_lyBR% z6FcF)+P!BVlH0azyAtoc_a0gSTsO=%v(?XWe;YCcre0X*BDHvmDj~zR~rUfkh zH+2LqsEos4>eOlS@NU8(5LEuQ?c2byx#hITva+(d%a$!YcKoQF^;jm%`0U(l74`L= zX*%85)!z2dgAee3W5y{d_VVh$=GkLONz~(W<3-6hMb^Pemo8RU)p8t~q7a}H=jT)u z3d6;?dDBMv$&9Q_lkAm8kC>@b0tHy#sl+f3WDGoxPv16iWstbOb;MjTwGf4X7+*9{ zc-C^ghzZPC>N_;`A(Zm$l|;~EJ1BPPvSpYK9FhowVTlrCQET)cX6@)z{VT+PmY~XP*Izty{N1$kNi1g9i># zfI*yKGl+zcxMQ&W`q#hC!hh@wj|pC?s;WM4;NY~W(?9y?BkYTlh!r3|w-oLHZGzPO zPygFjYfsfM+UAy&a;b0K{3-s5*icAHiiLGK*xsx~cMf&;akLP1=~Jg+jTj76D_$`F z!}s2M?zyjGt2=k?c<#C9a8Mi0W57f|OU77H&Hc>JFQ~1lhYd8|%F1I9dc}$roVoS& zb?n}s{p_dgx96XK4qEUVEnT`))CXSZb7NLlS09TjY;Pf7ad9y(r4~N$;Qg=-*1CH7yMOhoUyiyXk#f%@6f`*fl5=^Y(V$WC@moYRlujVVI$wRAE6#u+l z03fW*%*tX4(v%FF#TYOW5yr#`Ytg(m`w2?;G$2SpzzUQ|&|zn^A#arrFNt5AUN*G^ z+KVm^e!6_?u2|p3KfRaPU7p)l$QpglYLMtT&ZE-Lh&I}(*iXG<>oNsnxTQ=^f?j~2 zN7P;48ZzwxK>Y#`uXKm>05=&l1hG+cqk9O1G95rh`6&;5O^Tl+USneemLYnFXw`jV zq0RDa+O$bCsG7yadB~W!GpPai+DxX6PPi#_Y30hhq#%qlXZkN)p;84c4+U=9zJ1rO zoyn7e?WX+(ef##E&p-b>J%X|gR%)aIKyPS?5kM*cCmUd+2EaEfaMrF}OESys0aWQ1 z+F%s4Sub9F^;N}^$<%vnIx9PPG`xke^bYNJL<1qefA2oq&Yjy+Q&ZGL z(VaFeK-_?FolI)hLVvqpy* z*XU@4J$v>d171orHgE1cUJ#mzTG+bUV3LWV3zGr}I@%DANU7So8k^u4zvIVDW{l6x z&B5<0D#}qfRmvSdMse`jUrtUouRT{J3@7xbAzh7nCy+W zwFWd;ZYl%gt$y9O5p1&w6O*9@b45B2vtCp10&-u!0lD?p00*l0Y;}H6ohv#fm8M=4=)qe(oicesL4Ll?&=`I=7W`ShEVw+!Sp;|O`ds1V!^38alp}2g z!_>4XFnd-w<04dt9z2+6WZxPB#PH5L?}$}c#7w;N&hLKry9N2P!4P;g809bs5iJ6M zei$FThao6|Z^_W$fVnd>Vk%`)lAp6@6*9aJ?B8eWIn8FT+DZ~HVDu7|OBSnU>4E+G znd^eyxjYxlUl3Gcr*F=fNx7&fuh7i>`}X0Ly7QH1pEaNR&;R_-SO&qM`wTP$X#_=y z;3*VFW8u;UV23LG=SZO8DqX*Ry;1Xtk#R23z}aAwVP|5n5I?nl_N<~0-v5wcosp3# zlzGSYoew?quupWH!0YSjqiS(GtB%e~jYxRvvAZnh*tqGFvXW9JUr$Gv{NQrnQAFG* zgi~w&;>DXj-uTYD@6s-+PSvW+Tsf0w`t)$r(Ek1VI@{Z&K-_cZ&T0)J)Oy6j>qzS2 zUUUG`AFCcS#++E#t)sZCtiHB(bP!9Ucz}QJ_kNF#3LJL#fjXfL)zyXvagxB&=?oY- zfeE3-OB*Kw7V0M@26meYC4;XW#jxKA^TiOElCUEMusZ0fXW@B@<3 zIPfer_MPcB-U9=J5^afQ05ny5II+-xl2Mm77%FXm(|#fkmEvD9(IQc>VlJ*+xl-e0 zR=Z2%rF&3_5C131)YMq`D@U)rb)iRhnQvc#L?hCrz-ys~`AWllLRItNV@;AS+lG%m z@~mDI-qcXzEpPz{;OI38M*hG>U?f6Sq$B(V@XzX$afykLxT@-;qHtvXh^L-pG8{e; zdvCTTJ13WSL(rP>eeAKvWTroAs{9I>F*DaF6W4GC@UZ|~6|5wJLoD#6`-;;9F1_*k z>mZNUs8Qo}93R1qd_wDl>e`@HnkuumJ^fvf_u(a1B;JcIFV{A8nNzF{_?8WZxTj9I?+z9W10)1fUFtf@ae zcmAEH>YFlV=8hUW-a1sueBq)ykM7!0RFGd*GN)wW;*s`%@9N?v%FWLuEcWc#`P3Jm z?Ci9}=elE59e%SghnbB{4d3|P|Ipdh zZHwGBYu0VuwzJVI}Jbd^>RrQ>rIcB7rnpzVRC)Cx{-M{|9 zq;V6@4_uftd)D;yDJjA0m~d$SzBz@n5ANSLIeEgBixdZFiR-=(XUlT#*+OPX+;OQ5dd^7R`HO>OCu zCv>#8vf-~@zB)df5_p52f?Zk~&M7c9Y3%TX;YrEk?Cmjf^sqDMdItv2WX_n@-P5sq z&yM_Ac{UCZYi%|QEm?m@=CT)ptBKw_-1#ZuH`%B}`_(8so6Rh0T4acIQHapiGfva9 zSW3v(RbdR`QrbFNvU7y;cOE}c>6`%`qk+M5=4{Rm4opg&eBtuNs+#Jt;}hFDJDiVv zs;+if`COY1pE!PuJHVP46TI<*bu+tn?PLn_BRJe(<0l_mSi?s!aZ`TSR( z89aBUrMbSRtNp?I*Y$K;qT9B2_f9%}Z+Dl;+lmGAuUr~FdsgZ9zyHJX^2KxK&o7-j zzox#fv8l<{WhYLayl36o|NN)_)kb&y{evf}Y9Rcz8+|pk$3EHg;kWoH^5f=b+t7kXD{Yo8 zt2|k~Wyh|BvLhqH*qX+a6HeOHgCcgaq;re#zy|C~9+YQJO5Xe<*O=G&wK;CWnb_%5Hpf&~k3P)w0ZOi`B^Bc$-CnoLj& z9?8){=LzZ2ffwW(F@>#aD#k`OUcMSsM9fkNc!$OCFw*jC*7cPuS4EzjKHbDj*tBVL zq<$Z|c=_r#zxhpvRvkWkaM9w$xVwV6k&}~?2&v1L!ybQ!4jvXvs}M77!>?bW{EX)& zyl}yqCqSV5(=S*7wh>2rnr-MVx8ak`+(JhVAL7g=J;brtvFX#zpQfizOHEA? zg~&hj+*g0+@R0)p17}vOT=xF^TY&nz-~AI+@T9n}zGpS_n+yj@+aD}jw(QcS%TbKo z@$&MD!dVuXq?+WD&W&u;%<~8BEGbENBP`nT=~kA}INW(~Xu*Q=Y11ZYKR~Z$jE%|x#x}VzJ2?D|M!2N0Q~i@fAbIj@GnE>`;Hy0B#KPB7MILToty&t z_C{}MZONRTA)Ima9V69p=)l3;{G1Cz7sierX$l(5MLqBWKqSH+7x?K z5f(B70pjHq%#sbnU_P}@A~lxD?HS>a_7oYzK{rWwXwCAmBSy}cnPYpWS+feQVk#{w z>+SD{lI)X`;?jbGLd)oX@{^ZFj<_RhM)uEt`D>^~IsV{-519e{K;|>)!>oCVoU^(N zMoUu*(P713OLN=yoqJcUy4wk;2MVTi=Q>WI{)E=!owt4fG%a^XMS+lmX@)+Xp?Cr!FVF~*0>FTC{ z580G(PWs6l{<fZc8^_X3Yz$gDseL!ss0oY2<2Jh^ZWQ z!yQH(M}P4%!jCjx!Wm&F;7uaB#ab%_7$FmVRXZ8~CjYS*b`8$JiCR)pEHOJX!>(Y+ zfB{caID2j&BQwhxdw=wuKi;`(H#wd-KIz!;6AwK25UEgk?D&kV?ADg%y1JSv>8aoU z{y(Da-0bWnOBTQSi&w29=hAO#uD|onvZ>Qj9T73|j$t(?n^~mWwr$zAb?f`@y;oXN z^4>ddLqCC;HPxrg9#a7Ma)t3Suve{I+0@jIr)V3CboD)VbAM7&fBEXGW5$kxhoU;a z@|9;fJ{aeqXDv`ycQ_t}JV3f|n8^bOJWQrAux7aNwXeS@YVgng?9a-|=05u9eFJCD z^DtE&Jyw14USI7IZ+)i#_u zRnybcM{*iGTdu$R`s+HNhXZGVE3c`EL*cYJY=nf_w6%6HStygQy!y-O(=(FBn~|S+ z;e0rnu(h*IKd~j9dGd)Th}k1Y59j3Mlol7`aHs_V^z;Z)8_toS{|F2&`UE(1gp^>Z zk~4T44)?pi`@0gQux&t`n9486<)YxL6qnxJ({0BW>z(XDXG0U%V}WFTZazI~!Mu5v z=BG@a*gr6sk~;ar58rRF^nB)w+M1d*Yu7Y3HdsChAz;72vcA5ait>f?%H|ToC3Esq zQYKm3#Vohh?3>^E=7@xmK*X@qPFNTb@6Ge)hfGeg|L^B zOBO9#w&a`N`U8cCj+{4dJ_GnGUwwALg0ioC_374@(>HEjE|@*TrZi(mrw$Ab5PFnB zCM$b#|Nea#t7t^hQh&)Pt2^<#ojZ3t`sicTRdqPjTW`IAAPb9Tvt2iC{Dl5eTURGC z@$GlshH_B1r?&?OT(M%={ST~x1Na(}AbW_x&dx6V#ocmpvU!`K%EzB8*ktPV4JtHUs#9fjXZSG-N5*3D}oiJ^3%Fu;NtCp=e zcH;Q2fAgzTb*GM_9CYj1B$hXOD+PUxQzdGAL2jO6=M)z0-Mc3%BeS=! z`@j9=Um?-`d-ooxJW)Jn&U^2_OWvlYrT+NGKLVWk#=2!o7mpq@`qRyu^z_upDy%6# zKUaYFI2LSw@6pPmg4kYs;RT1nGZ(pJ^$}5$1>$MQTk-)bA@d9KXYbjy#{}Ml@kt>} zf|hG*txJ~^$J+AxI)(3w0~04Cvs}eLF@H^H*-(ML(AL_PF)c$L$7J$f+`LW#SqwGo zhGomwMTw0V9gf`_BQmtBqs#6~9?qRPLyO_NfRUz2n@%@;`FFm2s_MkS1N-k=x305; zSq4A^ztSE;6aZB+1IkT(M$# zVM%dYON()V`tQE`u9@VFjOk*bur}rlPvrhRhwaqI*FclXo|$E$xc{sG-kW?S?7O$# ze8XD5#Y-3ISJ}L>5hI4}*t)f}q!fp}bK%01$4`JZ-|y5Z>81qoa&u2rS5t;5i6HgP z+rLGh%a^X&@X-bcF*SWEPq-aa)~#F1TVejif}S}=g_xQp@(bqA101nae3SG+eDH-A zUWjB;6L|x*NEdjE{#k9PfNp3>y_}ZkqF4}cR6YOzKmbWZK~#(q`#`spZK@)M!i$c8 zaeoXKnZVLOx~!ZB9(>^Dh#NG5sd_YD8a1r=dE}iH9G+RI|Ki$yC@Wvyms!rzTBIU3)gRN2x7A->#U^kei#z$iuZ;|1sf%6D} zPSqRDd$UVrfY1gTh`_`r5JLB*6Q}Nex;)50J@$%x27ifKc>TJSOkq`%W;oK(q-$=@ zjQQpB-+S+E&d6iOj)*qh^zr7J+S=8tS9f%F;M$ePj`3gEbcfc*TV{aP-M0>eSKYme zULfc8E!*iMrE`n-?%i#jDy4u`Z`}f;w57GVsj=a;*MHgD-;q4gnY3e=6eETu|Mg%0 zw?l^xQaB9_6%-yDKDY}SlvTkY6iBXw+S(dX95280>p%L=w=qqsNeI9gJ!<8u<%x-7 zH*PX(TCjB4;>e3T0UB6Zc(UqvW~A9MMBxKKVd##nWD}V^sjm;qp155wO_Qr4G!WbM zE5i%&@-wnB`?`A=%pGkl*yF6P&U#zTh=nR6yfXs-f4eUp1l{O;R~hi18MDITKU#O{gi_E-aig zE@@P0Nil~68|Fe!2dGf>;zVH~Y6{?#(I|SB2P~eDX5p>>YDICQj z1|u2=k>VQQYw7~aV8J?I5nnLUnkuxF6B;Yc&CW$yWTS<v!G*f2VQp|Eh#O*!ZiRz zb$51%_j>=mcY&g_qm9`n&`JE3KNdKmBUHnr&$SygQaK&*C~VkPn__)1ixVe~o8kuz z1B6zg3$8?4lcI1}{1DT&XLTmB8=m?i7mD*eHTO@#YO11A zO6HM6hpeZuKUq@P?$9b1k@vktMF2!WyS{nXZd|a!B5hJ0`|E%F@F&e?C@lXfoBNBG zU!g?My-5iA_Wch&Af_!TDba8PdqF|50WF(b`qrCo)-}{%G5_&f-&$BvZpmla+_?_# zh5CLbB_>+z&il>aBpz6E(1zxP}5~vii`|zw^c2dv;R&bwr5KU=bDY4HZ3!A=1dIoz=4BsamD)eHI=xR z;|VfHk4Uho2Qx$b*AIU1J?+Lg7cX9tHYu6)l9Yt09~-m`Alg6-!#1T(>$af>K9Zkr z$L){nOe-}sE?Iu3Hh{Qt`g(eN1tVdeh&;TLMv4M86*q;xKmdKQF`yx4DJ|qGMj(SK zjGXqU9S89hPPu8*ril|0-+udTI7ezRNRYP-2?-+(9yk~we^@+8E_HOY1MUzPPQoZG z9HFL7nUbA7gQMF>XNWrw`>kEPYb7UBSp8?YnR8DIXXSTww{Yxb=gg$4VSj|%kwZs> z9oj;vuC8YP!TtYFPxl?1=Xsuqo+JuMfCasG5WN#%0eh85N)#n)EK81?#!fblXOok& zW5?Ou**P8%*jFLYq0!@h!;B z10pgrp(*}enkw%kVtV1LU$Hov-NVDf z)^Au(6=xnzaz)U?Jr0?Y8Wk^efHv8#dL>4V(R6KmI8o zR#}ywm6NF~)0u179WB3VK3kfb28ITn{qnN{MdJmSB~R?#z5rStPD-P8b?w1}hmczW zRMIc;vv$Ke56bikBjci?;AATzNj3Cn#GWvQOJr-swbRm4c#LZu*Nz`Qar4H_k>L^U&*JK<1}6Xg_rHH)*RJ2b{PKVK&;OYlpP=!> z!=N}TsYX|;1jS>s*a99H0Qt?Ye~lo44op$XEi62F{HQ7y$lAbRaux))4?uZD-3SyD z5i#G8Awe31WfBrb+V$nnT3fDiWi!*xg7Z9sriP4+44yfCQj@U)XX8Ogs#0Ec;QRI- zBG8nUoWQ^U9uz11p zh!f0MSg_2TgXJ}4!$aV!gNx>k49%-=s8?CRJArayr8B3_a0BFx*+m(amzR;Eq2U2| zo=)EG?wuGPDJ;y-Ey~YBx-84#4<9_7meRY{(auah`i;(x4I9?zoZq|e9jU=Rdv*&C=%+W)g-0tS zc##ukw6oFQ`qqo~QnJ)_xiW1DU^*T6=%bHdOW0j08H9`h2Qu@Eg4ru>xn|q8#`EXB zfEz3NzI*4EpeZf^eEpcE-sm#onRLvbu9s-YZz@qL%rY`Nh`!+>UKT8W8c2LbYTB}=OB(t zdO;0-WwF==X2lcCM5ge4}YL);^4vkTEnNO46K-45}Z;qz-&1O zK#R3O;X2(gVWtHb6;!)nodS)?u@UqPQwxrINTP-hTed8Rw^_V6a?0smVO2v-=PK{`->B zikz%$RFJ-$s^t~>&!%T}(izcNgD0DLFm2NMk)ww(Kp%X#H)3HJymt8TL1x8x^R3$? zZ;lW4--B6Ju3Y7gCr=(X4yK~KSglCY)dnJ)oR|;cBWw*nae9C(V|C4P=ztsxwQ5#W z5ut>P)Ho!4)KW9CvI9&6jU<8UZ<f~#kX zt#fk1qL-dKeY&^zZus0emLNMj3(O`T@@y6YOTlYt(Bs09X}fmq44tC}?Hug|pK4ZA z*Ed{x=9xW05skI}QRp2g9FEpE`v;;HFB;D`+W0!?lVTC@Phsgyj3jKmM_Af+%D{Y?hmmVOe>qwziHP zyLRdvu3k~IcHKJl$(2=AS=m|P;q!$k7cO4ZM247A!sgGNG$`g#8!Lg#e1wIC$f>AN zu^52L#snnd!pRd@c+s5DP@0Fi$5CnW{`m3n&?1>X>tMiv{-M*#ZlnYgcC$uxmjGQ^T z76OO1JswBuul!dP@%ZRiO!N|dy||<(J2T_l`7?KKcWW%kNKX}&$Yk0e_4oGCERmt% z!CgCcMn}cvFUzw^-JM;ovu@2=o>qVC-o2mh-@6aYUO0b&&;2$rF>3qPZNmeDrKQE7 zW=!mo1>vF9)s>8b+Sp(I{Fm$(hhyHuf`ws8*%?E3@90HIOvuGn;EIkNJyu*?dg|n9 zVSwD^md%@uF)J!6l;fWBXq1>!FPt$scX~EDHu}Nr)Y)T44BrtFh($pFVq-4>-Pq_@ zb<{*(_QZhBhck00CnkZ8ZF`<6$jq>JNIsC~eGLhy{_Mb3+w{G+ew`zIs(^Mrza&jUY-j zibO}gEWiAN@Be_6q62IgXTpi8H>jsZvpFTA*HWGBS$Ht+Tz*O(xW%NdyD#Z0s zRH`^MG)O0be0s>RUA=sT{_~RxGd$ah6*U2O;ur^4@9XnTUYFhi)s}-jf)`jdrjGXm zjX)G;^7S|VoSU8dM@g@=^e%Mw&`3&px^};wJ{6i%go{bZE6i73*na&Q3olD-Y@E{*dom4Zo|Lo{ z1am~B`IS?tX~{~mZrtoxv?yZVXM54tK=`Y#{(%w-Hv}4w7|z>ifU_D zi!DjU#*G_(_OqWd{45Zng>ArYnjOb2Q%<9*aPAg@74r#CuC_-|3Fc+3DV|gogITlP7p-FrESZ z;eY)vvbedqIa@bwrku-ivsSLCj)|4UTxy9H1ZU;ci!c$8u5+zjwbK&MW2{WX{G}d) zc_}I^kb;hkh(zy;P8F^>%TDfld~}$P4PyQ|vq(V#rlQ-N8N6Upaw0LCotc(|OG`_A z{IM_Zd3vXqS>@5Ku3Hah=Z4M;rO5zlLqh{uC`oAD_KLZyLR zod{19DbMtyAN}u=l2TQJIK|Y|G#+GXO8eKMt5=(Q`g;gKXlN(}#*flr`iw?pG5Hcy z0{zFHQvQ&gk5%q@>gm|{_$7;@6}{z`RQ%|F{>$=;n)K9c01ba|yBlaiy39MTXJzBM z$!P~qjz=XXPK}NLmUITQFHm7lsFMf;fBQfF`Jbr=hYY?ijxu8&1s_bWWx07IRR?0l zh)9(Vi0DO3BS>S3nQ%)J_GlUdN+YOVjEj+00j?yFF+j1gv0modwHvMs4AVDtcNE^M zB6S%j2(zrLY@+0F&idS+TNJUht&+o_)#yrw%`%C8-AE|n zu~)2IdG_r2>YAE$>(*bqbdfU$l_BZ_2M)rvnsQ}Lmjq+%Z*i3_+uYJ}Wsv66+qNb*yz7gTv*m9+-GdSDH{t4tfVS&?<%@2Jz2Wzxub!}W+q8#sv71i|W=+VyuYo6@9ym*Brn>THdbX44cqvCSCortmL z?trv3#fXnjRF-$_*a;2fj49b;@fAftC9nn}KzK~;!EdaaUfr{2Psb(1YNcQu#PjtH zjeGYVd+hOz3GvAo$KIZMDM1(*OgPx}%abMYwkHgMPKt< zJxnOTkT369+j+BdXmDh4q|vzAhE}H~rq^F=+PrCd{iVhyckj{Hh`_QSwUG@bWpkL`t`^5Jk@!vok};iTovp=eJsNKg+UeZrTPXc=N4gMNOMi_ zvjXGio_o&HprF3_&94(a(L5OJfEPD~*;cGv!9|cW4K)-dJHJ4E11V%&L=@mXY|qWI zCZvO>6=~Qlp}TZtOls=8Z@tG%Z`rmTP%&nJ*Il$^(Xr#l+uGYpOUv@|^IO~6Z<(tw zJ&h@FoZsE99(md)pV%cZIC=7fT0FTMysknhga4E<@zqI5$!*OoSZmdukxQ0jXJx7R zxp4kM=cCarq97wm)O9@l)Sk4Aw5F?|fdx=VxFRzYRyhxb(?6Y6;n(w^xHf+Wvq7O+SXUL^6RlKeh^Es?EKWy%~ zb9Dmj#H7S@m11dWTnY=NycCAyJ>WeP`4=ugG+Z>Gx9{#P^3BA1D5zTAzP;l*N<6Al_xcP-;g-pgZYoPrt&wQ*K3gE|M?J>J zjm0QdMH>;h$cwO*p`qbmU3*g#C7~5W&ObmL$%IEdz^{M(D?f`U6=`0*dX+jgcXSjM z7Gz|l&0paBsip_aIeB|>V|M4GfpD#c2%rkrg(#I8Sil0ON z)8Bpd?Af}aqO$U`DiljZ7{a;W4}W+yDIuvSzbLP~G%_Sqf%@4~r|^NWM`KevdwllH z89tUn`|a;uhV;No%0NAwa=^OCQpm!AQfg8L?gll+m?V#jTzT@j=ML}Ni$J5vmPxEn zb%hi528!mEXD4Tz`|AgoVbNlx5#!{NUBKFrmE$K)q^73t+Vw8^O6}@bFNW#gzZ^`V6c85>UGV&R85l)0{b>u&XJYxw+Ze0MRz!Us`I0 z32*Y=XgYLhf4>9e95p_KRlak(2YCZ$Ub%8LHYWbP_udzw_!nnsI|vU;*aTeUD9a-8$sO=bjuJzTe-|ccuPvO=VTw(&$AoOA{6?Y`xl8v%D-ZJ!Q`1=(THY zs_=tGlJM}^`;kNU#x`x-JTXo$CMEskDjYYtbdk#2q4>BZiHXr!naP(gUjUQt-RW$+ zc=<;AwbbMkWACfVs}kdr#)ro<)3fgl3?nroBBO_gCH)t4b@eEl#n((Lrk{P7k+tl0 z&%N&3{fWt$4uZdUsj04xChXj@ajOi$$)m@0W(@TAb+omV7L}XkfiXhTe)z$A3BerD zxe4*H_D>WiO6us)k=31@y(}U!YG82m+D+m0N@lSrd`^g;=B=RoRRh9D|PAmLfmYwzXpWX~sx%-Zyroa65 zcSpx3P5ee-KK-Squ=3rvyR_-WC&W8R>P}DZ*u?#cs;b<)ydFni^!FB(7I)vhm6nwr zwq2&x@Z!0KkM{R#KR)*P;rXE<%d7J*)EzI(%e-{{ z%);;x`f>Z_^(P;DJU1h=B)?#>(H{eY+J%}M8bgv9J;NYjBA#--^ zdVFA@r>H2u+}M{TOWt_xmFpetnQ7^7{rOLS^Oygjt+_cTt3WeN!==lA_aFc7$@?>* zA@iX}zySpwn$rsOE3i)sU@R$4{Yb{=yo(@JLB&28XT zDJFL=P6+55K3_HL!@@!n?(~`kT`UY0$0g=uW|(|+HcOX8^D}A^V0w^!)B5%ELPFe; z`N2F%#xV#4k40DH$Viu!m8@B_=FaWgObg@3IXGjAyHEz~JE#$+OB$1la4+WE3|9KH zaqH$e4`!ijz6${>wB{K=YmYZJI);DeumC^J4A(oE(k$5-l`Cv58{E;+fe=ngOlILe z|NIE}pvIFc7K(6Owd>aMjT_djSAuu>awG8byO)3a_S4){}*Hu<7uc%#X&ehp7XAvhH6z9x6vfpaIJSwkc#p;{Rdo>HC(jC0V-)!H$ zT^dZeR8LQ@c@E9ZEnpgGQ&L>Y-o?enojG$FnB+-#UC|Ja2bXdFoAu~=+ zRQV~45);X3^F?!jj*g8&N?oiE1`=X4xbd#;o37B+brTv|mK%&lBzUA9Q9plv zIEvSb5&uPtmfpV8qo*B&SK&A{HKRW)FC)34vZ|n{*ckLfhYqh;x%%Xp(^Zwri%UxQ zd2ZH|D{=8gM$*B_gM`qKj46tDFqrqr2(?%+MyK&>19G46Y5`1)YcM3yKdy0b%ydwT2^jzn1E5us}Zc(9)#SU$1+1WWrhm`e59 z+S>Jwe(f*5Zksixc6N4o2lj=esyJ(HZCkT`o%YB+r(4T!cLjh9LQwxiV zBO)UmG0xcBzdsok9=^P0xnF_4iU#?^$VE%o6v(rnph#lY)X&;AYef#FrDY%rpsZKE zZ0#CUwXBQG0Uk&>$05~`OP%bt1%}DW$zh}%I=*?+mYiiq`#a~N!f7{#0m&Y!B^J;u z%gcA>SZ`mSZmGheBGZ8PA2@jQ*sGFYt`xWumu34=bnNYAP zs?+p5z|41V-6l7^M(l?KBi%^3-MM_Zi8Rm<+~dWIm&lVu<*AdWcJ10Fpzo=y-lc0SW#J0=xyz7WtC*Oc-gW%W=ImXva-tKp%wHt zj*gB!w*9dU8#dzMgkCZ$gg7qF5m@0HHm=9vXJ#pNZYPeyav6K$Q2c-sp`|z&L8&Oo zME8Pk^Pfy7s zsE{<$5l5_mWtXsxfmUu5BNg=V-I zsTRq6uWrr^$KwY9p!FdG+zA=`hky7XrSp(>*~SGl{+6|cf@s=ncUNrLX5KaeS1(`YBr(HmHp|i8 zaf25sj1>oK9H2;yKx2?6t1&LS7#nY798sHRz>M0DlgG}Su0y$%l~#a@tu5_|NeRvjJbLU%QDKSv zszL+IxtSp-I<=bszwSF#N7c)#=Y`Ijx_^IJ{<8XpdV3@P5xrz7Pr|3IT)A>=WYiHM znc3Nt*8W)yGAeu3ET;8X|HKnx<`V!F>B>tQHgDPV_+vW;2ZtucC&12g z=jw8Eb2!GT}f1?OuV#N z7aEtT8W|hcSl4^6f4)Q8Lgq%%a_lbJ>#hy)5!=($vm&hUS= zn~ja5*NoU6cvgIx!mvjIA3~<@+oAR%qbBO@ZaK!7lUWj41^g z8&Rx8`WqV>o0_h)w+jzDytZ*c{&v{ln{^w9`}-=&t4JO!9T5@9_#u#sii+8n$cV)( z#See@Ls-R{@ah;{45~Tz`}gm~y?I&$19Qlll4H1l>Z+}i?f(JO&YDlDBNx z!n8>a*PT0O?gUe!w2L0fff4BD=2qD&^Imy@5TCX!qL%D0bn-!>b`K)5gtp5ib{sz;ojaw zi(mUgVFnm@ z?1HTwtv5Pv;P$k>YQPY4p}`&f(Q!Qp&=TV^Zd||F(Ae14)*&#FtuPrXDJiwEu(ZCu z;mF}*MFl0dZr`0ZFWkvHJ9g|E=)c8vpJW zqih%g;lr_G$7rJ11fMVCb^7!f`b>UojqS5#+-k_?wYhQesGV^BEiE;TaQg!mB`P}l zR(I#3h5A(c*yvFegr&yl~;%m8J%#<7opxYWil_ zf-p+rR|1@$|Lo_shAv@2JK8$jj!EL6T%VS{{>RsG;O8&YA@&Oj^6i2`e{oi+QoEue zrr{Cr3l3f}OqWfx4%WT7xt(YMED~vOmN^7Ojs;}&3~Iw9!!DLrz54z8lkr)ZM%YbF z-~xI*`2Ir&xlnEIb!Y3&T{_Ret=+l>@2)N#yi|u|C{ImQZBj;>2$sbpzyT^;rZ5X1 zCsBNl-ikA&DnJh$PJ4UX-Tpok26)us;u23{G%D=Y&`{smbyKVhJiEf5-~7{q2eT!m zrRp~pN0R3_leE>G(4`F+4^&ausOAuXc5I z%}mc=*n#RK>C;7v_?-0Ql=1Ni&4aRA!k(6vRs$dMmgPaMUWuN#OLSrkd#N1UT1Z76 zId&v+am3Kb5D#2gwLB>$nMU31?NuBxIX%_e-z(NK2PUVg;^OJLkrAyPDS7z?sPmkh zT)QjRLTJdhz*yiX@*El-EibEpDKsdNS2;6NOF(-J$N%;H?*ok-T~>AuAJ}pI8s?+< zYSV`weGGv={q!@ODwj?Kw{L&Up~M6f>B4;M*tyGZZQ*Nwe0}Cy!l zSy`I|=_j9jtdu`FDXFRPazRDK#$|rI1%U}Ld#V9l9a+a5H++MIBmzxe`IU0Aw-;C6@WoPqyo=G^d+k%1fb(1>Nmte$P{|Z zQ06W(sZ+{5%F0-u*tl51tpSLZJvVn5QNdQ?HRJ+2I zPTyQ#awKR7%gE?d`5Z3`Fr-kV@k*o4jh#>IWI&~)dufu>ac$OISH+pFb zL`??pUc)1J1WNtDks67KsU?-mQ@P*L@}d5L@X+~M!D?2mynE|zOmSsaD+gst&a2r*`WZ`4XIqpb)Qik65|0XsVrhOB~$c@mhPC(O&sb%nRy z{4>C|bLUR4p`xw%Y71i%7B)X7Aql>PpV`I1p&?bxq9Cjq>cSV$ErM8P#ux00U&X`) z_Sp{(2M)0KfghlhlU0j4x&R=pp-SW zYgrlcK~R+7Fe|PQy(B0j8SNijxpLK8Z@vAm|N2uFgkSX}tg;6aLwYt(#r8u1WGK@F z98=V4*WTA;6Bb6bg!CRyVuM8h?V@Aj_&^0MpbjP}D=Rx+-es{N(q+gP`~*-#0sO4? z4J?<@{qVz&XdwOn+6#Z-cDL^aLr;wDuc=-xqbFQ6%Ss{|=j6e3(?d`V^$p%@`(wcv zK`ff(60li1UvMH6_h_ecbMqeUh%=nx$hY4fJ8=-S^DadE!;d~TM1?0M5ipgRxek$o4QU}14h#us-Xv-N8fqp9cGv7;7?$HwDeSen4KL=R3^1P!Df*bHo|_H zM=?4!!RW9lASE!uSF0fth}!XY-g#R+q1C31!lu~=bMRNMzVaG8`QZL!YFe6yQ*sv_ z71MFOBRVQtNG!#|QP~t7SEkZMCB%K@x#v)G)(Rvyg7eIoGpyd?#gSw+*5M9;h(3@F zAvW*7|3OLda>)oUYL{)j35Rp$&be~=>V=E-5#b92eWubv=3bed)m&Lvn%~{h-gvpm z(5d3ml9HO0N_|~lfgl1>K=E)^b5&GIQX*1=XCa^~%!L!VO^&?`AD>s;OW&yO74uyK=9d2NgsOp@Ji*?#Ze*4kcw zv0gzh+LlY4mr zV=>-;@6&I7F6KFJTnR%EQ%!Mnx-d^K#4{E&6E(kObV8z9Sw8*|#OtC9CrY!gJtCns=U$L{o?tnRJ;H)7_G&wbW-QRRxY{xM!efk1T0W^KXCue_Rvtci#d#1wtq3eGpT%wM&ayR)w!qX#J(}@DlP75XT1DJm%J&nu2M@LDz*8 z0yH8H3^ll2o!R0g7^A=V+Sf#u_*>tkQPg{HFquB+r}dB6m1fceH~smqes0&*xT7UG zRg{%vhO8s^OPZHgfs4H;2b2cO&D)PnPB2l>7hTfj%ozXu-~XL>kMtxYB#s8@!M?}{ z$2ceo4S(bHKgGpLDYY~3VsAELc6MAv=ogtV($C^P^=J+TlfQfS?mc_|E9v6j*gCtU42J|>AAX#!vn)BSFVOj_a8WL`pjvLQG{=VJOy64s&?Z3 z6!CjFXYP)jJGD}q+z^wLN{!gd$Q* z?D*IuYaJ5eh+0Pq1?7zrbaYZgDy(Y0dJQqFo-Vhntns4O`62BWlwS#EWaMH}u9RSO zc)aOKv*08rCpX-BM=qjWKlzt`K@7!U(BtDtq)q+KcfMm<7CMBK5aP@A4JPXjj1Je< zu4!*=|Hjw9$!6n19ZbmE14W)kz;@ukL5Dtsg@)hjAL#Av)B9^E2Q2jXV~>CM{s&u~ znk}CI$_H`ItjtA8Q7L=h-`5}1zQ@GhxOvM7RKm9tCr*l{N=pN{MRnN%;w~s6SfCCw z0dXHVZ~!Q^#Bi7DJDci2T~WOUeq{gV^4m6^v$C=|1lom5i;a!1uCAtXH*X47I&FDoMz-%r(`02NsWFt-*tB7@cj362 znwswEF{D&|_w38*)29rl#StutT4E?q=-m10F{mPq6)~~4MsRv;K?-@ISTv3KFDfd; zI0(<~_S~j&R?p|2jjB^K1UL)jF}Xg3E;nbP0aCx{CI%$Zp;`oxocJ-YmdG3UN! zTzq|fZeoKw^Am77luR=T6LsId~#9 zuBaZKU%y^Gyc4Rd1~H@mPw#%lNd`Bs-O#wZZ~uOtL+(m8&Ob0YrW>wlYi%=NK+s31 z$jL8$@$*%+tCmJb;m<8TS7t$I6MLv@E-=UQ=E_+dKXw#~O-P7SHKj$LDY5Bx+%2d{ z{-g5~g)__n;vg(c@PAJob;v*eKYz_FU^~U7Y3Z2;Vp`wcUK5pW>O|94g7*)eY{7yn zSFTJ?&3xnQFN%DYq@_DWkekKFE(S7h+`M?u#A zgARk)rC3K6>Pyc)W0@!$cV`buN`iVU_|5Wh{*0W}VEHz0+6YQmhwd(|8Uvo6o`9VK z@%i%?c1fdk-9nkdfvikU_Og0Q^{%e2d;P(*AUQ*4UC;Ia z00#X@L_t)34--yN(c_kgdaPKX6nPW!RzVN_VWcf=SlFDxLTqzGYD&851UqQ6G*_u< zRd0;K0R=BNHM*Tv9$TD~860|U!Ktpm-U`4G)Y6X7dH>hDv&LQ@Zgh)Vx4IZRcb6e& z<0d91`}+Ec9gBwilK>NE0u%lMdKwmWjgJbOk^c&q6(dU@ut(a1IX$iZjH9s~w{G8d zP8|X)IWc+B(l{^f`{Z?e!q|g0uFSJROk}`m5~@(#B_zbzJPQ&NQcMzG5*58+)27Rf zP3JFMc>ejX?t0>h{DNW?UWX2TezUVvy~oPcwXmL!5mhvAzVm_i#URo&B5{`EcEs$% zxg(>)D_5*uyC$eY!?E}F>RaZS=6rtmFw7!mwim|t=H>+0lN~o^7-ZuEfQz9KUbkDT zGm^-M{3o36D<-P}EYPcngY_{crLV77;lTd=dkYHkj6>keW8#v?p-cfyflNg!6ogD- z0fl~igWpQ-L`Fu0{MYY)%fdh%`jexV#)O9{YgjNmG`42Vx{;C5lP6AJ>$sklp6U@z z@fTv5N`3kAm9xi>zV_Oy1fA_y?O;PLU2sx%!j&smRX$c#RO_6OzLRlDN=qv$EdJ5| z{_(|2^=1()TbAciZEfupWfelHit=#)b4*LX+<9La7hbEgt9xQ{0-h<*8v5`7xD}&aei{BbtfZ`5`ire6 z_eKGZ43BykNajZ8O&A3CM>HM7X^?G9Z2aXbxSkdm3CLViI3I_Iyx^EdP` zEiLVZ7hX`%$9uDMeB-ulkFi0(OGEwTy1F{bW}E5K(a#Trhc3*?S!Q=?YuBp#QOFU0PQ#p40sGP5$_wgM&`RXlOwk`;Zv>=cz%R4bP5fdY(z0ah^#YN*LwFDCy>%h`+ ze0UHqt{~K(5Fx1@CWBlOai92H$meI~j}!I@#U33Uz%=NIHLx_OPhGfBu^rx%?HC@E zA2Kr?{W+DC5KqJ*V~|eWGSo9NJd9t#Su&krVe@q3ao02f$dZPkz0P8L)|$1hM{l=-HxzJRdtP*!#L=K*|d2xHbw6$H+C1O^|!iB+112rm!Jz=CDHQo(gs=p|;vwF?yoa`L)Sjb^p zOUqIu*$Ah#I5`c?5v3l$q!C8PDosyM0y4G?1DcQ+3xEhf`g`t1$EVUqtPT8Kd*lIT?8-xzDVJ%W)M~21m2u8)Qm_<35yn z*}iQ%zVwYZ-eBha_Nm>w`3KVG|IQ5$dN?$}kwU(5Dp8v}87cX8~xq7`Iq_CjKW@EOzJDf(^ zEEZ=nHa5zx|L%9cg$ykZd#RSqKamRpc3Su#{I+eRnZbwsC~0d;JKu{Z8&)MCc#BKq z_l_Pt#&G5bJhN)S5dMlUVt{LER)`yg6%s=1j& z0dJ#?k&O7h4wOk8Oz~;@aiH5-)!1kUK8n?*KJS7=+7;vDses=i5=;#=x zM^`ZG>0#tlBss91XU?3BO-xYMwqj)s&o8~{I?8Gu`H=bE5hv$;UCkRQShPoi9sk#_ z0@<4iwA3H6P>jftBS!*a}zPsk9&ayeO6FPH@%86oZ`1ARTciHQjp8(IW$$vIhuiWtWwzU}G`B!Le=I@*H8 zi=#wpnqhdFo}RujA~HEEneRyV@Sp&0>J#S@8nWG_%6d_n(xve5oM3`))2`i5yz!?u zy1EsX2Y`aAD+%xnCXje3<1zo~pZ*z0ajF5+ke^=&L<|m$tXNU$rbHq)R}o>D$TNs0 zf{CRGxw*NwZr=i|Y`s!T02q`QHzc5_h*_r*O2gQ0rq>b()e~+tskgz z;PVr%PY%1ff(0jpL+TY;O_1og3ICrcXtR92=2i(xP<@#f;&0?xmCCB z^X=)Hhv}N`o~QZh>+XN6|8@ZQ5JhE000II60Q}zq{96ae0gw<8|5yJLlTJi!YG?k1a0d7 zOA6$h+cm!=1If9EW03O;Xfw&fi<@B<9ucYkxuZq^{GVX|FB%du0wM|k6%GAAs~iE~ zKkul>sECM&|D7VDpdcU;0FdYjd1SQA{vx}Dr4TU`HTBQ!UZU_4Gs>buN%+j&!+~%# zIql-+J^q3DtA9TM*hv4yAV4AjNCBWHlC39{R59>PB`p3=U~82074Z&gTC}T(>ju8P zYS@4cWyt7O>N@}jj{7dsAeGjauA}k35ENrI{#-?Lac1wo7L@{*2HlM@weSnRpJ*Nc6Tr;4- zv6}WtH7A9cv?~PQJ~3;y ziOR=>#{42pnr399kY=0Ufr~r``#W-=s`TD`p0fn%`TUVfXs?GKI+UldstK4Uv~=0n z^&cdyFuw~T5Jb4z2J?V6vIBc64?S8+b*^MDj*b$GihAw4yxwI(Ax17L3na@kCftb!NS!9Z=n(1!1g= zW+j}TWC4+mlFt_CZ2p=NyKO$*#f>LjM$GEz$8kpM zr>x5G>ZHnwn8>MAw$BwnL2TPG0}QI>2VfX{f{8@Mojs=SCh8F0DjJ5COLctSrk>os zFvs64RUl73uJYCK&yb@d?GTqc->6hFSYwZ5Pbkf#foJ;MEu&R5hGHKxNb944PnUuU zh;LYp+JONEEE{qpOCBXh=^$s02%ApdEsbH7B+Ol^M~=H#4gdu zn7nDvI?P!OUKXxaRdT%gq-%wCLRr^SPCMtyTu(aLpDx^Qr}7VeqHYL-+{ zVgIavV)8gWq7v-|&yB*0q@Wn}__OhhW8?b*{<2&8h*0LjjN|9Stw2%gr-Hg^zqvCk z6h+)arlHHl4{NiO>m@%+Re~NgVI8k^Q$P9jKfC2Q)>43>zu+&gHQ-_g{}2t zygYWd4y^@f&q!>nZv$~-DTD2p3&*>mEV?S1X?HBz4LgHed{+I8bK_2?B)x4$%unGN z-#)6I*MALW9Is2+eBYL`v~kQ?qn-q{uF+&9XaH#jzzfgF{{c8LWz&b$8k12Ddg-ht z?p}wbwdgU(p05XMDe8U`fH4Jr<{x3Fe7jc0js?kU9QF63(M6mU@?P#i=w*2x| zLX|te#RN@VT$E6pufn!0Vt;51h(e}ay_oJYmkrz<=NyB!`u7v4R@Ju-F5|!qE(i@P zGwS8-{WW%9`B<)zLHTMOSQF>Txjf9Pv;e}=bp|&b1(faJOH_N4PdMuf=2B4KAkcIk zYe{W&Ntntaha)O6Ar_O^{yaAZon=9wZDmHIjLnMgaOsSEw{C;8M(^JX933=uP0zFS z)7`?Te2wx9Jq2-&?N|!GCcnB{i(enxbE8M!*!pODRxXpWP7I5wE2yzS-(Ch9#zL1dwRw+| z`62iBL{5MXTnY`Yb@Ky-D?)KAM?252;@UvLKzR>C6au%H25x z!oJ)ai^#5Mthq=aD0_Sb1%kL7i2H;HwWp}lSNZy>(riS;0H*h~J&qjp^NH+xuBPps8 zc-h_}V9n&#{Ven%OS4~ojuweHd$0ncCl6-hX6PV>|g+`St#rqLyIu`?}JqYO5~?_b=~dXAaOIyHr4&bAGwD~O_GBzH_BtG16X7gq6^zU zMZnO0*6j^XmrwryO^X)aIh<^EAN0!c8QS8ndmFVt4PjQ!_JV<7Tpr$ zr%3~^rkARoj`Vy*eO(CBdGr4Pv}CnzX=X|sj)hcByN$~INKL9wsUKIj2V7+s7bWBx zA4PRt(CcY!VpC;Y1?pVFYSXIVLcN+w5MsMu=~OGgUubvLX>${LzMm zp^RYKcuze)XY_2u>`-W#h1yps-*gp^>cW~fZTT<1OfJ}1fg_5_Qdycxzu}x?*>Mp4V#jW$6(x?TNx9o)GLXrSHt!xp`=h z?+)gGaey_U>!sXKmqObQv%m=G)|b0T^}MPKcp}6hH!%?|AZ|4`x35H{AJOOhBi@EW zs4{tj#7r2%NxuzsalU}DeiA%Psg}V zeK|RvZYQg+_uHv0u+W1k2_xNyiRxj1s@mVVpoR;y+^VFdE+1TKj(8YGh?k2ueE)&p znK>+-XK0vb2LJJO8zhm8HB}*_AP^*}lc)7e@wX;*uwn{G*a30kd zvSz8;yjb@x@$Yw#Ck<1Ub1f>0sgl_~D8;~%BtI9S81cQodncOq1CZjkIqiBzYCKHI zY7%$9Nf-3?=y>9Pe9ZbASP`RR1~wqBNf|dz3Oqvlilm#7K9>1QfVs}Yi2+l7I7-EH zp-#jGYmsc7cNwbL_jhTZyLsf`?gPNw_h{q(;-V(J@QWq%Ys=|3(^E*n1bM<+iIZ__ z=4qPsnwschH+@Asw-&QG4AIwWJ#_22^KQNd3Joo-Y+Jf14@XQAAdwY4O8TbHIoGUo zv&i?8aNMBBies)?jNX_}j7x4`gwBRaF^$RiNZog7J6*C+#zikgYvvNoAFG;0ln%Cj zc&zU-|3f%Kv@3p$5k@|MjZfx^lRgS+$>%9x+9t49?SG{bcM%;*uVbFDJ6{pIV7ab; z+AiX{X#xhngAKbUi*&%&ONomyP%+p71b@w77w%t_m;IfnUaXhx{RzcoQUg+%j68QM zXxp3`shv0HZ$cBC!{q$DNTeB(3sX~9Exv|0Z?@h{lUkPZ4i`m_W^sc;lX%&CpDk!V z_%pCgL~$#^YtvC#4xidmtT#|;r2Cn3pAQ0#?(fT}PTq6br^zbYkn!o^JSiN_ zYTjt5+}uiJe6OrJ%|TuVB29Dj4uNY0;{(HTZKWp-*t3d0Iva6U5SCS`m*#lU+#llz zeFQl5sEFT^PUZ=a7i1kHKAA_GVwarGi3y1r|_ zKRZw=yy)1Fb(kNqSfO0OntYndCg>Zj+u3zhitv=Ag|XLZ+Q*?LD~2|IoEJF@)$BU^W9Z0+YbZ z@3JJOTT=$;(6Xl8CskR|S?xe#1BWW zc*=->41HhagYe%6=gbpUq@wiYhjvEZ8$&)P{MSD~ppb4fzmI=*5drM6$=NA&8ey#? zSu^#Om)~G}whRcfUAlXUT25UnQzwCrnvKl`_*7tLce}J{&Ur3nJYG>Y=p(=ps~PKpSIt{xHfU?n;xeIaiP=Z%b?T*Z@~R@WT%xfO9*S<-qUVYDMx42EYm;0M>lj61)BLE zo+V{G{F22JfBl!WMt5gIf`i@iII`(MEZU{JdL>H@_xP?>oFIn%-(Eu7I6WE*zhWm9 zC)HT1DnITkQD@m8`)9uj%jDt(d#(@-Ov)0b6WL~z(Y*HV$3m-1HJu{$yj~SPs%UB3 zRwH?^va;x;_b@j(^Ne6H?wjmZNtb&Ka$`uY%pHFYwk)rFJgjk#zWv~3B2e#O;>9^& zAcPs`pJ6~`_aJrrdSfRpS~`INIm3?g7XChDz=HWcLu_+|dFzSGF9)7G_o()fvQdUW zS6AHqAK-Jo#6N%pR?I)Zae0|M-=QJ(_B>Z4MLb$Z!xUGA$|7`8w+K#&VYmCF@4zH! ztg0tY=uXnqjTV+H=~c{BW;2NTCy^$oa74>RWYkBeQ~nDHww}E`M)gr?VljAGzvh>$ zd4^f8i`kk{8YB5p{?CaVYfk%&V(GD`7I)twKg)0`nfm8#N_%^uZ#Ku@BK1ecHi8in zSD!r+1m7q>rSz`s+>gMe8;80-Hk{LR(VmbV=^#f}sp*C(E>DlXLw>oED$fVAf@!|H zfw=a_6~qOw7@7-IP_}aKr%|Iz96V#Zi=x{$#oK%HeWBn(^X7L3Z0BX4_tAf-g7Q}>k9=ZvN>ntm zwBA0kQ5FF+okz0#8|$AD=nxVCI}Spz=@K5)%wwy%r5}CDerzFsZvIWSuVu`dsf|=Q zb5W(Q_wCr(I)TE14R|pWyW@^7Oqb6SaaIZ|V)Du%Dy;19Oww=u_U-BE>mIo?jSQN! z=b=t-&Q>~6wq<41(}H7FG;e5Sk&;>K0qyfR`AFMrWOgu3lEk#alqsC##> zuK8i#=%Yn(5j+oA2hrnQ=w_ic%!lxttM3hAn_6r`IMm51`Mj7&~z(Fz-b_VDcjM7hjZiH zQaV|{j|qFK$Y$_ymrg^BkaTa|sF_wM?ycm@S0~XfUUd2f*Jo7JOjay#QAPQO1bl|N z!cu$TdS`;2PddE??j-;!57XS-Ccu?PsP7VSpl4*I6z5BTLaf|&`&J_>+nG$=cNK|& z3>O~2Hic~|?nd%v`ojw_={u6H>`Q5>pin$7B2&NitmgUa5h25=GTR9hy`;PpuB%KR zFO^3>jjro>ru}E~5BP{t{t^^wd~pysaEOuS`Qz(sztJZRt@Ts+Ms24=olR;`GWt3* z{gbmzkfVxeqXlIz9mBc#bb&{QEPWg&zjKkN3Itha-JwntBb25-gR%pW~Yh*KZ z+yM8fjErQW5Ishsq$6QOI1>rW?EL=2&36_Oxe$RU84Y=8c*>ehq8_1WL|Y6dD(1!J z#b3oD2MAJxW!sJOcN~-^+wgN2G|tlr2BV9%(B)B%sOqkKvOH!svKftE8n2v5O=w_^ z-SD~BESdlz;pR;z?ztb+OJ`@;H^D3$wv185U^&t#7LbeZp6v>qi28INp%5a*zC^1X zpEl4SOZlDT#WVmv6pPe}{UNZkMSAjZB4fY3%nZ`*DkLp3fqkwT@RjZ|-bq-5@;iZ~ z@$8V^EH&5n6P977IU!?b@ynzYzxpIo+l(pO-$n9Oa0^Z9YDIN#=RfEJp(1zpvzC*s zoP~zuZ78>#$eti+L*$*k`>u|HXhz%9xcGchD*8H1$GNc^!Ua^jdD4{Zg_J%8bD6u5 zpr{5x*(H8e*$AEfZdtED##rebYvkpeMdyb&jg=+6G8s1lj+AP^qbEnrfdQaqxU5Ao66v6t0SCMSbo3=+DA<*hK-UPN3}XMCocV6xvM(n`t;34 z0Zyzw6}FHef1u2K!^X$(mL;aSy``#ANZA!gd-xm(U(+*Ca3;b0!KkByqC39B)->*T z9qK#c^{bpIB+gzof@%Y+rYS3|lsL1qVJ3KXT~7(eDX_Ii21kq%-QYuxGDP`1oM2iAWlm*A~Xt#Htq3zlJTo7>y;ST^s22e%n9)H z07lR8<&yV}Y5wSG#uJvJJSU4MvaG#C9KSywqJEqw=I9zo<+g;_=47E*XC7%KZ z_=71--sh>elKR2hYuUx)H0Za;dNs%=?&)lvY5d<&)A0_PO=sf;nk`Pt$Ow7FG1flG zelNGHVy$7Z3-MVV5h}y(UrZ-|W2BX%=}URc(_tZNk^C6n7&+7@7xwEN6T6t6#=^qN zlyyazMJdNzW9o!h%QQK`=vP-ipk0h@)_b(x9^-ab&+2;klNlkq({k#qlY|{Vm4$Edg5L9uLc&vBf81Rn8*=#1dXGK1hb2kD}5ezu7`nVHk3u zUSw|jR@X+xf9(CPHT%lA2g5hkN5RYXZ($Z zg2_%-^EWTz2ht}3P(RnX9jD=Zsd=*h{tu8b2OZS0e{zu|rH9KWH=ZR^Q2&PbzquQ+9A&w!*^{g79l+{diPRTT1JL=OAoSwSG^ zS%xt`TnZoBTT4-z6f62H8#tv=VR3(q(c`o0_4$pkzJ5xvD~q==rSuIGE*#xJmo$Wc z>{fT|5kF_!P_gc)E(>EOKmIf52&suz)iooA#x&LO&F zVv5kyGjC1SSlBRfSmG%*Ou`4}o#yQ?ch~Z_3iO4-B#} zsx*zPa1yoo{nzC7u7#~qRpyu!p3fgQRA43sKTPbsxb%% zDyBcw(?&4fstO}#15YE49ujmhhwZV_bu3$3^+=fXoleRHMyRNSZ)tHgIvJ5v0OB_3 zhBh%c?QwOa0MmV<;P27V?6l?6T*Od0(qm+8W(wht9vgU$!T#P_yyG$?FD_e%dYajJ z@6`VR+$4wPj7X|n1amg$)QGfUd|{Evjk}o?FP~2fIqk7^xrT&9Uu;O}fyC9HajsXR ze%F(p$JQ0S9*GMH)&B>mfxMxN)Bs*wcYrX({j4>sj3isyj;YA8nN0^7ivbtmYi4=2T#1lW{`jx? z!lx6YlnV77FfEWLAGA+jz6DQZ9R?B5J*alL4=(ITJ{i&@*4}GR(o4FYDXk~?tYk7} z%!F&eE$loo0*PkcL3a%ekLb0B1KFZNYon#h;MzI|J z3&=cG_#f*reH#jfe`?N@R$~uY`IVw#y#LLZOUL6wHH+PTfmxNU=1>#QUEkP6qVVDr z9MiF#i#^dR0uP7QvWJD|IEemx%@f%3J9t<{S&Rr)#lEz#qlQ22JAQNbb|h=O#)~c& zCog;({Gb4DZu`W_l@fLFex9ImzY1)cSW^8y^vsM#9Iuz@P}-Pme?1u;jWTr<1ctRq zh`!uJrE}wBLWLH7VlD&Qo&J2ltQ zhWECZ-zd_x-ZvRq{4sNnt30Wx!BS9{hHKOQJlvWZ3U>$JZ-`-^99sfl#(U1U3G&?1 zlVFGD6(tf+0(BeZ3O6EjO*0JP^N^2|fZx%YoYvQmzhCwaC=W@$&W<}(4_OL`(dqdq ziJ(QKBn5liFU1Ht2WJgqRZ40$ED})lI6ui}dMbBB`~#@CTyr0Z=OR7g(6yB*j#b7; zLY2EnYAkD2JpXFVX-5GEnxc>9l=QvWCE9@glh-{D;Oi>e`Be3qN zmp!^rZ=O<39k3s#a|d1bmstblU+}X-9J3o`pwDbKBjg_my^mNQeHRE-{UUcUd@27 zzPX_$mOQX)b@LvH^Cs^2T#Qnp6nC}kdiD22F+J!_VC}Adm7GJ(2rt%y>sMNA9LK1_ z{`_FfTN}aiI9CMF{v^Jk>(W-Be7ZivDY9$=hd8!NnC^SQB;ISdl+{*el#_()$#&2h za#L5H_0MbE(;GiOW>(gk91YKKQ5m7gRdzRN?!!s2XiOc+1k}`x~SYX194c=EqKKS zKq6}Ed=c>}WB*Z6oIq+C3^UTxOM~X`4=JB;=!?HCKX!8oQ5qMcuL7ls z+RDt8sNL3&khW#NaLQxRH92W$WX7^)t(7cZ5KScC-BXHvDlWIbO4Vku&BJ^`V0NAr z+qsO`xSuUcXS0e|c;Atv7HzB}4fN_ZL~@JWuk6Zk5QWK_Jynhjq)e^hzj1~EF314kU53;} zk&(WE{FF0Yp6yolfAIS>qk{kG@Pk{sIMg_!-GUPv>R6EvjCOJrdN_4cWz>68@+6tSYc8pZf5D(?(_(K$5!j1jyC>)(C%CjpRaZS0K z#%pbY9+|_?#E9zmYJsC`LX!%XPHCQuZ_)8%6{SH!^%CXs@=$(q6Uy{ZU^2j(8F+xJ z-oP&k!T*d#V$j&H+Q!Y6O+SM<_a0mJGUXy) zz`PBToXU-*Jh7K)-_CiDo|bHa(tC9{O~%5x=wp;H?BrV3d&{R1YgxfxN9@inKS2^Y zKw|NUF(UrDP{VM(v5?xwBaOh?0Z^TAelnUzv!mI$R)1#X!SAIz;u*%~Yu6VqLWRaw z3NL{xJMODeq*5~(H#?XJ<;~8-qF{dVbC|u0&ZAh$q>cEd1v8Re*cfQUO!zM+9g{vr zH)Jlr7)aV#EXxg zKY}CXWy!eobMrTFY_$|;UDc?L3iL%A7|ere<%8x-UI|?pgY%wMYT$xzRXfJbf?F6IzJH|Y;tS&6Iq5?n08VX{ z-)}P)#>I!;zl-%VFZ`sk^e!3JDQ|ibZHuP_n$C02LCgSD7Qhi>xXc4(S{IZZ`g$MC zz@(G*6XNRl(Vt)Km{P>~Rk6KByUx71d06XU2OsU4DA&|tzvF(S>FBSWAv`jt*OT(? zB9T68<7M}b&F}aEvH03Gdy7Iczn68kDrl&GpkL*Y%{AuYWFH3UkA!eays>1Zm0PYW z|0c>?Dd^riZjaNo<#9hkFYxV}-8v)I%lz6_eAlwrys#uD4#kzL@*QK75Ha3KiMpe7 ziGpZCm=NW9bJoTaX`JJLtcnw0@h{87EG%bt%A|Ft3esBCS{jO$zR2<|$hyo(X2Iy< z=fK1xtsxb)2uqmpR_%uhaJpgUfo4c>L)4JX5@}bgpY3sBRaCFYBfPlY+I=)iUCdos z7%fy_o@uc<@#j;4W4sWPIXbPbL+7BsVa4x}4)UkqIRlTX3I!<@dSUG}Gs3FdWL|Y# z#h{TQzRbRQbJcG77Lf|BeZHPp{jTms(TU<&A-=^V%TCfnasPTtB&PjPeVDvQd(%EN?~v|$`6DoR}Y z2OWVZt`oRQt4GwQ3o6NtCAwvl`?&6>!;2^wfigX*Sr zQ#vA5F{R-$SS)X3f7b~^c<8X7ZOiNpA)#087o#UMnkJZ4y03!deUrQkTgYidehplG z44zt4BYm_~NXVvl^Vczwh6NfKp@1@Gr1F!8p)X%mmsT`x_~&J?X9ruR%?>Y+lMICE zd`2r8H!n&9rL7|)XL-8Gwpkcpkz~4J4mG7VhHZ015UY(EWZP5@Uu)B9{r$tNTs*U+ z-uQ4CcKUHCAAvcmw4cp60&gjZ&Dl9QtVjR#Bu#~ULL5)|m3qQ?u8$pJ0;uS}o&+EH zs9wat7Bj#|Tlt&x6@{Fe0-iOA;YWOvp6T05=w-;`ugGW~N;?-VD!4{lCp{ zoopz-oKk0lQHK2%6OHCkei$#^(aUMG>!@L|<^0aI>VIL|^_^+oy`z}RXaQABs!M+B z9nw;!S3UdoCMNS8ucV@D)4ZlOT(Bq>(Z)WUlrD5vQC-McWJHomq7B0@)w!!Yy`f@6 zQzUXyt)i;y%LVGiE30MV#!?JejXGYI(ttZ<1ibOPJ~EJzJSpxu9I{Ti!DoGyacR%0(TPwzpD^64}K z=j66m;O6sZK|d%&w8(&lQ#=cQ5zVbY*SC)?#%VN1ay@0qVGHtw5B9aO8|_6TYq_FXk-53v5lOqx zloIsbnoKX>sJM4>>QEwHiq8qYu0ztC_c!rwM`A0oLh%)fw2Znp-u}MF=78r9oc?|F zh@7Zto8DivpPwCetjB1#>tw5-FlKkhYdZI%X*DGdh0!jbJ_^aC5fUow_)yB_co|!# z-do9P%?ijfPUcx{vSlzYeRU?LHYJs~uAO8@M@DOCIyd7lMa2A&hqCe!zg~}R`b#;Y zZw?uiE+jPW^Eky%vFG@FgoO!AK%Btqmwx~nL(Dz1B8ESCPcm%l(Bb)(EvQfjxE9@M z{e$#q*y?40*}7s1M=}#|bK#sGZDu#uFV8bFzcOlLPIFj6-(F1>o866E1Jr-3v~edf z_UuvosobJSK~ZJ_LLAxbyWT?Ev}c!INnn}S9HWv2 zyG>G=PRv7}GxSBmG#Cd#GDEJf*X1Sc=7upSf!EF)bLD6oNgkKAI_g^3YCfm)57HbY z2H@>IqV+*YY|eP?irA)vaS_Tucup(<4gX*Iscz|8G}Uk!W>@+!I%pW^<7pyURSBrfc|IGsg;JbBrtbg!`-68!9RepR7 zKkW95zWqDLzvaXq5IAc00eYzFDAIT55}$|knKhehJ_aOq`i)^^;iqV@h29qz zxM2sf%Ln^%wiyg^g%OhMVBx;?)dbsh&9qJ#Rbv4))R1_K7|YUJzM(g|9v?lJ!CQ_^ z-syfts-=gq;_?7?)(VoChO1sIC*}FONque3&rJFQJ@X_Uwus_n*p&WM`)=<&swRNdHL4 z1#~XM+Dt$a3Z8D=&C3JE!L(xg*uwp9KSeduSkehiF1C9vC#6jl`n&f7Z428LjnDbP z*NE4EOUNkgsH;mnL4pY>9NjF4`xFmC&+^{&6i-4#wFOq1nr-59o)SiDfqjJ_`S5r@ zcUZlN;A>wcq+QUeuLYY&u@|;yW21v4G-YQxw3>{#^+~j0sud$}y-L&&s>oi%NvJPl zsYU!YeTvPgz9`u!{iGi~K1O{&Q;{!r)LS3J>LXt0J`UQD)r&S^HS&tFZqcE11JT(@ zr#gtyvmL#QPIOC(_b3}$oY044A$i|iOr!DGWfVc41~S#WerPCd0E@f+BWqaJ7^2}$qsZ?jv8j&=W(*E~z#|;H63{NN*M9*fGK%3YT^+a`^3DY%9fqYjhPL;8B?27D zN0ydP<9+v!l26)Ud)j+xQzVY#X4Hi>`~pIFPH%LaCNu+yc3j7hrCC3CCtRXdGwGne z4<)_h@$G3r{t-wK71Mv^dx+>P!1*kSO-V1MPui~IWXaA~$$u4Pw~rN;u(di6Z<#3k zyk=U-Exa||=R(Z@%lL*mWo(yDOPpc6c~S=FT@0>N#Z|YAzM3e-ZU6=njELEvT!o?! zKU>_qnq&4WWIWxz4vs2&rW!k?-?k{WDl4#wYoh1N7FN=o&(w(H2fOpv&W3(*pOj=g zF!{;O!zHWojN_9_qyODkhrPiW^(e*27iGxHu5OR>$~l2@l`{7F zwy*Hw--S_#{hmK%!W`Fw0bIaQP>I^vT}DJAbl!uMlh`-j#iqj?&`Yndwji^yOj<4K zZuo^H;BmJchb=}h<(RhillYFsw$d)4BU}7CuVq{&tR{#2WXT^6ABu!f>bkT?^;BeT zJ-FjM7RLY>gr))e9HXBAsec5P0|lW_(Y&i8WUOI*lLWsle@*F$_KY=;>Y^wquIk*X zybvr@F?YYaUB3g5!fL2)!Cl;MmQkS*^4g(vL@65j$qKXbud$*vZIqlSBN!;MN|b)h ztg3=LTd5JrymA)FGD~I3S?PfDjBcXG6(o$9Y8hAO&hwkl9t*pu3}SyapQ57rD*q

!QN?&mSniyT^u z)ZNai4}rtQ4Fo!6R2=go^yC@*zvO3}z271)FA9@CajaF<-R}t1x~EviL<_$aZc0Hs z7jNfQ7Uur}uC6&!>x}EC_tY_C^isn0X*L1^?8d9W87d2+bCp91Bs^xS3O;98E8Rb= z;wsJR;km%wIGhOgBM+|4yfly1pq|HN*rT;$3+KgQZwjo5RL8BpZQHiR{L(zJp#1EI zXW2hM{?*HGHl6!v*x2vDX<_L;go{yUj3rojBz=wL?bc_mT$^p=b7C_{WZjnxZfOLQW# zI@ynRkzI+J4JNqvg7DIlW%-}UkI+^U%_V0Vie_<+E3yxRa+WbZ z6SzKpN(afdcc$LG?(6AqpX=h)HQl@}nf?Pr7eW;M%Lz#;v^&4@v5SsiQdj5IN{qq@egK9*J&jzhSN){hRk{pkwW9eFyv-Fw3T6=1$tS(Y0jj7zr5Xf(5E4=KW^8?R? zbu_vv20iS{BMq=-rT$U__eE%V@C0R5ri}xwlIpXAM!&|br0DU_W7OCmgVb7=Q1Ww# zDaNm!1(iQ8d>^SA{s&+vo#36h^DY9olZ>~v~>EeJ*rXOkCD7?bU?AyNk&S@+kW5s=P=SU%YPdLhsfSoc% z;c@3w1|FsFbt?~~XkkXf)yKb=ay!L`r4t$Y4{j@O&|Qi`+G{fcBp)?LQGkqN2H=fu z1aVV*%;$a4Y!6EJ(kfNB3U|pKhIe2NVrh%SM~OtYcSH3F4h-57IA#r%wwvs%aBH}C zLLV%51dJ2j?MoeOxRD_r3f0@r_}F6_-5eOT?b1;1$_TmO)1M*G__3sdK$sF4yMcE zz;(H(yP(CPd{Fwg{LmW{-P-uxU3TZ768RG_lwzFk_a|d13-zEPc~zAty!>zZ_vZqi zx^mdo^8&3^(rT3OB((wo9fKB)qh?Ppd7NC1gB%8A76Dj49I!z$ilGQALFOjS7#Ac< zUI{p=nm+^=n6oD0C*(+Ug^@!ijL_&v9;aI;LYC7|gb&wibrkvykS@OusaLhfoEprh zCl7a^TqBkgiKow4_q!woVYmlc&A)Zp5NA+KZuZ%^V-%jv3v7t4SJw;rvl&7Ti;WzMz8tS9uXRRgv~x@MO#_g& zq;D!kt;@^vvsw!pD30j%2`0cD`67+dR!3KE<->sU zKOPBE!YZ^d&bi~KWCVxL5Q z0vOwIJ@QdA_bfSp>4dZe)__*OIsrIu$Ki+M9}6FZDjRBIFV&PBTjkU^(%PS`B!4CJ zVNuK3PLfLgSdIV7U+im%Jew@H?KV1}vVh-)W+PU2^B77Kw0F2rDXME9V8Xw6EvGv_ z2(#e$s~w8EB1}&ea5lbAj+96CIh{itn*Xa?Dg{|J+}8PdDCjymnCd3T!#Tu~Lwq0Tf7Tp^F%cpma}$=QGCT5UUl_|yqtJX% zDZZ9Zno@?KNS+HoK%;Y2>R3X^=w|q#Mq790b{p3f{%yFg1NHgYOp9b&PR{vDb#-O^ zBr#X)9;vRFd}vr(gu61@*Y&Hpa#3^?n12$OV-^zw{J!h8Kkk;~Yhl+(PGZ#-0l#7i zvj>Tz6@@ZVM6N3?PyA^r(++Gf`1P7wN)*2LwY|$@_9l$RW=UXVP@BY(gbW-w2BOu% zOeV_Zw~>5_w`dYiz?Fd_&NeR%>1B&I^2gDMo?(k!F|Nill5@=d+;5-J_Na(5?9$Sr z!FMJA&j6n~kHVhv$WaKDN1Y%1M|pJ)k?Mky&UxDm&sHXfE{FaB)Um-(1;^SKw~lblmP?=#yC7;_T!Rlet`?iSY?TcGeH zzWf8I5%6Y$5Nm&(MqP!Mp`&{>1=%xnmz9_qahnqs`j@(Xe)#+t-e4Aybe!j^?;02K zIjXM9kr_AZYLunxAHcE-8ba&u$%iM(Qca9FTCmgZOKp=UH3%Rki+PGD8WSP-*>v-k z8qEku{J9nveYCwEUUD1=Doy)wzgq#2!CKvCgq^YA@9O=qb7J zM;+HYl42&OHchZrK~DoSTnFP-oS=T8y9#Lh@n{vy9Y^!iIh$`T3`I2AbvA^Ndzx<* zU!vRRe!*VGp75xaKpD%5Ol^ zg@P76I(xenQtWJ1GLbK?X*FS!N}QY{)x_Q6~_u zBZq$U=|P_$`x^t1!A4m-eEc5(Z$Oa0f%K9=aFyq!X_smRaWbg#-<`p5c}$F}9!Mio zWj6=7Yh0UWIMJ3Y3nuej`xtcTsA-X?{Miga*rPE3=lmpNvC|UCTN-{c{M;e5`7qzK zH2{&-$Ws!>w0{u?rB5Iq^wC2GDuECN_xO!BE}eqwQ)hXgNklS8;t69UWUd5?LE(l7 z!Q+$e28clgh2Glh7fC4v;Ac`Xg0Z1^z=0G282nzL+dmrdpUNz+ib_xkG{WubBQAYc zkLM^&xzO705M3xmx=2n?Wauv#eO*x1|nbJ*aLR|9t)wAyDTXO(&0E7qI5MuL`whMP`fe^5fr z*3k7m6z?_0dI=!(H5764DacYW$FIK)_tMiiWFRtB!wrK0nIHn_4`0{ZYZb6o#|1=j z#+3&#;gi<6%HfY-JB<5eK$ojh1zJO zBk9zp-{NtN%^~#joG)x-=(8pjp^i7^*i&_mn8+Sccdi-4`r2xmTjW*r^)o{SNk}Ro zrowp}PT;~m$JfCa;FHFNl}lMDTlug$3yx6|!d91AY>j(Y*7%!2T~7+uJgjG}si@3z z`K%HY1mQ~KC9%O`dB<%UM1jLf64O96?{>rtgwPMkfwT1u)~@FbL^Td5qX*2TNef3L zjD!X;GT;QpeQfw1@SMzl*h_*;-?lpBHNu;N!oTRKH+EDBS z`;3h4!8!NQLJD0BK3@Y6Oti60IpF)cCaT;zc^T-4f`(|?nsE%P6qCa$lyS~o)4u?W z;~lfBO?hb{p8P&fyj+r?=X#6-^MF@B-K_;(FEB|>PPJq<7E~kfgU@hEjC*8ps(=;d zK))Kk{U9MzQq?8M$+d!%9ZykqwcBF2$25`5&I`dF;;p!xah&HT*#7`ws0uO@W$wdm zUu)rsg3-R?jruek7U`;bR$5feWg(<38$eOMo!Bu9gm)uu@3!JQjv2HRmp2pE{>6yr z5P+#i&3pQL;}zxVYD$p|bkNEv3{fpaktWxULW|}q?gMsNd-ggAGShf`zO?3gLrNy~ zDUE4L>&v7SvVBFyDr&f%T8ga4PSmJXTPJW(cb%=co(XI1l^s1>^!1Uqkt8&6i4+r_elVo^k31bSF&4bi@8a3}I<5UTnT>j|_EjP3DA?Xl;yps6NE-Nx;d@6*j;ia;-hF9U+8^LfU!jO+KHb z4eDNndmSxf)zHe06yT(eb07*CpV$BxU=RV$xLPat-*X0$F>acQ{`vdD{{Tc))l*)o zyo8w22-+z(Ioirfk{2HMAD6a^JwO0Ls}0STCM2Qvq-pA^`EBLNWv#9&4$a!5r@G_(AQAAGp#J>_|4!DmT^fim9cGO1$rS z;UBKmRWn(p7aM$1FqDnZF&qv9GK`#Yf>)n^eLF5>naE7UQ70Ym2$L-dRg{|Y_t%^= zQ(3F(?KE{BZ8L97GDyvCkm)`T5kUQYHr`>WcP-R6-OjclhnLfR$JHY1)e4 zSp!nG)@Lywnc^Vwz#neMzrL=MMoD3IV|T*>QdAA1!#;nA8B^6pDp}*Ib-K&4O^hjw zO%sL#hsH)h9FlnL^Qxv@zbi23zr+|zO{Yyy$1QeZZ$;Xu@3r#7S0rYv#^s)(Oe89S zc1AOjM}L2F`RJA{vO&4?fzLrg99@%RyP>kpWj#er^fBBi=xx^B$uW*)k;{{n*nsB^ z&m)3CIX#Yd6E!jbN)u^aec>{qnwKYeZ#ZhUbY&t^4X;>wqM>9^sEiD;NOmf6eOX>J z#xwfp%i<9&ocX9&1J^y$18GwOj`VlKE4dh*OKOVc(iK*VrPpO_5FNDD$&IHcWM?Bg zw`AaU^!pu3P$!q1roNlrkzmPnrvp;dZh@muh4H6a)KIK(D6KKO*Tp?c(*fn6Z^gH3 za!)zI2M6C7{5J|@tjnnc(zR>rvz;!9@*yQ5k4MwwGj;!CHFxCK2UoUW7vvjkWH#uK1(`*&6 zk+t?8@q?V_1QI^l#p1YM_6eaRrOgS?JP*Ec03xRBd0;m6iG6)%v{XqXRw$&J7hSSI z%1mm&Fk$NC3<1d`XSOwEOqVWdQ!d~xtBi$ftz5v1sVuX7eXgfhm+->tD@apNbwWQWk-yq^P12 zF2x3``q>!-B?js9*y6w~#d1VzJylUsFB_XO^$Oh5_ zvYdqoGQVt$WStok(h>f|vtRXnW0ngC3>p0~$U*sQR>xlz3<(v}ddQ5%KjB1&AfDjy zgWK+S$kk9RrAs3k1Fl>2ix3J~3@*p<9(20U-)bYYwL|$4tcDQN?ut;@F8F+^=L6g^ z+B4W;S+ffz-G&~5?5?1QSy)eqoF7kW>d_+QVJlYjv~<-~^)MAs6z5{#f^yt&A0(Ur z3GPmakoH$)7l2obaFLwP}jneNRb`Q4)RFc2pf(94o?`#=e{(= z#FPa1gpyx$yyi><%vl~p^nTpivqTG8TVlV(1qun{-h9a_P)iy7LvYE)0M0S)2CS6Z zQ>fP=rU$GAPCu{$Jh?qNTa`0ZOGdKCx+Gk>hB)MBByr!idF`rA1q@SG!wO*y1zM=6 z&h6@8ze`!`;HaUAuc~MQh^3LCCM1n=+d1br$>$0WzLHeLOi*Nm;Bg~6vh|6y7U~Jd zl>yH^VXLZop3iTFc1kB;k~+~89(0i*{5y#tFeKyCo-%#3RZ8=clJ&&*rW~OXi$(1> z)$TUXsbZ_9T1cV1K?HIYX`!nMJkAHJdG;OlfIpi#;Ayp5RRg>Bh9){Z^7L_m-=qPg zqn?_qwXQw47k$)q4;~zE&nuoXyn;{ov z8;vr4`<8JO_ZmpqqBv3vg@E+7^>r)mLB|JHQSfajrIf3I!Qaz17r-V-T7{$+NDs?M zDP*ItUGAMhY@JmjmKaR+FjTrDgS4FCg1ZLW!OjnE*%^`q$#X(bpbHM#j(9h$SjZr z$@k8YVA6PzE&giHJA9Q)tLe2nB#KES6?1@8eiC^kbML5`G+Mo>h5H%&KoW&ZQc|Sf zpUA`FDQo2n7c0$BxYbVu)>bV3V9q)GLp<^RnzB}qt3gk_(i;W90Mpgf1s+W>*1uOy4T4_ zwrOiMgIUYF^J?5$URLS7~xoj<&Fs>4sh#IP**iIBesJE=+75V`o)oT zc5g2f47THEq@$hy(YDQbM?R9SS*!9P{OgK zo_Z)gk&ey4A19Ii^QwX3GrcMF?M{(I9OpyvbF zlbrsVm2mQG5FC=@Jy{(1h|KMO5F zU{bKV>!8jVOO&Xax<}KxF_;sdZEe9QM03yW*9QLp2I)64pci;c6W9%sXj4eAUO81t zxkt|jx64aMHB~&YtFAVqV3osXJmVhcAC|IIlVwOEonbV9n<;voo8mPuO5ZxZp0bkH z1a}I`3P~%7l0=3{X`)i{vIir1RZ?)I@TVEZvLq^U*HTyqY(eL9L8XAI7OgMz=sEL* z%XKAvMNEQ-2`8$NW@zJ0rbbhcq#C0%Q3Xzit{yy|+{pM{$s zMxYad58ZGzc}N6@CC9hD-%$xAPwVXQ6QwRTi|my2lZi}~Ng+Am3yhG1->@Wn;~Hvo zvXJ;Y{&(vFn<+>JXzhRNFIe{39lxe-6*WxpPckbsUT_-;7}}r$4mrrd`Oj?yE4M9_ z{WIwhZOjQs4Ex8{9&LS9YP$6+RNUx|y54C(@(Uj;%qj-Jz!JG_&75(^I?g;ayJc^9 zX8iBcDoW4_5**h4XQxN6NFCG{dVBL$M^{wvJ)t6s-4rnY0Lqw0F2ry_!Qs90`T`*N zSw)52F}oaIE|Mjr5GZfYO5`+yG21Jy^fk1#70^jhX+)|@7sQV&Oft@l0S9K`xi~m% z^S4AG0u&2%ZT;2(R0D-?!S*kVEtTn2SYVzwA$cWLd7>Vjt-vh3hB?j$zJV%QO4Vcv zv-5qq>vTKAsotm396Cr#QzT5Z$vCC|0I0En>QG4C_xJvq(w3GL8AgQ|G4+P&=%b3I z7LiryM(;|N1K5x;k=PdFRZ2ohdJ~t+KxmT2gMHrUJJoM%>#0S{_j6gU zp&9`t1cS>78DA{Cc=ZsZg4{7Y_5)O#I>LgJRNQI2;Ih^wi}kPK?_o?~Zi}ev`gP0~ zN(GiMH%C#i%uR!X%KiE0vDXPwX97k!z7TO@YF!j~V4%FwU29{nY3ZX@8I}X+VYFld z2RP3J{<=t`!kFtGQO|whbsfUs6w?KYaw)zZ^a-Kxz&f|gd2mO${A7HA!bXi7Soo{{Y#*;P>F4 zeNxa$?VS1S#TcnX5)12g@`5%Fp@pG>vVSEl@y)pXM)zre1T!`WJRP8g`)aCEkg!v8 z>t5Gp_(e>nasL1<8{0nm$CSTCcdbQ|u6CHSNgW(ta*4Wt>UK*G5 z3}VVrNGT(}oT0*%DXQvZgM6Z*G!NzO+vZ2xf-#&NWDlJbk`1D1c0Rfrp+ZPW3@f^P zGyu3!&pj1IMH8=<9iomywni)lMtR2{Z;x#nP${^4SC=R`RRWiNH225z6}|GyQCZYG zrV8-YO&`=$E^!;N1HXTk$G9JDa3mn88>_!#49id=%8hILhBUWH`-YKj&h$@jta&`x zSd!p2%zzJbv}3k;Bj-U2L6>PP#=e#3d`l$?bk)bB!vwud_Q|dSDCD8I{ZMqNh=Yjn zlDGq$WRsltBSS>RfE*G<-&?8Y6rzoV2j9O~+ii}bj0Qc~&;Tb99zw~mIF_XLz;v+62?rN*Dl0Rmv)WMq&%dE}A7?W0RY zq9z0Sy}q%FNa8FTHw=64;xy3pt=i*LD7n0Klv2`(-U$MzRo&&XDE|NuZrR8qlkK6D zj|_y8{rB+eq?d^XHJD!7Hws) zB0OYX3yu0g>rI68-A!d&%qK9n)6DDsp zYKpm*7$~Ejo5^vtodFUc>@pN#PIHfJ4{cgnrGP?}c7FS%(aBo5Ms!x&jh2j6!5Ya& zRGTM;GR&=$ks!z4hR1x5ajRP)!bLDA@mBUBq9&;-15ZrGFfQfOX#&n8xT7^ptnda<1G#xtR*YYP^EKOFtys#@1kLA*x!#3?#@+jHs&zP6;&de>0C{? z`(SbbA&)0Ngdc4gDwu&<-q%x+zq~Suszvo9pVT?@?G!yJ1FepIt-ePSs0Yj}+-0zH zg$gn~#{g?BXyRsZHqdN~fu%8aA)XOj%XG_}7JjpH)O0mQu7Z`kriH5OH@Rg8B@3dl z0D^hPbKg4BDP)CAP<-z7V%;D?4o#Qdp4Y)UrE1h(W1fi}Nrkr4RLQ(#I2*7=2MnP6 z_wTHq@+GdwmfllWz~M{li7ysX)>6x<)9>4?545!nZQjdD^E5Qnnfj=VzEo2;CF(84Ak-`5HaWTfrX|JOA%Z7=GyhqpbAkAt7)QZ9?OjcL24eVt9K?t zw_`u4f%nPwJ%<_!c!Y5PKD@Wl^@)297eH?9&9wD}II3TwE>m>P%F!*p-B~Nk0#yc* z2+)ni(~>rVPayC>I&4e|7Lr&S-tp^LxkA)!sh+oEzgUHP=vuq2Y|u4ycPVN2N}F8z z_zt2pna#DtN!v-edtLGI$r3AUhdUVPb?Wu00Wr?U0&yFzuSyQtu$Ya|$ z>6+$_p_YYbJpDQx$&3RWW7v_m)%QO76!4^i zw1)w;0B58?Yb2z*=GXPZtXY=+le5;{l6ZDoZc_;9jOQ3g`5QoEfKEuy+vhq~RMJ&u z_dTI8HA@vVH|%4Kdwc2oJr#h|EU$Hr8OfSTa-~@i72U8570K=2-&!Yz2%)V@jpYIp z0~F)$3pN+&x```pDN`J_)P2uQ9B&MW6+vZb81OK|0DG|b167+Wtdt5ID_+7ikhO_v zN|WsyFWpDdHT2(6V5pXqI6)#@zl%_WuO4CU$`7PJwmAbpmL{MCOb%;NcKzWwgU)OQ z9{jdjC{nQWtILH|rKm;)-uT~yoEhQ-c zp4r3;-$eB_o(5XmabG2$GSZUK2!a0qsm4goI~;MViL&1ly<$SrDwQ|Cqei}k>e_oG zHPn@jJw+wO)S<{CCtT#@p2v;{JOT!YsaApTg>Y;60n4NgWU@gc_tR!BTUT3j*H|Lc z(M$_dXZ$AiwSkuV^HhZx)l#{59;<_RevBq_T+ zIaZf_z#002p{%Zzl*~S%6~@FxNebi8OA+n=0P>9d>b_a?86Vj>4YW|B&1fmww8Zp& zusY?`HB{xGFi5oKoU~>McwnpDnRy#ezi;iLN_16au@r93V5&e`?GoqToJh1@)g^}2 z^>WkFwKTO8Do==HK2SbcC-HV8?0a*i(>h=Za4JP_Byu0DVt^E~O+d(da?S}>%{{WY zB(dEoshZ&$NYMH6lN?@H^8h&H41T10XrXqLRJ5`{=iZ&7>I6NZL7?=j{x5^pzKg8u z8@!e9$x&S*sc$|ib}}o4J2$svQ-CwaeEVrBQQQhlPbaIJZ2a}pLJ6so_J8rfLM(!Dt(lMD6&UhnC|*QsZsu^ zU<31O@y0UhN-n*+bR_G3xmo&pYKCcMqX=cD5rS1zoHH@OEUHHbZccd*Cy7aLgn4n- zluwB%GXO~>e4W(ASsjMQXR_a_Cb$~<`Dqq3t-K)uFH+>e3LaYlK*`{IbF0i!tb&ni zOMuL~y)eDufiNtF4UPx#yyCv;%6jotQ9&IWglW9#jKC(?xXv^9v)p6c{j{>wFVZTz zC5jCyO*=!a%u^4cc%q68vMf$sL6d`p9e~e#pMEuTH~et+zq|ybsnorl=S++Vw9wGp z?~_qiERj$pQ?#3B1GYv(e6}z@LGPxN8H!CezR*;p*tLs4%vry&<+d6>9_$R0yRn9J z5aT6(g2$G}`ygo2%aCrZ(BfBUDIj53`k6urg$Hr12$6JixBmcy0SkBoBQ8rV&H}r8 z=MRi%6MvzJG)d(jHY+W6K}QWVuoY?JNmuxaz=rMHKbEtWKwS;@Zw(Wvn)!U{N^zK73G1MazDBDyX0XvsC!(n@Jib7Q|R^f;()(E+h z5(3hy?|h(Swu%o9spW#2E0~q|vrT()zdv|7Wu{S~ zVRt{wL_P)G`f7>mYowr>t|vZaFVrd~!ZW#LBn{Xgj>kR6eO(jE>VUxMQPbh#4-+ge z!Z_k9(?3Xo)4efoxm==HsH@a@Zjwt;wrFSB!tFQ(NFK)^@Im&{d_?((MUFMe8DzV1+g}ZJ-7iAcqbJr#DtxWcz*r=$>L3nBxd*T(x6ZH=JBdP`Y_?-lcyoxx z>~463iHa0ILs$5uS1{1rUa9ITYE5-X0pyL#NbP}w8GDg|`s8cVOei7E9_ym&NCL>; z*7J#-8PpZDS2Q&B639Har-i{MY$RZ*Y>bRxVEkx2RFx?OlmR{KPgu0@7B#;dIYY&s zI82f<#-ewOOD21Qr)VX43VG~1 zq?Q!`vqq&@F&(gR@89QJ5?!co>kT{I7BvACRMAfkGSLMEdlQmBL%`DtqYd4ZadAkW zLs2WH85xL=hd;_k`S#STq$!#EL?ws~6o-s8)%`Qql`~Pil=U|X7_mGH6SRxDj>lOP zuEBwva!Kwq7IhT8?SVa^dFe`+EI1yWp-%79PgT1T3^tOC8cVt+iBCbXkH^Six1NR`o`z81E!D2^<2g_rOG;qEZ!W*mTz@3Bf>u zX>;r}SJ#e?qmtrcrl+W|%CWUViE50kE_1gEz-;aYGD*+dI$?ZC0@`Xdr{)?HY=Tx_ zlMmnAFVfF^qqkMTJwKUbhY`J5jDbpJ91zDCEwGMx2XGqXi8EPgH8*5>ZjfIMD^;=Y z4OCaDE*At#F=}}sQ&1z=p(82k+($c=LXvU_z$52b>fzNXZlEfD^Mq{PR6BFuI4jm) zx~`QI)KS!b$;Rs}&@vn?Nk!hV2EZdA;c`d~&WxnlCjf50nSCB(Nm4 zOk+iiZB;&&obCQkQi0eO!0o0&mW0%fgCB8Da7Fn;t1XS;tP-9oI-8YcwxX5lD`Ns` zc|qYEago~_d!PQ%#;Y!B=M@x|%7=AGqkeLJS2<|3WWple>lfH zokW>MK$SIWA0u7-M+Fec0Mln?Fsh#Hv|>3bBY9yhQqa;B1rzX$U;ai0F zipiJDd*>7SX6r42^+`3pmEew4D^rScBVnFH1IW%#KdyD2YzMF}7h*fal!=4RUn=Z8 zJJB@PYyCx9QJG_|gi;9AfnOvxNeB6kN#o;Hmoj9vn@FY=18v#_RFDD^rx)GRrba3I z^T|D3T{*6X-F}W~qH1d9SXOA;7{r~1a-@56-``qNmmh7gX2t1yBT87h>F^)2@!QtL25O8fyAj zqah`c#{3hw9{v7LbH{xmW+`4-r?}^YVi%}Mt$i@N@e?AXT1#~;JphifwzfeM!m# zed9Z#0?=_~d$sr9NC8!2s-Ct+j?XlZr~%7i2K)fZGmdgIkG_LB3ZFPB?%U*a_=6;* z0=IqN>s#VI!j_ikk}Q^4;!UO}F`T?(2RKgH$0t7-(kZy_8es5jF;(cNMb zK;fg^B{b5BHni08e*##PkYgZleIRgqarf1a5n(`(!+8A16x$&vt7E!!tZ21$4X)c) zS4ky8($a}iGW5~mIe>d)>YMT4#$+tsR+aQ^eR!H+P_ zBYH4K#zg>v%;N!lz&~T86T-3a6gN6sIwJXmI398N@aY@ zyr@t~AOv&Hdw@Y1(I+Z^%plU6>lT)a7pwY`!=89FVI@UX+J=gj)K4Vz>r-=! zck$F5VC0b4EL5N7@1g)^lAxFBSU&EHtxRo|Hg`epj-isG*K)K}qBUF7xu-%+d7T24 zAo*%pNK>9y*d8!-C^Ps;mS%uM7P|~Sbh;5Ff)$J5PFMOwnme2H=S;5IEj2v#H zrRvi4HR%FTh{A|o5hH#TMnq0Y2XH*{0p#g9r6o2(T8!r4aM!DIi;oghpsJ6)Ur;7n zZ3UVrB%`L9s-MYdK&O+-IUM8y8yi6Dzk6NHWsbH_un; zjBT&C$#18Y8cKr=yUtleVL(tqoM-%4$s^l5`{{RnqmY=73=G$BG5tH*je>g%%f{idpRj^8`cR8+MV(#tEMf@pUk z$E1EG1whK25s{AiNlNeOckUKz5mjoQwuu4e|XbR;g#C=Z=`Ev(cB@JtPCTUAtWb&91O1<`+@I{ zbyTbcf}nZ)H|hoAGRajj>AWG5zo#!&dgvyG(N`tJK-6gsJPy=_Vo(`|7bDZ#CzHv^ zBxs9B;s8Ui7vTHLD593^R9qhYB5y$iaNlaJ^4lb=djhLN&$Ua>Kq1OK5a~%#@_m4QY_bMIlvDFHl|@f`;2wB@CgXp>m`w z?SONWfT|ZH@=qf;#0j1OH)^Izg0;* z)zoR7u&WReR1iI2f=MT~I2@8xC3Y<=v)@3#Y<}Z{5)zS9t z#K+W6bB~<%8nQ*1sY-5o^cwFNys>E|Sq-<`eLEt z&wS$x_f^kyxBlvq=0vbmikRYPPVtQA_=Ayy?Sq}!7{;@LQ$;WUQSK+4N)m*uxcb$( z^N-QXZMokl?etV{1og|dSRIjKEr#8-$pDu4m! zFp0s92-2Up)#4@Vs3{;1Swn5STd&P0@TpcuWR5T!01S~J45Z^DjDgA1a^=gEJT)uj zxg4MpWg$x;1cIM@UqQ+V*La6XUD69rM@en6+$M!l)kfmHZP~m42|wZ>9Btz#MM`DO z1V|3p(e=})Z0iQpF?<<}!e;t=NtT@5v;MZLYvb z%nw17T6Co;tSP(Uh%XMaT;jUkYA#J|g4I(cID%!REK)+O510WyleBHX_y^xe@bl%) zSxPYh^A+9EGDu6Ll6Sk}Wu}&qcNdYNfx{IG>O5y)I3E48r)B>DQ&9W#+s1*o`$Mlw z&nwdiW|zrzj7LmWG8km%_4emSwCE%X>%HR;Ll<~2O?HCKPR~-xSQU(s_=}za1M~j? zK6PS1NPHr!;gi3ZLh(A!`|75Ork08BLVUq0f?Mgf0)1e95Zj2(ai7ylO=xAHbRPkfYs4^DsJEYL*yEOQ$>rhdZtniGMwd zi(+v2F+NVTr|H?gj+zyMu4qHTV=|+EizjK?<0O!$1apjISCXmVUI$yT&b+XQQWY=< zxN`dYj4TxwcDK;d!Cgz{Pb9}n2+a(%I*8OusYNd@2Y`@BU|lqpVJ zUGq<T++a^XU^Ng@|PJ z%XGxV%XduPx^nMPLsd^tJrvZE3aVM8LeoZoA2oT}O1S{z)F9O#4vIJB$9h)Qps>wIJDJdqV zrFw|kH6+Ncag`yIf$!X${(i?pnA2P-m|QO}zev3S3vv`TUB06mg-n(jIRdP)&pMEj zBZXB=ld}VjxjV3X{{VeLPzkatTlcXX_T8I#>H32GFL9QltrAL+4NSDeiD4nYBMf*s z#zubUSD3#IDq4#tp{shGGps_~f{-ZuyPsk46E780EmSoO%OxC}srJe;o6CL!gOGSR zI?YEG7fE-D>$BE5Dy5Ki`0|Jr-m9pRwt-{`^iV`R@iT->1C7P-aB=#vQLQ8w1ugpC z-Z@D?uKn?Y*BgDNS~|L#KQ@+gDRd}+d8&EI2O+^3IT#}fJ7-dnA!;f$YV_VQGD?Di zNpLaQ?Eq=usOkBZ=(Q*6>lFx!XfZOUs1kjE;1EdUH+@3U;s6wC?s9Kmcq+hJtf|@h z*L<;tDX%|M1hl`tzM=yfnNpY(nnIu*tP~PJIXDE5ogjp*OoniMn;(c>Y>A~PKK}r% zoMD2`dZM{mDi_FPibjfhY0+9Ze8mkMZrJ2-az_J!k&%BmYtw}WUSZm zo_}3oSH%wr;OZ+U{7GPvhTkgF8RZEuSm%>0^ETprO@MarKMQBu4oU?eFm(HGtsv6p zs#yt`bow8-r}}Q~Q3Wh?^TxLuX=R+!O57t50odc!86Xjmc=$cEMWq%_8r;#SGZ51! zPr$V@NljHz9kO62qmD<$R4`nV*vOoN$IoC!kx5c~st!i_XN(~*rdnyX zct3Wfp6g1tpWjOzPGrRyaOhn}E4e`k!_qK+#0WUhR7|5b|Bctt9|1aFI4ZIQ!(Bk-^aBB|cXaz@7Z^hE+C&BKmdd%MP$P zQiu1wMKt+IYb1DtrZ5bM6mKA&R31iu@5PiB6KN&D-FDbRAT1@2HpRamn6RuJ7j2`q zQ#CWM)La3P>6kCms;Jt$pHN^k&UYN+7}0}-ySdBk`!;Zl*`@4srRYTg3)}HYw1!At?VHVwelJ9Wo%4%vTWvi;C zk7SC9zr{RbCmb;Vet0_K>Pt7HzVU&QlJ-pw@QRbAuW?3XrL3=-86c{fr)vPwx&ol1 z6(7r!kIenFs6wU9O00_t0q`J<;?okk5CiGH_rnE;tstbbJ(}@pRi~$_XZ_@^;zCOj zuF>tuVd*>`a1R=BDnT{}GG8uRc&d>yfYq>X&JbRvZl}FRWW2{~MW~FtR{Nz6>UFA> zlhwNzWF#pedjbv##bT1HuBqrL8F&{qH);+m&?@@IP4D@&3-R9*w zo5XbWlX*Mc35+z1f1HIR#v3i^&OjqqnpGtK0J70=&xve#_INN#Y!HP8NpARlaOn;9 z-v~O>r>s?1>N{L96x}OYnM2f=`xy$8ju&ypazO3K)?}H+VK#|ln14&+fLS#y%AbwA zS2J}KQ%MyyEHz11@#bjC9IIy}eTTtr*~hk*mBCGrT0)2^?0RP$sc9(|dy2c~r=))9 z>xESXXP0(&d8XS^(Zd^)MiNZrQ`~GPJMsoXy4OUtsRac^U1LG?j;yaKP>f_CgNV-I_^>(0&XJ$Ma7dV$ztp9w?WbdP(O_oF z4ZZ6No;_D|74jR6o#`qmuNJ^0hV5phf;Ealneybu-SO?fAol)VM=rd%b8G~$x`Lnk zZ|czrT);*k=6yr_$2CRNwJ#Dm-cS##MaZ;HF6o2jSrM$8(>K+L7fJ)AbygYL*H`?l;s{3|qQ=Wr`VS z>RZm4$Nbz=I0Zp0*vAJs;OEN`*CU{{V&m0`%Rgw(0Qc~2eEZSq2D61_h1e$iz?Q^Un^)=?Uo2)9Jc)xdw zfMKeVp%u)qjn<}Vr4ej+ zl5(i*G6wK+zWjFJpN&crb*c=0w|(&hB4bs+-#FW4xLm2OR8%eJbo0$1SZBy|LUM37 z924n0AGUOLQ3Rl6ej)v@${I>zEsp_r1NUY4eWR=%xQd81+CjjF< z{`l2a6DbQNhscSSBuqdcu;;t6Io1T+jbBlD^;VQE(itog%dg2FS@wqRp6a@elebV(m}mI-1n z3?D7L6cr&XnsxZjCv`28sH`??Wew^dp|;!uRnZ!WW7}4&*jEQ9(~w8bPJT4Wm}S~< z;PW?nMVBVgwlwB7VFUV)!uy;R3;V)J%9lORWt1Z z)hAxTjbj^6L~WMeQFn<%F*uSIxys@v0Q!4=CBV<$8qJ?JX$wT7WEva~N7%kBm9AT; z(0982P|4zdQo(oYdFGt0Z_)_jhM>yWl!O2TKmx`G0}3zz9Fv_Bgh)_GnLsGnHX}2w z=J>cODJ3_Ye(!FunOjBV($OdG^^v69($oS;8}f0FZ=C*rI?*TyNm`UnLHAhUF)H#5 zV?xtYJ-|m(PCl6;8(uzLjzJ7_?a0*lu8^FuJFZ@@6jh{>Q-4wEhLAqcy!^IZIAu{}!_k{IeS85>65 zErv(Q&X|)htw~8PLDC{p5{p0x`inwy3jSKvgGC4XcE#s-0aBU4Yn3P;o?GAB^U#(+ z6}l`50Ypd55?I|Mh`HqKh{k*Sc0ZBUT9ObP=j-(f1~STkuhG*SqE}dS-Mgx~-a~3F z8-M|kX2MG$IWm?%fk@9`-{U+i5JJdECC;7sM<>dGY>rn`LAQx^UWJm~G*4Yl($}n> zSVaNw2qOd7pT4}`@^hChXNMGVT5J1>QibR=ZP$FFTvT_M?natFKDM?a<;@In=VJH6 zFLfP`K6K&|N&f&+DU(;W#8Htm%^N@o&Oz@Qgx-Ha8sR9vE}jx)BVc%yd_lZ9eW1x9c)!9SjrnZqh{zX1S%4>ru} z6jz5+?mhmoFjBMY=BK2uWR8{OVip%0(Oi5L^?V%n0QS~=o*6te&koHE0-g0W!01C| z0tA&EG2QSdsjBUrIn+-bLO<1-Z>m90G9NBSZduMfZLnc>k>8wW8rGj6PP78elFfHl z7(S3xTC-&8F}nWGdyY^;r7Gz!R*7m-phYSkXNas*NaQollA=Fg& zKw;DpTCddgB0aRNBbR}cg>_<7gPzO-t~=nI<3p>Q2KLuM6atdQ?a~rYU)0$uViB4d zq)DZFd@=zVGQKv{V>n(E@P9Br4wWmM7L+9!-Y7M`yrUg*#IkGp`+M548iT7Wmwu?P zs<_@LD*W>tDL0VOQ^tw_WL^N=#fLZpAe{SXs}k*!rZ;1|;(;?PwSGzUg!}d4gR7n! zNrEa79YJe(kQFbCYBTSrH};90Ft@r7s-dQ;sHTSP z0g;n#44KIejr^>nFwb+(9{RFSR;ECy^zVFGObdjYx^Jur>MNB|>*=S6(pJ1Ju)V=m z;fRG9OfWfa=KyhnPOab-q@r-g3VMjTLP9|8i8<{1M*F-q65B3S(bdz|K?0U(8G=O0 za9yyVk~8TFyb=$#mHz+%LTLa1l(k*XfLFVSgsc(#=Bmql-H=xepUS{mE56?VHd z*K$EBy2fO3#Z)Sho>@T~fCLT?W36S)2t$gN^u`_W5~PrC6|Tfs7QUFOqN4rf6+E-k zMV~gVS$`V?>OQ0FNFM`%-y>NGpT$ie%S#K$YB0>@0hmBEBD)^HxEiri$6Y;HshXZT zNE%molsk+Qy$Qey!)fD_#{lW&pt4GoG4(M({{T%FK2j6~cTDRIHZry1N4JHejv0z5iFYYD3fUwb z#N#C6@&g)5dW6B@tADH@N>!y)IoG-Udczc*8*icMc_WIXB-Hb}7t2T%Qa5_L<%m0o z?}9#bFAa^Qf>dbta{NS@s#2LC5z3IeaJHodG;cAEnw1r5=crZw7RMYoe%Q(F?frEK zT*QB@)%Sial_0iKpj0m=pHSaTO;gj<-7{3S^HFQ7idbmtCXHSRrrM?B2etyrgtxZ- zJ7~(16qQ*l``%9-Ee2XV=XrIFR|~`f=QOcOE-nzoPykgM*BEec3C1|aLEDZ8bxhR- zSeTo!7hT_;aZeRS*an1kZ;YF)d1Sj$Ls4<5T56a#NP?CyM;o_gn{EMY0mg6v{Itxq zrnVnHMP zK!6!}!6Tdzrua@VJY11A0iO*;4(S>a$e1tZ&wh~E(v~XPxz!`KNl|ZzGtQDW&46I< zjmYooF zhdIL?qyffo55AWZ!U0mrO+_^2?-^@z=4yKTyYA>?>3U9?isKZtQNtx{5=N5HPbesn zlm7rxBy8?18+(KmgM!hdT4;aA=e$DM|_sOjhX`j-6$%Sxv*v zSCV0uGP@2OfPW4TzxT$I6B9g?&1&7BT3^eYGPJS*MZjI~{c?t`t+?E(`f3SbCQF4< z!jRU*0;?;b$Zt263@`{8#~rxFmf;sP@di)Byl*l`$shb>t-Lai&EH|B7(?JA5NF=jzfVOU#mY${xs_N5U zEi^;Qjxbemj2tJh9E=i9NIyE|s>(dHs%8%l`nh_Fw)tWpX(WQT*L!i+GP6?O-q%kw{(UtaB+cX@KwvWC zF3NCo?VkMpT9rDJW$1R@dqvk}i8~GH7L9|(Sij->!dBcPimHYsF)LHzo<+i`K!Ee= z1QW?S%M;%Rlx3h2<|Rl1&mdTQ{KG2o5|&rJ?YrW=>v|i-9sa2GT}?&q zD#(_A)zmVBMI>%p3mW4JSDft|vye_SMa*2plAz!?rkLjVGzcV$ri4}V8_V7ltvx?= zyUlK->HFHEQr5J>7j`0^Wg&3HaJkw&N0E&NOq4A?B57*pr&jha7EH@5rPSTf+i{L!SIj#>e<%~`#(RtceLeKz1ct5S)8+^)hRFMG zK5+&r5kYbP0OixyEH#YO)5$7-4SfKsoRCy6vHd;ugqbF3T4#VlS&P?Ea}9KYm7#t` zVasN|v0+?#dbXA-`a6VlDF}#%k!Qqm;YZ@#gNDgCBN@kiG5-LEOhn%i4lbqP@M0K( z*PgI~l$8GfMKkwp{b3YE=G8SN`nrr;BBR?eNE$}MEL%ZCxaV-kIUEmdX-g!bC`8!dmmE%Qu^3@cAf>W$@;WX9(Tqs;Rb_7>Q}fUoi`Q7dbrO4%qB}JzWSA*-v@b+6O5~1%Rl#b?X9V==+LT>aH}6 z9Yqxu$)hbI48H8)6FFi4JOPe#l5wKTS)5rlO~*qAAScYA#QS^of)3XiDecvUhH5Gr ztgKa1)0pCvsUYnjcmDv+Pr%lsDI}$Aufzl))JNyuG}Tko8Re%*=d8Fy*r&J1jUyFo zwh{f;)K1^e<541cN19-I-<2^Ou7Z%I25wtEvAXHg*It38t%l`GO%zb1Qby}Cw)d2` z@Z%7VA?zBzP)gq{tERo=@|P zfHHD^G@SIZUQnruk`e+_(;a~m`sTXZLp8pdnyRYkhZD2AhbjuT(65Yxk=xq<9aczM z(4#a__F$5RWWMRVB5QTc4aVgi-m=AR$Mn*MLF&0T(eW~vT3I#rE{k88KDmso^ zjY307A%dT4#DQ?C`TRh$@(39?BoX>)8+MmrL6n=0Jjd10A`xn&k-PU?V65FaX{@nL za%!qtXsKf{`Cdb(k{6cP6@Uug;&4bDj{0Uo6r`~ucb!kPLZ&XzO-uFVecv&KyTS{E zG>Lnstu#wAgBwXE!l}5Njt>irfP3SQom$CSZJ=w+^T$&hnaQ;3yl(6wZ&q}}UT(3y z#_*QgWKcarRM$*u+2#q7iWO7?BZgo|2aZYiJk%~>3>chLSPsK?#|hH!H)nsjqiM<$ zueX}WDd}ixYu;Mvsn?o)j~kxx4pam~LXhChC)G&mh6Ah60 zi}#jMa@sBpABJ>`HNWfWo;cPBjZ;PCL{a!5X(_g{y2&X|y? zt`D0VtCz5fi>vycDhtcfJ)})6Yq~ilP#uV0PBEM)1mp4tS{yW_0d482&cCwt;S?zuz}iAiF4Fv>)!tWc%1gg zE_R=oARPO5(3Tt*j!^O_W+Ub-&pcHT2wp!uCEkK1$T>LUKOE~Y zhEppY-xQY5Ac})yQt^8*2z&%_4E{eP!iL_QpHt=1%@zBafA0dxgl=3K=Q@M zd^C&7mXtN7zWqCHk@_2b!p&`h^DONR6Aa;HV&H}ghU`x#-$(^pon)wodV^c^$DC0_ zI}rPCZ>Y$1jg5MF-G*)fnI35woe;Ae08eB7Uiu#v4FK=$D zucn%|(Mc6L2@14Ry!%=?3`$51&z8y?0fv6s0yt?Ylv0pgMaye{NUM^s4HuW~t9t7d zZ&vh_x0_to=&91C(4qosrEw9J*>f>fBoYBTKwdDQ^Q%oy_{~#hfYfgIWh+e+amkCe z5AOMxN$IGhroZ`Uq@mu6yE}>%8RUE559y~9ISVBz4$yf_NH2c*MTu#lNZDoE7VMu+ z23Oz@eN3k=+xh1W0-B5d;=-`Z(A#2;rgUm|X->w*=(eAqj@TM<(P!TO0C7+2bt?_< zPKg|q3|q_~5_aL6j32k@q1p~oF1qM|T-03v{6UrqYXs6&!X<%9NK#n}qvrr_+t_x` zxdR%a%B+*U=MYulq>yXBT%w2SD~+!EQq$IltE_XpnPgzR9D#+#20b7QVR748>ztyy zYV_Y>>luR4OAoWoy4Du1q6;;`riPTP4-Cdxq-WZcs^kS;Ng=Sxdz@$NPK=_p2La9j z-tVn@#l@u?w~tMTISqyCDMa(h1b|JGGD+%09+uC+00J|m6*&cIMUvz@jqE_gza^9Ga3f=84t-N20R;~Ww9$>ZZm@anfc!SdG{`kVg%aAGNB(Yt=M zh%j|c?t?#DDMF~^F>XH*2RYAudNe4@YcvdNlb9K>w36?23H0ln53H${ zrrQJ3NfzqUC)88!!B-7~xv`MG`PU-SLtjz7`$fX7s>t0p@fv7ly2nLbNe7it0fs{< zjJKpH=0HvX;9#CX{PCLAB}hOw_Ri~|wn9^ApUH?-&m8fsH6v6cQZfNiRWF$8G6_8T zm4hic$;L)8s--CSq@ore?kMZehT+ox)iX$s=af| z-{L3~=pf-upNI>C6~=^m+OjEyPSG~sCO}S3M}9{q<61IG$Z{%vy#!DK9W=Xl*QY0k z8ZN7b=Sw_Q)il&p#blMK>Ptcym3d*$z!@VXfHew0T7qRey-p9ltrhU@1XoQ?ub=89 z+-$?9>u4=^TFI2r&m?MAUkWSoH*ywj(fxjzLjPnBJMl-4l*Bws~F$Z1z7o%t}=`UQ=9{{Uw~Uz&Pky9_h~5gZUyN<+SHzSlt>wVE3H zbq(T*g`ilWkI#x$89})3!2{H!Y&bYM&bD}YS0vQEIUjh{;gGZdl@}T3`iheA)E5yV z)?4Okc~MnbXtFmjB!F@4mF@QDzM52&6q#xcAif=uTEX6@>V)wVr|ByyX``d+i&R9+ z&?xfjDfaGm!R1*P1}ZWz&5qdCR|mkK<0dIG%gbJ6?3{*Tyu?C6P+JXmxQrjLkbD$&#me%M`rjJ_sVQBssh>4t5a zn)*U%w^UTm`|cZgrns5cZ1Je_mA5WXx8EbSMtI=pOG+EVD(Q}S;rfe-Nl~&?oV8=PmK-8{XqRIZiU%Sg+09>_7RiLi* z>+|aeDm+Q9zj!@$4Q8XcJq1$7JtV>8aY}MaHz19q6T3dZRMq4LU%Dk5W_K|2?2dv4u=3DeVMp(-;$WwUc^ zU-gw>h)N>r&T`79{3ElQ2#5hKU(*9Jy%WM zCF%<#O9a(NaoPM-0kU}cV8h?HBi|Yn*`S3e0Ov8EE1^XoB$PY0^@l#X@d_UjR^2^K zs(Fn(0WN7rH`B={sEh_GKwblTvgLrrmEn+yg+(;&LG#WAl4ar^t!K>iSJa>f1IoD66tP?q5;0O5~bS$aV{OJpb#-S^rK z+v%uom;RQzt!M6t?okA)sWCGwo4q@XLy?xkfsw!jk)?QxEQS|7?|^;lSBgpS z&-dl|8g+`+E9^G9N^0tQ;ZFrb(|TBxnPN~^k(?+v8RMLR-&&}XF{%C z#QNc{ws7}pvr=8{GSk&FW}+wZ(U_||XcUYd#{?hC@*0sel-jxSKYg**6kCAAIM%RB zsJuh9#ME?E)JUx*C)2zVt4JyoE(3#(`*Xm_&N1<){7Ga0D?k|I9cE!JFvb48B8-79 zQ*|^*jSX!a4q=9&rws#u2HoL^+qjRnxZ@gDe2@}A7wPlu8c6W$^VPoh(j;GQv0XZv zr2RTP&=zw~6k$MP_#Cj|lsP9KVa}@}+9j1*r#F2*;C>iaX1HVVrgeqi-qBD=N@u2# zB*QDiA`Q7D@`Pt2Zrp>8bNT3~2`VM)yfw}YPyzr?++$1BEo)th7N!yknVfEr#6a|e z*nDID+HGl4LX}6lkp&`Q1=^So#!x*q3C)|Uz^wN=6^ZQ|XwqPDu482pE&kgycKO5jbxR(05K)`a%znmV1Ya6UTF>x7FEctak4@80y^1EUH+O z>r_xB+PN#n04G`pi1A2D2PYo4e&QO)V?cGd>xWSAgVWs!Uu=LqWpYVXwI%XmWs#$h zl0ZGi><~|LlaFmAW|@mAP28`|vF2cFn2^C7uWUVHJO-M%WiibgS4zxfc$|VlF~~XW zdz=sZX=5=+Sb74@k$kIg*Yg)xQmT;D?HZ)A+pSGYS5!kmBY8?zGRRJIw2yFjIM#qw zu;AAgiaAw4*B##tFxsWHT5VUtVyRlP#!6~bWOz2p=!d^Pl?Mj|0uC{yrOikx4geQx zM%Mn7hDrgKc|E=NtO2EH=z$ZE672VJ4sUVZ$U=OD*e(ehci!7kq zt#$f_UYM_z?|Mk$Vzm?QF?nn2Xz`uOSA+4-w;0z2$RkRoAdRKC*hu6YfCw6|4m@S4AP(n0oS-TwwhYk1;8^aE zk`|8C@Nk0zFWJzL$08r zsHI(#Ms5^svvB@K_6I${@1-QlC|NJ%H*<@t#1F1?r@vU6XzFUpdo+}FW@#y&HjGrR z3vM5WfPV?(agXxR5})@w*U)G;sfCIX3nV-4`{P`%MbyDW!QCVJf(2w;n+EW6F)q-+)hHs8xvs0&eJWeqB01 zZIvy_yM2vv$}P{?4mnrGs~?#DY$#@p9Pm_#SZpKo?Ti8roCd*b%oW5_{{XfG6!Z8gR<%eagjUlrMmP+290R52mX@OxTiGoiIBwR8 zdJBZLuLqY%;gvkLA&}r>^1;V)Guv4*)E?5`9ed}}6-3W3hA-{TB;9Ln7b`Hj!78nM zk}WMxRTN|S23vx!+mnF6jEs$W&@CuWOR>CPTh3i6i)}n@ zM0}TZMG3co>H}#${QbSPs*&MYDS906G#l>~Qh*^^`rc``g{wVHb)Gb7?jm?mq?ja% zT0TYy+THR19|xX6(@U6vHL314W2Pq1bdSD##AOr)?-5nE$4Z`SFpd>$C~RlabC7%e zzoFEvNFG$vdEwG75>f*yo7wn{w_guwDyeI0_Q1XQ$obLU{ zCdvr1DtdGMT?A=5=Ic*lg4oM7H3G(hk=axtumga@f~W7#f1GL;DM>-Lr+mii0#vKk zhB*C1T12M(^Fyz48JpH#kB)hjZV*>8eSVND3?n_WOdQ<%!Dik9(rbhLFQj z5!4l#1IHrvZhPnZX~d5+RoWk6cK5LkcH1p9*7DAaB$G;^fH?qre_nLWY=GTPF$IAH zo#gQ5TMVIMJ4AUXeTVw%f{U}b8~w!sMbNwAEs?JMF;_t**7Hif;^9;k!v@d5#(wzK z7M~F|K;OhxX(^UfhW(?PsX9`#rzoX*OiqZGdCCy!Jqn274JCm1CR$9eD*S9PT{v`b4QlRJ9Z@hy+LePj4c$f z1=>IgTh)M02e>0S`24kSq=8I!dA~T}D#Mr0pS|%eDnU~mQpr@wDT~TR&QL}{4Ud zeyOvBCiP?#vR2edUlCV{!HAhT9>9U$1KV7Qh$$pfPP=#@1k(2#)1~nCH+PT9Aw+LZpH?*heF2;2(`8!OfZ_f<^Voxs7yzt!7F}h^CsI zogtg4`i`5cdZMGKZ#2SM`<#nWEv6^SO*=|=xk30*#?is!ocn3_O_MMJYhB?LORzvy zbKlMx#`!;JD#^;&>8A3i;heNH(judhUmm$ftUx}bJNc&B}&d3XwRiE2yXq3ag6s|p88Cr8G>TKv_7F#ODj_X{{Xk-VrAuL zr0Mx4Pe`dDB{LI&>m$-L++YshI$~LTDH_ROTK0E)*_FcI>zqCmA4Q>pzOL5?Z^&;RBgyRSAEe#@ae-!~UbT zb-}yIYo&tSL2FippqTEJ$sfZYV5m{F_ax*Kj0~KQ1j+(xt~Ki(cf&G#?^9g4>rEjx z=GH6a6x3F#>upWumS#xpk(H0kWMB>#BLf-EJ{#C)R+MIj3_?ylH+&ProTd1MBx3yc z^Ah2xqUc+FWIaU%Ek#VU{#DutE4&U^)=$hRd6DjRCz6RV|HYaFfhRAxCeT(sbA(lrnY4zQzWHx zgDJU((|n=gJTzM&?s30}Qor9TX)kq`LrO%?Q1Q$`Ex4)$1RNeg1D|onIpMS+EJ-id zSYZeOASK(j`ouZ<>Ux{C9VJCYMRAGIX}sbIiIo6kk8c|?qaOTo&WIQ!9Gp_-*KGn& zNv~6hf5wrzzs(i7k-X4EW*Fykz$%agV1I-GjlHlj+e2T1umC%|VgQxeAw@Oc&nWa( z%Tgo|NYv6UDiB2FPILUFaf}oFwM3+|q}^CDrOPIRe6jn8uhq4+6a5unhN42`?v(*^ zg~m=fJPZNwG4GXK{RP8X&|hWosI)wfCm8ZcsS>sXH5Z4gF?e^2`HsS$-bF^o;uB-sNS#=oq-qo!F>f=Y7RLzjhJZF{#gRJR6# zsVTvg2?1p!COu!6bDy~P=SlGsC8la-qO~0TJ$-2laT3TD>Ipo?O}GRcbI!SP(#1rE)7~azvXz2y%-5t?oj2E&{Y`7R zR^3(Z^(|7#ifFkol?QPc001`=_8H{pGe`Sufqt0l^OOx}vVf2o;=1ZSl$tyBKb32w z7LzHeisd*ZBtUtIhH!C=^Mj|Pp>`66uDG|a>KcMj&FD5DWk zQdisG&5o)kW-5V-#?HJmWDDU zQ$$XDzTWuH{q?0Nx`w#&!XZQtCN;a={h}IL=Db{ErK6^(tuU)IQ_}`_5rxm;9Fd&+ zXWvo8T0j9I#>cnjF6p&smct=-FZzNsR=T@w$!Cs55~rtYp&pQU%ARsF@6LrN8IBFl zPu;+K$6E0e60u)x>iyuw+WBvinm4rlRKP^RBWcOcGu{EyyPjIh=SXM>lno4Y$JtF~#&Nx22@xWevSP5|poUl}s zfKz*x7u04;P;-l+K0=jL((%rk7((l#^jFy`DN$mMR9cxNj9@b#TDtsaAd&N*oj1dj z&-2{bn~hv@f`SSx7?otW!&`5Prs{sB=}8d79Do^AxF;w(55MQ_qNph#gbqc9A+}-& zAh_ShIOxusuIT%XsvAuc!0k+?S!AiFNRgQB^r^`%-mk^>_||CQRW4te^HcfPl_51I zqGE2k{{T$K+yuSU*lzVv^qoB{XQ*aD91lrI9bu52-dnQZsbv`_;Ei_GpStgs!T0$Jl>p2O5K8$4v6jUS8gDJ=%P*oXP& zd}tcD0<;=F9XE6s@dlbEp!4mDKB9rXrlOtES?XeRDhdAp^npPn51jq9B7#Adr>!*{ zvTSPzq&3$+VlaxDvY9P^Lt82&RQE(Iu&LN#bvPj8 zB#Za6oNukL*9(--L0ABq9qikWsIPu8!5QzXtzjw(NHl?}$(BNr{{S5Hja?yk`d>1N z=A$IL_y~R;gbh|QCNlhG9 zsZW-FhZ_DNk%7+xBffq$%6X|kN?EbT)4gj229{E9bNU{TPwKvw-5b=yTE=Km;Azw- zl?XXy$1DK=dv?&JPnkTFr9pu&S0Lc{R97)b8kKgxtG@a|@|o$?t0=1`1(^h)<~z}M zaJ>6u40r9RHh6_H@b_r2Bnu|5zWPPY`yUwxkJc59nPM^2JgM!w$$n2h`6v46xhM%z zNj3G}Fv=82QUJrP_?x5ZjkKR327R@!1p<|dXZFyzwX61jz11L6@zhgoEHV_C0Nwgz zodS3UT7sgg4wBhN&UpHUTirxd(9;=chJ4aoZkzuAQ5}Hq$9(G%D6p~#uV&G{a*Tq6 z3Y+g6_Km$!)HdtYwxg%$ktyhEDW!+&2J8kKOnE2ehalr1Ydt1c3if^B3O?T(NX0xCD@ zy1*W{@ap5!cVl6e260uss+M>M3ku^o_}kczlg6`BNmZ-tij4C8z~#)ei$;J^_4|Rn zHzYP%wu-V@m0CAa$q*$GaCdWo-vEGf{j|)tnx7F=8{fQM%BBZSA6w@P6@6<`w#jYO z_V+;O#7_j{daoEeKs%Ql5O~1WqUHn&rslin<|DvTjiPDEzZ%7P=T9^XNiUO1k-DUA z6K=!@`MxkY&X8D_DSlru<_~M2_CR@q9lSI2$3@9m(@#BJQpHMyNjQa-q;a_I^>ep8 zaol`rvmCs|&|j{vo|UWyV!&7phE({iU?p4=EVD|K^Ozypim#TyB>oaO2kqS8XGRjy zH8w$Gug;OkO#pWW{p0By?ABDVTw^c7cc-4~WEN<6*qePafXkoU&R}^$G z(V>nfjA4KT7UhoP?VfSR#)_9A#+@LSRpGv#e8sSVD-{Gk;>aBEesy9(L4U^1Fb1m} zAYWU?w+kIaDuuF4~QhWl_^Q4+PmosR=ZpZ8fO67hVbcs@wwKA!@u=$1t>v`;zvBJ4!nItmY9dgo1fCBC#C0CDr53tjc zCz1dZJs08t82N^Fzq@9_j= zsJP#rqHBQkAu*z|7{YcK9{3%J_{VKgQ&&6vg?3WygJaz5SgEp*5{z2s+hv>w>Kb76 z0)lyp%%N%;n4jiskl7saqp|lNI&xT0bSTq(;^;1twrsBS%UZx?rl5GBq^hk-YG)p2 zniL&}8OR@y#?jx5chzPErc#wHE=C@W^#uV*BpdldKTi0MZL|F~vYOdbb(T~@q82f! z4;ql$2Rs}e{9%THD~Cx6E7qNTdPFKwQ?!49)~r&7U3ae|@E$u=2jAz+IHY6_@~6S;znAXOwN z_ByyyUeJ2sTZP>qfni3W_jV#SczNMdRrM2Abq&$&R{6?N($@u3B_WyDblG+bg@{St zfE=&{;Ap5*l98SJ_IPU8N=xaweLPs5RneXi_*F|SCr?tt)>mnOFkEjkywS%SMB%3T zaH}3eVdGE;Q-bZ;bP20av?e_@V9qL9zR#QhO?!(cLEgJ)r8K~e@^vP%KG%QmGU%&2-zBdrTt?L z-y{K#uo%(i&Pq{f!(w+xII@6AF#Xk5`QhmrDPyM}Ojtu6)(rdi?q)dc>I4k)$m5MI zDM?s-1%0|jyiYxlY6HbN)s)fI)KmFv@s&~tXfv>eJbWHW=k(Lok_GW%0cW~B=}V}w zUZ~=!f-34(pVv<$S!t7=LGHeicn6%GG#T6Nm-tc?M=UZyEDkZBkacBBV`!K5 z>5LUtCKd!Yc#lmnuDBRZ)TmT4!L#{5ISO&ycN!`h%UP!Lfhjf?{c* zxWF8L-&UAPK`9%{*H|f8L)p7yIAemDDd3t&Y2#TvD-n?}tlhx_JP!W=mPf{dplv#j zF@!B>l+zwt<)j^@h}Vskam2DB66l+eoclWFUsEiy(>uhc>Sm^-h$LHg0qI`xjFEu7hiqd>@dzkWq_g^Wf4G^6 zWdhvVk4}T9Sg<;hj;5j-i6|zTI_8xlh(b1QgofV4gOwhHP&ng0+0;o*pYCcX8y)LN zABY$IYAg4v?8SM~GT&-y#FW#T7o)iB=s! zl7<-R^#yv*rnrx7AlXHU58?l1i%3I+c(W9l$&skV!tnQfIEi1hb3|H-1H8;UZfF*M5;; z^mkF!^rgZnA&#z^rQS8DwHHK<(<`_Je&FY}G6)1<=~A;b23*UAH9xJ^C_kZ0zz7=R z-yNe*hzPfP<<8+PmY$m9RK{#m$jlqis-t^%$xm!@Pa0WbZw?abra03$7@CrcEsg&G zd!#1ObroDxQP8AqJs8@utfOfd?!4gS6Y=faIo5)T5mg_T9@oJDvR@w8lPCVNJ47$8})~XlEu`vt0WWF8afMQC{?S1kjGa9 z#Yt5hoF?MODm`3fK{^!K^HVK87<%9I@k>nVYx%e2X^6%Rb$3ujY_`j3r>a|pS`>y_ z$o#d6Slj{#89P{p!!B^S2R@)Rpqc2ME5r`u(ZL+c8jUrq1t?Uo3U*(8Arb1twx}eM zwpvxDf=H1R@>78EwmB+6Ki7?C_;OOED&vn%eV+KWqLi?xI^O|0&i54+bZ{*+7Z%*> zB&O;sH{}GS5|0JSwuziy&x} z6lBMw4~&pbPshHBrOR}sbOSblR3poQ$?L32=&q=}^fy+~Rz*EIO{6%Ion_k@Y&aPo ziH1gd{l=*|GUm!C!_o_31?t)FpSDram!E> zN3jYr3D!iU@e<{-32^O?cl7NFq?C~96{cOY_=sI;CraD8T_2+FRdo^lM3Gfn>Ou30 zVS_#x7CW~S?mrRWjaouhCN5;gDn7q|5Woo`g%0n8N~%Y?S0z=6Dxt4v1r#>wsy*RT zZcf5U3U-n~IKkv#<4-8bm|3j=)X$!OF-t`yjwh!~d*caRZPQmv#1(bb6dyrLMG%KF z;S{((#6J5$JmB(A15Ku4Wwg0qIK71*eW1<-0Q+wDL=KNxD}Q&R&lMaibh% zi*no^{{R^8tJ|rgfY`U-bF6g0$_hC3yWSoCii%romWz!%F8*t)IuRtHi{LKtl3SeT zrtzh*pR6RFI$!(SXu0o~!_9X0bqCAWd>r_Nudk)+a zmF7(FSSI|A7yjElVyaS*U-Lutj0gSeC3j5St<@=!SyqhX#;#)_IAXsY{kY>qMC7Vc zmdxe(a*6@~1@84cvG;?Q%UuP=$w^Uet%gHR=9(m^jI8nvqiUZ~*a!GW_`jIx1aY#F zQi31MRe@|9@4Q#SWe`RBZ`Lc;wB34BsHtR`x1s!0;V{dF1cp*F2f@~o7bbYMBv9k0 zI46b_oBqFk*BHM)W%@f;{{S62a;h5nYOeEBPSeC`{Fu-bW5x;JjDQavzH~`Ka(I>@ zkma*9*)VxPB_t$KzPjljOYP1704|@Fw_$)Wic9w0rAqJF-uVoY#RN@LNFGUv2&?A| z=N$h4O$Ks;5L9l9U~!?3nD#1p>Z-0r-&Ig6siHEe^3Sdi4(31L#z%k5<5{y-0)T7w z_tWMUTZTytH=go>eihzmV3z2Yii)awby)I?PrfNRBo`-r%`YX3D@-*c)eFiPtz}Tt+3Blkw~#k9U)1WMZ58@IXJ*2agm(!jVm@%(={tq z-4A-x)+~=QfCPH>H-6)ldXQIje^=F2(fS!pGaXY!=lQv(UL$8gzFq+}33O+wU^ zr76VG!(a4bsX<62R2}q<_dAs}1w<4zG-#BwqcB+gP38~=^8NeZcK6X$GQ%W>0~dO> z+q?}ve=@h%>LOb%7fzD28VO{@6-7vncw$^6qq;FZ%s&YE`8v!TXNXlQUBdis(-Eu@ zNDD6Y!n+XZD2t~0J{pUJ{{T%>c8koQr;Q6drE*jS1AuTwPC4NB)~v9k%Vit>->$?Y zl=<1IjW^aRzNwm~9;tz;zvZV$!48=cuVQ!ejN_gU1G(>^Ng`=xFl?qBx_u&+-eZm&%^PK!pD_*y+;B3ajxqr4qN->`ZdLkzUssC++ZG7k zQNGXB7V`ydMa(@NK0L@pG!k!+a2Pf+atIk6^s=Pa{Zy_8uJw+vq5}$N%XmSGTU<9t z4d#+~?zGgRNZNv>+@lbT2?HY|V*`=!56*=yVvxTLbf-42+yy|Of3R}YexkU#+?LTm z4Lq<_u*wF>!8pgLXCvph?me{X5M~xu{{Rq4a=}r1y-u98hyMTy2TzJy&BEbP0Z^+? zAJpO8lNRQ1tbX7DgUB3!aB=?tm*Y6j6=;{jOA02X?!B;T;^sM4lR`Uv-IOOMR9~uR4LiFC{3bW`BXSL!Ok210CBBFN<{f0Q4HW+aL0Y1z8VlJ_v3$Y?qbI1{{Rbb zeRtFSQAZet<1N16sUcdi7nd@|SAOyV!3++?B(YJQ0P$q7mI$M}2j!dzWeEv41KR%p zUb@0nSA!H#T+)`7K{Y}d9-S*C1!amLOGwGPAP^6~WjbbG5io#2yqzh}Ur3K0W0G#( zPXszvnx3ZR4IJ>>Z7Uiy2{DObk`w`%RN!E#`6PZLlgZGgu3-NFdJDO^`cnw1&1_Vf zZ0|t_tTgoqEG^wwlffAn$J_PPh0ImO{n`UD0Vc`650j)3xpW^= zRaaE?C1SM%6tt4F#Mu7;n_>amvp54Jl6#(U*lOfWQV39Pl;5;UEP>%Judc)>(Dc9j zX3|rM9){-}KuBdE59U1ge~fILQktIJ0EZ}z&RqO5(1E_l4+&n`(qVJxrddXc}3ab_m35i zU`tiDo{}o%iKDh&D`1E;Qixh8<#Nr)js+Z|fIPJ<++#os0FGV!!Ql%p2h!SD`2#!9wM;hgr=%SjSag_>_Xa}}&rGgmc6 zT-20``d01thHLfymOAQWuA!x(uu?dVN~xUA5CsIn2w^9f-{K(fz>Yg!QcT47YE2Su zuQz*+bjFavB^4D+wNrKvU45WB(=2wf-5nIstHmWvMD>gsIim}e$dwOaoCE-o?sQd^ zOeR`|!H?m-;-sqA`_vCB#rL#hWbo$w;;i)3G*MV<=;{>3<-`+nMG^t#Wp{}2kcYP% z=h~#KQdLDA$-VG_ERG!bZ)x>?>!ca#u9)gdI~3K>ThbY6;sQA$VI;85#E_&fx`oNZ zU~U*B=NTM4sXij+gs`;uv4Ey%b8!!L>G*{o-0^hfEGbyf(bd#T8qH4&A1%RdTPGyr zc1h>pYf#8?7J?ia41Mp$v12eO{{UJSzfChYe&K_st#Q?es;Ta=ho>tfk*zBu$AG~K zWPpBhbCb@1o)%(~>tvhgHO#xc<~eH#Bowt6bRLk+Zm^D{=~--7in!{$nWU1F>eNRP zZpbPRwm~2kIT<8#jWi@B96yGIl&BXNJKhcz*03~^6cm$v4c{E{t4BOvXyx$Y?OR}k z09H{v6*A?_(Y^^*-I4$Y864$L&W$UG989pXoN6-O5cn#Zkko2#=)q{NS1V_gTQJH_ zWSGGS5=e>)?M=r6h3}7_oajrQgD{am2o7<*=~DnH3Jj`yS9jqWtL}7ARxD3ZAJR$c zl!Rvy#yKs81fC9dfu0Y(G)1Ch!R6V|#fzKprP)RuQD(JWs}%J0+JafFcEmwFz9|Jg zNAWh5Mv+v=MDvM-^}@%IEt2qxWbztZUa(_&lw&fgJ?eKasKBoR2@ zR8CI(kVrfa$nl~BV3I`#rhV|jDzXrU7ah{Ru{|oN#4y{bX`zjbhGbpD4W)5_hmga! z^gZ%)l*wXaD35<#_Ks0imI@^E+l^z}JqsPItPme&lDqp=z<^ zAK1U$I#bdm0Id4`_r@-WXldxKB3g<`yws;^sa9M!lpRX#Tx5p@oG~~csRM(ZWJ;D- zJ?7MxBwT-bVFCn74wLfx&I8?Ry0fFKwJ*^(`K+`eidY0R;1n^wH^~_Q{{R$Ya=60| zaop+O4HoSrP+#eOkV818fV=LSHO49Xt-7)0c<3IFB^Jc$`r1hPe;?;BN?Jp&k(2!+&L=Q;A9=R z`8eR~IYD!#1thS#qgr-5XgbqqC15ph!muuKxF_c`ue^ZNk9Qg4E6oM+8?aAJylD6O+{RCCgJ67^S0w80l^sO zzIo7yO@I(Qq-#wy{lOf9%tLR}7%Od~uDVtwnYT`73nUH~0CR!|zhl_<$2RMQG<5J+kp%ZiFvVjda;#~Bq00a_9OJ$*ofOU% z%0=1Mgw^i97#TuXk&fBm{{U0;rJtv)1}3@-jwLK0jZpb~f`UdE0l)+wVT|$HS>UN; zlF2T@@=ivS>4tG9EoL6>vP<_Dv04-WlRXAG8<4;%oMUt=-;J!U` z)WN=&y(J8A)6}FW%ltWNBFTSp0|CZ=FZ*cLjI!zsdhe6Krj)E3CHpbuj=4eFuA`*3 z-YqvI5-fD`%TH2?5Md(7Igw9rSDyaa$9)~_j}YD{UoVpu2(+6j#|`V!7%1v`YT

*Z^WNiAekVqL{Ryq7b8sa6(Sg@<^Cko&Wa4`05D09d4q)4#9m4dZl zF~~L-hLNKNS0yr4M`k0D-?=9oXG*O}7MkNyOW~XmGX;=V9}MY-e7$K9gcT1_9m1NS zZIZirWlZ=6!`yFdDP!q$jV#VF7fHb}@;hBNbK{~d3#un?VVy+83(oWTm zyJMDC9&5&XNKdn4A7hmlC&o3a3}FT8F-_`|QZZl3 zb=6JR#}PsXvGXsd;}X!`MrK->VP8!!Uo0tLSUyH^k733@=UEdCrDUDEZjhap4s*TF zLrh*T+doBHYSYeJl{Mm{EmrL*QG`hIoihx@GLH1y2L*xLxbMe(-=7*h?j;;3B>*wbgmcGO7?vjlm0#<9 z4o#eGy2n@7J|C^Rrr}LJ(oj%EPa@)66NF*G?gkD)2j@Dh{{Rs(Q8f)@UdHWp-Vq5+ zgK6uoxa84IPtvzOoT*5t=__dJo{$&(t=R zDRRX_J^MWcA!ZvJ-1^n3sUT}bx`t2l9I#^PFxTfpdq#U2a zs_KfPZJ>r|?e8$ArlyBGUH0WSE^>0izuP*V3y2(IJVpg>#j`kOF3F%vQc`d0i+pPY zE?q8$ewdPK>0l<53FDSc(n2F`o`qfxcP=~nJQC<5)B~~S=uYN$5$9>>M z=F9uurluN2dKd~ws~o{(F{=IZ>So)Lzvrn+P=#5RDMC-%XP~+dEnqTApG^Acwh+xr zPVHOfN46Bg?O59f@QyLx2Uk!Q-B)h@m5mU~^Cs=%^AEN(*=w0cm*-04Cpq9?XODt( z*cL^1y&@D@Rffn`IO(=}gAhA|{>Nj<@2MmKbz-+FBiv>WZIdyqg_cAN>R15CMsPjG zbEqIQ5&F6R08w<^9K&yh-A`00=B>Qkfyor|a4V3zip+ zxX5n!@?NvD+iw=S%Y1O$l0vde4ODnYQN1J5dr6EOo&Y}i;taM)3Uf~VVCX`KDqY6z z{iAnPc#~b#)~f4VbP?0}Qn5*V)iOqalpv!6lZVfsfrE;h+*G1^bEcMlLo|&ntY2v4vLdH21fOe{5f=D=Xv=CTxs;XqU zMaqAB{lHR8wK!t0j(0+|-8uWZNowS-CDeJbO$1HmFgZMv-#fAQ8lvZcKftq_m!)@K zXd#-3frTHn$JPv0%N@$*#-ieCtEvLYDwPU1s^_)_JAZSi5>YBgg)M?lTz>9~Sg;FJ z0r8pG${xBIE*3g!$!)Zbw+hXQLxU(@`$o~hKZxhQ&mGv341#Ol;Cp()%041OiC{U8 zSlwIH_X(zgx~`I2oe7PWMOx8>AZ5cxfOmJtU_r)*`Fi9lmo!?`i3CSnThFh;&g(B)o<+2t58rSs` zsrtI>K(Sj(M^WbOnx=|h;)wUL_uy{iao<0f)J?T5(=OwN6%Te1J`f5D7;kt%>OPFP z^+mW--RSAwmb!8l3ceXfB^v{kJZBjm-05jt4D>B4asj}43$gDLZ4Bt4ZPV^6Yv)H@ zI&Q?2?nNa@m8mM0bles_>Qwr8F-W}N5>Cbh4LF4!6;zc?Qxk~C>qxG8 zi7Hm6xC0cjnRiJzkZfcG3=^FDgZk@AT9ZNnZTjaMl%Q;4%<#hHcIv)_ud!Gq6!Xzl zM$1(sVIYDxebO*ySQ2n>qaOLwawW`3{{UgM@BNq&$-ZHb62CFO zPpv;N!EW(3Tdlq;C2hH;a|0~wBIOVbfIFYXy>Na`PsW&+A7ItL}Xh9Uax{B(G|UGF80)04bLeGVL+tyM@3Tk9-Vw)%;=wLme>n_JXg&Bg$FD ze>|cM(xR$ZU_&6C&&mBUrHzCroN1@)<~Ss>v+uu%9Y+Gq1c7B{*vO_v zY;6U|$HoaG^wWh?JjF~}Ib}F{ZvOzP!76y^sN#~6tzhVILNNNvT4b` zuI!9`$2riI=0PAoIu^Tc5TcL^8jbF;ZFJ22OM0WbQ`0RR7Pw>($rM2dtEg2ms-WSE zwix7OWOw5$oTR6RN<5_5)#-VwU5jT(f|X2BAZuT*zHu+4`u-l7wccTlI4%{nO9asq zCQueYvLlc1pxf`s1b$DMJ}ktWEN0bskeZ z44|e2gygU%0RCJbSI)DR^2;iTuJmi8lWZge)VAw)D0=Bn6OA`XQ`FNK;=0izQpYV+ zJ6Ni)k7R6DX8pX4!g&jZ8hx`H5?tgRyoR!C@s2_xLZW&3N!77=&NKl;AN?(kS%pPNfJ=7k>Yi1 zDRmr;$F?(!-HZ=!Gz|KO zIK~eno^`@XQlg+A>~_1hQ9d1#Y?d3x;*pP4-(kL2!*xDoe5MGZo?4(I^`e6R0EG7E z0DwDo)x1fzKwB}$Ypu_GR3*-bUDfu`L!$nI{{VQUr!osV7@9)UF%d~9!zZl%7GMY* z;PKl!SBX*p23T@SFd(@0raqBWg&lBz-W8NlaL;z>e;h0GPkw134I9&u@oTr-TR=>odf#TptqyM?;lMM->{&6CQk zo|EchnacW0DPg&W5DQPy1)ahQa64WIUm7FO~uM(=fC^rkbLvNl-bmpODG%~Yi1wrFI zlmU$7{+#KBEg=N82QFP;w!%`%YHNgTsOdYmPESNG5W2-uW`xd9;CyaHDl$(X_x3s; z2ttC2*2))lzu}-FQY;n}8FPjBtzyYhCCL$Bjip&&rBx1MZQHiCK?DPg=RbdqHl+B8 zAkYi$`HLw;HLBtI#t-z*htu_pQL@cVZKfIGNI3W!);u)8msG~o2klx# z0>J*En{~kG@LtVNYpt|R9Su>j&rpF(qb|`F9IMDc-M8@P100(h#_f7e>4tusO~oN>iNc}h9w{^RD2KM)4Wz1?(z=M!zdDO<1oKULG+O>(cMnlqbK`MbqF?_3jvIWf5T7|(nWoe>-emoS-X zE^DKk+nw#5(Nq4piB+0o+gzby&DF9=70gXf9FoL_P{=mt2f1Cx2LnC*vG1kke;?y0 zF54vEpCC*)-#&vB(PHAIzF5A^iWM}lMAdBbgHe19qa zn%0^^#e-L;er7K)g(}p_Q=RwrINb3Is_yi4ciTjE`e&M!c}hI7@-`%Uoa2mp5w1kE zB4UlEBK#hn+qwzE%2k3$ZSC`hyL^n-8VOOcD1m(F<6MZy12{h880Q)&SPDruQ+n?h z)3jAkU82hA%ufYaeZ^rX%XMG@E!-cHFgg3_&Z=SnM1^=>WU8Ik8U{qJ7{T9d!Kjj7_b1+IkT!P_elIkyHVR- z2;+`sR)t#;qHo+z8~&ga#uo;09ZN=jCoSbRCWLMuv2)D^#|i`5+r^U%~(y=<6Y zS*C&@=_I2H81gpzxi`AD)9TVE_dG0BhTH;E}N^3AkF^WaG;>J$pDsGnBtmAqZ?GIYN&1;G_~JX5}|B)W>AmDJv&c+`fg&v zTp!2Siyc?S%kf zofrJ0;xxn%awhy}L{ur9f!dAgNs7Sr43+6mWUrM(P>f;5*ucQR&+{DQl1b#BojoxL zBGPHten-3#$o~M$0;jIq(k3kxMJ2j{9$38S=S6vptOKE7m^>T|W9`TF8m!DlQj;=0iK!!?SGblsZaa7t@^?9;dZ0Gpt$jVffAKGazhjlelL8v_N~gMvvJ zlazd91(gS8#5cj`QxRvzNJ8nCXMBr8b^idxSSaD6n$>HS(q8+3iXkQeWC}SXDFb$P z}o#io5F~YR1b)vHLL8v8j6^Dl7i`VN>f7&m9WbUamY6p{{W=ql_UD%-gM6}I!98{>o6>SU#0 z(2KhAixjXfHNA)8EN-6YYlU^9qU9AVMd_-dqXWzZYGPGg?(gk~$Ups(el+T)0_kVZ z;l6T;PiacCfvx-KAo%|Ph?@IwinfXmqf(CsDhgCdwMj1;4c>6T7#LP04{iWa#9Nxn zyDz_bQUW|R8%aKAy8QVxD|oBoWi86z3Q@%r@jm|mIbDm(8CJk3;}}1P6=mce0Pm_Q zY{IV%oC>)D^MDVOFqGLj9)9ssTJ9@d)YQ>aQltw?#!bqkGNHf>bHD(uGw--)=|D2b zn~%Qrz8pYNDJSA0bs&4S(H*DCM<0}>;a5;Mk|PHnIXjQr;~Hi{Q79h|6Z_5MNT$s$ ztTOaBTOSWPR^bwV7WZ8=Q9!b;*lCdiw2oJ^vlmg1wod>JM9Cp)3TvN#m`jM^#Nq=} z9P5;M&{z7Zj}Tz1r>%`^ZV)_i)K9gfNW@M8Z(vAVWD$@sIqjl}U?2u_z2b3-&6b}8 zfElv*5pDE8QwqCu))?+)*?y&OsJ3O-%&3K<$Pt5;kqJlKydPF^$U0I)(KR;Gx`Ka1 z7|NR|Qj#qKLCX)XXid8KjW<$XtL(qKVn}?DMNKRuR6$U@w2GkZU=BfE2~+QlFEka) z@g;!#+-mpJ<|Pv7CKJ;`^ARDhq>fs;*-|lbj1azA9Bu?}0~p{IARb0fzhWpS!xJk^ z$Iql5Ng-6cwK?Ut))!%^WV`%KcQn1s+8b?96J`O-!95xO?_T&uJDk_&->-g!L z#mg+Q{CC}BRmb;qBtNYPsK)D*Bq1nDg}L=50%gp31iB>wNJG*#AlImtHzE#pR^!44Lr;=$RYS|`%wnZfS0Cptj zc0M`B<)ciLR#V}Y0{+{qLYXzjoxgX+n@vRxN<~j3q6v45%t<8%c?Wj{aKMxH)h43L zt)1P&ZtS3$+4qd7)97x}K^(D2V$Nku`o?`FdHBKSllo~{@|h1(K6>Nn6eJY6$5-a@ zWm+wDDQUQM6tcBF-qlU~8n^?BoJtR%5D%oGk+~s2=RUnkU?Lqt}Vlq|H862zHF9g7pezyJ^R&bAbw7Q*q)>?mT(SOd>deoy)O zXx(D5PjaV{6sxG9td5;b5cRWr9f#HP7GTDB&g9-+s6rkVwz; zG*mHGO%2BBD0W$q&Ljvnk?eEacH;!|+gXGrV#*KvSRamq6BI2}q*E)#`8?5IRB;iS5GFFx$w9-9C zmmM6^4>EdZ9&7Gj{+>nv;CDMlF`hvu;nQtFa+E+$ADrMS^DW)|B5zH+6ZH*RPj_i1 zqlnbBZx$DN_f^LO802%Ea7w0wL0*shx*Kyv6uoaba*3A?rMKU!ZuF2r6p_lAStfS= zBja+;gCLA61?PZABUzIQku*+U{JjpWI%Nweqym{^mEDY1w)z-tHPX>tYFxsOlBhy) zFRwdKV}eIH#|JvpP*lcLpv6PqbcPf{e8TPe?7@5FklU>mnyXB!0@R73^3qRSTXCJB zc1GoSC-9N^-6$qv?HZ4&cEH~eYT>=(&K8!{O;q&tuv0vt4%T6uJe(d!@?~%aGCe2X zSDK}v-=D{Ei`F?Rb7ck2{vyZe_^4=V5}GJl<5eu<$d0xQJZf>1fy)n3JBS<)ZDes` z(g~QaD0gKrJl^Wjy-)>mpwQ5UWz?mwKDr2LRsO@2y~I@}KWbzvH|DUW=!<`L)qinQKraQpZyp z1xW%nBptS#oaM8D>~zwm+M1LGV)(XNeQO0$z`QWuSgNg+RTP&=8RAG}Nk50S7GYFb z$YOFpRRbh($tNc|^hq-2CZ?Fy?FR0MP$YtC`R@2(=-Z4p+3q#h>iKFNXWC+cb`k5^ zpeW$3JtcYWJ~d@aN|eMhd;RYi8o@yXVq0h83Q>58c(IF>s;ZtU>H-0ZH;>9yTo4#z zId$Wn$KOcKmZ=FLBv>M%`rvX0Z4@%Xh?q{s8)(^MzQ;$_b+?$Gbv-*I{{T-_0ce>B zInGxElaQ^>(!=9bl`&jNLK48Sp}Lv1br4?+(w_9!@%xB3UllGrZ`85W)YH+@MHGdi z2&@~+;B9F*Bq#)r*V|7`lWjg{Hwtd=XpoSS#a#9GzW`n}^*>kM@4ZtsJdRY^vR%6Vj;fg2{-n}U%E;k_LL>@* zio~1+?0wF7sz4dMdXb-VZ5Pg^WxpYx27lQ+N#qf zzBNH2tVe84hj*a&{Ksr_?WNNo34!iDhv&Qu2B^Gx!re9Edg;XPP_+>iz?XS_vHSz< zbnNM60WU#^J#^9v07DWSj$7yJ3{*W)CB3R4jbNnMnQ-cIT<|y?ax?ujzlc(+Sm~!b z;8SH!_?5icO(ja5gTHtgo!1qG*j1 zPf)5pu!tf?V}Q==Fp53N`{l}g}w;g`IAl;&pv%{NH!|>4 zj!r#@9r@=PY^6$^%MRFnF@Cy4D1w?%^3Ri3lxKLk!xchDYmTz6XEI`;-|&Yx+q81e zl23oPIvP*`b$N^J;sFh5n|{aj7ccCr#y=c7ex2y#g1t>hs+wFggXJ*^jCu*+&M}&Z z$0y_G?VS!~5MtoBV;?VXwAQD)RaUJ+tZ2D)iIA$yK){WC*N%If>jWlZ*jXa9d;04M zgs3F3r*8f8iu!8}6Hv)lOH`gz6SR97yt1dd?FWJ!qJeELSn}4-^ssI72r} z_1#t5rmLZ=Bvl!sg=8E500~x7+khiEb z=Br7LJh6q(zDWS^eZW2R$#npgQ+*?+AuPaD!O9DbeC4C0rCFvs;*c?CDwrO;sT{5k zu^1eUDKThBbsBuVF3(t^k`j|zZ=a-E{RLG=YP2;)>0zL!5e13l-!!ns4=t09PtFcF z8q1cHs&dMdYjJ08Ktijuq5i)xy{^aZ_JchYY^~~g7_RoAG_>$cY23>!Vd>eGLc1{a z0^Y1;Kp>B})uqUonUHB3JKT%R7w`l6^OBA!!RYgxHls|?6 zRVUO~fSl=uxnxF>)E4S1Z454%U10-(3}b3=MDH8!SL_L}!8y zN5{T3(pfE#r%fxo;gkVj3*y1)T7WH+te7$H&!~3<1N}ytNCi1p9hkW_1>$W8`pb+Q zut;*oHsgYL2Orl|Qz!TF^AM^434=7#B}F`2QPjC*a7QD!ApB~?d4tq$_>gGNNU8eP z-3L>6ms3*ZXqthR8Kurwl?9G5=ony0$@a#SQ7TyppzmMCuLyA_z)<`?`9x}4jkc2E zbE3OZty@vP$!Qx32|Tx!+2xgTO7KoFGzsXMxkV{cD;+VQg$1u6ai;y@3h7*(Nz@U= zYqlCXI9z|rHBn`e6cHaJk7Lh1-~w@-NCZpp<|wF|+{f_@7C?0n_jfhs{Ggh2ZQhE+ zYMMADrFD?91rpPc4%|pd{D3Rz?a#i5kfc!{`MREm3!;T-dQ$B==gIp*4cf$%*6}?A zZAkR*{A5DJgU$%}Bad;a_@>{7HLG{$6YHE2Dz;eH^|7Bwap~TyxEqXV8b~Vc$Redp z!(=NO7W3J2$C`H(9kG&G@Bm}NN`XofQ|f&-)qpBc6?zJt=LV>H{^4I0!Ob!)H0skl z6si0<8JSnG}T(!r##73rN=tOWy=9}QBo~j5 za0m>+LBIeKG6t8QI!u*?E{80A1wLYhY6@heo~`-zx()PAhyD@2L2)-4Yl=frnW?GZ z^OB+vcS#d(1YqN7E6Dcb4JNtLn!>!=0$#Yk$cVJ5i2w@r?W{qrUplsex}dcl_}%`F zi4-7G4mR%R1bP4`ILXP=Y6)NfDF6WMNcH!}7?($w6z`VWJ(i^Na9$8rH z6g+bvU+{r|OJnenw2YE-!qcQ&a>f4uFZHAlx&kB(zQ! z&Zwtc2%vJj;Eeb0`hlk98H-7_o%}CQsIOQxrN~gik1dfP4Lde zgAiBL7dZ#u=ilE%ATEmDQO!zL=&`sq<8sjQu&rkbPx%Q8l#mOZC(t_qdN z!7N5pj&kPPl>Q|Ga@5nTA;l6^WD{|^Cbg2juBM>TqgUK38)DO|VVL6w)t_)k_#bX` zl)32)1~l)b0}p!E0E1p=jOS^oft z)vnC!9#I8>0E+Id@04j|rg8gXBZkW0#ZNLr`G=f zU9=3KS}Yj*jn`PPqv?*4>fV5-iiYnsx(_ueNQ7obP1P=V2F)C+` zD1<48uP1-FU3Su^T5F|l$LPWX_*-3DJ;+%op{b^#h6(CCx`5KpGiEi7VI#~fCL0QX z{{V@Kfx`2^moNY_1pVvp8Yoqw>fh_W5C&=*T1!mz2|Ghm8%C)Nprbg*1xohEzH^a* z^P&-Li#QnXdPiDXD55|(jL+9Ao(@|mXu5zpNUEY8inGKC4;MSX5afd0goSqd*Eq#{4BF$#i>OS zZy~_|(Vel3ZjnTy_w$=`r004d0ILFV< zjERXM!yRv9p2Sg7%%NpRw);YT-dO3a8Krlcig=fC*hAAENQt-y(~rasJ9oj*5E54Z z0Mv)Bb-yaL(Q73p%UAKG@nLi~RmuCFk}1PV*xpLh$?K4e0@4po2ns#BcRDbgEiQs_ z&+!0QC0;}6y2hUtZnQO&#zRk5ZeLEZNeqPfW+Q?bSYvB0N}e!t`Dvw)Kv@^vcfZGI zq?=4aRQ~U%&t?(qkU<@xqpC`nSdT)gw1rrL5Mc3vw30#RBTD#+Cft$c6jsJpRxVPi z6$%~m=iVlC*{g4?vOPT=U1&h?$teJX&R0APFd&u50hDQgyDms)88>a+2OygA-!4_o zG}p;bWS#*=l0!0|rmZy+`LoL@1iXL+!j5sbB=Pss@+Ja2rHL07^!bP^v*s69_pVX8 z{CZwkEwwaKZ?2@=OzJRN9&o{N_^?I?0PY?Aq@ik(1krut2&}TCsQ7?yeIgaYB`r`d&dwKP%2 zR;SM2u@)I}tf9vT;NbrNU1`)P1$S`U+d)Fo%1H;8rV+Z;OFebMI#^?P-bNlv!y6Sj zKI9DG5J<7Y+thbbV|yP!Q{gn2+!-kZJ~(yqUzE)+$BDXou^%YPG3>)x9-Yi-^zOJaCsJT+KQr1o}6%>F5 zjO{pR@r~s3!S`HqrKRw`DvZUJNV`%f$3QZvfLQ2Gtrh;|XrAJW6J=wU z2-so(Amp6>EOEFW(?CaxqQyQ+J5u54()b#t1tf&3xfiDMx&U>hMYiF6dySdvQs~Ne z30x7B^<@BD0zuB+1{d?wMquBJxERl+eYs9TUUZ!lS ztVd#cfaI0QC)+3bXj1}}#DYmt0^vbhtBC^sf+R8&z&Bp>_IR(T>oFoZZSczOl(8Ru}IJ^)aq1jCTsHpm|j?#>NQo&z48w z1Nn&jhrTsg@>gXgMbH~cwe`X%m01c`U#^=bANniAxw>^LSL5vYE62KYxvGN(o(?u3%Nu~hf7Ulsg7z^7Wv?ENRkl62^&sB zaB>C-_&?NsC4>d3FV^#ic%`6SJLrEAAEbI<*edQf8DL6^Ss@-#s^S2ogZK#qgMok- zu+*8VS^%*5{cOQz&I$so#-zchx)M9>##*^5Rz;XBzG`Ezc*MV;W%#Dh@?he){G097^pvfu{X2^o6UJPgc87K}RIi z)bOqqq?vz)SLE^Boag;?Nd{s=QbN1IH$vH_f{O1#ZdEa~p`(q#jgCeL#?~Zb)IB5~`e%liu!WVSv?ccVKvW8r z0ee3EejygJ`B8VLr&gqm?yB6&v;-(f+N!@f&;I(d5|QQy@L=gn`HkBrUS5$9+vTFK zaUC%gvJjY*urVC+cH^FVjC&nQ)oxq823WRlbU{(FN#AGQF>&}`(o@;4tp`vgJzXTO zTABovBS&&h<}9Nj6mH1K2e&#&2}IQR)+y`P?#aQ&j=TbFflj!^@-1YgTwm;bw5ni z)6!Htd#0>fZLhsB6mM+q+qjW{06y6|W^kRu)>?u=sVqrxhE8sBF3urJwrtG6*17Wi zzAXWNP50~u(081_h z=;8_VddB{p=}T?i;H2scR5@w!#cIc)x)+eDUvO@$X44ZId)!!R;M_tqsx zX?h8YVt61)ngx!r)u&be0M)nCgZ}_;&rpXJZ|ujn?jMk%sc_d~s|gKid1*a95tQHq zlaM?A0KPvhIBsU|zP;dtvloK)(C=8@NnoXRGwtJ!arej1-%?6~$sOVgTyWM4+o_!) zNY3EONc5e@8OZ*}8PtH{xW^ir)+k>RuCmhg)xv_6e4%lc13U^svy!B)=l=i?eEfa% zu}}(9iE&SD{K9k4%X5G=-@8J`QFP^Xi6%#z&y8F)I>J93&)wK(IN;+vj!p)2Ia9Am z-{W^c610X>Zyg^v71Ks&F3nMj7fLbaG&O(-+yy(4g9G@6amTqh<50nBN`hZ`+|Wg- zgOWq#&+jZC*HiW7`r!>NWggRSsadH)=P`(gUC@;s(FVdYyc}cLYCKggV!X@+!v@2x zFTN;=AO<)3`uo7U#P>V2F-H?1o)P56>I`o(k(@Ek2|khu0~%d3gvpIHFOAt<9Dv}2 z2W`HPOQy-N*{N$Pk%v&pTqJ^2lL25<@?M@~$EYz->QwQceBc(^Xj87Q+Tss*>8dH9 zr`}tnXVj)C}|l+qVzyx%0WwGOTDFsisefpN#N(7anC%${cKH&}!yp^I$e-?S** zZFekE(jrGpWnVfbiO2CL!!S}#6gOq(fxEs=niCQSjI6S|@1^i!yFr=5;daM3HPe;} zeB)|rTI#xAsuWUB7RF@rRJ>98$WU2^>+Hd$Bcy+EmT1Z26+GB^Y0^wP>pe`4d`e-TNu3I()_y*WVcx2n8bFEr4~Wq&Sa ztfHQs7&B*Xc(D=4r53`G~^yAd{a+9Opxt(_ITcS7(Xk0`N)!2`*iUpzXhCw>nF!EhnbZ zm7=e;$5w>P6~=Y`3Wdj~92R5hz&OwJ(bF+9(`6*q6_}qrrUmvlPEZn}$y+6!mj3{0 z5Y*98EQMzSF^Lt9SaL&lC!fMW_!>oNn=Mwyo2Rb&#q6eHthooBd2-e;>JEXXdpz~D zlvMpkES_ZVHM81J zXQ!G*g=Hxa#^eNo$&dziVUHs{!FhHQP;a`O+bn9)EVU>FYW1&Q+*f@nkJGiX)zj3i zWK^<949cW5tK2YN<0OJJ@%9?3mJ|Y3S@qr95OtKSgp5Fk>pxmo^o=dfzR^=dY^b;@ z%^hcz9JLS+h@6bB;=qtQ5T_hzVI>Y!q`N;ZpA`3sreRp{$vb`FmBHXANy|+aP|{sb zP~BnbNlb4Hmg;FLBw}~@s>3Q-jw9L>m2g-Q+fA%0bjB(G7tc(+u8Ju|yf(W(cYoFK z<#d*-^btKe{_DD$kdo9*9D<%`jAMHcySB2rfu>5AQFTx=8ft}8-o_cJ+Y8GY7=fxnMrjFZ+iMg2v}Gox9dJtfRxs`VxpF+ zDw!&pF)JT3K*Nqn=NKbs;AwfQnLf!??}kw<93+AG>)US)mg}aj@f1}&po)9xluw$&QPZ^o?_XSLA)TX+c&_eahE#x4tugCJm2jc3MME7~uQR-k zAz1+4WMjs}&-!zKb;>U6T#PScOh)R_a<%1^PVn7n3(=)rHO8Ol=bdX6-wp}i%AewA z+!DlpTpl#M)fI)6BoJxW<5&=u3)P!_;saB|(=b&@9EN#WQKNn5vCn2a^1$P{;OWE_ z&L9zGu=E!lh#_?lpd9H0o~DWODgU;YT&s8BsMZp7~z7GPKF6AUt?|DN_l8TL_ zq3S9#BnbnPQ5#CG%NX6cTNx{bIsCaA018~f0Dg?am!`1^w`r6;k1G3m!sW-Lu6I7Y zp4-t@gqP}7>M2r|q1()-RB0QRARM4>3HJMqOzG0NWR*`OHaqsi2r5F98e+427=Cl> z4m~4qr#&A?BD1vDk2R~lwr4LI?BXqvs8RyqvzFwR?lFbdpe-qL86-0}0rPs*yFeyU zL;=3BoPAPyg{e}asycaF@tHwWQN}}nf5HGD6P)fj_Rv4d;w2=wbg;)PbL#yf43Mh1 zdbcfehAJNu>8|!uO41Q6KIDpedXOP>aelz@jM(|hF;=xOOBx>8KP zN{)loB}AJR9o+MSmBNw<9R9c(PGse*CRjs*gI~U_915I=>h$TRajx-wth!xfG_6k* z4h(H3(F(H;GlnO>7#;o2Gp#gBM51gYbhvWEohy_|C;->5VfP#A>^0pbY`E8TEe*yS zeSHw5qMkaxr!myy%!hIy4JHboMmQuM_!_ZTT)|(Usye%nQH^Jb<6>S^Rv2|HpW%4B6v;s@jpk8!HZl%SSg`}|=7 zGZYyr?mi+{Pt(sW{J`aw*$kd>p{(qO1ONwpyq*&p71}6KwkK{{Yl5 z(sXSlP15079u-s!kW)jLyqC^doQ_Y(BiQk&v=XNQz5f6W>lC$eNMoLxqzik^g`|z( zr>34nK%Q_yz>aqS59!V{*(ox|$v>#DD-^UqC+9&HzwD<#vv{@Bb@J70l_Hs9q+^0X ziL&GJ`2PUgLz1WB5&*gDoi|uwQM>?IuQBwt(tqXCpKjXp5Wp{dR-d(uQqB83^_s#7 zl+(gg0iJyL^clnnYJ9MZS&x>zN7R#bCCTV$WLkN4tTWcQ#?|b@j9?sbj{g9zvlC`d z`&Q3@%ac=_41^&zg$K+r^|ymi+^8h#TTRZslClL_iD{zPiyqYn35}(F$j(XlIt;mH zRIov{n~J~d(g2yH`5WWjD!OPPq@|V{y%g2ar||q5~WdN9Injrc_4faJNq&iG7)gUKgK8KuEvL0LTO5?sbrb8-6GZ z!H;Wz^{iS_lS?J(>)U*wB)6-*_Kjn$mKjo{8DfZFjm1irlWq#O)z7QGKKe0~i$odUNXMpr(-Hb*}rzM2+-0+m%?y}l8p!;k?=ZpI5yl!EOQN~!uy z>x7)+YMc&8KH11WBTS_s!}i`#z^JK*2CSYr;ewo?jhG`cjmH_ujQ6BXeE+6bhPqG9cT$u!4IWFVTY$V4e$r`?W=^SsH~PFc)-$yOCD0~ zc3TifsV<&JP%PDu{`8{4&Z$Q45}5XX4RW5eDduhhOcBO$yG)@fvKF-2ChE7YKQLfO zs4sVCe=NsapnqMo*V0mT$)yoo%}<7h6sf%5$)BRq}{-+T=%E?R`r zO$7rz{PtsnvtXsz4Ssd~N3hjc&s0@kE%Z*%Qb**&49wGyGt9@DrH7=I8;`Q&frFvT zOyo*3rl=QFxENq5wu%RGf?RFHM;%vLRJEufl20)iT0RCqBWrV=zT-aG(Gx68mgz37 z`+G+Op(I+M8?LZ>pgOLjqGh+#87Hl#rf@|iMCZ!-cXlrtMh?K)CAZ^0)at65q^iL; zVVV3z=1G?^C1iERmwMrJFm+#D%R_LvPt$egRIA$?Dn>~VD+wb;Ku#MwhvdW#4!ep$ zoR1D@EPncW7|xb$GvB@4?E-H%c<;SO9kS_YiW!FIo=+>*z^EZvNWk1l&p#OV*GUOt z5J(Ms_hu&1ZxZt#LQ>f%EXLkouPYn=Ncrrb7g=^ zoactc`MqF;69Rxicfg+8>lLR_Stsf5-F2{qp4C_UN;%_D6t4_pWy#v$Hz1zGV^46r zPmgF^QdompjCFh=VTvH`Q*bZlIA6eWFnu79PvPW_!&&o5@c>C;!`~k58N5Z6JBMLin4fU!s%E@9 zM;xi-#~i!YEG1$FQ32F4}8?Tr~hjiAj=`CZweP>{ggUY&lUdtNRtbCsz?Z5%7I zbeY!yT{gUg59UVgk8ViNO+u2q>er0FIl#zB0A5YsLkJycTDz6bN;$l*Dk#W#Y0lv! zfPOwX!5Gds=Odk6Zc+k6jG*O7Yd|~Y@$;-P(l`811q~X}Mm&wQz)&my02$IGMH^(0 zg7`pvJ91cYst~n0&;t^8K}f6E4n6u)^K^ReGqN%am=#+^Bu>wM=8E~WwNR6Cw zFnjy!ykLtfAQ8(NvCnmYqz~u|<>^6Npe%7q3{DXyGGs;!lH+zysfK$32Pfw`p<2?@ z73*{IgO#OJEW<8wD)UDgR!2KeEMjQdT8YY&oQ!~=@W*!EeStc^im&XAoXOwBTgbp% z*M7BtwA3?-d2sPdxq;mp1@`Bj_&MNxv_h+EB|(^acf-;-N(=xs)}GOl(n%XJrwdaE z3?rL)V(h9oMFes;I#b;mfu6=Qcns*kjW%L_cj442pH}Tc>A0maupdR zDt0f0X!o2qqzPuV9sYCq=MMfDTb}Xf-77>wdZN{kPcsFEGyp0l&`9DCgaLv_2N*ir zn4-5xdJ|69zr0dn*f80@Z;TwfNVML4O)Rm;hC53PB#PO|0QSfygYq?ikK`muQsi+# zn478;E`3Ee!SjPg6@zMOh{T)Hs%ElP?$m632FXo(6mIt)%f1 zM5{6rX^!j%@pd8uK!Qu+*S8we^3uqeec-20fp z_tp&lB&JA0<+_w}1kmEywKB#TP*3-7p#K1Uxd8EOjN(kb*Z1F9&yH zk)Hf$ic)30Dow;wyif$qNV02oXL+^}8v9vGc}j|-bKN{t>ZT@Qq{gZOs-6Gm#(X87^SoNe>g#h68 z!+oM;9XQPbR<%!i$CnD#Frzg= zcY%ivKbR7DAaji1fHf^NEO}}NZ5re86ab}NI(6j+FQ2TJrHEJ-zb)H`9#^61e)X=K_13cp@aE2ethu~AmzZkFI$PcP}sL05-9 zl+F%8560Z!f^?kj8dB7iu7G8WQz#11mUh=x-r61Q^(#;=QPZya$WcoPk1ALUp;8B^ z4mlqEw5lW~S{Z|3-Y7zf@2m6atZ2A&b*kxAMRSrSh8QCSqENAr!?|z+m zWgE#Ep8lSY!`dmbe7$hTec}{xbo~w58fud~NU-^F)C66p02D5Y6r7dX&7Ij9In;3~ zn=q85Fk#jHsdq@SR2I8xrDDcA0k8xh^rz+7-GMN%sD%; z!(%wkbtX@ON|A9m=ZW4BwDn9VNU|xq#c$TVEl*A2HB=N-wG?j~A}CUxUgllFl~i^h z@DKj4wu-5~1dxQEHS7i$*XtO(xur-1^}Z|Xg)+rcIFVwOTBl(OD;!1;;5OX$KPNxi zPs>mWNy>zq-L$D}yTJpgASRF3c!<|{0^3b2ZC5K)OCrhUpH9gD!3sUV#~r}k#+XFK z&Hjwd9EMA&+)A`&>7Ron=iehEY8wB(pX{$m-;y1Go~yJRUG{kA7N| znKmVh-={y!F=oLeUSYmNYkO@)&If4fB*A#4VVbGf<*|mrKwELnazO`31r-YuRO=P# zmpEP2lv!=+*AAZW61I8j#3Hc_RKZ9`fE)r&80Y7cpT0EELJEbJCmLx>>AJ=2s-;)= zrvCsi?Mc(t{;i{^T1qvQDPY`^ld+Bn+y`vu8RzfYOQ*yoNJZye^!kNVSwpUQ-<2-9 z6*~UYXQsW=Lq#6?O4wtbIU>S@1v0?o?#4j*#&m0HmYZPv_8P%tg&!(~&$9yU%=AqR zupc&}B^yED1kP|zxWVn*6RzSJN)L~?!}Sc=3QdDgz3t(rsOT=&ie?(hWv6r^EX^1go`4ZR*O{m0p$U=5Zx48DI(9 zwY|Tlbab?~Sv1$*_l#I9-R+*Ah`oPiK9IRt{AcNVg~B;uiTnv-wD90Wtxd zqs>g}iYy#O+0A3-2SdY0Zlac+rQRx*j#k2)2V0Wx7-MmY@^z8MtGt|uVGi82?MGoy5azdBJc)>X1gO1uVd_t2fmQmym znf_w790gzcuP$+&JHG{llO5%3f-Bzt4$Od?XQ_4|R4xlSFk z{4_RNY|CG6r?yQHt#pi|xlmDXPZ;b!KKaw3J`A-kdYpQDJXkC%k?-%i#FOF6UY)CB zw*6XDOd;}F81E%XBmvn+x%tMEnLbHhhJqZ5a^;o$;LA{Q_w&2f2U?~70D&s2s%;eV z(!I)%B1GGnQE`TSypRsjmOpXG<3e1(gs@fkXT0vt7V}LBbkuUKX!l=$^njN-ijJeH zrjqLbHDF2P27QIoILRZH`)A;M=qqq-tW?l9tKk+V!LXB0YMxv4g*#TX6w!&JFCNlb zHo~zS;N#mE<0t1js#Zd+3Pn()l2UG^?XB1M=@ukCA#Szwx9>{2N@ki@W-BQt8@mDr zb@$bjkMhAvUV*8ePgD07Gi(x&Ye=E5QAuAafIgVUxC;uLxd*u?u0hCm0VPdUP?1=IHmWIY-*d<1#=6>6q7Yn)LDH*4wMf?( zXP}GEmN<<&dfQ-0pK#nyCqCW#YUs3Ud+%o$BNCvTA^w_J(w%2ULnS6S%VBO>ul9Dm-A_kPotJI>yn}( zNg$2vfbhyxoDSUQK}wVf3LE?QtS@K;TTpGac5e1e_Q323J-Yx* z@Rp@5P0g;jY~vOr-h)y5fo8IrTA@Kq?ud+7M3V6UJe5Tw;GTW4@2;{-C>LtIJ$|{x zT!#j~Jzg2^wCzXH@uFL1r?yrzJas-|qRS%41EQ54+~=`8duS4-g{e~2{exgJ{A5fG zK4rNs(5Fh(HhOA$()8wPc3D_?jFSZo%W@AV?g;0fVXXxar70j^)#;X5?85*7MJtvS z_k$4ix_Q*LiaRoj+2IQ;_{@yiIOGhsb~$fz$G(R$KrX3jD=Fvf2X5Lb2%jnCsqB&q(&`ahwCM0x4CyQfee;3v z+NJVoTr)gW62Jy!;1Q3WIQ?}Kd^CchbaD3(lqd?8>+48vwOXlclGRtwa)Bd`MvAJbu;L?~ zpfDJ~{{Vd8=~Wj}Q!OTr*H?GI48v$9$-`}1pq-liV5v%oD`*yT5}24X;lR(K_-ql} zoDt4D>cs>Sm50kAaAOS3C`fcupMC5`wCO62m!O)ehFXieZ7@#%07_z2R%MWo+4X)9 z*#KjQprl#{Nq=eK-=G3}*nOMjkMT@0FhYdd zwY~d9M-ayph87aLu|$$T!Z_rRc$r4r8UfwdZ%{7GaRhUp-Y+ zRcZ9md2q{EM(G!oQg;4_Zzl(n^U|e>WV*9A9r7k9e82$AeMyP*_B(9^$j~{cl4!}N zD8)CZPzlE%FzvTF?s?922n?dJLgZ5I^B2UZ+Yfl7?+V`rdU~#pr|%TgRMH*Lx7D~| zwWm8n7~Etw*G2%9!NU(@u1B0&mY2Zys|$hojwM%*gS{csz%L7~emVG)4T2gkq<|=% z;d21VBljb7^(w{#s6%3wZULdWx zgq9^DDUvYk@Tg#X)r=JcjFIa0$oSIW6{*ueecJ7Fg@TDmP9t~Wu?y8o@>Er=D;j#L zdEMS34rE=~zzfJ9_C56}Qqs01!N%bPN?9Os$JOZZ`f6I(t@B7B-$E**tAJ#mwJ62Ks(jBTik>qp$N}`XZ!8~j8P^17 zA-U0%2}pd$?&pBo@-tTaM$9;M6g2fKJ<8oy9x6ooV2U!1ju85B^PUOEbD}5-KoGCS zM0J4>N{CW){5!Km3Cm0Ls~uF-^Z+8&Q%Qh-77U2p(*e0Z3-hJ;nT1FmW;~d_nbW6u zgat8MRQBtr+r`JxjR~oQv%^_R@TiJ^EMY)UGPvL!pbVeWLzl!W#A%f#j4(US0#d4g zZ>itZI?rx`+i$v4$h1`=ZK$s*JZy5{#>tRKCm(Xef;1i{gOSGZ66dH9V3S4wz3+4B zga{iT>vs37RhN#x{r>=SzF4J_3JOFRsi$(q#B1sbK*>190_D4II6B=Q?h!cyzXU#aYzddv@xZ!y2O`RB63ZoVFv7gagtBaN8H$^S7NY5(=CocKJHPMo2Hc#`#9d5giSZdeHKfj7F{+OgF6Hpy%-IJ+N|AcF!YBsfsEmr771})&`|NmEs+) zg|BhTNlcL>Ji5EcGEsA!sO)$jJ@wNjB1#IT4ucYRb@5|qWFOa0h_-1uGovg$GfXGD zRi{(QPJXtA7=lF{uO?v)xen!2T(L&{PNR*dKv`OzX(5fn5k{{&&XBn^xQ%YD@F{wV zlcs38uGLKv-6|dhG*?GpvO!Xg8_WQP-I5q!V{rGzbqjPZT5WkjtuLhqF7JaFzlZ^G zarxKqAYV_=&2g(rJ9$clsS3_(U_in~+8MmiH-0V$ZVCMKE-rHdUCsEv(>K|Unwl`D zX3LxUMVHf^RaHkvZRy&2Xevx~#M~}YKo(g7$-Y2aJGPO8W7IMu|-N|KPId9QqQ z`^qY_q&jjYHNU>su{pOdWV za=a2|9D0$MH{-JvNp*QhW4#a9ihHbRD`>CPG}2rknwFs&M4E-ff${<5{v|m%A%M>~ z&CiA`afRyo7m}hxB!BI{_ETufMFnH$}$UKkF-$E)0P$Rd# z?~5T#<~JVO>kpN_CK{_9C2h)Os_G+%R-&0UE2!W+hk@o`kTLc;ktHiwT`7f6VcQrQ zQ$myi$FSbFLOivSM>Sm_V>xA4Lfn#MI5=+o{{Xg_ngWs(p>;!Uuxv%|yv-v`g551$ z*(AF|EYy&l)mxMqScwAcKr`!R0Y@FP@vfY-fKrpo3_V*Rq*OJR109Xs6KlF^-$y-7 z!nR6!brYD}l}{Bq*x}`%k}o-&W52MnH&0kAS7C~)bWzU)P{2Ib zAgV@+dD^Nuzyx>XllRroR-!B>qVsUxkyRuqMQ#lbcuwjG{*t1mnHbF*0<0rB$r%UV zx$Z~#>GTB&R#d?9-AC>S6KO!}P5$7ty>E4n*9E4Ip{S&*9#o!K?1gsi&wO_zkDqL7 zlAsmhfpGMJnL$bfnhanbMv1SEnFbxfWUBe2NHJZQFp(jM`h{*ZSL zM_uo&(fYc)P+Mcf?=?ez4EaZBcflD^-=5zbXbF}Sv1Bt;wphg3F6E2oydle`daCCR z+httPB@HkR@Usp>E)09S@N>trYKux{EDO)hW)_+f1)y6fHA&QUT?<&Xbd@CAx8O-S z?qwi=QQIA|eg=`B!m404DE_#%QB+Ucu&({1EU?oFBBISqtrE)537Ep( zW2ZRv5Hs)X+gj3=O413(rN{LKl!mlUS7`C{lF|>;AlAe)2p_aX*6VBVud3=PX{~j1 z(A$fw7b-zgk1q8J8a6u_c?SnPjSgq^G~Y<#Y=@7V-~2`Cs*3&nP0&qexW-x*y7X;# z?)ifpg3U`Hcu6Fagz~}o1C3_cl`%jwU2*s~5zZ?YPLmz|MICR0}g&Vn|{uq){T-srznvX%lR<2?UV=NmW4|EM*e9T$EYQ zA(#cvIqi@}2Y%Y*3sY89v7pCJy2MRYn}hs~>Gu|2PW63kR+%EKsEAx7MjyOmr&3E3 zl?NS^ep-$=&Rn$kbT)(4ff$WpP?QCWHw-ZQdj28Hqi%9q`gq-I)*6b7k@6s=h^P`q zg!!eEo;DJkjGQ((J7%2cqWu&KSVqG}F+n%@-;O$ZHkTl9FU z{7JXKI)a`%RKl8sTfUf;a26-rGKk02Ume(S+-Te{5i^OGX;R2mwS9Eu23ZPwRRp=$ zGYNFvKXitgpZ8dqX9~`I-XkN_p@}?WzCPnx0aDa$C=OR{v0^2MEBl9jilwcq=(~be z)6Gp^7&Pkj60@-F>cbq4`@TDnF{i;#hNU33?uwL&R!h)n@sGqHbp@hMpTYXr>Sp?b z1FTbiCJF{X>~Y(U#|OTFEn;N5O}q9Y-XL3~ha1?y#k13Qh^nkMs+j4Tp03{M^$`$9 zmW)^h1HWOPpI0Zol}(hJNG|35H1xwrw+xo?9}~U(Llq*ySy&oC(?rtb6O6b7ek^v4{zwytUSglxJG+23?Z!qt3?)zV7z^E}YiTZ^lfEL1dmwvo7i+4yV>j&unV zMCoJ%frVL{qwet)2~tuDl+N7tFpZX;gZi$Yt?GZMQF5fLYKbCw5>pZ=Qtv2c+yO2y zbCP>!oDy`!N+<+?lEH($Uf>;o)Hb+BCTeVvPIz*Efe~)qQx(F?6)j80HDvP{Ke_ZjUJ!5ZDbZlKA3=2(MYTU-i^7`H4 z5-HsnYyd+kDn~uNiO_s&a}=4Ed+Xj0Ae9S1F!bNNX{WLM2TfPk)JtuGN=YeWN($+* zH9&pJe7MIlsu<4mk+*uDc*4_Wg>eW%QcJs#7zPw`gWL_~46=q`Tb1x0lI;gjR8ud~ z$ikc~GQ|XE_^d|_fyg8eQS5e(!%s~@r3VVXJT%9o01`ukykiw=`?US>@)4CAW8y{y3!1Ul#ZD6?Gmj&RZmdw8^qEhM=P0B@>CTDryF*})SGEqiVwOdu(Cc=T*n}oX z7KJa~{h&Qf!C}4CwX!NyriPXxi2(q@?xIG7fJh}vVRZ?G~iZbNgHE(zol0gBO}|Md;b7k9FhnHG_Je)jS!c<6RPQHzM4rSney3W zWj>>UoPU4qrczaEqkSS4#5+KUlXNc#Z&BSQ?Y+3b{{S!VtCtsgZ@g8~#tuu<6Vk`i z)m1kMDd;X+oH=HA>E;b5myo1^y9@1+laayICM9-UU9HDE{oz#qtsu)Gb9oU>?7}h!19}u9#Fo zPfsLK71|M%iDfOcvhExv;umqp=g85Nq)SbuC|#j$_x6e@BoJB7_wN|pQhBI`)RLOA z-&9e)n75TM+s+wI6l4(02_JE#Q#MFY3Q%+H>jjoWl@Wg^7xl7VD%N?fCZQ?D)_wy8 zJYZ+HVUvv!cBLAMdQ|R_Vvv$vvK}a3+pokLTfdIiWv7787f#x#?bAGOg3|AKWFL}>ganSn z9Vx{p#8^sAMf>HXD?X|KP>lXHj&Y*unXeYqc-+an`FNQ~4g-3!;CCF1{Nq@Yq^j9U zNqhdTruBeXC?eMPyQ9m66fnrui#s&TBaR4J8!j0R;_bJxD&P#0liM0Bm6^#lszvRA z<#-R?E=VjfmJSw&x~ zefNk_(bZetrd`tSXD$B#D#6i!Z%UQO@4?By7#Ym2Wj`qe)y8z`s$C_X`>)7{zYyM#ijdV8 zd+F^6UM+Z4Q*e@+;bNenge>l|8fFVCZ^m7@-L*#;I0^{B&=+7a0J}XOwq0SVY9jL6 z*V_(pR{rg#r0M$Tscp4Y-@%cgxIqyCf;cb12Wd^*x||gUAZJr4DMOVhj#sh4wdYLY z9cnqf{{Xm3zrDVep=jikM>Rz+nJiSe1~2dsGtS_1$F_dMQ!m9*d`X6njt{&02w^Uv zb$#|CH5BnhRw-@*D*JOXF6=sq*x-V#jy`Zc{{Vd*Nl_L-^Iv$TnI3T~L#~3o?tQip z$|@*Lu1cFsH4+DrmPllOGbN+W*JIA$NgeP9>7}d`Oi3ZzM(u~mfjXQ8Fzc}iaEpyB zvBu?PXH`QjD+U5C#Ah13!7XVjN=+-*yfPNzg@M++A*k8jlFJp+8+%sJMO@KIB7j1L zln~&K3B&Elz!_e9X&IbC3jYAd3u2%M7gU2yF4s7PfTL)4`8&Sj5^uLFU7M)x6;Rrs zuc3x>EH#M$f=H3uY6pxQDaw+5NhjY|;Z&L2It1D*hQ5ZCBQdS`T>&9QEEQM!qyFH1 zo`!q;6;o1DEY$RFwPAv^W|d6120lhk?n@lx&#Q_$s}5GrJs6E~I)Q|vno57)UGR$2mP3eeHLGrxF6lV;N-(098J%z4t6nyacs zW43)Us*iV-DneP*F$_r{6NSOrM{MWstIm}!T1@4p)?*cN8Rvu*Y{gbvHuZj<(0ir2 ziEWi~+?pv8O|1$y{Z?Yk1I{zY8RG-EY>qS~O14ui3ZiOsrQGw0DP%)rFYV#|&kfLUD{7;Hmi|jOW`xJVchXXmdJl^OHj$-Ynx! znAKA(Fiw%Yd*gN7jN3tzk;p%8-=?8RNC9Bp2Y;uj!Qf#FFNZW=c!+l&KtY zz&3C72U6p{SYvXL{mz)Qkt{7lxN(-)?BTWgk5B;6rET;q`W zVZ_Pbxpy_inzitEkOA%HvffIhHDTS9~#tDmEy_PJ5B} z`1sJg(v#u6JHJ22DCG-4Fu#4eLfE8JJZ$bGs%P^=o5?TV1KCf>KW#7;QY`=o`RVf) z1R{fa>~x9KBgoOKzEzQq=^HpX&tvvIwbMvaoL}E!IHaZQ+$c6+f<#z9n1!J9O~7uW0F*~ns1y~b+lBn+UsYI z=57?QM37f3E~@;u4Yc5f9CO>Yv7}F&RbfbCU9V%lRDcD6LY!&x+GZfty1I@|otF7Y zLsk{DPf(luqZBN_D@Mb&3IQX&^NkS*M53u>UU79Dj(8udqUOSQ_RTQJ2; zAb!(xJuiS=b3Az89kG~X1KF3@@u*X^BEq1CkCS&IvwVBh4o1U9xe7_5gAB z)ay zZSi&8sw#M1O3JfHYGs-gU>0Q{VfSHK&m$SyjaYA4Myk`xPbs|^nQ;%7SO z9D+GSi@jNaBciOPgX&RB%Ag#pXDf&7SZ&Wa?~cPj6Hz$|t>*IX`PQ8I=K?5aGWt9v z-K${gI@)6$63tNql$utMqDsa=hGC4L0N|1FpN$$(Otlq1KCtS@62Sq8#{KT-vFbY0 zrfqf!>#3<%G;@6?lJTlbKl0@X1TuO!_VoV%E_Gx{TfOA7)YAUkhj_~fnzh*~ZcW!- zaJylzs*|K{Ed#if8K;g{IV-rF5KkXGdwxStp_MG8hG=X6SUXUD{{U><;=Q*=cB;R{ zB}Qbdr*R}QPxx0p#QbFbf73t-K}k%s(9)1%8eRVYF<^8(RbjtZ(OoA-dXFqLvA+PS zk&-(AP6_SKG#e>PSP)JeB)x&{UHWs28>gU?>j;iv-;?e?%T1&QA@KEv z&A^TRpxtEEH0?7ZA1?vCw``Dq?a#KTN$0)uh+-<{8m(1pTS*WzCiMzlHGG9~JAIC> zfXUCSLl+TOc=b;#(bG*&=OltDVjfSnKx~}x>EXFQU^&v$6p*lH5Nh4<8S=sfFa;n{ zwED^a05?S?RBGxLqDr=@sDf#kUZvEwBw`VAbLr#pDd1y{+6|)gKD~cjp&-T5kOK|# zhbszo1U)%bPi=~tXsIfE>1pSgl2%iWQOIN?I0^C zD@a+drTP0n?wqt5$Zqvi^z}Bnp-v@=3YfRWGx>oW2F?@>%A|40&ut+&h!PZ_r#Z2> z(Xo6m_k%3}F$RZq*U}>OB=ohlmbY0}Lq|-I3Yf>0=gPpdw;jTk*c*-qZ8O5~ZLv^&xMIAHNNVBZPR56gTUAa;1#sJ_0`f8S?tzu$U z#8daB9s(`PReRtmp3EV+q_oXC!&cDNB&@KjD38klnOD-;$v7vs7}L_gI^6iaCa}VT zqy-8W-8rXiXb<0dWYL*+!J&j1$BJOQ7+p-z%)+#db@VRc9h46%K3 z*wefjty`){+hU$|6PVMtJcF#sd|Z{Q!VOVzzNY^G#qoJyr6q3f zwt&4+QB^gHy4O&xPf*f%vXpYsC?!Xd!;qvh@8P-6AmAMyP(mlVK?=J15$}I(Ajt^| zNKJk2==a|Pt=%4`j;`Q7p508u@1TlOO-Ks4BPS$o-GFi24{YhV%3uOT*qXNUfvI-M zYWzmjw%cg$)1^$X+pSdij#Ef7k>k55U^f6r2alW+jFC%N;t-G&#GHP&nTxGvUs4m50Re#066W9R$mb6vI`vP>5o{VYD$)C?W4m7L{KFaEz|32(rwKeq+oin zltcZdYLi@tvlCPjGA z!x>~UC}en!+%G4C@yGY)RtEal<9Oc?MIo!EDk>neK~7#)jb(o}37a4gPnOaA5E$i& zBe=&p&z`WUmIc^l8I!{)N;-RE6X@@kMgor11Tcpv;7(PHSOhrZYLZpHcq2H}E?lz3 zNppj1`R@X#a12V<*(sVpSxXG=Qpi5pPq4ryf2rk? z2)lT1tU_yk`y0rC7f4=;jyr+@EfWbU>O^O8UD+%__Xm!{f$heqIAFP91ZXHeJtF8( zSTfbjJ{!gZxTWeVH3YPD)RA5xW~iR1PR2HE!11aekX-u6W&p;Z-8S%+o>g&1sulaNl96Q-b$ zi-E2?)JNMti2gT);r{^X>AY_hDJoKuq6&kuYR!>*jrzqicCW1J`}ITB)XiU8Eb>iN zRNJL_WjSVcU02LAQ&rMcB~+pr zmX*(y19CYme-S9$;~WeV$CF^rArPbTO!xLbO{bPOB@lSv0@m914&Pj&J zA)jMMl_V5l%X@M5{d5E&NvKmb&8`_l%L+=!Hyj%JL>d?_HtwjEYG|3MCXGysGUa4O z#!CP)05}AX?VU6~)qH zkAIlsBXaE|dt+$M3Fkh1#YrM-xh1K0EP2-WthE5DhaLX_SqAAI*m@$mJ+DQit+5Fr`Br~jCpkp8F(H^B4s7Eh68XJ=efZg<3AcC`66m< zii4E}HT$GNAq&qe{{SA5CJJ@ATIPkr(^5;9p0Y^VRz%fL!f0KfJQPu%TIu$C|bU(U7Po; zHAMhB{$cLXH6>gXb+16JWsH3=qzqM4a*_g-^&OxSj=&6OCrHXvv@Ae9MD}^NOI2@iakMh>~0Z1V?oJ<%jr^?qg z*ux}LR29~EJe5N7#)}k6xG?Q(pPU{@+wGI26*W?nxs$7t65XPKReUR-P4kV(YJ%Hz zu8wP*QqEMUrbS@WGcGm^amV(~gF0%K-|L~R_iOc>bCnA%Li^Z_H1kDEdzb0a=Bk7_ zNU}eIjfEvM-G@JJbM2$2#FT(kC@*)sUDG&$Wockp@io`=bYRp{TW!+GT2?BVx1e%z z`*(U-$UGeJ$N6cbrDP>%4%b(tN>Y;a-&lb~Nk?+LM^0WjX@K)xCp!e4!Ca5U?T=&c zqZdr8cB*pyp0Hmuq&f9#%ujjz%2f=2Q;7oN6y4>3$}*$vjPu;-6w6WwQwv(ZJq3Lt zm3+yr_R;OKou;9ztEh~nvvZ)8 zw}tzq!jiU>yi6jMz*LWL#Car>&*m@&nwKmfu)}>dev!{xD(jbDe|RHK*4n#ol#3Ea z0vyd!zxEzjsfZ%hTM8H)t&1;ThCfzF>lHk4Ewa4xS(?LfOo?6|^1ZdHKaH=!- zSZAM%>JUnRP0UUYu@`&|=X8g@grMolj+Ll}zOp2^R@@POn`1M@PKe|-;oAh0jPiEm zW1S(zo+T4Z)GP^UPBkQxfY4^l@p-18|`i-h7WN0E(Bk3!njhqx@1ymE? zo?DFPOn(%w9R+S)lq29%))q^-EzdF13R^9fxU3gCT3RtnDW+(rmVWL~Wmz{40}8~XsoyAQC|LcmoOjdQNBlKe!_RPwvylyB?XrMBf`d$qP`tyNDt z1b~q0>wrYceohW}0F%!rNyH(RZ9s4Gw9Gc$h-fUVE)#z!O`{2m7< zO(jT2{#8NN?Rv!(H4;KptIY;|L@-}^dKj*>bk!8~_1o!p#UwFpi83<&oyP@7us*du5R%>b(dqPJ$(S9$4UKCA`YzFHqoBN7{HlqmOQ?!@lw_d;hR@(R0|$|hai$YA zC;F-W-wM-|ODbZmS!TcYi)$KgT`@}pboF9_rlvZFbXTK{O%lnFs>J~Dk0j`! zVrh2E*YO%sjgkP%eer0v>8%vDtJO6nMMmat`bIN`1dl<_w>)Q!JdgzyQ|Sy5Lk2EX zE@~E(O(MuaB)G|MwtjRX5A`OadwA=+Adzr0mBAk)Q!FJZ zFN8zUp(DqbsjK>OiYcnTjzc(TWMTMEmhQyxa&Vjvzv-@98qw)~I&<5==P;LBhs;rb z@d&54Np704yp?dxaT=HZB1KoCRoeffNgVS_d~4 zyI8}ndZJ0LRQ4NOu{AvfB=n)uZQoQK;|@b9E3x*eU7TbSliyjgaw);nWqL@e>dTRRggkPoNqNhJ0fI;R0KDR2q6 zIOa`sz5-CR0+b7e`oV6axZP})Z=tujCMyGApIRnpn(6Mu*IIsmcIBIm;f|To6wgBPWxl zrj?Qs4fv03n!(b$XeDNtQMaTYv(IOuHC1)eM{uvt-}kC|43kS~wjQEI8_IH{K79VF3H2_%!sz-(;586X}{VUBZxc+k=SB`I=E zHdKzh~kiv z6?+r-tVdW@-P1Bq`fe$`1aixfED;9WPkfEsE=fJl13U)NQq*kM&^EN~1x&#)K|zD> z??|5w9m?a=S1^W-nyH-x)6E%TiSDG2mM8)C+5q5?OoZ7azGI)icpSwcBufC0Ha~sf zKS@;7*lHuEs->r(XyRd0JkCMJ7^yk@I3Q(5!THqNO2Jx5BMsk;<|b+as#VFodhEeo zrMXgB=~dvad!x@6)Ol03)swauZ5$pue0-efX#vk_Vff74ae*N!m{^AIx<=bVRZC{N zO0ma5DW7+gPUYBkuH`=e0666SnqeRoN%;Jj)Ztl(-FMyw+$o}_rlv2ds7N+w6LUXC2W?%1{{(*bB=$uhyW(L-@HMgAi%ohkeKCJ znMj3v!LVe4rFr{f<5xn69pjX5&w6jx2~yr^E!6bUO-1!J5uN6F-O4Jy4#rS@L=D6+ zKh1;Akx@;eR6ir6XDtq;C{v|f+-|u_rq_cSYl8&6gvn9`G6z< zJs=N!)@StrAO!DRHqeEmE6Sw3ald=QGPU%*wXL^`Yo&Fj!7EV-ZM9Wf<42oz(HR3R z>I9H*M&rpD(&kB+Kn3(B`rh|**0q=bp4HN@zE`HBuIz_ zWTsGd9swP@l6wKz{q?Ia_~{aZv-6Fa-_Cy_#BizLrc6E(MH7PypAvndgzD=JqQi7* z>s>5VRZ(Paq#j!wj2r>SZ1asu_*qG7DPlW(#wcPKib4-i-F}6Z&?zb+x>KveJx81) zj&fUiG5|O#q%Kb!efaZBN~I`O^cB=eoLJmCL!#M}s0nU4S zbD$Z17_zIM3;A=)SPaD%ee}cDG+L@{SFuoO=8kg(Lhp_V3}Cw^)IG2U18oBXj&)#W zBw8VX-@jObwWP5%1d>mMBnI$mj%^{Q<6Qil|1l4$kOVZR7zHwngj4) zr3y*5S&lgGo-jVo6&;$6=7y<7bF8#=AkDT_4fybN09h|%~>m` ziPat=!$Z?_5JPN}WDzA8^CVRcRy->z09}hJoc6{)7b99qnwe<@Nv|>MmhpjED^X(I z4*JLIcZ#%--KlHUYOV6YGJ{5w0Y>Y!w`G`vzylft)ro`>NCT_#d)f$PmpHaL zbI0JAvIkz+>Z>58NvocYR9RZ3h8mUI8<%f8v&rL*2hNC@6V!?pBI~OIvrs^8x^D&savJD%sb_yg}05PCMHpwXKp)t^MTHnN|aJix_a%f!Z^(F72(`*yQD^| zj-KgJ8M0JIG=5l&)51%*F}DP%<(Fyu>Q;ovAu6rxP1j&<71k+fc0Bj)`r#5U-V}8$ zveQL*x=i%d6^`G*a0qzEmOqArIR5~sgFJzp6P+h3hzs(|PnO_mTj`!1ePE%-ljmLX zx)y032rB1@Q<+4t$QB6X-r`IU><%%GGx}-u+aQ%wo!qyCL1SjUTjzDa!8$o+b!xhq z3&|uVo=A{q1Jbw(JAWuYCA06O$ui~%azJ8r0s0z~dq)Sf;0^miw3P7L?C{gVSstN} zn+Zk>OPm278{Fe0gO6dR(Q0z78ri!uo7bQj!6->{PF=T1BhzK4EKw5>H}QjP098WOH{tg8n#Cq5PEsG+xb?xH70Qsw6iU&pO-*ElQC2yi)y+;?ky!&yt-HxPPSQUY^(Wh%2yp-;C?&T@ zUj{q)i8K{!Peiia)#JBBrHn23pMpU61fDaGamN~obJJ`f)Ed(L{KT+D6Mp%?N=qx& z*^*eMrkz@4U!AEUoFmIK~D}4{^I0)hE>^T2lMJn9eG< zG*0v}X4-nl+-c)VN^6YFr1^A_KxPiuQyi=Klbwqo&)Ak#>H_uG9G0((;|f z+B%Kd7$Y#ZaidGD#FJg&4a-3ZQ(YfY?Hp33wW343C4SPVU-oEG1CBJ1PPE2Ls3&mwA6_PBUEPMB^;J^ z9Opdau)qUIi3&48C7G+2Y~ezZl?1tCjrI0|)wkZEub_%5DH8Ps1d5VGB6JLjLo{j> za4>%`Pv1vO^vYL=vIbXN@x__c8Am*QO-Hn4Vuampm6r%n;+9CHF_9{-nHWrLZKUiZ z4p)QEI#2mjNt?o%=~D!dSkxS;U*1qcl7QKDuKGkP3(Ax=6Vw}ZEyV6+pr~)+6#Kbl z%V#aXBLrvI=Nr_PchKJX5tul$-J7lT9Mz2#WRcYqNVoGeL z3W8Y3dk#(lfH)lS#;&_)KuGeMYWnCff_Q>bia_Y!pVPx#&!(tmMmGP84<~5QY<47}dsS&{hr+Y|a3%dtt_#^NA_}3_;5^#a1%}`(W z1+3RdCGy4$Pb!%y{PzS#g#Fr86~oFQYlg|;+`H)V-D0e}v99gi3~ zR(BIDp9xB3C}rED;Sn&hnBkwa4ylHkD(1L~f8FrTH&IqiAbt^7a^-&xS+kyU0LL8Z zq*+20ELB4A1_tN)jwxBIbHcHWzGRw(ng_znN$#tcCgDRErHJj{j@2` zLT4g%pD?>I@1djskgqTgznIxkO;l;ke6PM~{Sl%3WZ^(={BFR>`*W);D+a+Ov8@jA z?R*((3>u(s^`te^QF)eu47BjLDpl$*nN6hbVa9fo$vw{*?Trq=OIJW$u*7qLm5?j| zKA@G0rLFxbPgExE>dn%=LLjy=1RpL2j0AOH>zxzh91Gy4JM6#&x~pe^spERYrp9)6qr#BQva z>ymSKc8=a%bnwbD9_(`L=j}NsaJq@o3u7zH46`}r}Cuo z);YGuvw3?l=^5mJz56NA3RI$03ti0feJ-S2t=1XOZSJrdmgju5R8q?(@dSAk>l&Oi zpoR4ejQX+1e~#xx77_@4dSe7j6@t|e`(d@*tWc%Wt_qkaT1JUVAZlsj7-{L`W1a4y zliSmPcs%OTCZwp8+;hZwU40-^RFJYw9F6ak5xl7CTIp&oC1a+N9l>_S@)txTF4bZ{ zV}Pd|l74l5APLL1g3q2~O}=odLlI>Dxr)VaThn5S0dPu@7BHzz^*P^@yI=r&XD26| zb{Y|!;?NG*?)Y&F3YlaF#~+wu>9msJ)ABN^7^$iiQD%|bVwE`n9tIyL--F3IWdSKw zFFw7i-W=hB1qQ3`w1~CTWg?n+6h|A%5hV+|+yjyiW695MJ+#FSlwa?zU7^H~3`L=# zGSklMyN6X*3&{W+kbTD|KbEYb=CM^*2m}k&NT|X?D`qkkz+zvLoDa!56vnH*{NYnSewLoTk0qmF-dM@u+wq|tvIz&V*Cl}e z0Cf}%!1QmE+96D&kPG9E?}AZWIws%KEm_xenqSYw7fGSO!cPM6<>ec}HA!A_6x)5KYA<@`ysRIpw3G?y6- zzT#H^c^+J(AxtUtE5G7m0OJ}+{ykHaR8|Pm{-B((g{&D#0a$LoQPTIjg^QrAiybvp z9Sylv76A=Hl2rBzGsx|NGo5-Aqrw(!uWSVD+P$L5ylmV02=k3AO z6uTxZW6OQnQ?E^8i|Q_{diwD4w%n8I&u-ZF(xpHuyWgZ-3yu2ve&A1C-k^vY=@sQk zDe2u>F_|`%*yEfG0;Kcbf$gt9#DV@vmq1ho>0Kdpq=k&Nf7BH4`nHyiddceOXyv!o zK@u!7qc@nPy%S{cHZVry5$WU|W^pN5K|>mXDSuD68ksBs{cogMb-oGPx@y@>FV_la zYFdgx9MJHx&9tc8yK=yH7{)Q4c^X|lG}kH(W|!AYL{f=SUH;y;Wfc!eTdVq7>2y@- z+J?FcbypQ@vRFgyVycH5Nm95ZavK9~bEcNGs>)U754z{N!6qi!D5ibmTEo@8%TFXX zqkO0|)6XK*QpmYoo{@p-?n3}De&8N;S#qSPGL*z;`TBv0g&tJYkAB!=pn=wAi>+gp zhDm9jd6fA+YLuFqP~-SxJZ)jP@Oj!n8Nt-4V5)|vefIHnBG2_3{9sxFULdHmPd%b* zc#^7>k`pAzPo%G@`N=KL0mpE0@uEM9_Rcp`@eq)(r^B!9`o?x!eY&cOOJ9>xmnw1u zvdnfwxb`{6 zf1kdzcvTdCsYaMoJNv+rPxd5lceGFHtJN1FT!f9=7ZGWfRPqjf2?Kv16 z;E(3_Ae~|HTs4`(XAd>f3zXL?+}G>RD1@b%AoJ>V_ljn(qb|KQYptfa$t+Hj2T>~e zg0kcgJ(P?PzdY%L&Q#)+OCDWu@qmXz$cD0RFC#}k9e(q zFz1vIvgoJ$j+n;UIO`&&l4g~nlp{EqKxPAsmiOJroS0!mGh zO!;TAq(SN!Y9y(SmZ4TSh>gjTLJ`PP0}+gj1NvvSqD0`d{{XJ*gLCzYl#~;FFT1Yr zgWWeL)7z>msADmr1)5EumS$t=K!+iSBOr|9-@Y`o>FZ3(M6;F}HaYWtku0QXy*{f4 zx;{#ZTfIfXu8Oj(O-&+IOefmWLBWvZXK4%q_T)dv;a(V;lTLwRIO*TjMWXO%)QEYT{EVL**$8&T+p0kO?`-Vms*E0z3v^hHV$94MBOx zdf`}!YEfVf{VVy#C)x#;&uXNaqLwO#ppl{wRTVKy8#<_rb|=%hm>=pirxP-K>3}4< z0idWai=NEdv|Oo_L*M(XE_J=`v97O01w3g{EFzRH?Tr~;at{X^cJO<1jtJ8nERil> z`!5Cazcwt}z7_(EuCISL-Vm{8p{bfWwrBcdNWe>zlBBj1YUk8R&mXRuT=`&&RSLVH z=|K$;kS!-;-+4r-YZ8vWo*Igpmx^gT#`4&2g%}Wau_HZ(NBZlexh3cZAXd6x9FXG4 z8gD3rWV%t)Ts(@ADT(6YLjoFH9l1Hk_Za8Ck2{A_(iVcs2LAwzy`sg404Dxpe5(Ym zeLVd^Y=XXmj%aBjVFg7*fNd@VG2|bOqz-!>R$ma5%~(oF3k!8QkwcZix=cn&b+xk`MXx*i(uMYq+%%Uihh6g>Ak`6L4pT2^msR=78F78-6 zgM%9Oimf5t?(MxIZKn5g>HBTeH8o4LtrC)iEN>eB0QC1`InVe`4?g2MAB>d{gejGi z4_sk&T9`@=M*bl}Y6h;UTKdXKVOZO2prKHkhF9;;1Y_q!MAQW#C@%d?FYOGtN^GRp zd&V`~uC$U>H60W+blU=nnnoM zpH8s^4=SQC-Gl!C36A1EbML51?Mwl;oN^Qt6oPBoJ-_-+Y&a%e!R z$g2Wy)9Y9zZRtCg?^Nc49xUx zDNb?8nxzzpWKifF=r8?1>RY|Wis?g1Vwq;RR0$+_>L1A{L}U&#wEiCb&IXuSN=>!g z&Py6Mt9wN(3CJlpT?@9)5^ho3q_1n$&KsO%);a3qGC(3k!yxpKJ#1TW{$&|B8eGkq zW`z(64=3Jc2xL@@G&_1xvaBk@e!1Fhi$W-;iV8+G5thjF6~JH|ft+V7K1*}kL<&g` zl~(F=^~x-;g0B%*`+Y@+O=7rH^$jGgd7{2SU2oX4&^g+rBva*_fs8REa56`3HB_kD zAz+64?C{KxhaoqD?su^cH%pY$Txd-6?F6u*#L&C$8`Sf+91g?t){DA;PuG3)hnqUV zs(7ckLo|{~8P&i<+l+ogume2kZZrdXzB=oK5}}1BnvcsYf-XyTEIt4uo<_RBcNBbL z1qpZ&PV-l+iwRZ4q5QK79t){H`QVP*bMvLqEfSJ#d_Fbhbs%=s zZxr1$&Np?Zhg9-VeSNiMGz%PJb1JeeMB%{NymG)5W3=s6IXKAD+&s@6Dwyt&q6KQ% z`_ZtAr7CJ)(`Dg8raO;O+2@^MoQ0DSq*0L370D}rFh`}B4ssOZwz!}6scAatT^|Us zbeC0WsoTSC4K?G$3xyO$J@Qq9Lghq!C{v| z$FvXyBKyua9(%(0jXkPDTBl+yLr<96Sy2y~#IDutw;M)z0203XWh2H-Qsm+ICmbMX z@d}M-M=rYS17)G2rG~bK_ceVp%M6gylpD+yMU)vE)#^>5#uqs)ocoF`69@~CMF@Ic z0W>)g1RhodZMwWo=pKi$QuM8D@2TnOr=q1Ui)x+ag{x%^{22Eybz_wz9AhK-by<8# zoy2%dUuDIW5?Q2_AG%`^|YgUS$o;8s(jilfZ zqy{9O$J>UIB&3@uXZODU0M>Cf=l}$_PWhZ+rlYAZ-C=5_hSzg=W01>LM{J{IkIr~N zl38P?IXMA<8O!$A^2 zLq$rSQX|J7sn}y6fQN(1^Z96rT1~-$rTD%C=ZsV!fTcZoS1$3VuR3z4rqW4nR(H0J zB89xNBizv}gKKac6+AMWFd%|PtO+C~IDN&^6r~XTcaJ-xM@7_3)=J?lPdgct&{)Z} zFmedSF^{&4Nx44WpO=k!0Dy6Jjg@xXQHaFzZ}Rzy2O(P|oB%!e_R-0+Ra2dJ_YAl= zz}C9fBxPfHMn4dg?gSnH1bwspe%dXN9E$vWK-fE&9SlxYa~4%uc`b$nX9Sbq-x&D& z>Pc^UM>_^4k#qFYKUee(9f}I5sp_q?^2zlNo)MVvyk`%b0y3&p0stiGHTZrhQU{!0 z$ewt2$i{5y%pq4_tHhS@vbvrMYmK5>cBh_}H?G>HgbWjNv2Tn82Lp^2_tqGj#c=k- z$tA6MOW;ihF?*h{1%YX@ef*eK>ec@MgR2rcIVtL6tc*7LAT~tIaO&R7bDjrt?l~G} zTGui_wJe}iF}<)Ly~YLrUzJMNJ?R3V@Y>PT{X{5ftYW63qMPd!(t#pEs&+V$M+=jK zoPJ!KX~}#+Ae*1#0rObg{yi$4F{(v+Hl{3p4u57Fe^t*>9bJm#j|_$=#WX-8Df^C8 zf;;0Jj&yJGRb-N{c=Vs+pemvnd`RU%hPt;IMok?h>A*#_+$*zJdgXai|{xm@C49GI1 zjEobf)5M2mv8z+$Y(_%3DS;+!liPc`7phh}&9dEBOH~=9YkbLu4g6snh7qX%<(S9- z`-7Zd=`KA%GZ3MFK;8528AMs3pzDr&qg|@)ai^mzMQ%o;qeZ8yjhx7QuvC^zkVz!= z_T&ybQ6kHmPVeiwRKm)3^BR%JZ>&G{Pg>lk7FtI_liMiGB<2_-6w*Y*JOCE%#YqL1 zgX!e!Gl>G)N^b`3e%dOK0$pug=|}2fMT@TJZSvgd(vVz`NRv-i3q;ZdR}pQC=u=Kh&&kbE=kXBiR1ywy98(N)7o=9SImw6Ub zyDH9CcO`bVFc)zb5iF#I%Rx2K&p(VbgVZ>?p}PT;2(xr;O}@S+t2Fdb!B9gq!9ZBL6Uh61n>n=1k)b&(`k1OUZ zCmCTdI0uq8mE#Pnk9|)UDq%^OKd=}ztH!vsB0K_xJ68B-w{?w=r)vei>eJj-t)8iB zoX7;(w_#I=7r3+MStFYgQ*-|Fzn)+(E z=wP>7jI#Mj4OSIHCRl`W#QKptoR;+*4m5<`JyWKCu34?rc48cppS8i@DOqedIc_$lyILEefeTHspI41kW7`B|_rAt!3v)6m1 zRDD-a*IG$rsb*`%P!qezoA#*O6z*boXDg3v4{kLJL71UrUqT7X4Ks!kl%+jo`-#TLkcKrn&tZXhu)g$JAFq@ z)ZHhJf2^lgXy}Tvy<3uqQA+Y~Sx>j8zqr<53mjNW`B>kj&3#_+bN7JwmP@3MTz;_9 zHC^u43JMAuYn3ozRT|cLOSun{-+*8aFmuM5Qf4mKg+OjQz3#puSpNW$Z|>a(>LXA= zEoHr)7!34CiyDKUHy9l6=Q~HS>;|f-t}iYROT9Tk6sv2rK&}@WcYcsBscC9#^~L=r zI+pVlW8pA3QO~vqBfhI9YGw*ktWa206g@vMC#h}G+p0vYLk~ML%>#O(je>>BA8=3N zAY>@-&YIxnCTy?;_iX$;7^IaNVY}d@_n+T%ZEpIZbG1QKRtkyfl_c0Rwab1WTXD!F z_r`b{4CM(z%T(##nAnA^`J7X)ro8*a>Z`BNP*u%Ix=QpdA{GQApg05$>To#ck9{s> zOHzUqC{6@pp2NYEf(TBV-C}LBgwnLs6&H6~$+yL|5nD5FLefNe;D%wzI3Z8YwD@HQ zD_TQf4uPE^jYt#})+sSbLY=NXn;u%{bQGqRmcGK~K#(xX(xrCgUr57089R4id-weG zw;#l*M9c`Zk)?SG<$KZbilxGUDjR;Vw@|kGqS1M^l}%9q4^i>+nqhzr#haF=?yfrm5n@-(s_=Om*gXC zRU{BgZR6k5-AK~}m8nVrf$81x8D!BVDMb`mcWs>Fe^J;cmU!igVLd%vWVG?cE22ot z2L?hHDgXqI8yQ^~%b2M_HdtYqHGYrUg7{j=a^|^mh052Vt+l;h%l$t+%un7_<+I4$ zH|xjm`<~i~cEZHQsT2%|;GM$@pGd&%`uEXNd?9LBraRtIU=WuHuX?eMZtEZJ=8m47 zR-6EwpIG<@^X^Z`)S+c$6+3+5U$-dZX-XuQ`ro8uXQzhUZJDbfSZ^sDel9<}6@e=}U z-QP>A-XzOm7$HHT1l3e1`6CDZgNP$3O?uLoivGDKLIEq z0k9kOCpa)^VwBzyIyy&(rmGs7fI$R_8%tX|@0LPa7|6!eB%C0~;2fPSCn99qDpQ;X z;_k6U5TXGr#_zo%=KYc?Wv}t;rA+G>nh0uQ+=B;bg@>!}`Fwv@Wg!V5qc|c{MJSuV zkE&LG{{Sb9XIhaWkh|m3k>GD$vXi)2>lkC$h?xHXrm1;ox^E938a}eA?sLKB7|hSV zDt#}r8lxn}NY&<;ljeWv$OPne!R?@q5n^Jd*(5vC zhn;)n5b7qk>FciP4K{9^lcVmdJ2{ z)v?1{y<(P>fKc@1?|$xq6y7*o`kJDwEHK9mCPY3&WtJ$;Hg99Yup`(G2^iGMN;q~R z^#(oO>P62CiC~2rnV&uLhT9wz_bW9&mtGy~1dUYcZ?x-|k{qPW%yG@~&Fn`;6WV;qGH!v`ZOfqRm5 zoir7Kr5bhV_{uDzX4wPEa~`)=?X*C;wa=(cRMeGiVh>4CM>5dJs{=};VaD|g%E$mD zm2@B;LDf72*xOW-(UDZIP(>f{DVja;v|xf6 zf)bbj@I5E|^kpf7Juk;*91`6@M}J@+!m_Z;ZAPTcY!H5U3A3Bv%hWLQO87OF$kiq6p<~ulvOD5Bt9&kOlJof&& zu^_3h-@HPJ{{W3(FN@ar?p+I7xOYT|Ww~RFu5pe@$HzGu6x9HSx*=tfOmmICve{^k z=NYG@s}a#mMAa~o=;aw!AdSXZw&qNF04I$ixpJrCT9*OzO&r4R#Sw@pfNlE0IlqD7r@}VODG&%J1fHyE>hIna0T9%?2$$RUI)`vUuiUOA^ zxK)iQ?Z`nt7pg4sQ#{ktqSiQCNRj}dgJ3D#azn4XFeLhqY~x)u7G)AiD!#N2>Gc3` zqo(m~^NQ1?>!-2x9lEKhDDRdlfJ!!*nnsOP@RE`?{{Zy%lw*U85N1aeDOs-m5V%827E2mLm}j57oT z=URzOz6AnU3Q$ceT&tNlIqQ@KDs?;1?@VHWz0jW~uFVShRMa(evPpHCNnK-&l;MiD z2eB$SBoZ*7HgnU*OA<Axa9Jn4FiVarOueC(-g@RP$J?D3!2bV0Tv-kNeR2(c|@Cc zNu+k_%kNWCimHmk6--rd%o18LA0fQ8$Sm29gN{4?E;py23?!i@$$E=`F8ibB6$HyH zk1>s=G}JV$P{KIg77ZdRcL9zvc-mO)I$MQMlvO5OQ$pta zt4r9^5FkM|%TprHbX5;B)m$d3m{Y+UhcD$QC>6GqRYHCk0nSGRp8DBR*jknx>|gZ^ z*>nT~+w_9{D{QI0^&?e6)txyd9lBbXE1{@zVGyQx*f*WS(S__^J%Jh+w3I0z)OT(5 z5p^jlVk!@%Y16(?aP>6>_xD_>SvB_cB((Fb+Mx_=im;Vvqz%TWk8vj`C$>G3XE8VM$Sr% zegH54T^Id>$^jza<HbZ}ZZ*3M* zMUYiZ-;8NVP_jx(`gDwvy0MT=^svVJN`MX;B1s$$Dx&OcpQLeXri&*~_Ap}~lPl+2X~%ar9p20QSlAb>}HaiaiS zJ^lB#iM3=_EZ+&d>y%W~w3gx}q!UxMX{Zr~QeBFAj|+)F37&iDWfN>!K`o8+)&Br7 zl|2AWv_|*%i)xy?r|C@HJ$8l(?Q%wDkV7NKx-bAKDc!;Xpk+wo0P4jdO0t5+`ut@1 z-3ly)66t&^&lpGXLeXohuCm+gwlN@)Ba_U9u`8i{%)8G35rez}aL3;|#o~my6TIP^ z8k{q_kFf-nwJaTjmu<98@Lv60)4fg9)|(}=*w>rQ98*nKaCVVkT4K!b0T~%n<^~5K z6By5Ng2zyS4o*P; zkUtinBq27B{97t-k&4c0fM0iSOQEuBy_c;jtD2rDpjpZqbuutk#?VzsBr0cd&m;l? z`!W7al2TMiYL=+fXV=mfNi=)j_!E2ZV!HKwt)Z|~wdoKjTDT%)R5=O;=f)JVBXPhd zKVov$RD^{bUBS|&^F7`zYye284g2ez*~0fzbWc+BP!L>NDXo+hMQN%~v1cFwIEBbk z`y|4W0R*FM+@PI1!_4v#eO!!&y*iF?P!i=yR+}E(<83|8THAzjMOPoy&rc+xY6W=C z&<0;?4g(kabM4<#DGHQ;i9iEuJTL2*?G%|;#17oMejr8hFE#Mc(#1B|tueB+(@X1e z?pS9h)CM>JU~``OWpeDzAP1w8eIf!9ukDT7-Ee{^we^(LH6rm1WC9({PKt7bqPLja zxyN984mj_ufA%bsEeyt&U(>zjkyR9VoKRo)hiLuPwYi~^hLFf&kmOFOx#0kIIpA(N z3OnFuOh^hTCS~V6_l`*m1Iu;q@esOxhD%*tB-B-j0h);YMReYfvJts=5>C;D+7Dt| z+d?G-5UQL9TZg&Q7)dHwNe!s0w*Fz$ruv1bEHg65YLwNiuNM1{{H}&Lx>Cok%sEusNK~aO-*s>`;9--QAHwyJH1mBc;gBK?AwxY zmB1&vYdw4v-}3EEGz761+5Z64_w>#WEp}IsIb2Lv_WA0rb>^8qs@*|U4D!+}fnyMs z^!a)9EPu)gB=_%~T6(+$07H?c8vE-7TNPO$&paP3*9hNRV1~BUC2X?!m6X#YQAaLX zGYQ{--M11)KVhudb6f_=;9)^De~$kEhzw~iHL^id&*ZU{SM~q0|@lh*>GCU-Rh$?G`t`)lu8%YK>LFz+kEl*BtP?jjl-NKEqThnK5c1 z00TzX=9pq)600;F{>D(#($-3e?o~6^Qqxq`!z7!QrWJNgh(QIp^A%lG@!vQHoj=7+ z5>2U})u?W+4S;;iSc)=eZ#ebWVg%RN7QViPuXU8R{pVu>%r>Y2kz^+U03eKyk&N+> zO5%KJR^?SS0J{f#vF2a}(#+UBF|2CpevX>h9`g$=H`Thw6+Ga~$&?2nhZ_z^+grMwlPwCVavrt)#mJ#?cq8b+TU}C1HB@%`OwmQ=g@SibqBZ;~&b$`ev>Xn8m{4^NRyk=M*K$rRP6yiqaM?)N|I+8o5iyF-CI&fCQ3%K6&8n?el}3KA-iF zgpVk{c4(16Wui zUHU;1)ZImPv{Kan0Lm;<7{pUch~bQqN-@uD^SGS<09{_%WrAvYn4+F( zY7Dahy|+?EPPax#6r z!PSZZK4Yv|Af>>gUA|B@mbGD?I9z`esgGoUjm_PA9C5}!E_7CKNT@sg$0_gUwz@V} ztoK<#so4!aeAkX4kd8nE(%+G&znBL-mB8sI{l z6Z{A5ste-5fvkLKe&N28M;Je@u-1jDX;aX%b8K{OvB056$ZyG>ni|tcooTGNwRI(2 z3lz}IjWDNxD+!tMcNK2saCjb|bHMMV2?#?63QZh?vrKMis>(rUZnW8hG|wg4+G-@Z z%$sT1ngvsbC9+(W$vv^~J%`N@sqbY7WPyHTFO4&StfpBiuIcv?=YcNvO4B5;Lvv*? z#pJLK@iyOV^T9bDz#iJs3gQyVVaQd+`$Y;oHBgd~ozKoRTRL*Dr{ScghOuF{OR*}c zqfh>-l2y>;=Owwva0j-uf5j?eG*sV-3=4G9I0UR(7mK>yF#K|cx;~(|S?Vd#>J)VR zYRfSr%x%Gm!l><^jAtb1MWiwa$?3kGqE#w~Hq(B&!*z$m3uHF>#=KQE9o0@LL^4M@ zxOX8+GX5itq>jswIl<5;B6RT5mw4CXU2Dj-6eIaVS-I}pqsG^pF#aQ)D zq1SAn10xED{7ksqFgaY3PXG%m#zF!Vk*l@-@yY}-K)h4&1F33d>r2z3!3~VpK;B#? zV~FGcDmLPGwn1a=NN&JrNhMEt#i;jVg(Bu?OIoWOKAM}pJ+pXuA53*^9JQ@5krYTg zmJJ(cf*Y}4PusU1`s7V3QcJdPew*tI%9^FjsCV?z9V%j_siLZh-Z04{vnuv=$SQOG z9)2~Z0}A09#YI<&Z+@^^&k+kv7?FImK!{NB`F?!rqx#$+yHDo0^f@ev=ExnSAj0Dbf>E6pWBC0=Bg z6xKQ7;nX(%nx2VZr>42qLrqmjJe3QSlguZ1$YLBR;1Tu%=UKAlrPO8@1Pi)UmhD(U z2z1%q_5C)Y9QrTDD?EKLEz{Z2WPzftfKs?*u6SpSzLBFOh>VJlv5@{H0ONK;bC+RN zQ2;gfHki14XhYHjw1lh8jn9MQb=pHmiJb>e)?Gntuaau2D_Ela-$_k7n5B!$k+NhA zKZZh;I5{NaJ+|mpRmCW!D+M$(u7}$j;)z$}{*RO(Sm^VdpqpcGe7cxxoKZ%)b zfk+h$PDfDg4e`az*BhRl{{TGVxvRTT)`>0#pis>c#E?d_2dQ1Hm2Jd;c_45<$ZJAk zXK`u)gC*EvaI0yLNGcU%6r6i#A6y`fXIIemJ+_k57)xCp95OvkTu1;w0UR9TH~ds76iJR>g4?l#ajB9sa2JmDshwhe%cZG?utu+w@pV;1;&Pj#8mMtWJMNb z1zC`s9nZ88`TXjL;v~zQWyyCe9aVeW1EIR=M9VBnmw4&>ZrDQCP+6{4VzS*4TIXLR zwKWn-7j)7-{I47l=?4QH{{W_x#4HqrhNCyXUuZ1}y};c=*OzE)i>4*B+NrLPNl_(D zV2=<7S-~!(n8+Y{4j7)q4ml$SL`pLJ$RzY<`}c?p;2WPl#=Cr=RNZ9_3fCm`qKc+j z#GYJgH!A|Wiht_(C${76Gz82<#2X=)41>bm~)IJV~H}MwzV*T^o~3 z0BLo26%1T{Zce) zF_KSzpRw(wxMSv0ML=rYJ;l|BIC>QmL%m`JT{O)jM=eys126Jp1&G3uK*<@%$KUUr zXaJV94Fo)**L%hm^#Yc53h+s_mCG<09{dlu?WqJ4oKf3+;6*Gb^%F}NGRWjcz>~;6 z`91!BzL>5a@hWCxggo>QQB7`fOC3_hfK*Z=5N41r==Wq0Nk(Cw_)(t22~1S-wAr9> zK+bbJx-)o$1u4~*{{Rrp)e_Zp2hl_>6Q<45{V|DDI~5=nd;&s@@!QmW^PwV77-lZn z1t#LvB9;f|T#Abk9ovwEkb`Uj@%)RvyA>M9D0%T>cHHoIGSlLpEBPv0LnP!0zt zI=q`}uMb^XgR6%)FnIEx$nkdN8v(iAdVR$!;|GL&KjJMk)e}ua71nL_iz?(uu457T zZ2Y;2N|y*TyMyTi1>z?lc>^~2ZK`9&$x_hC7i0InvCh{1i@8likwHf>w8^@jDyiIn z_;PT8jzC=U2V%oG$kFX8cbt-<2W{aNGp!b*!xfeYC@Co-ny~`ZM6^4+g{oNh0#^!i z$PK|c#(vsCU-w)IcdKaa>vdujxq!WrcZbVFiY}Udo}#!>+^w^kvjlZdTP!Sy;Z&)K zT}~BPuqDn0RCCDGsTQUQs$c&A8;9ka49rlHT%df#lhd9b^xg6bxxlHduS5UcVv99p zdzYGJa2XIE!?k5!d^R{a7}5iYn`P%D`EfO&<;tT7C7E*#662QlSa-AabiQ5NrtUNn z)IexfdzE|$Y3k$h*(GI&EQ(Gd=L8U691IhtQhZw~Ld&@W78=ue!ih>;Si2*S)K^zN zwWYZJ{f-D;l7{78-@D;?a+J{-2~=VU+ud6{sQQsNk zS<>dMNuo$3mcjXKXd;Ou0@WM8c}CmSnzCUeMxu^7z>Mn$m4kHJQkB8>K_arpBEcYtmO$Hi&#*zBI6t1YWJ{E!31njVm*=;J zg^~$3Zn~H!8JeaOQ#e;@iHZLJsp&)MO~n5IPqzf=<;_%~asucQ zS7~=M?=NVqZ1jy#Xjg}~eDUNrmGtP&*Z{Kz$Op03Y{_b!Hqw>-L#QW~ zRDP!SzAJh_H|xIfUiBGyYQJ$%86L@6i%m;YB!VcL3Z%G@k60|+lfcN&I@d`=w3>oJ zXUyw|&T%P8Zz!mI`^L+HL(_LCK906q)X%wDq&{1=z^Ygu@B_eZ0Ixt$zx})S)^H|r zR+W&FGGm?Ceq)j53IH~{#wFY3yHGrGDz!btvqv>6v4req+>M-NMo#E*3`s z05GkLAli0ujOqRoK$CbZNgz?u?BHWfT;&sH=Wc(Uv?W|`ZDf)zW2&9wc!YrYf>7LU zS3cx?ay6$eT9;`lBn@}i!vQ7yw`u$I?E&sd1sTc-ypa%<6i6dynDd>Z*nWT0I%-nZ zlvrWz-S3VCsG=#R`9_;H*0Q*@ED*CaNLiI*!2vkS9@yu%zJ2k*7b@C8-=t56Z47RY zD_4@6G#^wjJ;w4EoKfqw~NMCgHFPZJA8;CbslVzu1I0dRv=xDj}5v6<%LHGno1#Ws7WaTq^3Dur5`3NXL9ZA0p$GTYMv2mc8xT->(q#XstxuRiv7H16<8| zX%;W+dsQV@i}w2YQs|4}b0r_h{tt#SCEMA;4!RjAKJhc@9 z1{>-l>MfOlrcOPzvjq?{#qpmBI?QqZ0Om^HZ2tiJJL;nNupkie@o}Sfkt(YuCK0fy z#`Y{YY?JtKgFk#@S%pDK&7opa=8v}P2W)JV6sFTtGNf^%)=2XbtuA9EDsn+i$DDon z)>`GNVmvl`L{t_u`qHK&#D6`&)7KLVl^3X>c$z8c#Y8d39_nS}9!B3*d;5{|stF9D zK_okh>6IG4n6;Bo&Qr-hTf85DQ5E0Fu71 z-?T}Or>+*6T9^xaR??EQ!nu(_^#>?_30?*X`|B+6KjaHR-`9PhBv>RVMf1Sde4+(K z70UWyYU@mQc~U|)MYcamu0R2K#y!^>JeiV8RGFN=P)P>aK70D;?+|EuT1T$Bih4mp z*9aOXrTU~2OBM)lQF2$2!*FtV<2fA3Bqd6g2?HC4TKcneiJRpqE-&@xzlhOyw@Kmb zNYyskrjAe=-4uu&AebO3cMScG+~S7PK8204rMq*Ns;IQ8R01>WG#93eloArw(mXd18DW)>o`N}4-)Nk6jidz4E1rzBouVDl;|RwIMJD;bnS&WR1J&} z+lL*NsJ?^tQ+s};Wg07ffrHo4%ch5~A; zVtEl`Diac_@~TK>7-6^I^T)6&nt+5AxuzAnEVJts1wfSszvk`tFekOe`#Wh7r z)Y4pS)aFXpO2()FsR7q=?JLI%#@}GajOt9QH&H>`FV3CJ5U7eM6NcM)LuIO-*VVLC z?{xEKy)809G-TzK6m0WTBLje-k_a1)MytYb$(UxSE~L9K9clfsc&)o20t;Ywapw(U^g0?0Hb{r$x&FHPKf*176xAxaozo9b4T7t2tL3aZDPmD&KuJp1?8r$W`lsHG1| zK3{r>BAq|q@qeT-bnT*Um!qnviC*R9LAs^|URGGaUAuwF`S>{R#xmno_14IL#_R9K1XBR*U>;3(i^?(dWCPqwq?%_7v-0?(aTUFt7B z@Gxx#H~Jp_(faMw)g4kQSQvKGMdeaRFvKCr5rhC>1Cj^Ja!#&V(MU@TF*)t-hq!RW zf^+wF*GNY-^^4}A@=K`TY`{`IfjP-I$MyK+g12uc5b{WS;yPWWj;bT&uEhmiQazyO zx%+2V3<)P^cYVYVY`8BFWon5kW0_H7#t}&4KICT~-|fbT;mvQ}5jASz6KD+8GDl3& zNbpBGzGDOo?F^*0F`S?WvE*d@s!~Y`LHoWCoP6IGZzhWdD}>dNA1Xr;DkK9xi=6YG zef7AOzikNApDufS-5ag7Qlm*MizJK`Wt5c3Eso>1559)0E2$gOJHt>k70g}N<_?)) zzDrDzTg_21>~Jv5Uktv61CPb|@^P$wFM>?zOBSrCwQ&utR<(I<!xmWcNT(QtEIV<7TLtk2=Z5JD(v!#sVX+GI5|4<+T{(vk8S*LoF$Y6Gk5%Bgjx1)5G_{7QihH{qM)QZWN9hH6w#6a3mCvs-~ExF&%Us@ z!7*}^{@X^LP|aNhOJe4+sVjzD*_Kk5CGK*rQ9Gx66X<^sdY-?pMH4l~`zel?R{4gN z>SBv*lKD|#w>OqZ5-`gtA$b|t{yKjiWsV61Sxeu4C#mc@!d!0#o(6fB#vr(76m~Vn zmt$C_de_1$*N0t81+w9YsRDVF@~gP>V>nVtz~>A>&IrdKYt%{Nt%8|>rek&4gS(^8 ze<{IAk^cS+l}S+m63&x(N}7*|HxbqpI+LO7RQ4-{0@1Wk)I=#m845=dp+;2NSdi?) zC9;0}XGKJ!q2U5q-J8|@Y9r4HrTjwCPTcL{T3BIAz?Bh0zkH844Q{+wbjMSo+wCsx6Q#@QX|}q(Jb>+sY0BX41aXm! zjzHH-3sOR+EuP?W&pxqml_)-yz~9T#1l)W=s_IMSV&CZ|thmU0{+*Je_Eip(*i!X3HJv7eXckQ=WU$3f+2sg7)1xZKa@!l9?*@Oz`8(sSFWQdlBjM zhB(0hlHBPv&k~A=Q7Se3UyXU`6$MEJEIx84ko$`tsJc=fpQxIJZ*f%8Q#6TgrKzRK zShj$&J0Rd3ZOY|;2{|WyXx&bGwg2`cfIX`Eeo6)i$8xaWKYstuZ&Hkm?a zh}|DhmAj*J^^h=WW+>6Dt-Zer@K7jF0V9*!UVnt+6Fnc~1eApnMy1rwx!nX(scL0? z`$E93uBf}v(iHO^Cn>$xbH=Q2Kt1`#WAmqGO$93P0J^P*L5xM4E~8G();Chzg7scv zGorJ{6vbC?KOu+M6a6^vqXGf~W!wce*O-N&zzW&t`i!Kkrn}rl4Zc^1DJr!RLfFR* zoueloh+zHn#|z^In6q4s`qvLwWRqdd=KOhnq4T9^Q>d>LRJPgUh))TUT57z-6!4<@ zlRf(m26K{m(74I+WyoB~=na^u2mbCr-dm&vc9cL#r>2mDcIas7u9S4?c!?=ms0vW< zw&%iCrI?={^vstTvU=W z##~86&+OcG}2P6 zhH0vhk|31DoafTy@CVM(gMpuYU1~F>%S5ur99s(45c#-;(vqbDtG9N$FuQ)93OOzk z$tj^sqKO~)yrLEep*E@Cm0j4t2ey<%nTR4%(&h1`4|=y~D}@%Nayb2aKnT2vDCnz% z!j`ZrJ9&zBBC+FV+>RTJ^T!#}&Vl6_kH!9wWfBF4ujBO#EpS?9o68%EWkGJrbNH}v z+wJZKj-(YVEU2C5@97d)0Hq$C{i1a(%9@fH-S<^Ut&zBe^$rv_$T>Z|#)43431_&E zth*S32rL8h-O(P7f~LCO(*X44g84>F0&+dQ$Qd64O$7!fS$4k{F)2$HG#xji`bIF& zfKgs~XA>VVC_a`Yc7Xo?AL*TOn5Q9!EMh~;QR&h*3td$L)PU(a2*{o^!6yR%bNt=N z9^*p`LX;A^V}9r{)*zJ3amz1ln3F=nTU|5JFj|c5mQKoWIOFUAJ8TnqdWZ4UYAR zw)qS+%<#KQAi-8f$zOeizWE1@1xC>Y+vj@42n<23cf2B3n(Ye_Bl?)Ic-NG6=V?Ef zC!e;4mbAc7@6sp~N12<|yH%oX8_x9YNbONFGX+$ShAa+1JQM6d`296!Q9&sn4}Eux zSq|fEru)ZG{>vU9&3o|&q^uIt)5RSG(-;|J%=-m9f~@}j$MZhg9JydjN{Fwm-C|{> zNm8pu`ScsCeJ`?~`FQ=Z2fnr-mxzbrk{LWN>g5Vtn>A?Kqa^B4=P`Blj;UrMX)teD`KKH_NkW{ec^!n+Q(d_+5{X=fHB}J~5il9Pdj-$>o z$-94}VHhg7##cNR{Ir@lB86H6gOI;YT>He?CeT5qAlF=eRmBmpW3JMbdI_h=LTLRY!a7-`rWLV~?t?Fx<^tbk!1~wC+QBssICb zK7qji1p|}MejX{B5>tilDBT<0EDe;{G&=8eh}5Y`PO(wcQ#Eu7l1;Eac90LO1KZow zdvm1tq|2JNflB#Ne2YvjP}nsKY!0h1kfG*#m!5B(iv>B zNl_S*qM^~y43r(zk?wf*JonYo=`9rnB9Fds+O-KrBQl0+Mk%sVzz%Y9+~Xg8NM#%A z6b#UA>X6jhC5tGlBrZ3DmE>@8JNML(29Q$q205dy7VBfDq7~Fsze!n39l>cB7CDe8 z0h|Rn7z6>^ci&hD@XE_2ja=MM$A6eXSg0IZ*KZzpS<|uD1)r(v8ryL)U15aUN-Wh+ z5D&dcW(5(IiN**fG&~&Wlhn2O7}+759$7tZzABp_>PyY>S~4RtF5-*aiWqM z+E%(HB-Kj{wbB(SG9-*f&7uP3$YYlS7$9pka%@X`4m5j?Uq%-ND@X;v2W9g8!o952 zRocIMQCTQxI&$*sUrx1k{{T@-6o<=>8CP?9Vk`i4b?!mPIv0qL1f(RDJs%AzTHQkg zN>o*CfH>cEdu#IzR(_?o$0)Kav2s#mmF~Lc&N{gNLFmm^8zfiTQI+^WvN~%`bGt(-@@=3gq?1PpBWCdI{ z) zhJ__rA`+$ZI{5klABuN_BdvZbmj zg{qC@sHl!{v4o0RKvrMFuqX?RWO_lz8hHUKWTZZg_-5-C2}raSDNT9gM(>Z--mAS| z?v?i$X(_4eC~6)A@}msl_LJo?&OjhH@i0^!p!d#;#E_{*!fb&@1Jm~rV#%^pNayRG zQE1z)Q42a!RM%6~RYKlkQw`aL4hR`M1B{+IBL^DA{xgV|JBnY56cj)JlEGj6Mdwq< zLhE2?H`hwQ%e3s#Mv;Lekhmz(F4xBc0eRy%JpAj`r6>dh(7|MqK?KvLeD96D5!2Ou zP19Gah_})`OA;yLrmf!6zo&B&4i!Kog&^dkWOh10&zs@_FcHfE_v+D7pn?*TJ5rZ^ zAl+AhmZNHlVOei-k{z=|(mZm$cPnor_)oYIk^tI!e5W!(RI5pF4fXYAYp{44mK+Pg z-@Z_|E!4Ivy(&`M3h{M?=OhaRGs{KPt1I7MotatU3WqV5MEE=ddk!1p-RTc?|U zh`OGQ8md+4?say^<))|e7|82~$OCyG9D8Mk zF`r?LK%AiAB)@*Y5UljtUr2H38=SCKhMh#PwgZxK7mQ>B*kg`38eKe6WhjZNEJr_4 zSW#A&{{Xum%v}8u(#Nklg57B8j-8epy++X;MO5+mW7WOggAiN=k33+JyNsO~Tv8=V zGp0jjl0tbl^uF8X_;t}Z?WT1B#_r@?|sNgx90e5-+X%P!1pq=;c6R^IuP zlEXVeBo<#2#Bt`;iDZRmT4_tk&yZ{5*?%1;h2dIsgJFGrZ+Sm8c^IzP#87 z`d{%z^06M8l;Q+QMqG&AMI~N^vjC@k@+opV7 zv`J!;p03$1pXSon$tsf=DnnxfD}#auBaTkC=JAr3H8UhXUGy(}V(`E8G~OCo~ZPOQfG}QDkP3KI~Lh{l70L;%DV2lOh zJd6X_=R9O+OJ5OblyJuGbd0eys*0~|af-J2YPEF7M$2J)gvkX&vd3LOODjB-`x|&q z@FShUf0z0=%z^gRIrRao_QmP`j zmNq`3+3Y_EQ~bCDchP4hOHJ%@dxpej2*?q*i+s#_iHOi4{9#|eT9SE0x)B(1r zUN{AFo(UuEq$MU|MTIpe2@TuZNTLwyB)7)%f%YpUXIa-=C}clDaG52orTU6=Ej-6) zWD@SoWs8EKf--r=wF(jxz}TWg3 ziI!zR8WSHZ+x_qW$G*I=kf2md&-N+U0Y*5Sb;Q7ums14!cG>+jFZjdJv>h)+B#C#e ztD&WgK?D^L79j6FOiAivw7DQ;?Hk9&wfISMM7u$3p-WevQFpt*%7vvNH0_4^LVcF` zX}8B)f10wMmJ0c#ji@UmaTFx7u|$k8KZq$HbCRSGMn&dMTBn&l5dw``5Y_3@#C2^ikMZ|I3JhL)LNuvaU z3u5N{FmcJE$#=Q2&wbt(>3R#Px}qxUoYGTMEOftA($7wJMYJg;l@zcIk}}Ref>P<$PRKLe(G@MJyF2o}!JRe8fKq8|568&H(o(xb`0!^>PHcPA8@H z%<`)Yy`r|kAQ4CDU&Uc8RaRPRYUQ^=DdIB5(j+r&-mWnCIq#jk4l~crwCzeMHbXda z9<9H)IkW|JWBB>+7AH*C{_K@*KfBhtSdg?PW04~=kU04m%8oKe9{R=O5A{?-QS z_v;l=D5JG{@A0G)&Jtk5ZlqgJ54MT9r-P~gaaYU(^)!G?2A2!*EC! z&s1H{oc{nb70wAg{@N-*O6(AIybrKBUxZ!M)>75wySI>HIbzWN*M5 zhCn3yk9}2WDj{a!OYd`Y#{`iFJ)PCMu-UqJW&_Z5RLnoMCvz znpsK}YJ!;_)UNtL00EM_&S!s&B2_Jpd$k=zvOti!7Gorlg)x;l4aQZMAgSgVAp4KLk0cUG(r?Nk zDf3x3d&iMpmY_f>H!MZ_GUVeOkN5uCC{2*g>^fIsEKz3jUHWC~2I^z-Le^2#MkD-9 z8Bz}5I{<#eKb|xJG!j`X+=#9HJ>YIXs+?~AarrL`4Lpe-ks`hZL}2VV<-aaZ`fIFN zT^p@w`1y_rs8FGWyKVAo1$x%S8q|ej=wNn84;cf#bl8-Mi2&8I-F9J>3t)r2;p+v( zbKD`RlBY4iS)NzpCPv6yd*FA+J-+(n1qC`yeg~usizHF^-t8hJPaRq3Do4oY?WV0e)`KW9MGIHgv^Au0+9?{G zkzCW{&K0nv5(=FAaoqg&(!OO2L^E9jDh19ud-B^u2Z!_)exkY6o1E`O5-U7@UgOSi zG5!*zvy;IJM{}Y~K_n$5w_blyTR|zQbEf)w#m)O9$@E?{^z95}x0q^Vm87Q%hl9WcP62`>?a5-awd-L%T52w(TDn+fl9hl{1#{)d+mbjr3fRX7KOWjZloVMv24i(%;0GXjUF!^1 z{p0)x ztwkNCqNa{Hrl4n5{{Wn5i4qN*hUd~V!1m9^g*`}{EDbeF z!r8$)fCr2dz}8xfsjuTuidEk}L}#2z5LA;~d-eH>L#imJw)n?*H4PiAcP5rMkF>Ek zE~n>|#~9$BZ5mYDTWHyp{{WPnXwds~x>dn*l6rXJFSa&vz`Fnw*|;F#zsp5SJDfxj zAb{CKstwV{0!Jd4nL{o)J-dJ7>~&;-mQC@*-&j2qVtN(M`Cvr~PBF0ikH5#ptVm)N zjB}R+2w|biTyp5`qVJ9LoTR|zGu zu9cO^W!yPzXCQ(I;{+Zvp&@B!NDZ6RQ%jmEB_HZkceeI}6?R+Pw8?d&7Z@pE@|4^} zVEGXy$loR`GJ(p50~{P2@;X}9slgKlcZ2#o;)1{!S6;`iQPDCBt=?LCJA^Q#F9dXT zRFv|TNftCBHX+BSv}X;C!3KR<%3chqhzSCMwQO%R_Hmq9%AHS9&wFg3McUT|`u9s) zNhI{rVMij^q~GZuSMfYVp4PLMQ(m3aqz zY3+;zlI3i-K~GaGA5l>xUTVB?pFSFf2g`L|ovqk#z~JZ{Kk8sq9R5{0vo1q3e=6?5VmaQ=GXGt7% z)PSjsLtrZ0k1p^PPEwRvUjG2T@dEv5o=%plr=gQ^nS8CqaEw`kvli~EGs)+Gb-y)X zC~zyilSmR&N}S!ld$E8WD{`XgN{fxQ-e#}1G?96a6(GPUvU!zTTyJD$^7FSFLUInW zQ8HYiY~K3bX{XL`+SNb;LmhpF?u!Qba~3*kqUNC7S|wJJo^V3>iV5BnV3t-R9AF=8 zYeR=Xn&giw*)xNlzK3jp5O+76gp#alAKd#jcE ztXwz@ZY{Tn5QaCph}tJ+^AyG9xbq4SKy3EPp5p`i>nyCTL@YIFt9B6o00{nLP2UKt zjIQ5O<{3^3j2-^05P1H%(Z~u-e|;lK!LIs7xWp-OLru07L!7o&SsMhOp84)QhOB~Y zIWNlZ)I;JRWn+CkTShZ0CR{w8ka;|T`D6R&fD)27^%cCwmdcekFQ=Kp!04gDJd?DI z!1J_%LHE#&x;hD(j&qMPas zxwat8pTeV}C0}MFnSqrU<1ox}0UGiWP?_@?c;!p(VCVZBF9q5?N&*pP-~qn4RI0V0 z{-jFS zAtZT4!2ba1rX&g+qj&zP@c#gg{vuZaz)mYn)8Z0p=^zoP4%BL~c&7Ynzs=D-TU`at z;#&OJL@Ku(~Phn7rIcxnAnUx(6p^7@Q#^*fa7++$bmQuO^isD$KCt zV#m-LYgUh@8FLD4O;v>d0NOrGEYndXO<9(j)pP1fgODBfQqdyk7yzB_2R-rF>Dy%_ zscQ)X0aAVjw2bcx+MN>AvzhXz*Yg(jHJ&@=vd?s_t6B?{EM}R_9XP3KhZzyahR8d9 zPJTZoxLzQkCRh#$#QbN=oP4AI0K`rM!%VI?4>2}NY9HJl@EU%Qsidc&q^zj@%L*cw zYCVq4D=Pr9{uAGv_c_4rt+dagl`I~=b=a(YER@VyEluxORqGqIcShD!)mKsf0K|2W zB2ruGS8WGK9R|CSpd}Tm~(_%6@u$^ zrM?N|X=_pfAO&4Eu_*gPFe|qKmuMh+dz~n*B?xz`2X_ax-EoE{It>LYrtyF^*0UYb zX`^rcT^(D~K@^e|AyM3nsP1y0jB%bmHCgkM;iRC^fqSnHc_Nx;76qD`vtf=0^#uGj zv01LS7^tDW3w=Z=^^aXs3o_2*pQlAqNcknO4hZ|0oimC9#WqMZXJ*ywT?O&pAX06B zTYj3oBda3uJEy7oGT{ZrvX0puEPqD?wCq)wrZ%xt>H{Ywvz{~08bC@?R+Xex!KwA% zE1_bd0|vxX4I>M^KTlchRtfsT<6l-kc*hFW!4cSi#`2BelF~=IF^pj4iS4MFJyUJV zKmwN%`1EalVnRXzXQqu$dcwboz8%{7kD#cpy)un;r*g8#8o;qtOPORPloVFuCNc>4 z<*){`MB5AT=88SwmJ}D%b{=5np=PTy~3U;9!hGsR(vkQ zycpd`#@wG`F_DdGPF$WS3Cdyx9MYo{SYQJ0^xnTQL-ie0Jp*&9 znrcO>s#cJ!^tB5j`Jmxg6gXfB!NI`&F|A1wKvGM(7*j1X2cM)JK4o4A?S}c|3vt0+ z)r}X^(Z>ZYgS0z8UXAoZT$FgO?(=RVpz?j;hPQ#~^;P&CF0!^)FN-(QQci#~f4 zcDg7hi|XWBG9s}~2Vr6fCuu$P=YN(c963lq zNd%lO8IUi`;&g@V+GQ*Qm3}h10{Dy2wX|1Ou~MkI&a{tFRbOyMTq>zCsgZbAD0bj* zSb@fY!toL&Bq~~g1h@gaF*x@E4m{H8k_MaliYo8XHwdn&LnH_!Bs)hmfCU6+)=vNc ze-=O>>(mY@l1ZavKAsIjjo8OQ+DI^D@+t7SR}cFEFvh>W#4Je7#~3+ z+;_p(n-bEYGFntt9m^Xf-mVY*xV8PEb*94YFWJcb>&>FG>MQEn={%}Pm&}$n5<XV%HYNCo2%K2*j#H!nkng&WtM83NsUcYRH8Exp>_U0MllV#Qeg6O~=rZILET}4* zZr-sdl%#;CVW(a8fH7T(LjVRfBr_pgE_07>wm+txvMpN%v>T)ug06|e>a}qsFBpPI zltyP{Az0@)9Q?N@T#;o&6*=wuj!Ft#gT3s*Ym&i5Ks>me~MduOrLjYTWY{{Y1g)JnhE zOgEHmYgH9p>PL>IYP)D*2(6kDiJl5>1ok;RcfcB|rYS{$tsCo}uq$B+Vkv)Uk!I*> zmXRbhjZW^*33`TA{w_0wLyVHVf&DY8E6l+M%r6b^y2aHsKoN(2xI*MNccC(QL1d_y z7?w#nY~*0z_Rsoi{$p8`i3uj4(eU3`_B+(n_a>UEc_uX&`ihw_#GTxnd;b9AM6wnv zsFBKov2@kxz5ZjO{{Ur}C6D%q>3W$eps8A#dDbSTF44$>2EzhC;FIspMxZHH(i9rH zcFrbAO_ej#ev$P?$VsM;IP5j27ZnkYUI5(-)BxijyJ|;2{{V&cQG5|Mi1>XZMd;c} zcuK*!WR;gYa0WAh@y?T4KyYs#FshSi=J!Lzjw?q__=dIBv`cNW`3X)Hp=nxFShl>* zJx3+|T!Y^j*O%i@`FtAD!iOYdQuL^(Y^XzP99qSdUQ6<=2WXjb@cXB(cNKdx!%J;* zJeYj6lQ41U185sUMgVq z%Og<8%?hRx3EOmmbBy3)zIzTp@7p@{L2DH`(2dce2z0RRR`vOc=d0zpwcVn&!y?=0 zOn63&Zg(S+K=;5n_!`TdFs!i-1@zAs%WVZhQd4PP*4;&pg1EW9OK;QCs@itXB$?IuX*LmIK8zi$P3nuu!78ol!|!S<7m{gU^p2(@Nzy!`RJm^BpPp6<}UDB z(L*H39B8gQ*YcJ$$=pHU06X!{J@9liGcrvgD1u3$hANt~Lh;ilogBVOlzbHW>24iMRdSM=Dd&#ZNAD_?l%Fyb zEapcbYz^t-lEm)7<3b8Y49Y{5sjnYzbLRpH2z7z$bNakx)?Ha0rr-rtbT;@BU>R#+ z%&~7IpkfE5v$XNg3)@W3Uz@|>{L>wa5HR@d6;cY4QtmqUeVAaU@dov0sd{+~)KzfI z@YGXP#0q(6jI0E>8#z0emSLU;2O2*XsZwQ$rdcj-Yk}ki0H$`85>57Upqz@e%`N&lHEKjN2 zlBD(=9>ahK&XzLQASEgcPcPRh()eN0wqa4!bB60ZBzEe0c|r$$iQ|19NWSRlAJ7XMY93DM5_s*$dWCaHYB43w2 z@lh|;x!biCXjtXR@tOyz-QT$0x`5B85+W_Q`%%6d!WhP)t&8f!BOe&~(i~i#CS8uA~__=atgt4u2Or84!t;~vNO@KY$M zIItL0`%^fT)%{Utqv_fr^zc^oKT#PgZY?cyEnT)$i)A#7skDhBjnoawH!)&PNhLlR zDFWJ)YSu-ofu-bMbxllr-{C)tna1&oOR^<44iq2ED@G?;3!_Jb-aS@$bv;ca*0?Jy zz)6C$`BN*CxHB=zk-;6ebKK(^-{3gOJUqf?q}G{{SviI#k;$ zVL}}&DMqMn)WZ0nI@hVM9e3hI*Qu@asZm{3EWTV_p@cCTnTf}w0HfrQpKfbDdS|Gu zL4uD{roPeZI35fC01e?~aPn#p0$k&XG}i$Eu02LNep#Nj<4=3I4kD+9nOS510PRS7 z06F91N+w5&MUi1OzVpx9<{DoVBmfp|9Dr^0h~Y1Nd0w%-VTRo0OG6K(a6lz;zT=;L zAN_nN@}|E%Vv>_Li3L*&Aa9w6+jz9vELAo&NF|ab4^EOxw`{x>+DB#paruo$2BoHI zR!J2*)7m_*{a^fV`M<=?M5+`EK!5G8t!{v~ORb-!D$QkTRaRYOK`K~;!WhEf6_%ik zn8S{Iwe_&|5{g3~Z!S8^ zE&@~7;aXL4*usLN%wkCc)a2y+oiyo|2|_^VAGvedHs$bgCAbNGb^J$Ncy-|I2TfTi zWa>GpI-97ri7IMtwNh0@44ukEj>SM+<8cI%3E*I9kAao&`bN4`Lwka%8dCel4H=aZNPLV0D=wy$iW)rNlJwWpg!<9TtcOnT}DyF zllZH3p{;sOqPE&E%}qC#B{k~V7t}{0HW)2LK_y2FyfAOL(nKJJn=BrU?B22H4+$b* z3QN~7deXlUad;tl=sWYtZ?n=HHA9t>YZYS0RPuRh%f6ad0LlWp0 zfO!WaxYv>)VRnhAz3=O>8Qw4`utxbtC&Q{}I&N!S1zW=mEV0tObwy(Y9l?oQl_kk? zG2i;>w9FD#48iCtyRd+QSd@yhXGl;^grL4&lI>Gh9VN{-nI?(JG7t}}wml^MhX9P} zz*B0{6GeJG=MY<3z}OPcT{6|`@mqDBlc}sV_Zq57Q1lZ<9$z9gk|t6%ZggC!1%U+n z9(dODiQGAs5TkY_++r`UDC8Sr{qx>YKy{@wH4wuUC3eP$wAGTy!mjT~RbUGffs>5> zAOh#TRG$S_K^571{-MMlD=wgJ?~hoSW4Tsz&6+A2m;^KuPRmVHQCTHyD;U*)JAo>V z&T)^q(lcetnKIh^#{NV&d>WA?Vw^Wmo1uo`LtWI==1YP}Pnb+mR6`!zCK#%PA0@cY z-{(^?96ySgQcD(ca4(q0c|e>vKAZc6mZO^4bc&{iJCV3W{{T{WScxh&fE*|sw;RKs zU=DcGf_$M_gg!ZX{XtS{Qbi5btX`fTzfRZETxg(|$m;r75=v?qr}Gh+iR1BZVm%`X zxU)K+NnK!nE)_IYB%uEQ>8DeG7mQ8UKs=`Em)}2#2i84RTXd&1mg%oG6qOL9)e+K> zyCL?-9fvC!V7xX?Pi*O423&~-P+F|7A^-*M0iEf}u~}fVH>)w;-X(NjR8Z4cciLe~ z?EZUE#deVbDTqq5A(7=?RR;x5aoAuTHN^1pQdG4ijpQc3fKDf*6$(?3pI^msrgV(0 zbr(DBywK26NhJhcWkhz^o#PX^W+bng!~xhS>^RyprKRz5S20pUlCaeCFangI9eT$Q zlsmWJ=dQm|PfI+q-sPUEj+NfIr42}x24N22v~4-`;nWY>eP(-VRdAiq;4u zUC(BQ;TKxy6iD@o73iUk;T*9;xLi5Kgo z%7XJ$-@B6EHqR~Kgl-sIsr7aw`=5O3=@L~bp3`FZ>M8hW8b~c<=@INyl=u3m>FY?L ziymZ+w6hja#{hifC?Nd(wQ^RJs>OGg%oR$qlW2DI#o|5DBBfN43<9~{3XU>4?f&~} z@(EZV?@i+zfNGIPjdrVbHRAsO3{6&GwV6^eGjPl|V{skv$Q*lTOs~j;UZ zTuwZO*g(l8ls$0X+8cV^X}X5NJ5bQkL3gBR+O7&}mSG%m)1dU7zOGX%zymGG)jSnR zQ1HPHbaU!HVpP|vpuZzpeofI(MMptdUr{qvB{t=G7Am049E1^s+y|tddHElGGCY(g z*L8vv+ZKiU->hX-8dt#KL{Yk@@dkH~TN`|41FJoo8@T8Iyx?~a1~i0fpc z@u#QlvX(w)oI<}L8De5b$S3-D)FcBnDoOmoEQJ**!$;P88UFy5jD5AI5*rB1tz+NO zSJcMs*7|{upW!x#SBs*A0Lef$zemJNU3CGn$sCGA>k6RqT&a8xNzO)b>@=LjDQhTw z{X+AUQnzYn*PZcc_5T1u)lE+_M$b!IL_lb(W-&WEk?Gn<&V4?TK09~TB*x)TEdKye zLxwmD{&I!TOESZ}YO>VZt7++Arno^ROhGB(kW-Y*nPLg!1RNDO_c`~@lKwhcjO8j~ zOAz%m>3JFJq;rrNuCGpzO4Dz$UoU@6)YYOo_{v8GAx)lBG0AdxR2jg)X^DOC;$#jv<6yNkv=rn?Cft`;SY<(O zKqL@5lYy++sho!(f-)qNc3#2sipx_uAe{Bff4JA+#kTJ?=AhJBDd^~H=^RFt^GU?% zAlguc%&e;>Fn0T}Ib0m`#3ECNdz|vS9Petw3YP3rE<5Gx9T9Em(`C0^r>2$Cq{FcV zGz`vAA5PWI0X>UkZX+iL8qYjQn>99*>x=dJfPrq~evzZ4s4q1%brp5f`GN|lk^xkr z6cTPA5*Kkf0An5T$pC1}oQED2%w3K6;>=EwYc@y>&)uSn@uGTqpBgN)(W)TCO(c}~ z+)Btv$sOB(cMjl=%$>etj zzCQfwQf#kSBcz7HImf9Y7GAcw%@nScQdBf&ZZR6};Czk%;9#G9Xa4}PD|$AupTr{r zN8a!k#i8Jzh40ljt6Y%YZV^W>p7Kb%l=8+zp!=hyUOXGZu%Rb%T2ZIRblZjo)0BMZwd2#&jaZvxaQ-R2?0fgElD zfzKlvX)9F3lrp*(e(hxAk>?c#SxXz<_P>?*bAUHdHP*6dI+BfQ>MeDKNiDRvWn~SR z&zY0vEAC=nBYQFJjA_maM7c!FSOFk)ck^M6jvDByQlL$m>s|dt>Yfu)+`2}-@eD?u z?IRg5Se20vK2w8`Kp6nz`E{}2sa!dhIzLv+cgFGIz?Kld?ei54?vZ3qE~bqhjd)Pe zfUtmaLh->L-%%)(3nq&B`5H%^ux*m`7uUZiRj#g}v|4NGA_*L{Q9VQ(9EB{P?ioLb z1p6Pnp0rcPRBEC^(>Wt@$L%IS;0)k_j{Wor z>s+wz@21)rT(W=)i}P>U?-mzFTW%CP{<^7Xrlu$m$tLAs%1Y(2@3--LXH9UVSTc>{ z=Ar>VB(T-}pi#Sl4Mt$XuGaG6 zUI(!l_wD{)eL~WexHm=YsU(x@TKxP*g|}Qf%B-~2E+L~SAoArcBciVi@9Y5d zYs`NjiHKTe$d^)D`eyBe;}o@K@u1PYdKfu!v|Bn-NGc(wk)hm;GBrl{Wgz1@9ga5I4U?9KljwBO1K*D0a$h6RW6pTY<&jxu}lHHwerXPJnQQ)rV?1sfXXFK7apCcj(P7mmaI zo$qq^lVXOB_w?y%r;gv#l#xKbTyF&;ywl7wFeLsW1Gi~X6pZBS&;I}+CRF||SW4)k zz_nXVGa$WV*zjCO{{T;FT1%zLClY7}?)XyiudOY2pA)OE)X<74YOQwNw9+W^A%&^d zRg9ni0NdNPth4}BP@5@y=|P^Ed&k{B=`I}{9|ELJAgnDwoY=Q$NUyA0?I?SE-d##^ z&=A(Ok8=!=$PM}qddXl1^Dw^>r7POf|!e#+WOxfUwvN*>qP!o;V^&BjOiI4vP z5IH%^ObVXULU~<^Q@QA2N97s2w17QO)E;zBTZ2&yS&pr=aJi%XFlvsNjmfv z)SyYJ-$A5&=}f^14HqZuuhV0xd`+gRoO#e$p*IIE2?jty!a4r{)^~IM4PH_SSRVJr zcNsUqkU+gu`;OL{tgKE0b{f=Ck0ug8CN$Dk7m0G&IQG>-fQc!BP5ydk+A?ez-qBWAsfNi`@4Ajvr0 zxE<#i{lj=PFCSF!BsbAdnmlo>%~{0~`-lm|deXVi@2^zUzSb1?v}4AUIPt zI`_IJ&vBZb+jBK0rkbK%-$x}}LNsN_D0Y*T^z*xs+Zg8uO4Y?mQxRlf3zy098Z&7W zP%i4n+m*viU=6>oEQ}^Czx3nowy2B#AE_TBb?_) znKgC`Icxl<3$Jos;(no&gwcrQ@eKVnU0rdGs-AnamXuU7W))R6P5eNhH)DeqxDKiq16lgBj>Ndk26~=pAveRCM*a{<4tx`x@c;e?| z*vSLj<2gTV7GDp>O5vtTO4DscjV>g+7NMrJxuS;1%pQM-gIC^>xKQ0Bw%n@P($$nc zY<0ErGRGR^2Fq>4gq#7hzTuAg?hE+&5@oB1L3PwH06EVF*ILCUf7-&1`Vidl!YM1& zJts`rj8ruY@>Em^Bm@UZHsP47^S7MfudqJHTYMF@1uaLArQdJVi{b$a4m~;Ji+e)# z%JDsKn()(#T58?SqM9!*9I_my!Q2hKhp^A)%)*Zhqz7W}i0gzjEe9mwi)`r_+Imjm zC0zrGm0-X0%AhC+AB3;YJ;!`~w1nO<5}EO>Yu~I4kf2H5Od@65sVRD9-*JY96rOsZ z-h8se0bTgv26@21Iq#m_Y56kzWcZ4_(Gzaneaz}IBpD2O{{RujKEsVd zg^N=e-_v_|t+8r{Z=`7I7ckX%6ln?=%TzRJAaNI%aulDO0rGU$h!{3Ed*2pxlID*K zb42v;?cJ9HVnk1^&wknb^P`V7O);z+&3WDr8)>FAff; zCr|C<%HFncA&{nDamgLWJOjt;s__d@T0l#Ie*WUBQiybn z-}2ES;aDE(>LO%?lns&ODI@?&gZCs74>;5*K%>e#-23Sem%QV=J#mN5mFixZw^~-# z_AnMG6~wXvWHPHM4j&{aDy2vRjoH%5mn~&Py>WZF4|+u=EkHOo;a&Fkft0;lT}gR0 zVup!~Q#DKw$xeK@OA*9u2=)WMaq-TM3KSBg3UjYmfPj!e-@^=HvWljkudTOfz?59# ze>NIeqbjh*aLiotB#Z=L!NBgM5uHNRqHR6*3c2;eA?p+gu*3DxX9#_5Xq%^Q@Ke`B zOp*aEFjk(qq5NMZmS zoZ$1^U~7cLq_7Y7tQgDygkWnO4f`ijzyAP?KS|8-vMW=_>gyktj#?Q3DyNV*_9yh? zS4!h2R`~;WZNkM0Ogx9+MmFk)}r$Qxml%-QClQM24%gK@) zgXzHFAGz0&o5lDFEIi->Yf?`)A-^z^(y)qD3)-HV{vlRssQ#n=R-{6!QZi9?5_=XW z0P~DsW8ddbaMMvPNwxr4xN@ilr1Q8zJmQ2??+xDs5|#?PymvVr8faCqEYT}1NoB(> zN2m}+dCoPQC2?G2!U$mD>OmE}Jr1J?2~@ZVJi9=arYXei-Cn2l75ZAz=}?b3l$fPt@)binV4RS?=Ynz0 z1_qg$9Z6gHTO2=dbpHU@1^GWny(h3+X{hZ`$#HfOM=XYT*oDG?%z=j1%9ae=*RiXs0N?W8?2l{7!s2PnyWJpx!1fJj@WygJG zO(Fcji~8plSDPAvm!%?9*1AY-1>uHi7$)VF-*QB`{&pV!0M`SIchbt9?ntk0vDQ?g z#+$-9C^2pNebO70 zLn%L~6MVnCwUpM-cBD!R<#J4ba&mb*Yit)$08;*Na2oLvEtd+4iaFpObE8gQ0R9YM zchA3k5B%t#)$6~mv2yQQps}fyl1UJLcL)~pdR&tS?9;qm@_X-SB*9MHTAhybMTdog49 zXVk*iYo6yx8dBB)6~3m5CT25Xn8?6#zy)yoR|-on2O9HyUlOhyfK&=i!ybm=UVP&^ zU@XZF3A=k|hTjq5xil2ZS2f1jNfR20CxI0wm6S6CSozLQ0p0iGS=>scCUO>K1rbM8 z&P~LYu1y0+nNA044@agj5<3k|0EY8ysj7+_tpqPs8lBXX)hZSIzz#O37kSSx{*x&(N}mQ=$csR)TCIU9}=blHgz)^w8)FgzG2s_tK<}wmQwOReY z$4DDJp03ccz>VfQ5=o9mJ5+!{2kn#T>^b9|5U44#=@ld>u{}=jqzbrI5MdHd8nwn< zoO2oW;BW>w{d7P@?Lh{X9E&A~FWg6M~a5e(KWAB~;@AvuCC@=-!;ieZN zfrjg|ST#d`meW+ayGZz;~;5ylFY=5JJRpW2|?0oT;qS^mRH6d zU2&9BQ`6OMS}4JWR7lG_PQbnw+%P18o_qG=Sv*|DBB3}iC*PC)sT4S;fLR_ zJb(ci)=%boYlhOFv86`YM%c;-rf4czU_! z+6YvKGP^Mcl?Dig7-!V@ViwOpL21lM;*`0M1#QjQch-fHbU-nNm`jO8Yf# zlot1UTN# zG3r$-!d;}+8m|2-HPW3Tc0WlOD75s>BaeZ%pL6=?YATde;=TRpbXwGtMG_x)d_+1P zfux|B>LzMvR~cet91MHs>@oW4MER-!IX<1DsWN4cB1t@=s_}QKzkG+YS>t)_^yNgd z(;rsuFmdt8_SWwMAW2AE$`c+_`K~f&5T6E6CBL&_yx>eQQrE$1isw;ROtkPtPVW?9 z_9-mSg*}Me%yGdv_vc=lgp{EwhZB9?m=hll{{X3fk}|)L@X(hjNU@fHE%!bATXqS`U;m z1BE+@Br@la1_{#qLPD6N*RITH@LW&$#YIUb>TZ!lwT5XJd2b51Z^M!Yn!y!l36%CPW zwTzkH#?O)zq%zWfwaS+dRsIQ(VR+4v)tzjGEVhGp|2Z5F#kVX%=)?~7PLyB^* z%w?e^ZY$r~;9v!&u-C?DYZ6K78O(CWGJY`4g5Ioc$>cELW3bZ`K}vi?ktc zDrASlR`-h6t}nJOp10S@V!kX?FG?Y$qJ{THB||TmLx2HY!A3Ek!`KpZLf+Nmo*l34oG5hv06>or=cJXZidoeprGiNz*;O(spk0k~m@ z0D676(rl?^X$r=;hd$6i=tB`+VftyhK%WyKskn6=TQq8XnG%P|PhKPnsEHD<@^Aq6 zIr-L%sfJQQ5-_DM{FjQQikx5582CYvpZ|4JYYV2!Yyo6X?J!a_cn(8X* zs=7qGQ`Zu*Jq;jdf#i&#Wkv(gdF6ACYAY(Bnqah(Ce>xz9#{rAIU^bSajkhAQl-rQ0BYTQt_Ab2cz}^; zgY)?H!YJ;qqPtQEVy=N8mJCW3R4P}@8;b$$!OlNDYRK@WB%Cm>q;;hOi-zCC1FF4L z(AV2t>rE=ep_Q2lL_0`40iJp1;k4>fQ!IpsdfqB&pQf;DRHAFWzpQSSuH_9&$wg4B z9lOhAsG*$sfv{M&s9=%~I6b>$aitWXwLH;MTB4WI>Wv--lp$#fZS#qz}Vv;j|SZi1Co2r7DZPNk+45!-F) z6cebZ^Ch08i!myIakK!ZI2;qBNeUn(DK3)4o1a0;Zyca5CikY8()QLW+2<6r;iQvv z@&IF1{%&$`zaHPyPp8gEO&a~y2$Ytvo$C%To=e!%&Y* z?_DgYW0p4sLg0p01bgvIJpjdQ4&+m>CD z1!vkzD9O%9A0yvVEgFL3O2Er0b{YMvzIelzMpRSM-IiOl(J+Ec!vw>YB;`X4V4=_G ze?2EQTFm57q&-fY;BG}yK0Clpv8ti2sh)Ufg|l>a|efEx6YBhRy-Y{#Xdi+?F zP~2HwmQ*yxBN$0MhQTS2LBWe*m`DguH>IX?WJ z-orygpbHaq`{my_kz|5E>ld%=XIU)XHT0D$g-EHssiROmLv&1ABzx}92ex#>w8Q~X zz5KxfEEPDz=PW*B>m85NPM-X0M*E`A6Yp#0*(=bG#ZMsqsC7(Y>5qdK0OCO`r_WT0 zV?qWtfxzRxx3|Z>mQ$5LCRy_e9(?1m`jXXAd}ye|RP#z{4KjTU&m&WWCR66axF{@e zdxil!eEgjp6u_x`M8d^E>TMHB62O$w=Ag}?J zrmdh!1x%6^g||C~V5NHlfPL2;^R4+)gp`X&U~kWuEY02G#Q7_P6-$*>x`L(tfoG_v zmPjh91i_F-+$q>F%5n)0z#!n~UaCai2`li>LRO&dR_FMAVi+24z0nF|Ea%OZhA9v~ zGD?6BNB1}&;C;2K3NZA6*(D4QuEy&V`g-^)C@L+nBU~OyLPkhrNEG)f21epR9-*Jl zLYXhmXqz~_=8?*36V~ASz2JopM^#62S}1O90fuMX)2ElTCyTav?NjHvn{4`Uav_%J0UMcMt3*BPXT4?TZtsIe{3JZW8PrnL-1m`4`9F8`4 z1xaf*YC~st%s0c?3M47syt-oxmkX>?SC>^;sj7BK4BlVy3Xze|>5uKLz!;LOce=yr zG-DDa`de(w(G;uljuvzzBQA5EdlT{Y)rx{?aoxu#ikk1VO{8Xck+49XN7XG7K*sz;= z2p*m}WRYF!L#pOiXu!i3MfDC2*5|p`lac&9o*rVt5=)Xpf&kE7-C+e=Qb;DJ_RSD{ zLGc2Xswu^;Nb4(Ph(=})ltbl^*kAx<2Wt98PjYz1wNw0m5yT*+UQ~;p*{j~2qI^3F z2OaX|rg2>U@uRjaJrdkfdV7U3M{zZfc?DiTOoZX6!ZT8%vDRBq2WWh!T$j5 z@VGwht%<6z89UA?C|g-^b$9iIQhqz%sq4D7>dGmip_(}1j-H(gEG1LUb3Y04RRoXRs#&+l@SlsakBTQWTrPx{8iC z{(d64z1nS7r=BX*skW#aL)2ziSQZ!oyZ8C|(R?X$73EVM`bJdF87T}y@;>mN)%_b- zV@rh{GSyJ4XXcP?u_F^8P;c(_KtY~mk zQc1F0Xwa*=I++VJGAv|dWp>VgEe%_z*EDiLefEP-gijr>sRdW*S$=>9=Fp$Vp|0) zltxqtIH5{NsMsT?PUBm$3F5l+Ok-~8Zo>ZY#HBXiLA2yX5gC9ly6ZuXX_`V)u zW|B&Q43PKcO+I@>PN?W#4(&ZndA)s2>f|cs6Qv!U#T62^#a6@0vP$g5OBMlFB;aUC zm%~YxnG+OLMLe|~)qhQ6>0iR9`N1hNlBCNA6?v7N$sr@DCpb&%ICGs*ol=LsTV0I92f^^ZOL$MVSG(P1v4dBh(L>^Do-SW{HcQp#Yd z^;K6*G73{3d3JJmBOSYp9yILPN}h!z2Q6^Cek06D;*-Iv{D6W(IbsM`%jpl*RK}Bi zeKhp9T7gkE*#cq)jQUh0YvAz@l%(?O(YeU zIn?C$?V`Tw{<-U_`ucHEbcSD{h-?i+h=L_23d{V!6P)Dxd}>kQN_zk%yo>YB3;qyC zkhy%%riYd=KF8JhOp%nQ0+r~3ev@im!eBeWZ z4><=TJa_y1>HrZ11=;<5^NAEg>S9(*rtew;FuaVB{q$D2?|(4>d7V!C?jmKfu4{eA z>g}}+1yoE#n+XNKGxq-apctly9qh&B%$X@+Qi%!`AHR#Ri;u&<6li*aYDjLM!mW;9 z;Le1r2gWhCfxDl=M?T*g$KwA0jYQ;?aK_4xo>TOF@r}_ZhW=ev10sGGGW7od(tYB@ zyVC<`lmrb#+R$SlLjsK!yXfIB<#=7&~#2ImzVYXp%(4 zN(o})-#@J2;VWkW_s*K+(6>={iW-S3Cl%44xud(pQ0$8Y6(@#b5O6R63g53CHm1L(*=g2r58R#hi-Degq@fp=N_+3#I3ynU;FiWS&Y3A!4Kz$8?FJWtne}*F616zJn{;oJ6(>|sRL>lf z-ReBqBnd2K5wZ07;zms5akpsulg73R<^=h}6F@hHuvCzeTJ!JP_+MIiF46|2lAanG zXLiK1dSeX16%?O*p2s|U@uo_GKnqPyo!{;(8QDFs9kl`xMkS1UhEQQGYG_o_dLXN^8d=g-3p}!tUp$cf65Yoq@vjKZtPf;Zn-Vv)|kc z>As4&^@T^BO%l8mf#i_BR7yb^P1qo}BLg@%$9*9_b%j_pyuR>^(x8-wpgjAZ6Foms>2X+dW^Z}qyb7PK4U;~%xpsU ziPW7xO)dJWKc*KnXyQ;5%jO=;A|um<7#tJG`)goIdK^CT_lu&?rkC&H7vQ9&q^8VN z6)8H*(Y%P+R~Yn@kT&4%$jQ&!jU*Nprb@a2y6X}Yq!uj4GriIPNet$&#T-bRSSce4 z6#oET0LOhW6i9VGa8P`N?c<<-V@T>PUOZSSrluvTqnrL_P|S9S*^blL5L&-ip9k`ep=VQChOqEV(+4veT-~BvCzfwaPjbtv!<)%s)$vS zOp=YhV}>FzoT$hL1pfd|d*~C?sbHHgyWkT%#F8lS(cL8_e@s{>Rr15v6BMt&lgk{i z+TSD&LCG9v2VMnADFsOdEpye|0hSnD8M9RBDh9jf>kIdaNaMd->L#venHG5xDM40~ zYPYGd2t_52Bx5{kivIv)u&_n8XLlFe3BMtfHUg~LBDs#x*FpQ5Z#KGt4VS4ZaTREw zZHXc%P=$83)b(!2+IItZ{?NDsiyW#J*Wmo>;C(q`iC@siFEP z5|W}AX`R&tY=9YK1mv(|g(Hk|GIDsx13?@)Ou!0Uho#(|j_DRdDvG~+u7>LRVb&FN zmAk2`5F(^>etrT=8y_-2z#ORqcfTi&`pi;9qN!mU=cZXfgt>*5O>ynF?Hlf$K(%+6 zW{eV&D`8@n0FZEBJp49CW5EZFEU8vPi6jwR3s&vcISNYHI6jb><9|<6)A)Mw9q|2C zeHaTGZ3~$tRWFmve-Ghbp{eIfv!=sB$gv<$E0MHm|6Bi^}2r5RHZ#b-a zucj*MZ#3^UD0tc9Gvseq!5n?^Fh2Rlv!`%;M+$%Xgq=Piv~d!re?!vmc<7H3ZN~Xy zw%#D1dJ5WDnkJ=&9BhXqj3CG1&N%E3ZZ)OG{{RQa_(|d=NmYc3((G=(H>4}VeleH5)i_tKWOv~Rw6Ace^gs~+MjGq z97V|9g#nF}`w)A6dS+J=2=LTfctZ1JB|*~yQ=v_&>E~E0Uflb0BnvL+WFt5@{=K-? zq`ArgfsCc_;Yz9P6YN)bx^AvhcREt~ySxy94dmzy2>ohP+JMH7F9I3q_N&;mw1#=ddx4Cx(<6D=HNt1Baeny}hzdvqA}B2ohI811}v@W18J1Elhu33D58h zB1CVT6UKs)flF1Ue&ZOpQjl$+x1>^iUEz04U#FI$*-KMosd5jQP5elXJ8q1eW82b3 zJ~gGo{{S&ML9(N0WPZha-$>Zu{{WJmDTLE&dHXyfbU%U@KCbGjTU1rFlF&(84IH;P z8+1z2NRj6Rf=ZbaBjl(DBn^Ac8EWEER**nB@9@{hv6V1QmJ|UT!*59XY0+P^e}|nJ z(-ihtx?0zBNYfJ4+^FN8dD|SjGXi-$;9w76G~tkg{ezexoVW6p5Jfx)@aB~hZ$#Ko zMoR`X5#(nX=N-5lcF@lYB+#ecIL??$yih;3?_A$J73q$bhodVZ>AN+_>L{vWwNu4T z;lit?$rNsnDkS_nN!#B}NRuX7fhYiriwb_?mdOrF#WnjL_|Uehd)+Nn#Z_(Bic5hq zf*8k8uS+)Kd;b6v>4`nb!PlMt0H_wEE>{qfTKajtf#;-Y@Mtp48iGRo}E20%QF)qk`4d^f`31bbn1VPLgre>dc9);fBjK2hmxmF zD$jpGj*yj0aFXj=HDA;$aylJ{%-S#%;0}MU{&ndSC?PC9`^U`}#V?9koi$ZD`bDAP zA5)vO5<@M~XOU1TUp;a?J@7~*3?A7!WIHMD8fgzoo|7eBYlpCdw|nJ=$8QlmS*5B9 zcB})L)4!;9`R9TC^pb3lTSbVAtL0d{>u_qnoN03MwjBsg3rmVTfm9NC6af z$s}o4LMB^g-a2UnDX}DgXmKJW^)F6I(gx%8Q%4mg7WH|Z3Nw^Az)o;;z!~qYSyR(A z3pcj*(dRrwsYN7jR>Bi4-Fs+~l_g_Tsy$9bV4UX|z$ZE>n2Svfrc#R+-mp|(?cxaT zRWh^`YT-jk8!30=g&50m-)?XL(Ni@B`iK#hLczx?_qU0(y`Sf{cFXmoFnuSIk$y^&$TN2{^#cF`iF4 z@@qC(au}LZJu>SbJ4hj56TM=-YFl@QcNie2yH(Fb^)gjQWq9OQs6`|fX%T=MnSDF6 zkO#(fmVe5gl&w@GwH?D8nD!@Fu~J9`=eBO~OINL+nwP7QEAd$Ig^En=3fp$^o;VGV zPJVUj{3*6VN?EY0+;Qg6Vy7;HuKl5u_)UDTf?BH`u_v0KwH$3WS|7u?ssxRU$&<9J z94R332*;k6{I3kTqEg9n^lG@l?ru%18pI@$7#qIuA8<<3(KmZ_ihFRVnyTAI?FB_z z1&xH8Ljp^Bh#s63orw)y8z6br;-miHSbOKw_YN^o*=vzyskg0H z{JTdRyAo;Hh$ciN{QTE zqgGU!IBGtjrxI=;0KgJ*FbK%N8P9xa1aJyU<{5;5^}_E;`b10#3=*0id-f4lUVKH9 zr$b9)kknL7N4}~xjj7$eWlUphWP%Rj%eN%`wE89@S|zL`0_~5NJMlS((7@GN; z14rSdnP?s&{KBTrEv8#BWF#i4p*VfQ_4eewN;k@i^wGtyZ+LIuTI6e8shyFT+6w!2qkU%}+FzA}4 zk#jegQ#{9yQO+^Q=a0)qM1wAZFKUD{9r{FiS*A+Hw^G#0Nl;Ouf2qz2fszXU0H@zm zF$q{>zF!bS{aosI&MXS5)vcnn(^F7IVWy^fXe!##BNIHS9?v=`yDPE+TRest&N$XL z6DbHOK?NrFB(?QC@o%_ENOd_RXV}eIzH!;?}Spg{m69yUHOVr>C?4si*z@c{R{eJT}+>5wj^RD*=Hq9g458<0Sri zI_C)^!dY^voVh*}f)G+Ef3Vi;;!VciQ7tVH`qH&?Zt^0K?0T2p6cL_So#TP>JM*f@ zmZ?bcC(EV-*}dWlKv62?-{523EL$f;S^9q2EZ6APqM|lk&fKH>h>&rd;go}(Ipe;Y zmBX%h0e<&$=M*Y($ve-K6zDFmxLLeMgQ%%%Bo_fAc?(nPN_h5pYMxpASU>>JU^MK> zVob9%R?XRZH{3TVHw7A-{_i-)&<_VZV!ibZm^H*v)k>TakohP20!Q}NIQ~sw)d8XN zAZHeF@kWL^X^m|qL>nV!CM-w@3Qrxh+^#xQ#W8Suz>K8}qvWM^bW>!5NNPsOikY zoJolcdG^P+&T-!y>t~7LQNy6knL)Pog-K1iZ{j)@>EXrGrP?^}>rrl{ki3U{Yb7~w zxj3Cc3!DG}VUmA6c>XJfT%H=q1;tsb(B=7z@hGt>sM9W3?)XpY%XBr>^m55t`_7W( zKsMCzy~-z^tTJ*aFc;{2QcTSF+;Np z6;;CB>Dm{q{RLCg7utF5lEY0UBtgrzfvU^!sk#e)ySg}}!>wO%y~n6#wTMA5r; z=Lbcw6v5JKo%v=&By}(C6-Rfytk6@eG*uvqVN~X3l01#sBme+7IL9Az&b1_e4=5xF ziX@xZHATjrrYsPu6&G!_{?U!&#ae$5d^xyVC)h4gNk|cDD)-4STBKntDnKL!?mbw_ zxJfQ}5CY-)?YGR8_$vn3`%>!Z66%+sQdUks~8d zRF(yYSRw%pSdT>vFin`AF%~y7sRxez{q=BJ-`c$lSQwE@QyHUn5;8La0)j&RWaM#v*d2 zO{5mbZro>2N>O5xVvC@)6s;^0SIIp*10<)J&wM8&fBU&Vri|<>1F2P!2dvir05%P% ztk?xkPXKqv_Rthd7clgsg#aT}{?+8&;z+ZJV+;vRrN5M)zNS%h=0SkZN|Ls^I!D3J z2QODzex0^bTl~rIHIdVzkU%8k)Z-i|$St2?of6ss&N$0Z51#tQeP7W+&DT?O{Ubd= zp{#gI<8JhcnRo3SvKK#W{#qzd2@YBZX5W>f+~^LKsp;!FOTY+JNH@RB$*9 zcn6=1jeVs505K(#Wk4zD^^csV_|;ps#ItBg_k`}Df~KCnW||>PcNWBFF~|?PpI}aP z@UAvwsk~g}NZInc8$G=@9D-fu0-Q3gq&%K>G`@PXiVa8huS|7sa~V#>lI!p zMg*I8arf_$jO$SoflIw(DRPPxlrdZBBd2KUEVCMX`q3BgAJe!#*wf`nP$^YYlSXvO zVPJ({5HB@?3e4ReSn3~<=AST0`4V%E`pG}S@G}%jQo!Mc>(Vm+0Py)iM3nY_o%MzL zkBF4^SR`>e`E=^RgL8s88UFybg0J9sCbDiC?chg&md1|;&@+)TjsCEzMfUfp5hR7;V|d%ixVcxn4m*}9*@ z8h)g{B3Q1QOmkXlWl=j+s&AGkP-MOi>G}Tv$ng_pN?Kl9J!yRywQe_4B18`B>s?hP=BD9ORJ>1?a*SfTjLr%LEC1!Q^ zAmaz_GshooJ7l3Rz2z86;tH0?hYppdGpLHHmKusz8%h-z80Vkz(u-I?HLm$brr{+j za0r@^p~>Nb;o;4?)itudl9fagW+_pg`fuS7kFzexc>fO^lTO3kbt#u_JaHh77H(jg>IM~AkZO_3S!3UjcnJAo+i-{D{ z{eL+|G=#_`%AS`^E};M%4ddq*5UJyjMA6k!P<1tq!80M%c^O5iMUd@6}Tsaz=k0KI!c=TB27Nz_o?Y9dOy zX;r3!;NTpY_n1pM5N0eT5sAS^v$Ne_g=R8HCW`75UnJk zR2+eVNcYdTIt0Ze2O=7h6t_xDFjd-X=p&%CS29T*B}A+cK0y2XV|TVdI620wrO+r* z;!`Y@SvkSjVUNy=Cqo#*75NG~5AUE8LSslI{{Y6}zsy}WJFDQ;N@b^iI!KVr+m=m- zY1$73agV-rLo6)WzdwlcTwJi3ggs5%&Ukb5hAKg9tmZHe?_0<&JMQ*PqH$;X;GDx}nE~|v& z@nbm8I`gym+49u6GTA{wnSyECIM)9F#%J*IW^f!(bP&LtG~{7lpn8_m*Vh`m{{T^S zg-wRp2_;~oj!N3viKHcx8ff_SC_%ZO(l9p@oqA*;5+sRf0kI~PnIT>PXvNjcpu*TealzCIY$lz8u zgr6&lyAW_-L$PyHUhxxUDGFd{cly>jrsFHxnyv#?^+9CbWiWX_us)%WUVo zD@XNhHru{AC&Vw0!NKeJj@N%KdV;YqVVK?2jP5J9BOTP7 z@4r?}a>D{%tY0?E-SB8xX+mb#>umg@e~DBz-7jROyd_NsP)$e(N>zn`4JYvX zvf!Y}C-UQwt8in5;mp-amPN1Z#qRmdy2ZlMp=CFnJ)S5^yH`@%y85kejTijHN|R4W z;L|1)1a$)-s8t;C$^A9I!|?zlDGm{lrgzt3FDwcu{qNtHhYqK?#TD{ODhrXUt6b16 zT1Fx>#TpWC81!wJQyeA}Gs~-Ff)Q=Z$S9SWKdp4@1@riWf*AY*$<(cTrbTT&8+^EgZ(wu_oE2 zQ1UdPcQYJ*70KJ~1`j;w!da9EQrzElId1qcE-i1`KXo02I%Wh#xTREeG~Q{bh7zJF z6yc%CKZpP`$8*QFYbw&b%1Z|~@6z}iCZQ(_w}zrIrLbA5sHvfzHkFL8nc=F$jUIOe zF}468bGzh`@N`tnT1yP__Sf1dsh)2w8-1cZv!`k8u8~enR6E_Hjn`obxTwied#DG4 zj!)ZK$>GwWX>aYlm~1J?J9u%9u_e_b~!T4jn2ws0bl zYDVARoD8YCNO*dmCKcxx*9+AVdEF}Tx5y^o<}2h)L^ zk;lHNK;}7=;I&3yoFmG=5&C*r5(-IYs+ovXl_|(TPXP0bf!|Z&AYG4Gu|Xx(NDkgB z#X_mF-#tqd8J(S1e>e;<2pzxPbR#i5{XPAnYMSP?W-` zM8ZjC8pSu__l5ln(Y+&etLQr|M#~+1G_@@?!_I`q#_3|oEU1zd87D1FzxqWD7%2qZXC?uM0EERL5AlWV}*c zXs4czq8g|qfr`rnf<*i$oP&*GCQQi_0%rmap}pfsEi+MSSp;ccX!&o`)+l<1t>UeY z=JvZ(wG1jNDqP0!jl_9b87-ek;B%4(ZF)OtnxiRK-uhjQ0n#&6p>0G419(T%Iv*Rl ze!ic|s!5<(2FHq~An~`zI3-B@#2=&Cgio9<=KU*e+93 z+6>l~o+TGJ)gwbRUUPtz#Bj2e;wsR04jG0`a6F)D3JDgD{(1XD zABXq5bqx(X2CU=GxC(0Fgt@m7N*&m1?Q@T_g z&F_R|S4(PIK|Bu`^POrXsfF^Y*&|@&u1CHyeTRKbsliqi>MzV+>sTzX16jEB_`eo~ z>bp#LIya!V&lFNdGv&>NR4m@D&e&6x9f2g|4L2fU(92jg=hs*=yp;*NA8p}SLGc#f zaJKYS>WZF5t`b{XG!CTxvCDY|+*EN;U9pe``6vs0D*H*TEr>Chg zvGXaZqeN4jpI&qNcKq~dMBB1A*KZP_1h+ZGUE%jjR#JF_(?457GF$5I5Kz>}#DCD7 z7^FDQvPjMU0Nv+WXNQ;pGg8UckeE<3L9cjduyn+|IdQJH+#@l|OkSE%w2`ueR z;QN9VVhh#9wRV}STzRQrv% z_c}U9l#_k?+rU?4a|0SQeM$Y4`a0K7;ia3Zr=g0HqFCjUH)Gj%xM_adfaBN!_tZ-O zq!hsTf}u30e52>@S9K)ze-LgJ^wB6<%DSE6QOJP15tH%(T<9~F2piuxYD#RWN_g9Q z@rH`wY)6XiY?>4Vnt2$BmmcTrdFNCCWiMv_;w+^lC;K!lbgze9BjTTi_nNwa6!j@l za;>~x;*hMC3My(vYHY4aWtsfUWlEOXyMqiB<{6TRDSUux9K;N={HCqHNavo4j(RKN zG*UEg1S-HV&KraJXZq>>E^>sbnruM0WTZ_bU&^u2HrtH{UfO21)oiAykOCct2>cQH z2JiMd^GZnJS7=;ypvz`G2nn2$oMN)7l50iAc&0~Mh$JFKWy33m#xebQ*5GM~9~;B8 zWT{44h~C;KM}CrFwHRTD^=~-`+-LpcN)!O$P|{RLt}tRQp`GB3WRag>^nlnu-@p3m z(!*WpcY0JsR_Y)aYUxiEZZy?MkkaNi&J^ujSG~BgLV@`{_gLR@>khT*rUK#A zH%fYSI1sILrGn&eTXs)vQ^QM}C&aElK{+Yd9jmuq zEBSsA3!ElPB_|LuCld(||zD@0WB69|xrX_##Gj`03jhlJE?UK9HfVw?RE}l?LpADXSxm#LF8T ze+`@?Abep=hv!~spYizM12HCsk4`ys2M{O;(+2jO4Aq8J-iP_Nq05;Qx{{Ym7o&ndR6Hukt1<0O_D>u#9_TVY9*G+dxtmFt~%F zZIqW4h_>75t?C(T?h;5NnsAdGegNe9ivIxVI6wR9##^Sq!UmO)t5}`yyjfLn{^y(O zt6ce846S*~-sKzzR}f~A6#M%+Op zAAotzvZqZ@;TJVQ>KePKfd2p|DKh-2=cFT2j-E4AQ@2hfeNm6fnyOWIg=2AMLdc0Hme3 z(IEP2$&0}RYdMi2^4pRRBfDpgGJXcNRV+*J#KKq?G`#esb?XfgM@w_WQ{pTJ@XM*M z*NS?{Y2GfdhK75j^QihrmK7V1mH@^=Ydu^y5UHYDgDkmcsL8RV{Mn+2GcmmU_x(W6 z6Yn+}m_!t^d9^S@6C@O(GLaTVFPQz;CuhuzlX~Cu4 z53A{{AlCInbQUxXOG`4sTLKuu%*2t&W1J2EIPZ;V@baaiXPMvMr?#+ErBfAT)HX1< zy?1c9Rlw0h1W~JP8^bXJIU^$)e9Ry?5Y>xDrja`DUv{r)+B?anflm;|g%@ZZ`M70|?KsM=Ws*ql4t=S;RBxYtS{x5v@Iv@EdC7=peN<+k|`xzEHB|;L; zvsKc#B#bgLGUR>MPwlH(^0~)+;h@H?yt~A@PMCT-JhRVGsU#$ng|~0-jE+Cvb;y`X zg9VS-NwBM1He0H%(?tAsA(77#{NNui{gR0joB*OAA+<)SE3 z{?SkrZxPU6v;B}={7<(=6iEaH=V?jf%S0!?>IQ!Q06LPzfH#=p07)b~cb`x7FZ@4Z zztt2BQkmTdY+=fP7$0u>vcJ|;Mg!s*LoG-IP_$MT>zpw7Z`M+abvzW`yCf|HY$HiZ zDu7AJ3)l>U?VTYaTGhf$Wf5o^;_ZEQBA?04Lim7@h2`UqI?t{kxzoiAF~di1M>RyZ zb0JWhoR9`^I0NTNe;L6ih6Q%YiRDcH0C0q`YNhh$Trl1dUsQFa3RZ}ANm^NzV-)_) zpT7qu{dKo!078qy3P}oaPnyMtZ>Oz$a;S$XO}@9(JY_?Q9Hj?f(A&%UWCOlvf!H z6y%P74|C4p@2`bVpE7Lk4~I2+xXJ1>?H@-|f)p9|x<$-7mTJwiAR#_=>unYw8{wZV z4*lS*4dzIsj!-{{G6SK(`{PuyO+eldF=a=}J^J;|4Rwt@pRRg*)CO4ksw58+q(U=3 z2nI6v<+kL5kbSk8#&G^RJVX{)_1wyPNaY{`jHPmL zefh`$1DIf+=S1O8g2INJ0 za9LS@>EjHbvG2h?=Nfm0{yuN_Qix7i!4iic+&yXS9Kqq|S61{#QME-JYSXMj6A9ee z1%`S3KdJ4lsfn~7DAnQT_)4^#Vg9qldHScVEVc+J5`$#p&vu=kbdrFDbLqwvj~wKm zww0A61XFs$JYN!Kj}4i&(P28>ZLHMlAry_0fFNf)o=F{v$G^5Tk`zKgSAFj&REy~h zv~akBHIy#qT!mgru>Ewy5Gz=yW@Oz5Y*!0_nmwrrk0;F%XLdb`k;nBNj~e1d?e!9x zI51{X&m+tkO6LHhWH)bbY-n=;NqWUu$C@h-82U25ta{U?z>T4_#Y)r7J1%oDd@ASU zDFps{(cqFq;!}8maT9zrST+4u(lx(LQS^-V>x>~JxJOzOZ0oo>m2KO80010}91*Ag z0FqfUt*K64E&OVxc&-2p{I+7zi=plQD&6h13KD=p8bvKoA;NMPDeMjxu|19f<5@B| z1x!t`M?Le6UNerDE@y^AkRsRVF9;!_v%gJ>gt^+9M5MMwJVP)E4jDng$nzPb4UB@! zelfab_Rx(ze6s4xDKeKW`K~{vpG@LtoMSh^2`*r*kEptK(Pq6`?>!~^ z@@n{zD6W*(DQIdb90%O46HCt_C=nTQ#xOq#;~k;puc^3fEKu~XZ9BB)WlpqgEy ztBngCtizBOu^qp!_Qt3aV33$0RiNh|SlJUaWH!}MMs@&B4hiQP;y_llj!>mT_lC}y z>shJlB8}xKJ~oV#>Ic7neh25N3sMpY-=slh3dOf&>x!CsMUrW9NEuDsGLm@s`wz=f zjw;WTZ#WXA0t$$LJ}C7ig5$9g*G*AhFkWRPBZQm{(-3($KZG3MeRP+@QW6P5#ELC^_E))PT2e8yVric{`)b@_0@N=wa zPlk}%E|FM#M7OhfQc_feGtsvsEOi@>!^t>pj6Iv%UN?(bmBtyR23dM2J#}wK7+_;# zg_k9ALJEjI$0DE1JJtTr{vO}a4F^+QIvTa`%|q1GerzD0WSnf=Iqn&|4Lu|I{vd$i zE9%5^{{a3(LjM4WT9QnpxXmT~FA(+N@9b&P%P-X2dX}@Q6S$_cQkW5s{;64r`S%~5 zoIlLXn&|*1mzetiJz+KQo)Png{{Y-au&IkT#19qxF#VSLdeujIxKdgcgc#y~pE4Q?Zgbs@{{V}l>KrHX+&>(&@d&3yKb@I{{ZSw z`m#p?p(BSPV!;-LSPp;p8fMt#9N4rF)z#EmrU~`&&h)-&2Pbl|T&o{W7y*ocaD9&& z^fL$&T;82>zJl@dbt4ytnJbK(lAr$oGVJSKQF!Pf1mxd%Laf7 zm&Ol$1B-Hv;D7Ztzja}OpSZ{CrYy2U>Ak#Af=hvW#Atd>?Nj_(du?4(0s{o5b_@aM zCmsI)?Vu@0P2_ii32HY33^W(IsBJKG{l=NL)KtK+PdXUhB@-iHXSwtMJ^?>%L9n9Y z@f=XAnD&hn*Q=LH^&(uY^BEM>Gfxu`Rz>p;RQqR>gWPE4#n`}RAu|(YRg1Fi;sV^` zw%cfozCa3Rn4CW5Agn<5-Rj5aK+$BRr2!B6H%54r5(!;*d}!*6r7a@SQq;- zoRYIfTW(l;5rPlF#(U|4QqF7E4o8~EEdKx+`of=4cu`8XCaR&9W~D|?I+~-F3*Q1k z+#Sd;c{%T)u2=`ktKioBqS)W+vC=nx9O$6%^1zgI>TYsUGRYc-ik6jxZ4r!|9o@+a zJFYqBMO>szmEowjE9hP`d==VLjGRBtKXJ`n7kBC|hNZpQg#z8|HIXyOzgP6~K{!LU zXq*g!26DK;1K5n{1q8ZHUzh4NQ81^?jvV6Is=L-->LrlVifACDhMt;^Y5du|nDTP- zH!SYV@-h4*l^DW}U3?U&vceXc4QpG?8oekXInxZqhf&Ad7lA?xU!PNPHukM)$y%yN z-^rFP*fEaS;2peVa1OIL&*4$TAb3*>TLuIgV)gZWU{(l7NvBUu-O4Nv3Oq=mwIjh5 z&0g6%w5dd~gpQrS+JtSxDF9%SGDyyHN!Om6{{T<%DOdW6N-=6HM)kT@6%_9stp zKkebd)@+Og_Z_|PR4&nBy&!Owjt-NKx}Z)gp`Xj9k_gJik+dgzh9~g?bCNj+?Wg=T zm4hvS8Hevp^p3Oz5uSS7Fzg^f71X<{Bn?OSRFDIxl;MuvHjdz&ay#j?n`Ea$r+<%l zAU0el>$ci6r?%S~(9p;lDL@SJuF}rKIVcZd%7gX8+QFX<9j|B`9KIRFzyP7y#hlb_Dn1S4^~!{D3%%FF`^8nZ z%~^4h2zCaggL}r#!*L)C;3zzS$Lpmyd4S9zMZG^ed!gCnOP8sRa;~LVZdDO(FrX@Z zN!am_N7(1@t*h%8%Kpv1Q1jrgh1Xvd`kidFD+N42v(?-xoA@;8?peM2U?}AMv7KEZ zM1-L(PWwe>fRO6%jD0ZhYxZ8~j|}>4>Fx2!a=Fw*Bbwt>_?|`hMI1(%1A(60lc(iy z-VqB~bLcs4v7Wi6Wz-PEc<30Vv0d#yP@YX?J5#KZkHX#8cjQ3+nvNn`f=R&p!`wX7 zkTXmv_2-ALQQ-yJ&1q^Y)$Up^Dz=>EQdmbai1;3|tLf}BpL_#jElEKDY042%1!YOa zmc)wjO$<3r>*ff?N+3UkDEY?h{+h>?EK5a!PI09rB~tC)0X_vgQ{%v z)x;yDj<%o8GVnkyynjRf`)iyNWZq*y@Uuz`!YSKkJ67@1&{_Hms}eZCklO^Qk&WAC zLyUg9+2Ig8P{#Pd;#kQl6@7=wD|;MbdPl|xj1F_p{q^qU1}~qieEw@pON<%cPgrTQ zf#=2RD%s<2xW~@4k{KizK^6?qR9&&Vr7t@$g@5YG>R4l?SSG42Sfd2W$mNM1VJ83{ zcpP94eSUl5oUKySLP#acm!U85A4-&)L5>&Jj6LJ^FI3MYD4=hNz103+?>hCNVIlXt zqeV$}iQn-ol(Dfx{7mE22tB?vYK&r~kCmT2zVnGR9cE;ddC~yH@;-h40P~$ol@eI# z6;rzN(mK{NWVUBb@_3065kC?r8`Y;dnlj_ct@av^1x@)ItY9_ee zV+MIHKFX)vv#o#OQ^awzPZ2t05mo~eojR7r8JA^#VR)eV=e#3~ zqv{8{B|^N%&oZ+WUNCZYkUvh^_gI#OkO>ui;{oce^43-*T2%QH9Fki-$9~8A>ZO_D z1KoQ;?uFhu%HLm6Jd)f+T02PGZ@~cfKC|=QJGk~mPtZ5(XBdhW$U=?)I zFR4aRfy*;EJ^LIDVI)}dG&Wx7{FM?&uQUjV>~d9CVs0Jy_Wrs%B{c^9mi)lTf);7q zOy#4HU>xs^00@KQY|-|OZ4kARXY*1t1{tK4ohws&#)ND)~^9cg^kZy^4b;&V;20>qbG0(=I;8fCKc_l2DmpfN#glWh!A^R9(sSzAn!S`gI6qThX|Y391Z(V_1wNSc-dAdDp=a6*=V3oFB@ZzaXIhZiy!{**OH!~wU-xc$FG$r z%Rojq#U0nM)Y8>5!BERAjLO+u?LmM>Kc``#C2nddrTfn4l_OYBzs@OEj?YOG3Z?>j zv)D#?+K1hE^<(n=9FlBH@l}rTgyvdWf*FJ+Ty7FI&RQ6xD!yP+IQAIl^v;flmWMaQ zEmoGo89FP(ik}Sb^->EJmi-l6u*(Bp>E3mso;-mnMy{cp0x`cJ6+`W%f&{^3ZaEfl zlaBCZ6-hRcafsbxc{htqbzM}8EnM<>?I2yh#C9c$fyX&KcF<*{OI?(WOCCEJWcyrvOikKW@1SWq{dFM`~6(NOh z-ul97^B#2H#8-8`3|~Aw@cXE|NrtAj&2pr_TkUjJG_q`CB+=AMF=7WQ6=BA6lK97c zP?0eNP7V$Pd_dHKk_ru4I)C;z@$1Gv5c+=ULq$o`_Y1wMh*m(CYHFG$xI^n3k>Mga z4&@1ufMwSwop}EMQ~2RtB>|2j58k+PgYdji1Hzmg2e{Qa9L2`tgE#oe(>COu zhpOmk)(Re=EVl|*Knzuiq>{jKxq?WokLHXp{6}79Sra9cDz<7ld=PHRKArynN^xu9 z_?7R09X$wQJ3vnr`irK&XusK_hS$-zl&|Y)=IH757>Rt4mN+ z(A3mNTxmFMn`9zH+ueY{488I__2hq^l-xgwDu$z$Pp*;fk;E=%@tk57DhH`G?+x93 z`zYPKPVtAT?*eLS4wkLBG^;gnGD%q*Mw{JZUsf5noUvZ|-IY0B5^j=eEmOG{fABLb?PIe-3TI$dc&ya+IS3@UXeg8m+LNX6r# zI)1smovrbs1NLMJMz|4yBatK6#E?z(=M_g){>7df-Btepa;3+mDvk=Z1xK9@SMZN2 zUBd(H!$%Y5y%pc?_+4@ku~h3wrT+kEFWDch?(vH5lu9e~kf*I7aRp3-GMt2vfKCGs zOA>o?ooFwON|WYE2TF)j{6!>!mGm)ObW~AO-!4?0dexQ)=_!1qvsJ**$vIgAe-X|I zE5-ojl<<8gMPC%Tr~+98Vt)-wuZl_GkcAO7!N1oyygFj%2Z{F4OJe=mEfqX!^CT3) zM52sjNw@=*&jb(&&M1;4 zl`LQ!mjq>8?j#ZhIRtVstWx9^7AM!+q+ujC^@Xx+Y=m zkanNA2eH(ki;Nvas)-?w#t_KS#U)(R21<8~gxthnsRJPV{{W%U*t?DII0ztK20XRD z*{cBV^zz#3Y43CtOL4b0Ql=FYgZS*U9Hs%M;}QlX48(|Ia)GJ=f6uV6_hon@wc$@6?Pg@Hlk z8k&>;02k^`6oMvV)#WM_v|$;kFS zwWBIsiH6ZB85){nPfu8YqD!}@r`h(6b#qT8s#gO1=E&u7DTXt&Ipw%%`b(jc#>FDyll)QRXB1or`sL-jWkll;exEL->h}_?C0U-S3>v= z)&3wtkfL{>5Wy?c#R2M7t z&NaBq(*FR~d3|O;8H=_~4&%peIQG?&fTa*a0Rc;bP5bwY!=>o#5?0sH2qw70F2Ow& zG<(YKP{5u>2q(X>KYb}U5~AcZKP`lyrmS2(4)lc`4~Y4<$R)RnTS}5Dl?()`0AY_y zsbjdrHPf~j+Bo-JE=^HT2*GGs#3~Qy zu76zn>HZv924ieOGm=WIggkmqD7K9(at?T>)TYC zEV~G@NK#mqkD0%;2SHtIo-=iAJ*porme|Io7u>3eNLz4qQ`~HDILXL5z@03mWexa^ z&Khw=nTRZNihZhNl&ss8N=;RKNQaokX%sBPR#t_8z94PnB#}2hjn1zZ5+5BZ5V~IeOEmGW2Z{8NH zJVhcmsaOr$4`bh-oqL%^fQ^{&Q@rKKVYYnqW`;{i^6Fp&2H*@1_}02X2`w1Fw9pk_ zjcbk`m_jKj6^869F-FR!xVhGHSw0>Ow09|@8kkP1AQVpy(&L^**4F{C&vJ?O-vCc>5 z-}KiT?)8dWXDKW!zfmS);6& zCL^05OM%=uAeK736zq#l_L;8{#mpE0HgbwV;tw>rOvAB*K#ECG^UCO zY2#D?Alkc#QJ%q_zdZa6Z2tfkDqj!7fQsV(0JuD)H#^KHoJRKp=aLLBQ^GU_gq8A)fu=2f9qlQ`fd56BTJ{pkuqT z`taTU22bhs((_p0}t|7%lc%n{>4jRimW2#AkxeGC(7^1aLE~PYz-#4mZ2q z+8Yx946LJ6QbGIBPX!XSGvs-1DL_Ev@NPxSXVo}erFk@;XR zaTzBhXWw@``~j`6;}Y()jXGhz@P`;lAsAnV?)Q&6tElRt>2pI%zM^`nn5d+bbNmsP zW#hI&D*TX0;Ac9kP@-P2ssfLk-CeX>ej5t4w^LKsVPtmR2iX4r+fLMngcMKp6^R{Z z`&D#p2ZGo7D^0?#-+GZIqo9 z%di$Bi#kPUme*TZCB~FZG>kJ0z_xLc%rZ*jI`d1DGVqhmH$=n`$som9)!kBU)$df$ z2ZkWtN#mCG$vkp#{(9+hjLXh7!_aeq%$sBjBKx5#vf)sd7QIx^&aui%=1h9K@t=Nw z%UTLj7NC@oq$ZH5LfVGA_l4UGZ&h+?hT76Qd5+mWqImE9dDZ71BS&!cQ=dpHcdSWm z@4p6E!P<9xACK#$cyMh4y|ghol!Dkot={?d6ylOFpr@{8Q{0RIKi5o46|~&D!irf? zVOX6-P%ABq22>cq!0dVX_R!P~y2a95SrAL7>fW7%ft4CQT}J?ejz3S04Pofl#pNo& zwGMQ3O&UgIR#wjnPDkWE*g6W`^@eR(4yK^Vvju@R?Rh3rcK-nA$GOk9>)Tx* zm8;4rO}@-b(v%pJLnB;E(A$$`HW)nYId<>rBZKeh91nb*GcjqERMlR*wB-T`RLkz( z-XFS0!iAk0QY2XvjIm<-*y9_Hvy{1uK2v|ke|Cn_goM&8xGCZ2`e8J&C<=^?g~oj+ zfHetGPxkot_=_l?+-tGi=D&1p^4(KI1#J`)(%jGzR!C~8%j}h0aHK5KcUBqOjC=2} z1llHp4c*Q%iD9Yv{X%zJQFZJ+C(@T|b;jb=7pkj^-K|m7Fp3%J>I`v60U=$CQm$JZ z9oRgTTKMjeLo&4A?kJQkh$5pO&qy=-HF$&8T@BaWE7pBcRarMqMRcCcRaIMB^#&NF zNVf!t@f>l;sK;r>LF0~bc*(M-tZ=X9AQa@{?(O-5OG24M4tvr&C-#K!Z~ivv3$IUG zA>DYlNY~p8Lm$SCjw)$XoDK;%ANry8)=ve*4hM-76GWOQeDpq$^z;5ygv~e6Cz$@H zsPzUoD$Q^`8Pu@LU1Nr}YB@tqZMnqB$nFaFM;sMwWmp4)jeQZN@lshTp~M~jV*osr z)Lj=}{{S2;T{Rq0T95A(GDA|8G_22XFR+!UnWccQYjJC*0^Abh;y*vi7KZX znK6OJtNg6>NsuI3ch2@6`aN2aSqL+f3U5_D&h>}y+fS}z@b|+D9gbVV^sh+fo|Ecsp72}t zZnHsK2&wB25n5WQX_`k4wt~eN6+$0jPJIMOOYTFuH5wu&@f=U`a}<;Qv{OQN*`pa@ z5~KjD&3C)us)+QjQ}}fsS@m?#bu>15hj*^1ltXiPm<4k+O&p62X(c03BP4ROa`B@b zdIf^28Vw6mP+WGQfbiT@&KY*4DUIK^*nr*`b?-=Z2aMe;l(zV6Jv&+EQdCq!EVTJ% z&e;i?S$1tcUKo*{bM36oH;4ZKBZpkHId)(xP;L+U%00gi#c?R%W=vAtsW}E=gAFUB zcD>WW%k&bg_RgiOx-QbCVEIhm39ou9WflcN0&_5R%(DX z6(5eV4vOD#MBOD+g_A1LxWgk3;E$a?lBI_VaAM0`u|go76?0Rs@k!bI&9^&rU{>s4 zO0s{fc{fx-1VTAg<6)4?@%Ge}rsoDYAc!AV)X9()i=T~AKmivCF#?;bk(eQTpMZ2S zy0L=Uc#v=6V~#RC@-vN6)2bk$qXn!k(-b>~azPrsvK}OdV*tAIqWW*d+c)m6stf{1*}g-}ZNN@nkbo@OgV#HCbIgo%hx1B36Wo#3;AHw5|uP4?rt++^Lz3bmzRJuI7B|^n*8r{Ft zQJy5x(6YU^0MCTi6Yx29v*ZjHFZ`<{kJmD*azYs4~n{!b3c5Ku~FvgbUUj^kb% zi{f~H>X{h4`qj{;Q=-kHY?vMOsVUF8basIi{kuqACv%YN?2|~*VDc4;e zRYTL(>!m=YS9T95r=4ApI;(~B5~Bf-kAK85={jT(l>idJ?|4PQN{MrR`9+KSGk(tY z&laY++i%_>O$L8kq#Yyx`0sq9tiNeq zuebO=`#V(I?5rqWwkZ03-V|k{jzl6{d*np29Q>U7=*dNd-0{jf-w0o`4HdJc{3@YI z86}g;YRQd1tdBF22fqLS2kG2sV(V8$fxuzDk<>jU)0SB8mXD>OmXfL*psboPAJixS zkN(ly&VBLyGi)Z%P%eJps+y|>FG#!Sx>w(|xbMN%gs1Sq2YdB|)|Xi-7iGQoj^^sVnS$-pojufY)d@|q zUsOoj(opBnLGO^*;QKMwm|3(_frOLgLCzYjRWy;qK@<{svjrY$#yKaS@2h~Pa}*50 zpmlcWI)-XVW5Qsv^Ul^9iN(NFT3rh~bARnm(sy_+mfHTBdI&DJn>AISt;&UGilqao z3LO6c)0bH zN65o>`|Hwhz$Ix)a50fQ`4+MqkbA!Gidx3aBp@)$kT$kC?c4qRwe6%51#XW4#FYk9 zaz49$@Y8CK$*QMJk_J1TGw?O4T%?%EYbyw=Yx<-k^<^bcQB7N4Raqe%=YDp%od_W+1TK9rIDSYxPA1BIn;nng0?frEi zhl-XyVq9{IgQst4JSfRhQ#jn}!*?eFc6-iJ6SA zI;xgD;~{w4$H&}gGSlH8B>{w43LL=FDO<``HPFR1Pt_Q~DBFPhU=L%*_SDtc-U)v8 zAnGYv+q!PYFl>f_=aQ6~sFG!=9fe=edY8{$_B zfKI}AKhr1YomkNpjefV4U}m>Ln^7jJP*qV%fD=8ofxti1{k1CcgP!u z0Qo=$zgZv-_dw<�YjT?c5Z>RV+e?<%P%qJ~zMIHHUz?_x6KoZ#R9J;uEhlqp8q zix|hBWhTlEFpKBHj+Ba{tF5stlGG;s^0h>p%OZ`$9R67SbZP1`<*2*@KoceaMbS%s z-5ovC)xIQ9SuIqe*09i|w?Csn96+-Ym{G>!y~rNx-<^1Q^Nfjs63rT;58ljso*Ge| zB$ZaZW0AU>qoTD#^)|J7?_M4$7i*FK0NNkhQ!k7Ps>Kc?>N$KfB!nLDt5aCw^-PuY zRWQ=gt4}frJi@Xbz~hVnLHy3NEz1n23`Z}xNCk_apNbb69;)fP1onDrs;Wt5cq*%$ z5gZbFLdK*n(UG~Z*ls>F5hhHDu7!VWKB5Zd%u7t?zFeZJhH*;OF~GKb^KfB*yKmppD-d9O%lSvj9O0spSMLvJ{nvWex4@-|rvSKw3c#ZibLa^1+z$ zwTiKBk=X25!1Wya0q>v!Nx7hh$~1=f97|aW6l$8If@xzXf;(<+Z@3vecN!mrnwgXv z?FSPIkC0XwYWi}%@~Ud;LrTPv1Sis*X8`Ac+kxNQoi4sK17=t(KU*fS!Z>oTGR<;> zojGob7ltQ@kaq7~yki`GzS>zpsPE~F6pbSH>5r$iK{GKZTN_9@{Qki6+f60X;amJ0 z;AWJBZxA}6D)ck<%^le>a5z&ZIi(B+>NZkXl1aSJw5Ye;UgQXZ7>^;P2ERaYmN z+qh$e1pfd-v}HIWV2ovA5-1_g z)x!CTNh^TLc{=snPw~?Drc<)<$d=voj6`uOlCn{8!vZx1@f|+CD|j2?;w?k<{{TSs z3o?k~UZ$spDx@47A27z*z#q%3Q~XyMAiSe~PCx!M^oJA0Nou5yMxBT;J}=qVpn9PU z*3OyeyOadrJ+8WC{Xvb0@1d<<%5e1Y2Osh4fdXxw0I@a)isw2N-mq} z8zgUWcqwLigVcYGnfYP>P>OO(6~|$rqE{6@ZN4GNHxro6nSj3t<;|8-Qz6QYE^1^0 z<99A`^9T0B@owqj9nuTULOe-zb1X(B*^j=xHw9oNK{yuqen$-> zYx&*`{{R`oaNHpO0FWzDz^dQ@{{YfGsBvaB~u#I zrM{*^<;FjmqJB(AIUuhCCn|lKox@4u(90ci`i~*S%U1!!s!%i+Y7N}uwt6?MdVj_~ zpBI}gTKzyP3&}Etnx0>1YLI;Y04#%p?0$UQ$y0bGvWtRm!02_zxf%?WjvGeFEtip?3$SB^9U ztRzD=Ne>Ydt|VC$$Vdga?lYzV0iWA`usbw(ES@Gq3hivRI3O_3=cx+`4x{bqvm6rQ zC(`xRHFJWJk_J5^{Pk6?<}qN1Vy3%O)Um@U+@)C^X5;{O1U3qAzn=J(SI z#uppGkUU93n8h_E0=1c_s-MgeLSx^%2g0_m-%Pp3n%{4~kj#^1D7^VU3leam?+d%$JB6kHj64V{2+HIgJ zD)OhSSY1s{!rL_rM&odf3MlB(u4+PNW!@a93Y;rBW;o71d+X=&ymcAVLP~7_1qgEW zCz~*OvqEaNpi;*Bh4*Wo>lM765plHIZV=P-{bcnrNDPR!$C;&9!!b#5wL-8_#tRJm za{P%LMEPY(ENH-kM#Osef;mz~4YRII4wSYVv-D&Z`K(iZ^QLqvQk<^goMgEG$Pki3 z^V`2U)4U%BIA!>VK@RS3exLEHK|oN#yYKM}57WI*Yco|)+vKOKmR6B=(-F!m01)I6 zo#d$pfy|wE}|r2lvkmqC~e0Nagzd!bes3b#=4bn&l-0Z7RmfjA~&hsUhbh*@j1d^3i3g zVrB%bCZLxuuwcQRgEn|+yWsat(o1|ZtgLF}prKWTA&3NNcHuWC^I((CGta(ptx-y= zCxR?E0k`p-PPO=T*8Mje*K2h0S?{q%B#xS(ENv#{3{itH1OfBM<*9WL2!c3Il<%x< z@d_^${8?KxeNvXuO>B+U7@)4FjdH|qcT*|LU@GUZ10LrvrIia72)x}A;;OVF(eScW zu)J3b?NuSIrV6RHtt5_23Xi}_#5=k{V#07v0-yJ*Aek$7QDro1Z zin_E?s|ePLhDn!yrv+Ji-~v7{PKrW9TTl(GT&UKt(6QrY`P*(hQ;vUN26R#Hut zrt}``oSry3WEn(1h#Ra0*6b7NK4N@cH_6mcN^E@#)wdZU#G!tXB1Z=}4oe&P0Yv)NK7Mpf`=#3RNou3HM_SJ;o>jVnl2TSM?%rCn4#=LIoPG@P z+f5~Q1)R;;z@~*2S}r}Ks1wI3nR-4hpP}hng0dpwX*{qMM`9Nvp4j-)@{|$gN=fMn zsU=47z2yeHJ@GagE6u*$f3?-xUgF}OlColwLp0ox8FXW{&NvO0> za4*sv`r7s_*NPcYexkB?j1beI0iA*4lh|{~JOByy#)HPp5>k`^>QSKg=?Qp?wvDqXrRs`W62@-3ADNJ}&|bZ;m`X2A@F@Cm|avE_a>ZXIb@ zQc#v{PC7L*hwPW3>!|MlI>*8a4-b4Mzl&_}P|-ABP$;ROlq9leZqQ832jV`QW49Rq z>tli{6O)xOF5CRTIF$uTP*zz8q-=ZQr(&mSt~(rHoqZ}zS?`aV%}twDZx;{DG+Of2 zFa^k9a7Z8Tk*zeP&|@Shc%qKQ^(TLDnSSv_ZhcdD@_JvV=;|b?X#Al{@^4UZH(|E7 zDh>d^*TZ;;$dNH=01;OuMf*KIW9y=7mJ=|h@GkhKG0ukdG*tlOY@<03*}pOG+f5Z0 zq%=~KLNDG|L2EtIW{io9h<*FujB4P$3`igcJ7_6V(zFXv8y%^PoM4??(x&4T2Qv^O z>Z-3X!;&G%8DI4NpW9UgvkmV^mh)Ea_k#2{II5#(8$_I(D&USs#;gJ*SgBa5>E zcPx3rHU9v?E~~rLn!7ho+-p;I5vkw>NX8CN8WPmNQ_={6>_PR$J*WMjyhLhPVm&8a z5<|EZoW&mmO!WIFYI0&sqcW%}+r4BTzinfQNKrJd zu*p%ROt!2wG!V12l2$xsN6)KcWP#t4=>z)ch#;lL_oP)t&ECQcK*+4ZM8MiXemz`q z!2NmSQcD+h5JObi4ZZV>?bOqGlLn>{uErx{=WzBs`~LvnK}o4o@9H7Rq#wJ*Wt*XH zmYcm)RI60J*>j3aIeo<$!E{y7j4;flyj8d^vy@PX4y>Z}=b8);Q}rg6y|STIej+$EladkU3Q+ zgTeO*z-PXc<7T8w>gS|0!N^s+vK`}>Z;LNcwSD&SFVxpurFx0xlPts&!21E%V}JnH z&!!deGXXc9i24DsCR-%1-|wskj->SCQ(Z0aqSaAIDk)?tt%(=Zrw$5|4yGYHSFjyoQiPfG0L)TUt9rL6v;-aea3sl^mqG=+dj(5zGJhgLRG_HKcWXIwR z7mR4WBIZpfdfo%MBhU^|Km4>>BA*%zwTf=1rzvc9j;!g*58oFn&A#6CSBtH(?HX8N zh^dhvb&e-$mvuw5vmg)17*cfNWrToNLx7?6e`|y&xq#d$6#oEH^y^%@rcR!)P)Tr_ z6%I^l+uLkq*bU!@13B(;2pPtnbb&z$6Xvxu2()SXZrNMY9Umc$=Bc;POA55}d6BIC zW@Qm^x;V(#!5ceR$+YbtX|yC_ck|Yw6rC`Xu%&vDykaj~coBTLT34v*=eAmHb5iDw zu9c}xHDjiDf+888KE&`(zdBlSt;{Kw32V+Ai=StJaNI<{6TrQ%YPT{e^Az1Ro+>(G z^)4u#S3XbTl%3i9C-C>rsE)@O<3TBM4iU3KM2n(K*B6Q~l*%ANJ#M_a54(5$1_#?v z6afH12qZbIGxSaEEp>aPk)35iuO>0yuwS?3q-UVaNpSxFADDB8SV`XA9cXV%k_1Uw zNds*gxWLW<$MpWXNTd)2813s0c(;MQ`oq^vksF$s5I7_PfcO6Z)PF8?H2~HwZM<2^p^-s~;VTw66i2;1$k_qle`1$XyNo158_jf$v73XGzizO>o-t5Uz-fx{Dub=!Y zO#$G2w@6iWqrspA+995sGfru6uG%_R+YRq^iKU zZtd1BDp?+E;$MmqG@b)mMHE{Mm)iP&FU&rYV2I}!$MOz;El~?kf|f7l^@Y`FO(RIV z?lg%PhkYj;N{{sY4>ZZhIon4}Mmv4Ze){knQSmsH1q;{nmVda_l3RnDTv9pz0B@-I z0ePVeilBOxSJZQ!-`l=*^+Fsw$CXp2h5_9@A%mcic*;L2IKjv3?V&KbIBOd1l`}x? z^wPCBByI9e&7}VT_Hmz|@2gr+{{WOpA>74*&^H?9>f16_Tx%*R8f}{;tC5fv1Ov5> zJLB8iOYuG_lC+f@s~g|JDGB~gMX^gjR=367*Bt>}cdoJ3P-3RrS5@?v3I!ZPEB+bP zaexlqPqF(q8DGXs2Gq0noP7`ex!_me$>Sye08mIeDB{|)LT6BT19gLQ(!(UEr*Mr^ zB#u4*0It0OMUfvKDh9tB?-0BSyw!Lm`#^MWPy{gwONvK2%DilIw5~r;azDZ}B?k^7 z)<}-N=t^5xfM2(TC7KDNp|)1tOq3AB%9XZ;*eLxhimeK|I+q z=SwM2aBq(-E@(*t?cw>YBfshHHt!YsLfO<^JvY_MJu#AnI^cu+7I&+fp3$>P)pO0-SMDO6vRgQ`Z!d{BIA3F z+Zbn!HEM(^1_MIH&7zS4Fi^PLpY!8T{#O`m7?w8ye@Lz`l2lMmqA5eDJY=8C+fa#s zD6w)fzkSb0;*j*hFtl}S@F-`UM$@x)@7KPJUSqbK!sx0=1+#~qo$KT0<)z%D4WJ(X z0602YV!XoE{{S#>9L1SI)iGRR1sF;;6lkPf$iRXMFzi)^K_7iCW~l@y9pn3p6brN8 zc;(*{x=JfW{-)D!+ZDRGgp_Q=;waoO+I{^8BlH^j9|8P_`8*k#s+8U}{D|FITSyGUn6@hDqwfoW)seDlDI?B&2`lju)9%A~o8;7=WjPZ>y#3kCCzM}R}#}?{f z_lB4Io%W*V6?MwKSt1H|nyCYf5(qdw$Is=Wq$I8J=KxT2grtGJ`a{cG=_0p9TV2%> z7mWMFEFv!uEFOOHjk8VuBWsPp|n%n?$gT! z&Yq5nBnc6$k(exC5`*qa=NgIJWYK6BZG8lFvJLzE_k&h`kn1XXXx@UwJ>n|a(Z0H! z8gUd#aOglhBZeUE9PHy5)`XeUJW`dY@~c)Dl#q};dv7Rc=no4z!=O)9U1)4hmsz7) zD^!&-BAAh%*Dx%=aD-rdU1~Vd^>QsA;ER=G0AerDtMWl5*sp3FF^Suf!zzPL%tH(xip}_StBGd;OZJXe+Hk zi>R!16wp+LtCF_nk+ZJ#^sWPEATYhyQVL2N5iz?sN>2XAwia8cnZz1A~LD ziF1%a!LPxZGiD@9^Ak+rx2dYBsVwnYYF4V{RFzcHR4oZa6qPF=WkDmKQv-m-Mi(S^ z(u(3o6(LJVAumIDHNB~R7{c5P^EpCdLj^lNA+Kz*&K)jyE9Jw)X``CaO&!*Pi*Dz- z?|~K0PvMB2{{ZQ#%~y*(AcJWl=NA)vbh9K*HS;$9{{S!#scWh%^;K21aoS|Kc?3Ng z)K1SR-IK9MRX>X*v%mzC#=O@QsVJVM%#=%1n^b)RS4hzR09ENVIl?thT}cJp&1$5u z%`7Jq)KN(_Kt$RwN%L^ZRGg2tGz9UoB+W_{D5R3rQNK|fm4zQFfG0tL&rBmP5w7*O zKMT5tea4&1@&h#_6dO1ClmWQ8Dn|z(at6IW3b1D(fECo9b&lebW>TD!L%m{y>GmrW ztPyr+(o^UC@vo#w4l<9OPFa>%U8OhGo*OM78rX8Y5$aLwc;i}NIB-t)jAzB_7x-#+lZ}9nWyjk&IoHLwnUfPbX!71b@~1vncaN#b zv{EgcH_AD-vW5w%mDKk9EJhFO{WW!2P+=M&?E2Ifr-bZJRs|MBt`-~HhZbkE_0&rwa8O7nXY$L;=vkytxi~$P=RD;0_Rg`Aq=A1;*WwPui(B-D zdmGO!u9ihPQzI%a&^)yRZvuVPMB#~Qbqy~DZ-4wC~{!F^# zCyWwt#;m0&C?pmYWA(}xnvi%I*R<~sBURXO4SV{QPWcT-U!zU z5J~L3ZBZ)QG-xfi# zbBC%5rKWIzGn0njj2!LHKd)^34J|PsC=hPK{rp2{ThUPe0B7oThMqK1xMg5mpL6r` zq-G&br9L#?dUmWd2siJXZ4u9No5`5Di3U|d?E?praopnrBilnusfu@&%v6BZnvg@K zpqA++O=T>5Uh7n}bWz6HB91gH2@R%Nehj~J;^#kVNs~Yu;2hw2ovIfYA+7@W{qLQGWA7Eq^!j5K1DK|kAc0r zeZN~;0zyGQ^4Nvc0hq7-v3c9z`tJz3Vy%JSFNf2rx?r9tsr!8E!tqINHA2fW$Gkl( zpYbLnW}p86%05I_L+3>k6~SnfmG;~K2fy1I`pSbA-ug$C0h}~1-RVO-3Ygw;fu0UY zB>wkWrB`f@1Ehqlc#-`%_ zk@{zjmY?Q7hMIx@0Olgl<^`b^Rg=U^Cs5NvR>I3?E_!mYFGmQg4eTN^lf+?GXHY znflkmUj#g6pf3ke(o%lso}{q`8oF5)cz0muD#CRj{{V*^X?b80LpMGCpw^UuO1H7o zb&iewqk4Kj+Izv@4}3=HN(QUBnIor~o)PPgI)@R+jC=n83ZXzJvW8|nY7E5i?xm34 zkqAkdcePYwNV<-T(18hco^S&*=*yF{?Y+X|?N7vQ)cI)J;EgbA}w3h`Kj!&}4 z0FAy*!R~aA@_aXsF49FsM?LyMcy)M6sHP)KN67CTLDf{ElIs=D7`&Pbyp0_6-~Rwh za9sJQe$oXe{{Xav{{Zzh=QwU6+H76%nECIdYbX6n@ZIt`j5pH=t9(GUk6m3e45=9a zcKOiwgn(^}qgF!LF@$;UjZ`t8j&qPxf{&s9d6ST0?N#>G^qQR z{5%o)W8>dCxB*V$x5Yq@AiA1az&TkDVh{Eh_t55tLzA=kq+KLEd&P;;yPTy{tAF7AQn9B?9g~F4@;g9LtLAI2F%4jX@@c#heZ4G+$MQb%3NJ^M8%CJ5+ z1C!WbV?Dp^tBLZ$tr9zk0ZM@le_rtOb?E!O#fVF7WmHs{{{WFR(h(?=JC1hy6dyRx zZF#TfXC_+5hJUsoIdO2#=WHQQ4Mgf{dvyDSGFsUsQdZceu)OiNlS=f=uOJ5sc91yd zgN{f#$&n#tVSXdE@0K*!f)rFKM*T6)w+P3jdSvM-pb8V1KKNC4-9 z_8ffandVwhW|A7>H$kRxQUNwg@*jT?U;YfMTB~&$Qa#3=YLonP8#|N?02_cIc_5S9 zKbAF~c(p20%NH15gXmAa!Y^fEMUu=izj@ydJxSr64R1~2!uWp&^PZQyxU?2lbUpKh-g8;Q*M$;|UH+2Pvtu6K|tkR?D~+xc z5sdNnI=!X=&&R}a+?KC(?sFSEp=(+@E9x)>>cX}P7u75*G`O(d}$%_IvHsyOyJ2RQcB`3PW7{{U%;axeFd z9w>g)WqdEbTe?=!d9-y^bvskq>aR4SmKZ8q0VFb#${A0n1Hm8)jjAtYJ>a*vBqGH?IlxOjTqC*pq zx$ihZnJ&~aTjN>_tsPm8MVzcs7cR(4kT}64f_wbw>62xd9uU$Pl_<+!lxW>Dhd6vo zr?2bV#rlqxqUlFiu*C|~fUE#T7-c8Z$vmH6PM|oSGGR1ij?eE4N|L0lQ#1A+&@ZZL zVw&%BHP-6tU#iPgM^WWT5)UXwaLpzc&@;v}!9BHs#z~hqh$|`}mS+ys&X>W>11PZ5 zo8?$pu-dG)8W`wp7V3yC{$tc_A``oE1vz5*0{~+jWak>eo%~deB5b*enr$Gz^%z|0 ze6X*iUf=wlpO?wtcUn}%4~P9hNfiw4vD0K!O*~wU!`TTZAM5tlw&5&^$uI5q1>#DK zvf`Eb+kC}A)4Q4%QVQ=*Ml;8Jd>?&%CPPQfrl^ zZ=@m=P_R`-_v_D;54t}?txV5X(-Pb!L$XO);~;imS(^&591ah^0CyzkUm4=$$vi~g zF><4b%-uunA5r0#{*giqdj!~wjdZHX3FDGvwKd;8DR30^ftAqKCHT(RWhUHJl zAKT}hRHVB^T$XtKZn{Vg);^#xq;Pot+PEgK11d=YuI1?)=c)?INh}=gWym<}PBqTs zeIml*9x(G^5|l*Q6LP-%>zH67qUMa%jWt?2;ICoMKennxlbl$pmWA-&XSmeHRz*c( zJMPKgf1Zeyd0rsds6FBS+jSR8b%so%u*Offu^;$4x`|WP1LjDjcdo=%9c2x5j_Tw> z@&ZhO(K5t*aobCIP-giRI;EcP(R~V9Ls#dN|>}OPzt|R4tFn>NUHA1H~hylaA zH1zjPRoo(;sv21)r;icOQp03X1KcyV27g@Y@%X{EmKf;&^$WpC$T;66PS6HR;k1bhY}?Vv$x}qp`^s zmxdAC6~;%$w}11~feLo{A8}Hma6O|Qy779mrtL9SbglBsT#!@=>+N*)tLIe1w`&-~ z95jkOTR)rv2RO_rGXQ3Opuwd~LP!b>4w1(`Iauy1cdL%3zMXA1RFb-mSb2=nk{AK; z&PF?pfM^L(P_oS1ez3&!1dv5vb=Cp&H&_arHMYR>Y`5Dy$(@4!7{CVuo=)a&P6r@r zbw4mGlmkafy_FhUeJ@Pb(OD}XEiAClUX&uTB?-A)mR;j>E>!2gZZ)04OP^|Dk_k6M zd|*Nniz1lpK))90Bnp^>dTGk?maq`_b7z$nJ!9a znfGKoV6Jv#;EeVszOOTeQI=&9T2;meo5n{Hlg1@d)_kY;3zuKnkE%Murv;MTa_O6m z;wen{g82zmiy@t|ae>Ej0QuH}ttmB02dCV0whJ$JBTq=zeDD(4;rE1f5K>aZ*S%?T zNb2h^mfD!4q_RU3vlyU7V=f&yq$OoXX0+YW69^%O$1g~}`d7mr_#de#%Pk#E znwFTyY^|f0%%`YTI|jxWwE~`#%w3{a z$WW)YK^^`-@ucRcu>u-MFsokR!kXa*Wj|1;duN(ZIlZu$5m?O~a!T5Gp z4K+XePh#IO@?!5u)IrhIDnLy>H)L)DY5e?K=ltES!5)V4sf+_rjbg{ zibw@EUf^}GEn-Lk8T?uro6ZEDe&gn7?0e3 zg8u-cc%#IB7Jx-iN>3}tMll-2TQwc-vXxwePdg_$7#Z~O`4O*CCS|!OThM_Y6#ip{ zR|&_@kU!$0+^-!X&9e1OkUC>=(e?tN2w=&h>OxS@b$NWJ~{MCk1}4Ix{j^3 zC%=>>K6;P-;i&%r_q3@tRc}}zVr=I~x&HuTPY@>Xzrc>Uz20v0w$7iP@dX{0dz!z~ z3h59mnREn*$q{Zi!j*PCj+Wz4x$>7PSaR95defE<29lJaNHYTrh3yuFmi^)9QurrZ ze!N?)eO*1m;~iD%KANNzKl72Ymyn=xQ2r6Sa8u`G033~YuMKilHhO`U{!|5$8Sf*I zd#Qje8~jAy8fs<8Q78k-`CvC%V$oN4r{O)Go;nN1RCRTqq^pHxO)X7zm#1n4JxU~s z^ukn#6{o0;8G%xLK~aHVH)?;4{#J5NgTu?T_!YHrDSDU0P>ibzPLh?flua7u*&6iH zFP{$j-qGU^M#}KqWU7L)s!KIZYeW&&;ZA?$>b3)gGNPknw&1JtuHvPyfu0nqJ!y00 zu3aFASBWpldTKxRo5w?5PYD&qYN%(Xs)SQj$G$n>Cxn#=S6|2;N6NCFeKTz;E`dXK z{UbpNqolV#897jc#@<%Q~haF^E}17h24Ys z^NnoFNm07H?$^RAe7EfjbbUc2G}16~uAqVn@(0`P@2i<*!8q??D`~-OBWdoI}3_vsR!Fb{V6 zLE8zDP&#feoHlslAD{VaGI>)vzdf5`9hQ`|JlJp5Hp;}B8L8lZEs(WUG;`#4!lQyW zoN@2pOn(n3P2%i?q=v@}jkVf6k51c^E2LDAi2>uVw8V2M;~nrAO-=u zCLpAc;Chqu&Nruz*ON35{Y}afIoch%ig1B}u91v<{N~Q&j@e}2@*$dF-|fFQUUnVGZ$$=Cb#2VaDpXZq=C-ug9h!FYbA%L zs;~5&H*JpMYm$!DEHtx|#!J*o8#8Xl01m|Rb%Sm~r!>Sk$dYXw0dO(WGnGAR;*-Nm zATT9Vb{GrsffIFKQC%f@=DJnSQe131s!#w=ABB`D;P5a%(@*@x+5rp(nYM|$Gn_YU zic=3>XSeDn8f(o}rjkmFtxOWtyRrx?rrxorVmGOjm1B(ZF^v>)QIqEidLS_@9cjNm!v~ z=}*t%DtJ2Hv?+P6^={69ZE5Q8Ka3;K1bln@{<`lFk$1YmO~_v7b*#<^2$0!r8oV(Cqw1)jHeXO5%jUWVzrU6$ojaH^xcOw|Gy z7GF3NU70EaF)qQqCBOjf<5`ktDrK^zmPv2zOf?z6&`B3}4cka#{{Vt)9Y`s6>I-e{QR8o>sjs%x)2MHK zftQ|m^>N6+)TB;PFEF=O_Vf|PVgn?BzOb3&zeaxRzS-BH?P``fY6%|YRW)KlY9wb+ zW0EOfbcJI^I8_|6#!fVQGbKx~Z77~(dccB^qD>!vF!f`-S?j9ow@Z}PyL-)LhKjCQ z%828VqF7{LtW=H6LNEon><6}y{x}rOnv$9WrafQu@rS}oQWZK-H!7TnexHa5S5*R1 z*W03=nzgGgi&&L(&HNTZqy$ztJRIl!b*IK6-wvRWRVgFY%y%A#66Byzk?!%J*r;Hs zh<@|2Q$>jis#eCTn~;IP_vibaGc=@xgodu2<3%B!^S#z5bZ_pCo{h)vIyoeH7?E#D zBqZ<$;ZL#EJVYd#`sD;6OKtMfB=s`IaJw3atE$o$P?ICtDrPRA5rB4#1Kf=Bj@r!P z(3#77KzjQKsgx#?NcQ`zLb%b}THi-WO)bijI3kH;nmPg+q0r}ZZ;?RB_5%Y0NGpVx zwFL>h*(BomeKZhCB~C^(5ARqF;=9sb`VQlIrGn*843QFr{+$uMe1=fvvB$6{^w*)` zLRC2|ampz;sWO#ot~}`!J(c!GlfQQ= z!GE_u>#lQgiK0uG+ec8S;Uob|0q^$z0N+v>Q3)**B$SpBUwRPb6pO{PkLr(NIn&PVh-%Mj<3Z7>%nh$LIZz>8n})0Jzo&6M+IJTB_u? zRF#xFCj*1%1 zcRrr6;w#POf}9fOD>swoF~|P^+*7U~S#}+PJ&ZzwpD_3LA27(jw9QnN(b3cO4_7sH zL+p%8cA`dRV~@icb`9JEpN!{2QksDFN@BuTNwUD#UExSEa_c_6j+%;Sdan4vqgS2f zss&uJJTIq!Ff@XA>B?XmupOZP0OO+an(w@N{aw`b^g_J#9Zk`l8JdpdMDWU>aIeN! zJn}gQ=NgalW;pQxAm|4AM+r!&6oM3I_rk$71oB$Uv7wNF9h8zd8^2yhvB}V95AnjA zoDFl6B1)#8_=CQ%>3gg_Nm=yZZnRsy6t?mQmZ!OWr_y9|mM%vF01>75Wi2wS=Th06 z{ss*Qe;Fi+J~Z-5*HU@lGy$M3=`Ggop6W?zsG*Xco{gQNM_PPF2;Kt)Bzjoj;GF5S zp#@2C?ej?0Q;|ahx*qzqHgNV%oJEe`<;tRKn?45JsJunUAP{rhpKUl(DL>gKx_$&M zuMo9;KQSMx;`-~2x@o#PhIWu2F0LDspke@Rt^6_AbCcT#LMfD**&|00apDlcUXY#D zz6hs@vpH0T)k^GYBaLN}M1aUw%`t+Y;h6#C<0O%)5TK)Co4V_u!eI#sRbe0g2eo)X zQF*4C-wo!b-x!jnp!w;)(ilde6ybRUk)B4SNllcQ#`;jV!^93G%2$wjY*RRk)ZK9e zeQVQq&-ilHE~1#$3d$OC`nK3^Vl) zRWr~_YdtN}RFT)y$xO`+m5D;Yf-=$^5Dr24)nyqo2}%+>sKP>Pmju$dKu-m{OTKih zHtwH>j?uD~Sm>Tf?NKzvT0#ct2dS6_Vb2)HIwDenJjn!yzkd+t5k{Dj;Z?uW>MVP8 z&Z^0LxKl{6S5k?qo*OK*#%QJ3cZXIV^ooE3V2{Y)V}cU2`PhI>3*ndCCbb2mJ`8#{ ztWLRg^};@xsiCv<-%i^sxA>BJhLYInEnx>^$0Um>I7t!t7%m866!EE=sd7_l&E!-& z`*(s&U6!dQ53{n-N>KH!ADW9v)3g?Ag3Tfc>21{z&kn~_J6Oq&R&nkyPIY7?DK?UC z#m~rs3ryqUDVz1F=o1wz0~e?l!1I4xQe3#GYORNIiRVj&Ke= z{xxzy(jdC85LUw$YN2J{g>k%&MlsG#gry`hOkE-9Z{JvPv`Q%1lXL7OsU)0|M?L#+ z29unNL|SjH;rfP=L#z_9b;aAH`u>{f(ybKN3Jvs7K|MI08o??QsDebu+_EY1$jCSZ zhEf3!$(AKc^Yb`g%Dj=$WIlL0D@)j_z7nns!xZ z$O#&QorxN*Pkkn-p-V_fCsTRd--scSgNVbBj>SiZA4o_`o@hY%# zhh&0Yo8B<3<80E@^=D35=0g+o?UtSi);8U;q1VrpXSP9=7v~`IKp10)RVB++;UJ#7 z-SC`{3YRe$pGdsC1ic36t0V7Xw(hj5r}PXi$=T!sA8)Gz^w*UBSvF?{3{pKDQ@7j~ zg!2ihfB2t9KN0gs#A*X)>VC4d?NDv^%9wzUAB0UZ{+PxO=dY#kixW6$Yiq#Q4d)(e zaz~17QQj--#Z!G4jS#Z#Wn?2K4UXT_M^PfC1w$XYeu);{SvkwaQ6zM#52v!rCP)M2 z7o9>}i{iu$@1$w823rjz$Q{qAT;~~OJaLckk^auPB;E{KsJY_y@IJU!Ls3Rqn8>&Q zp3I;T`r(*y8}D{!7FI{{S%{^%Or5hgWsICri`$d{o|K z!HSMH0IAQV3z#d4|E|3);IKGcf`ourkQ>|a5JZZF3*CZ7&btNTNPSwkB zcM*o2aOubDM)k#M6WqDY$SF;8pz^9OCC24UP=^AE-_Ji@Z`xXNdJZAy-lKww|Uz zMvc24OIm5!BfX!Jp;j0VnIVgDjF7ltyBY2N&YTJi3V^S+{)`Qa-x_|IVV}}DLcoLRKc^f4s4FQ9 ze)pt6MJ|U3EwY&wcmTwxr0{UueEynRYQ?4o`>VWQ=}j0xiYGMH18;U8fboyMes!Ky z6fHFez?VPD_n0iwCiuBx=$?B zu)9M|RG@&eo#d}L#~J6IF{I{BLQ?82(huPlU>w8kn#*^rqJ=B1ml&l&uD)epS5hT7 z{66@>{@Rc7<&a$C91K5_>m=S&_dRpm{5 zcZD>UAfJ1-A*-Y-eEUN@);c;mno2^l2Rr_$4BvtPP`#q#I)GUI6li97&!{X6#7mk1@3X(5f&B$!@$5|C->d@<1VC5D#1Hg656 zsjg5tjVhiGIsD9ExBWyUo-%#(;(x}r>~GBB%Kp$6m_VrXq!hOGRXq=!rq^z?)5|e; zg?eAkR+l7_+ooVxE)sloe*bkulR0URv#AyEjAYU8l`p>T}GoucxNNI^|nk&U3W7dC`7*z7S_wI44 z*(`iHfBw|h-|8$G-9_`i?&z0MP1V)a;%ckS>b^fOXd>fK{O%sD$U}C|9l6qeClHfB z-9D3+kkd-MZk*!2n)7^_{EDi&*B~)foY%0xpUVJ=Cg&V7q(nYI4^0RH3ng%;{& z&mh-RJ<=kSGt|nfN+uhm2XzkPm6cR}Hf)?8eUE>(tj=m*sXXz4AL<(OpvkT=whBoq zD_jaV#9zkCc`(B2t1trt%iM51^QF`cLwbF@6cO!?S&ngW^u4|R0Crk#CYmWHf)p^x zD=1}hU5hW@)$Oc*NI`7$gyBbRk%#{PnoGL?3eq$G z06QK@A3y7?^)V*d71YBa?yNfavu38G@RsmIA_rP9&_x_-A_^HnWdM`N`8wR-fD=$e zWN`$PC}X|*$1>O>P_m&JJe*@7gU|YF=)6Cbx3-U;PV*MI$2ZSju*X8XrH=}ww`F#M zG0*nam~|zNF^;gx!o2Cvro>oK+qcrj8a%5n8Jq#h*T*>bJXF92CO*r;6?h0QgnZJD zc@U|L9Gss|b^idk{WKwniw6yviF9>qBs*|($=nYIS9+7a(OV#fYDCE2)~y~0&u%h5 z-`iXYnk80m5j|$}5CCdn8(>jV(rscHd19wO`|8EcF~x!95~-&2 zGBF@zfyQ&~sbVN%BN(6Pr#zO8xMDs%v8W(}Sdc}Wj2WiAB@+W86A{1}Ipg~3&M5J~ z1cKji@zHl3?9hO-M<_dDYp$CMZ;uhWY%)bTq<)t1Y1_Dc8!DEG$&Rc1+4}2pAbZ zeJrH1NjXCx)RP?(MSr={(8)(7EOW{rmHwf9J4dUW9E^ZS82B2@CDKb!dbaP2keH9t z)svcf$QpP0XN4mTBw-6>djba|Ib&Hw^dn2jjST6ID*=RaBMq4^}cj*KqC=WX6vOk~n#2 z$s+zy8u4$AX-Ov?uBV?l#f6nqM}Bi`=T_hSN2 z1MFI!0)>%w$*>00{k6m><&|BoG`uc=w zZ!y|0DHVtf8BzfSLg#jI@2l}DAqZ!1aKqi=B?Q}1EIs$>71xVYlg}+!l9~Rm_>;7( z`vQ=2y9|9?0yx%}5TvOtmN#4>X#&dYN}jK*Iaw;E>OPl7nwFM&I@)-|Nj6BLeZZL^ z!5dr^BPTp$d+W^-B`B6}z2_Rpl7T@^o{*P8TDDHPwM|;7Mq|1~4R4m<$Hag~A^20r zCt9LqqLtkHtR$IWp;Cqwi~3%psC(_^C#JR2Qrn}s6_TEXBALiWL*~joQW5tfo^%9K zdXT_(uC=G9IGKQW2wHJruKl79hgVrDXzF@lpr=ZziCTMYO=NM)6m?CtOl}=WRF?|7 zh&x|t;1)X3pD0X_K@X5S>~n==l9`|?qt|*xES5^EpNch=7Ylpa>R#nMGPw&El{AyA zV-ST3n^ZFezyn}D`T|lchQx1JVE+Kffp$5Sy@`tyOuKS(Pl!&ZsG=g^N#0O z-_m2MwdwhPNZLqrC8$_H2q7{P6B4}aATCQ|pUd0(>PT?eU3n!nCw-%jmllQ_oz-oS&ruS89vnvr82o3k{JWpXV?&iF1v%bBcjXpZ znHAo!?cfhjM@sE;o}IkO2mGr@IS5aw$j(C_>w)(=w-B>L1KvA^K{3+(BYlpRu2qqi zG859wc*omFx>>-xchVnFa~mvHmFo_xg5gyZr>CvcB~{+Ip)gX@Auh&7;~5CR4CRjl zoM}lC&lSV~F7^WxmWBy7Vr5G&eOsJW9YNMF*BxDYze=VL-c@qd7+;1#BN78}Iopij z0i5tVXk1*+7HOC+C)J z@8wWlO3q3f?;XL@h$%R@?vos+`%-v2))tQ&I_l+3(zl5$wKtlH;;f2}Mut?8w~eGJ z1Z13j!PlVvFZoUlJS3@WnvkFXAeOo?m%_|a?IkV@bI(#*?ly`aHoA(gi6)Z~nuwY$ z-~wA|BRqP0YkpMOb5ueU3uHlVaG6SxRSmprwAA9(Aj0h4l5O|jkMHUB`<)!B$e_L^ zT5XD)cF`u#kW)cl3YIa4StA|rHsc@E5B4?E+_)k+NU}i?{{Z0~)h*pqL4sjdeW2vy zzd8L0`|2l26XdN4y}fzH{{a0z#tsAdDHD}&QySoD7VnH*D{;J2+ZNd;lUpl2EKTN! zM&pK1T;KueBRSWb{vLRgnXiC)55-JmhOowJqO8;k`LqNC^0>OKZs$A_Fby!a3Hd0R{pTu9DCin-@ zelK6_z9n3`mcG_{me~ZdQd=#JRhik!Vacl^^O=h)f|l;3qyGT7{Y2|`?1#`awXoH}(*0PbDOOPLcc+M}smib* zgOETu`|5}JzA|8AG}+Mr9_A!)vQld?5B~tj{6*JUC+}*T)fMX9CAQ0Tq+dQ+Dq@x6 zs6qsD1wu${V<)y^L-VC&acKmC;F^L)J@F7plF%h5!(FAM(SZ~xx3Te3oT({nBVyy5z048(raRg zPWc@2*v?OGJ`WmY4#mYFa}Sw-gI1bwv5nvmNc5k6eaH3w+M@3U@!$0Y31%^}<7)C# z&5a8ZcM@_)9r!vD6;Rjj2!#wAtPrwX8F9s`my`DSwU-TUW5!58G z)6?9VRKkP!!g5bPe%-a`*1j!HkbnlN0x-dr20e+5eGA}kPIOIFvrFP9NmWW<)|UR3hdXzlr|ocSg_q`^%F2s7LLbM`~#DQ zY-tqmtwS*#$v%P@GnAsiD)x=?Qgk&GO*KvXsA>{ES!ud_E=CC?Z>A>{R{D<>R(-m&gA4azF?q1%f#%>KB+w$WcXkh?^XCWGy7HUSwt=l_aD< z+kqT^)%nK)Bvqke4+U_q<(@+BvjRGXsBk=THJj zARKa3T$MY3{X{Ity&ScW`BaxRnba^XZlpVan>-ej^SF`r&V7!VDSWh{`-(Yg4zIt& zY_-|?MylHM@!NWmbaxERB^4Z5%*5w+Cz3Ibfs#4Ul!U0CGd@ZG0Q#VS%qGzRwLgd} z7fe`Io!To0Qz2E{UKwd&b~*bGzim``PnAlpNiFWp9HO8JAaj4jXj|33(JCRsjZ5B?+Mz#S@Q0GOW#Us>j;WvH^=?h5Kx{&w|6AR~Ul zGvt%pXOHZ43i5)blazgQ^&Np)t>E5fU-B-bkWVXCl-u}@k{(Je&NsYD~x(hdkCJ^uh9t14oYr#|H2 z5n{psQOI4s@nrP(QP)xQb%@ekXGWfsNhznG&zQyeKCZ|9`c`E5i4GJip@U08OVe%F zcg9oSZdH{jB~?|{Sq{|m7T$oc^8f>J$F`MR!m{N-{{Z&CNNA-Zfrni^<0$HGTjOAt z=@E94b|R1GKk5EjsCz*mpZ?x)S^&j|Sh9RfrfK{M>Q*9Li+`E5q&u^@NX|R=?c0rQ zaJ875T0&DUZhpq?`o|*JVP$E^F2N}Mj-p1>Y;B(?Du!C0yuMeaS0>~Y(^s>~Y! z5T+%7zBerF$`VBYc2n3K`=9&jLA}Gr2M%60`Gz%FH)NcWJ8}8yLtY{nKKJPo>1sUC zz%vFb?l>RQ^3;=PR752t5=GIe)cLTpF=+ejJOiKhHENpY5&$?qIFVRoh^u}mJB4w^ zbz6f(QSExemq%P48DdbR0fsKDa6Wkb2Ypz7X398_6MKlhx=X=sx$3*kw&!cGTsusGG2GEe(gL5%cnZV+0At%t4h?9avcpy*EB0eS3rIsG8b+ui z>6fIOf5mo25d%K zN%Y}bqGxfJE?bEC!8qOSH7vB&jm%#iXpwQw=q+x%S*@0;xEHM~=(0zKl9rN0$M{Kg zk9Q;tx9N^`!3-;Bzs|%6ss8}tdqr18(Y-%psHmue#D1NmX^|tIRHCb0z-Qbb0DXrV zU8bpml$ZYi-}1%89Trk(m39z5=ix_1^*rnQ!>(*Hyk*CjDC@VT@BLUl$Nt)&O!U2# zC;tGs>)Z7fEmleNfW8oP-J8kkOBKVb?Y8!^-C7EHC!?NGRZiI}GHo0wIrO#&$j+*{ zCUr?m>RP%{j*w3f%VGhm;deM8)IJULH&NUslHuZYu9_Ovf(ceDJyCf!xxrJ|laYap z@usB8B3VKdmeEJ*6Pq+8VRlqIZ0{KJ!Psq!2Ul9Um#gE7>qR|f1ymoqW|^c{P`^{S ze4(l!G^jK4K8t#1>kIJOWNK z!#T%48ak4>lFCU-9ZB{A7D))06a$}q9qxh&YUp0Byd^D7{;s2~Xvr4(r=}Etzoj(f z)0wlmR&4qP<^}MlAhyv`g{{;Gd|Mqu)*Dfvv@gF({Ny8%E>yL)n%|(gQ%P^QQGpFb zH9{1UHXXrMw*DP0C(fl`k(XFLQSd|wF+IEd2cw**=9?;v8=7;#ZMs*7I>C!Hv{%u z1;@_q5^BV-$v5AjucVJlViSSHFt7;;#$F5kQo zp@zAomOx#KkgN#C?}LH){{WV=9FRe%ICS4AX(TA!1bjoTr|_EDZnO0bxo_P^Q8>NF zX_~R2g2>)cNM@8r{{T{V1QQEj1PSU=&Kqt*PYc736p?7S0JDHUgX2ZB+h`5$Z<4eoaCMlA3e2#e}?0LWgscP_WUuv9{&Ku zWgqP=Ji=Sqqlv$0j;^BX8tYoyu5ET&aOOwzK~PTvZU7yIb?ZNg;iho%)Fz}Q^4W}z zE^=m&P?l_;<`5|=l+aSb(GwzUjM#@=l)r0K)hLhO+INiTRI+FOB0A?=@ZGQ2nEN z&bEdXccMs=V|mP?UTUKhX?$6MU=`Zb(ic@UsX>} z`^x#y*R_@5o^}l-4MP-ZGPo^*@g9Hu8yz?ngn5W?H_`$nsYK{9 zJh6DM*B8r|(_Xs9{{V5PmPHcBHPT>> z#~A^KA8l1>k}8xW9TF+&4I)Yf{^g^8S9MeiAJIUO!A%}nwi4T&@s=c#N#Fy{fVWu} z((YP)XAPwySjrSF5T#AZQ0?Z3ayaA=K0bZ;(Fp$lh~KZQQia~=Z;U( zuRh;yduoa#ivIfY#wS1{3aYK_7JZmd4#BHU(C?fX=p65q0+Oj>pUqeF4poQu z`)BW>r3FX{xaGa!1605<-`p3rR@Ky3!%D*{NXaLh5y$84z}HMe3ns0*$IMv@bvW+^ zD6Vu>J}6CUu7zh+f+$f+kNTlYjQ;@Y$?f+aI@JCuP#ic>qnG%MALN&6WevfUT`^9?F1Vu)>8k1q^W ze9$lf!1m+pKKcnHI7mPJqv{z938>##*%zBEyB`6$7&4RX{{Y&+{XTVKR=`vadPJ0_ z)0TL_LWrbLY1Qz+<#!*#KR>>tIV2Ib)Wo_-8FPXZ5F&K%Q%Io5MJybmZS6LDW1q6vOMma;Dbl*f2N#5}}@_0j>wL}IeU2cvYdJ08H~{QT;xSyZ&D@^hy7L?M#3B=3H) zp&$|VScVAXIqU}Z!~z#*S^#Oqcfo$`<)jlQkBznl;E_r{&!2_%Jk!u&z6 zmEvSnu~b~u_t8U&kV5%v#rkBaC=%}qc8a>`UY0nj>ZL*mVNGZdFaH3Pz6coO0AugT_tt}f zl{A1T6*~`zGt=-+)ZkAoa~3ehqM_9RIc(%@{=a#Y(9F9BZjGu9(rzNF;S+y|A91`HydmfO< zVX293^kzE9!pL1IwPUd4)wJ7ayntX=;_Xfbz_zE?HIp zRD(7~TjQV8I<((1!C~6H@QwQWBe&=0iT4x;uN%JQBxx?kL zDj%n8Oy9i$`bEa-(FtEy^Ktj@`Fv<93Qz$g<8WxY6aiA4A6@&$uR|L#GDgnX2Q@dP zKhMt`e1E2@WtJ~${NSQxS>5!3Rkn&orjSOKws|1JdA07~A1+O~+T4zNgZ^32(vX%U z8fKxcQ8vk-_Sbnu+trlP&HFcXCz%vT@!6GSSeq%35a1GW!{a>bSA{0pSY-$=7df}C z5y&=;lCuy9Y;DK+gRgxc25s+1@Vr4K&6i>8j5S$dX&f#x*~kavay6)uKzPeo1lT3F zq5TN4;IrIhXxbWEjEKst8X8bk=iuj&_t(of;QUm>skl6SLxfQO038I zf(r6-f8RZ{j5c02BqSCmEyhc5M>@6MQyo$sHg0H>IX;j(k9|$2q(K>o+>^Yj+ndxB zk-zhON7N~cLDy>&eqCvTc%I+ib)sjoJP*SjKrvpX30RetXo_WXp zhxOE@MLuYf92+9Bee|`_>M5wEnqCd1sNBcTmNfl6mLsG=g}Usud@?y#zDigNL+g>1%h=-l@?l z9%#cxRG8T$0i{vuIplNL{j^j~QDl=|XMErb1W~=WNVE+-&abX(>7cHbo(g*C<6x04 zQCN+`EDtB`?Wa*Oej>mWafB?eJS5`bzM=&cmWrCPVdXyMk;)KP0FW|90Oa$W_ay5u zSqrA#8wxEGsXWLkX(vaicM+ahz}l*NpHCb%2hKj)8M2XgweVB@VMd`^o_d(l8q#7w z1FVC-;oH&Q+eea=wLOEyCMy>W4V5(*rnXGRrkofW7uhD$!=4Wu=RcN=2n(e@aJd4m z9gfb$Vg)_J9yy#0G^>?(pX`n6jRNHu(dT9t*0={Nm)Ei z0cS^GLFX9!JoeT)mI<^*10eqZQ1Jzv9DXBu!z3#@r9EnZr~!^nKp=MX{{XydR;m$a z&>wi_0~_HI@{;tYjMlr8MOg(OQGfEui@yM$)8Z^8h_^Ecm2Awh0{F3?3vNgez2{0QhKNc77$Hq0%Ci(m@96ne-E|E#zfH>n5E8@h!VjN+v#;4q#&PA=e~5(W;E4=;lBLgRN3%ARmZ#G4PF>i({&Y+ ztE;7>k~v0vH!K1i3gP`F6H*tuHFl5}vRG%Tg3V?iWJmhp|_6FIHIp z0D93uO)buGB;jhE!IDanFc6)i(h1#+f$1MQl?Fkf-t&nw9ZzUoR^LdjB=M|IZ%a3c z_G+%Mq@|PTqP@jF%gq|7Di_Xw`sxT#wo{M-uI;>QG%8%+^Mfle#PzxT{o`R_NL{4k zh7ln=6CNR&sk}Gpdq-4=nF?DfiMCyUV%XhJFQK!J@jEHq#sD}9WX%BB7Ki?mcmDtr zUwFEfkyQru9q)yHq3AxVvs$Up*lv=}>`^OVrIJV~ik+{vL ztM;r^s@(GNU_Lw~^``1uf45k*H4z#7`8KTUvUXqT`8{ zlS2Hf0WW{FpF{Xl2|Wih^|x4hg5D~rUJGRxBo?PbhLOew);RoG^bxeZCxw;5x~7n& z3l6SyjbCm6;68UFx1FsjxS7e+Q&L|sP| zpp#7yR&+d%Vn_M?x%=sai#0G6d9I5yseC2s9-iqtUsoD6s_BcM3rlyUktKl01{x^} zoW&@|F}@pyNxbWvC>P`itYi3xpk2;iU93GC5Xw9Zzkpyl3^#8uK5? zyuKw9ov06Oi2J|&7O}_w01rEWS#~6JSo?UPY2l8Bwn$kNvN1u7dmIs`W?55_Nck<3 zDMbsyouJI#Ta~Jc6s>YwwjVW#R?Q7dP1B-rKYC?u?TlMZ}@(yvWlflRkX4_G{{B@$c|CR z<+vSVS(rSkxL2=iBUOU~yx=9)p$N z2>^mSV<#Mcyy>3^B)!Lb5CzR)tTFV3K%Ll2GYJR;5$*@KZrU2 zYb<6^PQ#PyJOT1S{dA#HLl^4L${G$qbG=}F{uOBcp-BKDat;CG1Y_e@4J`D>dG>)W zuJE-jRB@P{*g7^8;AfotW8mqlC8&Q}!T{}e>K-~@ZB5SJxDv$5tQ=tE4l|!_*w%j* z3T4_+U@5lv#vI|u3uC|3Am8VjKNPREt0N>$T+FT?8yR+}{Wu@?<6eV-O^z7=(wv`q z$DQKUZ-`!R+UUJ{8Xb|u{{TlLMI>Q-@2kk+^YPpRqD=&oe@IyjlSZzP&m%<4N7BYS z^7VWYTaSKmp(iqY|l+9p3Fux*j|QN&~1$N0p8J~%zKREw;qxsC#*j7c?67Li(N zX#G#jYDvfgJmXzv^G5GmqC=L05LCu`cc{}MNVd}~j6qxhBOK#Q*2>bB!)6!%0Fp~m z0ytH*Qz2v<%kA8Z;~S5_)G19YA9EK4)LJ!(ml|2rB-&~XwsFo2lN?NU(OqQ;`L}osTkP9!btQQ=a9s+-ZVe0q^c%dm*R`nqFc}sH2%j@ApTQ z$@1ffC=I-wp_CFq9DmzFoNX4M=srvlB8<*Ay&%*!=_IU%IW85oQZ$4J>LYN6894=% zbI11Ws70WwMBNa!Rt*8oC9z;%I;L3W#(b#Q?HuPjc=yJrqHQ{rzR_flAqZ#P@Vv9x zr|Ns0ZB<-uaP;^Ubn)e5j+mnrWF@wV4o^53$9)wgic8^`dn`=6H5;o*RZh1H?-#PyP z4Fnu|m1a2HJ8|1Z@q+>aM?-{n zfB>^GN{XEjQik>3dbe^1`y)OH!gHC1s0pk?P0{_(Y_dL~XfdR^vY1wQ_30BB0dLFe7mz1Po*h{{US0|=I9c=q73^PFcG)M#rK1XO_st+Gb& zJVX!~K&zn`eTx*~HmWu7&X5Ykjffh#CDF1!Fw>8h+SIR3D#Sbty*+t`b` zzLn{zE2T42v|BK$5N(SGIP7<0o=5s?B3UJvHRF0i3bSN1Kx^yH81NH+lf??j7&6BdeC{yZmJy79L!%G>0Jv3ig_tHt z2Bn8H5M{YrB)M){x|v@xf@4xwIOpHn8Vx}zwMT}S3I+$XV_~?hZ88L!s#=9#HN3@8 zHf}urBki2ywuYynwZ8CI6)}xvo9iQ~nv_&KLm)@ka(0q%ai9F@i3nCZW00+^|q z4RnW>SVc66E5{quL{>~3F#&+j{{VFV0OLn-a4}US#bAxcr|tg$xvQ8_vP~2$!fm)J zcJ~AN=xRzzOKZF95DO}TD|q3FRN?CJ^!QTX3}oj$`|CM@{{WU7YMtW^l{?b&9#lq> zBcioYA)`^XdEgQH=TOX4Lfd!W=^bxD3qB!qb9GlvR?tH|QMnqXskg{u07oo%Dl^X= z#_zcJ(L$KoO0mlNdTVzmln|UK(G$b_@BCe4psknpQly&__)*Vf57PY5_D2k%8~_yOcj>7K3!N}a zCeh~H#8o7jXmL*a*@WL1o`Bq_C^|ydTOUp(%Y9mwbcW?pkvaO;h^qu*2|*ynRa6jo zA-R4}^`MiR;&i9r$4pR6uoT*L_vH%S5z)&dq_EJeJ-MCy2Q=D>h3gV~B zMGZ9r@Ij8WKSfMJS4-*L&7B!(F(T7y_km8yx=t?~Lo2!?k^OV#QZkf>yht#0V@( zHb3ed?frDRkm<7Bpy*Kn&BhM3;}mMY1hF9YI2`=^jSIt3Aeb>C=!ZR6YI!7Q;gq)0 z4>GJ_OZOe|kME-pR2iT_urXoue@68czr(t|tGC;$jF$?BrfS-HDH=E_6&G>d0SX>R z3Ql`OM;SVaib@8zdc_mwU!2#ZbT{lt(zi~o>CY6rHo4GL$!O|}6-1QNFli87DRA(R z#AD0>^Ryh0qdWnln}9doB9hd`{{Yr7oBKD`&27D3Ems6+3!==>%7q;WZLXzHDn{-> ze^)C_V?m3KPu~2rM%ew>HxBKH&BRqGy#$X{r$i5l6 zKd9&F9;%_e(n}(}1yWR@UKoXLM;)@fe!8j@Qmc1XxkZ$dF(oG&Y(=^IUsAzn@p`8B zO1QT5O%xqLOzJ%+&QIeH$V`rL-_@w%5D_q*us#-Lxj-Ao#8ORrI5D)DP zkDXB<+O!3F9q9!t1ORuupfz;rkt~fV5U=854?rt2+Jkq%=* zIAXr8-}~vL%StGy0=R+?mgx4|6wppLIPynyl%1|I&VSSN(bN*NsfEEI>rZ-J4Axq~ zZvkQ;25>=R_Bk2((zm4I-<#p|)lLL83(Hh6Op*x)3fLI@^_-vu!kgZZ-M8NRN4-Z@ zvBMOctau}7=lc8&7E)C0zt7wSY5)g-wDoAz<7Vu#Oeo7#>?D{{YVY;L-`SRDXNU0A6VY(z2pxm^DR2tszs+ zf~|H+-i#`0?3vd{{V4sZDOG%Oi7Ki^_+?n zMK!e%6@l_-8S-4d$X=n(7Me>olq<(N=1F zt_-b_vm_sXe{AW1vEi-Tmhp-SLfS!rRae@pWu}&3RtaZa*)&TqEOFR3;qTk}_R!Ti zDMD7OEPXvLUjmNB*u)8X-}7kx%_w z-6DiH=nebBPegZC!nwf=7w6;i(1I2Xq7Qe^ zNW8_1DgwUHBCDs8rn%>m`DLSDDGJlnN~TsB=YOcOpM$sG^3{xmtlIzqnLnMFlQ00v z1oWp*-ccIg;lI@NCCcq3&emYP+C`xGj95k^jDsuoCNsP2c>C#VN;523xO9o~r6DOb zce*|4KMHzlq^?yL7wYbqGL@V}w(EB0V=IRZyUcU91DuXe$88j{PoBsctMj~~T}w}t z6Sm3WvYwNsXDcmdQpBvmA+gEb?U?9wGDo*v3g zbE8u|v`UzCWi-EssH6&TSs@V(tpD)2M3YtagnGzPY*4fYVG=hrcCt~U^3f6Csie|Tqz>ATAJ9& z8u{q;9(&0k95Fc{VQ?@$=U+nq0Hy$&EnGrLtt+Qk@IUn;tdSs>BDDQ|#i8I(-*BpR zUIZr$KQ7w#{BM;B5#cxj_KCl~8zwHQH^Z-TNcQ7dW}=a&N3Q<>5EsO%DWvJ+TS^dB zQZSJj<5>U)gTrz0`D;6ilqbksvL1wvV(8T1rU_Hhw`#HEZ;y$=7A6O4a4<2PpUYn~ z4)l+vv&MdB$v7SV06(^gbzUH`EF*ZBG*2 zjyI8O>?k5!wB^-ZTLkbJcOzAL(6n*o6DXY0%A*BXA8s+L z0aq~qg7^Btn5xXcf=D&&mh|J)2XGH>eGX!RL8G1Wiz3UgHOui9-6i^@+bQ6zqK27Tox?D(&U$+XkZ5bS&S(wQ7XzMbn-ZafEe+Ejz_DL&l-i2%v8pmV&pri zpw*r!sOrsI-k`TcLW31mM0qQoeJ=UW!N)k!WC?~@QUmm)1z-g-=woT>Yjk&dM|-8M z7tWK;oLpg1E*A=c7DmAvhJEiPQ-rlAHr3=zM<9*Mp`fcbc;e)CzFwa#>glR4c z+eic%LXKDg#~g9rjd4v0e^K`lY`q3MXZ=E3rJ4xa%*SuBZN# zi0@Njf=GzJ+-Ac$+a11qJ_lk*00G}Wrl^nsb39f%&QJ7i+vfpkti3M{@;jt;mwH;s z;15+aFc`&FBq0o{av6akaz=1F;Ogio1cc75=f1t7$}3m=X>iPPhFZI=MM=BbDqf-s za0>2`MgqH+7-kAG~ z>6!x~kEkPd>>Pk3BxAPDIn#<#*Kr4=1eOeL=eCOD##n7Po}%hYwF=hFOGQ0J1 zLGz5SjkqV!Gk{ORj16RQQv4-_-u{OO)!}9W(ura&<@XiujlLmUI@6(gHlBtG8aj(y zGgC`VM*vBboEGyR@i5#Jr#nC=Xda=ZWJ~boDNTMaf9f5cQc%xaVxZ}}htSrnh(hzR zL-M&l`)Q?z4GYO#l1PgR<8`N$(+6KECC3Qhn!fzSG9L|`d|P?r?GH2oDtWXe=#J>V}V+y4M< zS-`r4MQjpS4pFvBhoj^YSP_E!XXoynt zBnOWsy)dR=2jSiLm+;`{R|#MmB&ZA_PU0F#VT&agVS(+8{Qm%N^3xK)59tW4?#eXK zqC;6nB=HI2^MQDo3j&~yI3pvzdufwbFt|y=I!>d<{=4Z&ZPd2wjV&#{o*?w`!4pwP zV!S5)My{YVu!0C4UHaSGT{0V+3V&+0kX#-6>q zQ1#2l1+rUh;>^_)0={a2GuF`u0U?!_i8f)jN%+FBQhgc+5CrtHlg^&4w~c?}KaijN zXA}NM3|UtZfd>9h6?OjrsjB+AzNYV5vBgy>ja_2Tvfy|3!uR*qN>C7x6L5_$OGdSdI(##RXCoV1kbd9is|BfLh=7#Q z&iTiNrCEZiW1dbsV^$@@yi&`S1;kpu40uET0L6Nm`bE4!Wrag6B=vB}$t>YZd1R=_ z$iUss3l0xB11)}35c-yjnW>VBgVP9C>VBHLbXCC8Tw-V^BlvB}M;Y1~Pq^TZ%wtpq zDl84#!Wpy~(;3xVP3GScZ301+DhVL}0AHPQMUp@ii<4|H5rS4Ax1+zO7#?69zJB@C zfS_#eVj)CXale?bJUydJq^nN&^2)O0V=cLT{{XIZj~0LBi;c7%MOv{}_r;;!oS z`iU{pkc^wG%PRNsUAYCaa7qEjKdzvq4`d?zh(CS$M*%RK&tnrIqxzKfRnjbTXK993 zb#@FhlC9iiXE^VwE^1||f)8*v)2wg=)EaVvFJ+ph-Fl-*s8yu;ergfkGy${g^#hab z&$!X8ks@JCvQ9*r`@nz(Vto1Sq9fVrC#0s13YuurodU|TA$4{b$Wxz4;P=mQt4&8gi$t5N7SZTwDG{%u8Z^Ab&Py#*veY@x(d{RNRU=7q> zOkDV6n=W}*o%leKDs3@E6t7!wS!awZ#ZBeBWMqKL?hZLU^w$YA*;TcJs0=FOmQV6aAFkhu-py^s99Ggp6JMh)5(wPfHv=pgaB_xBEJ$ z`5aP5{#YIU;NBMus@i?>h!ylbQBhdZ!PHLix&HvCG7m?5?I>3xb{OYUkK{On0ckg5 zcRW60j}IpF`ar6?2T`;#2D!m3q=2n7^MFo9c@8n@{e8P;uisZHaaiX(SHt^@oXUOBYrW+GbDdQ2*&8oq@G7?oDtie2AZG5 zN<^{&HjOi`?}ZjjfSI#DtKN0a7+S$3&|zpABvahyZa#Sc5BzJ-BZ8W05$N8ru6T(+ zREW_%6jYIsEmgXWmjLN55tZdhD4{hZ}lG#{$_vq z99oodKACgtq+UJ)LzycW4$%RT>^S4^+ghA^c|>IY03M|kD4VY7`R{~Vbd82wl+C3nOK~#ILFT({{SDBswi46 zRb69!42vVN+}w^2az6h6`PIp1(kvL%!v%;OsU^KR^pE@NgA1Z4^)Y{RA5mXp>U#|= zbhR_mv=W7uO4%AX&+$D*JnhH;0Dk%$)p-r)NV^a17RA-%gi;A9P)xhYk*C~ODo3*q zp5ECS&q4?GvR)5pu`d(+KMz6F_LA^LPcl@%-k01@@d?2JvIiP!N%Mlucg@hM)afAW zu?+S~u>L7t3&#GGsc$fR?&W{X>yUrmvLdPpGE$N<5FSwecaW9q4&ciiilj7oZaLfR zPJ*>Ms@U)D8KnUkMuB(z-*J*?sB!*$Tqmq(KGzalM z8v6eLr55|FbT1^(EP)b3Dk_%U87es^BflISR$SD{qR6hDa*i^s76L1JTL)9u&q#^X z7dj-0?NKXvG;J3fxjTAi>;sv&b3BTfN)qL?< z=0fI%rJh-g%vJEr2_Xmo6Q9uP6U2ozrtSX#^!!8=Jwta9VD#g4b}CKP?=={LltmKT zBt};%3$bGUte;XAH#N!zM06yA_Nj6az+QqTCr4PRYkI1RqT&1I zhV3NP6$Y9;nm~4z-0cToePVa6T_)%qY5>4U$im9OH|3pLT?0ppGf2X02DeJ zUX1Em$kHd4gvq%BoHD5#eTnb+{k4z6&$a?pesQ}#MUvtwx!y`A5#X|`9i$F$eYn<; zsEn`*Jh0bp>O*p*iYdJk?HF&%f-~Rmodt61Z7|%Kt}L75^pi;eV6rl%Gk`Kl>`(pm zL|hB^x-LTk(GHeXsGLQ&%Tn0nV+Wo;&sJi@s_piHxHrRZLr{Hrm1HZOo?=E<;2sCh z9Aiq)QU3sluJDGXxvp&mZ+$xyl-V!XV4Qup8Wg$Jvlm(cPV2J>7fXB4)X0(Kjo-{q zus`Xo31!nrLSkBwJd;wTB*elJ?>i%I#19BytBjEPz4* zFd3v7o>37klzZlBfj(ShFf2Q;=R{hFiXj%O77gF7KjKS$v1+IX)?4XPjvCP6o5@CI zMIn^nDUtY7?sK4p&s-8wxCVR9Cc9QXnkaY#I!(h_SF+K@gY);98r!U1vH6jOvH+d z0Hz@-$Dh72%aof|?1DxuUWPbh#6FLo z!|tQ6vsE0)1D`HnNE1X(V~~A{}+rdd60@(9QE)tp{TMa7;wXZCyO!DaCGp{tf@BbK1lRZ9_XM8)NB zq>jXeMISokDPnq1lR^vg!WNvA`PTIeIy4gTOTxA5o|veBO<6T;4D(QlWDJo%_*9&b zAAou9wEJlZoJ7l3RHqz+-N@g{L!zVo&vSTdMY@L-ofzLnh@2FEJEWU4x*m32D?+&7x*-&=_9IC()PCz_k zkIZR4DMDf^UfbVDIs%fY00=?%zN`FWv%iO)F4k2*8Y(N4i*u#`WB~sF^%M`4lX%C$ zA3E6JCKX8l(59WPjD9v>g*33CuiQUcdWsv2F;zzy48#{J*_8Lk-#XK`TEaIAS;fQQ z-&7ddJ(F$(3@Z*8XR!Dq{`ymimYJz?AAYS2B4wIk+AV0~TC|O0lg&~IRPw?0$>Zm~ zuvUMcvPul}6p@YCK)YQnB$NV4sa3YH zjW-fN82}F5v;La2^eCkXK|Kq7L`x|aj<8tU-W1&pB#0v%bKLx34t1QVV5yRQbJgL) zG*QUy11kDnwy<1grKFll#(HU@jhaLZRlt#VZNNJ+dG2+&!^~-ABLSE%n{te9DFI5f z4lu(HP2HUDj*e2r42l`{9tUiZ!Tj`sI9Wv51w?2!yP&`3%wS4DC*LU7Z|SS0!e1t9 zYSPEFl4~Z~xzE}&$?f`SrEpS9*h-Xod+f!J6Eq9P`oh)Mr0*3q#x&_WbbzQURWytd zj!r;jL6W?GUu{U?ke?uyzT|sFOyaX?OC5x3xA=d3g@9^XEwPAT!k*zdWI6dP0QSk# zmZyNol&+-xcK3mSDNdrU!X{ffQl^TgL3Zj28P|C@j^PSy9f1BObDw_Q#;BxmYc1h? zzq5CEqLhUH09AYYM%k>AM>8k2bX=9_d$1{x7 zg7Z{L2<{6Vv4j5rq;bx&_^v+6)PxF(IX=K;tT`uxNeNP}-TB7K{)C~rKoYWQ(u&H= zBdci6LCC>TkU_!LKw8MO_05CEg5@DVr9z&K8!7ry8b}&Cd*@KV23sp}c@BB@V0C{U zbg~Lm=UV>#;X(X&3WZzn*!9?rk7KWaN78jyP{48h(r!_eJ%Ksn-(P#+(o&EW);lRqXRKOKNO>iWPhs(__Q*qp?#xQge97WR9 z-r7DJs`2tg>jWRCy}3QXMw4x^G6B>5rpnXcLe>Lu>0yu znuz9N_s8hv&H*ZNG8p^*y6R^f_(oyQ3g&mP$O zet6Y@8AOmBjAs)(Qe>UTf0!Nyps^K*2MlzKsmX#?1OfH~AP=9mtP2Xo%`bztdX$Nu z)Kdos?XD!;AZ+(xy_o8s3Oa7tcd=01<+oH+)<&%oRe}~Y+zx%YI48Hxk({*MXxh91 zmtWi&A%EKq?>q}WqzfoSucPf%mR1A`RJ^PM-%SwW=m=9WTQvU#mYG|D(jwkVQmpR7ibz2JeKTjjx^^H_O_ zPch>M3xGY2dF{u@)x^~mw~m*t{{XB>NJ?ug-I)7`FxAmql04jQGfOZbRjKLxvnxAuh&46HX}^$aI!RRxkX-4#Z~|J@KlZEhK?MT>k(9y<>+CrGIJ` z!J}O?Z6aoO%PN;5o+L$VvB<$vbA!(~I*;;}py{%k{{Z6t=VwSE;Q%aZIsHUBuA55f zCT6(YWsJkK>6)dCc@3T6a=xtlk71w=CUu)WCKx1lpo0kr3JjX_$|F>C4GmPq=9aI} z=_!oVNTB1K96w{<9lt$PPEl8tWuC3S%y5!`2q0WwwFgZgf(mAs3K&Z=d66?HY?JCC zw-xmqf_vlbsQ&;VXh;cBdg8?Q^v5U)q#+3<2msRP#)iPOT}wdd^%KFpKr*o&kj?4m zpXbj$`qAMh&6rRoDD`ZPIlv~%NR~+f{{TV;Romr;y6Y>sl^S(mgba^MHU>StUHkUZ zoNVI056^j1W$k~=XmB!+o+K3i0L47-q*Fd3(52?0WsnWT@cS|c80X{>q!6NN>~ha| zp7E}-<-~KJTl8R|sR9rXvgD{I(~O)C=rye-a#iLh;xcB*1yqE0wKBeA-lLJZd-m`B z{`zJnrtEo&1%~dyd%9QccPo{{RtU~VzU%5nXKH%0p?_<13BY%eZRLFgT%`LB!VA8-Qb8yOMpcnFI)IkdGPwCh90D( zTWpolG_bDWCg{Q2pH6trn3FWmv}KFBMP}GdRH`4`E=H8I)fm@v7f{W{Mh6}L06ibW zm`!1E#7hN>!5V5LCu@)jg23m2j(=X-A&^K8Vv6GN7jycKyZxSg9qG!Bg0^)X#v3e^ zvq>;kxmHK$RgjJ&b{_feGo?6=3Uu`{5=weEyb&BwN?Q}p^Y0fyf|`XRiY}w+xZ;cz z*e*yF1~@9Jr##?s^T%xoKgZ5Xiy&oG->qVQ$qEbsJq$>S`)jmZX)aK8HMQz#Wm>qE z46JuXcLm}+4C5X3GWc~#A!=HG`;+&x6*ElGQV^o_x({{qR!dh@R>UqoTy;b1so+W~ ziHgTK6DQz84g(R~_RyyA^3t-U%}Q}%TAiZFnz8^0CvDO>C-%Xy&{fIP_seAU6+UE7 zM^MvH%S@ml-x&qK-cIKjAK}*ta?mV^fqai#u7b-_)(KiH_WO=mk}f1da!TVJ&U^m= zd})jhTi?7{R59zywC@wC2N*1JM;_VH9JQkZ2?@N?J4eADtBSXvZ1E#8dreuSDn^Z1vhVf;pD7>(AKwz5#X$jQ7@}xiV4roe*XaZ(xg2JXXVwuFmON`ZwU9tDmred z=(|-&WHUo6Br~uXM=O<6@EZ&W`N8(pcqJ(_){sFQwXfm<#w1LT6nn)@(mg{hl6SY( z$K~!&N=UddFdh4S@P6F;4SKMkNMo#d-|IzRT1BDK9Y!FyMLaF#M(?!pNDNPZ>IdIC zE9<|gnkCP-<~mcM{6Mp{Q~b_0hsIZT z{rp6Kk0Sd6c)&x^($gwE1w>N15?EXy`jEh5XvrfS`}_9OKlqg823WOB$UcYl09Oet z5<-YQ4%#MFSn9f#_el3@KAwV(5XdShE#6D2o=JF+ZFVGg=YlnF@{+bqg(dQsNck`| zaFY-TmbcK4V@`1jijpgBbH;jVI>@GA-&FLmvV~p<;3+^uWS&oK9yKO^4(=wb9^=rx zIdg&{T;!K18FS|!s;Mn-+$uNPuys+bp^JvV6?qkPb51En1u`*i(`U)3!eRm_|ZzZxpWDZc{ks$ zq*Mt6u_~uHHx0h;9Ia77aJNe%rWxn5hb)9(_9FuXkJpU>Q&%y(P8}-myW+tT5&dS6 zPOvidbh^bv9;#{?Vx>NCl9rvL^4J0ZIbsG*20NVNOmN8wQ}{L;5O2@}pE#N1kx7=K zbT^q&-$o~>`kIH!I#WjSI+8e0SB(C8w-GOfDl3>kd+T1o;w7FTNMr)Tw|F-}Jg*3P zi0UKsmf0f$Q2p?G<52 zLu^Oi^%FQ$cOxH&oOaK1uVw!LPWWh@X+#651KvE}^QoG-9vTuz(!S80LDd!**;DuJ zty4U1t%!E{^=s@LKM^Mxi5*AWe1nMKII_(pU5~9jqUrFLu6kbCPgo}Ei!==C09h_D zDJP$o)i|lCn$`$Bh%tT}h2kZGW+6hCl%KD-u;cL?r4k?;s;mleoEyC`;Qs*k^r=vW zRl`nV3Kd`dC+hf~o+)%}ZG+YwJ_ggA-Rb!M0QmG3Oh~-PBZlHn^^~oAs``(NFz#X( zuswnA-(M(Wgnf3PMshFk!j3uPgZ^6U>LMA##s@B>u1HbX4mHehj?yrVls*}~Uw?no z{o_(~G2EjtK0iy`Zk5ef1-`zSTA0R@Q^v;4u|k30>I-z}ny0JpaUk6^!g3^(fT4?R89xIYpX;cSNu4x}WE)bQ;VUTx72*4!dj1$1s)51!X4x+f$XkFV5AmXg~Z;Cj2J$=iab-n8J7SB z=l(x%&x{Q~I#9-#SEr1t{{ZWCfmt$)Qmc0dk}$ zStLaX)mE8Ez?L-w!!cZ=yqzYo6xVMg_Q7BieXB}5fIY{5D=w#dh z^wCrhPrh*%_tj4bfN3=XuXx|)I82#e^{B?>xY#3+g7pBDWSp(OsN{CQ{vJKPbz#HH z2mb&$J;W2lnnkY&_l>moE}WAkuT|28Y=a$^q?5-OI|t6HsZx#nLUQfVqPC}!f7E;J zqbYA)JsXg!!h^xk7T@AS{D%x zrAe4>S2l6O0pATT{k7J3W^p~nfk)yf+(86b1E`3XJM9BiQ293Nq;qfDXRHYfX_e!J zW-Z7)$kTE^#!SKCER&Ocz~-iSNwfn%i~T|^&gm%wXPUc77|)@2{J8%BEn&|;)dqwU z>SK3;7D1bvdTy~pb=A$Kk>jUgY9u&RQIp@kG%0CHL2!Q;@74)0f~zrFb)~q%t>+dp zh2?oCO-KNjJI(_zW?_G$JT<*|jd=PiHNHxcyvqCt;a)vX=kWgkpTIg|ZxD$AiRd@G z;dIL?t{e4^#PEl}3Qr4Z%}Y~RFE?3Gq_tIvIAuZxQ8qH;w|a5fR5XMmR;Sr{6|%p#vx(MkfExSeTQfe z;gA0SsC}Y?@gMeLy*2t9pGQ*J*wrvKRB)D}NY8KKqqB@2eLIIgY-*#5L2#rIkOlqf z(9#gFKeOFrq~q)Zr+UtK*T59ce$!fZau(Z|ef>ftx{uh{61Q0i$ZCc-iU7Y2* zMJ$PcgN%0Phdn7Tgnl1eI#SQnS2(TI0l-2b!?|)^Oqj_8u_qgjIL8A>kK|{h8Ylk% zv=Ta)vbZ@bsea}PNpYy2rhneoDee<9tWn8Om?2eoUq}Ny=Oey#p)gG2_+*v^Z$K$c z`oc*l;by2@kU7Ryn){uS-6T-gRa@kiS!8viaEyI^f>?utgPuQJYWz&;{0xa`LnPwi z_Bq9Pc{2E!i&Cm1mVay+r?_<`qtC3Wu9gZFRr<-AS4DXr(h2Md0OPmy)wn71XUnxU zPtlvS6Nr+TRzgEm{Y4XPp_VC&f*63Mv4exj$8BpS;7lS(BCx}6fXQFjGN?Hnun%tk z0G_+b^*i^7au`QtctL84m!NDQ!vK@YEMze$W+mGLBxmifDg1-0h*mxR<7z_*U=t2W zQm9xdiC2I_0gy+)(iSKUmxfB7j$S3ja*{G7{v)ECf}oW@k@n!7Rc2eVnjnX*@XLCu zH%@}-tFo+!)H2IwoP<88!7|gF{9wwxyRJ0+)|oC`!FZ=pcJqbM$Xd{DzA8T#x^Azo zJU+b8LrXOE2%@5wY(>eIFi2kC%%zw0($ba!oD6VyAzB%U_TJt&N}&r@6#yz^3=T3! zAFt)cw~|Gop$o4q^nvwjwgQ`5pHhr+e*XZMeOW9;Vx(FQ+vOLx?9Jj8FHY9n;keV! zTLiGAu|*ld^C4it@JJa5&)?Z}NlTR_R@Ms)gTlN(SA%Ick6K4on-$`z8g!5fcw z8-b2Mc~%7b5JA=wW%!D>EO}2y^=O%2%W$(LB}QPV&hD62Ak~Doua{kPqNEDh#Fbdd z{Q({S0MkWJ2{1blKlu;0=?*81mPeEl`bRE!?O~#)>Nl>UuB51G+?EtuhX*7vI32&& zPa;B)l9ebh$iJjgTYpq*!-?R_~ZI$c1nm*I}q?ynnA%iI%V|vNLk=BJE&Kc^ioCMtcn; zX0B?Ah_u3wPnzdWkd*!#jF&ZYcx8(!Y7ueJkF6k->hMD(f*hn^7?MC8*n`LgS#kdW zjS@omavEf^F+bV6W6~IxD~z%{MT=Tp>jMqWugyeSXreNqWs*oHWaN9WkoG_L(?xs| zMN$6%@*ct@qltF4?HEn_^_>AU5fXPS(Nj$*0CUJPAHTBHJ1x|E^>C-sxcQMjobP6)q#}$M9cpG?;`vS1{IF8GOkoP0qGy0>N{do zqSSR|ik}_cdP5OW-#KD={XTSKC2LQXT(W=neqc@mTPuueI-0}gEHBsAgptW*`cny2 z1LTO#G28FkR5JLy5h!*){{WWqiFqt-6`ualCJVi`s-Y%}sVgb~iGP7_rjkUJz$1BO zi#a?Tf(CJnYSyMF$|g%sAPM|(imFl+U&B%A&Fd4QpJZ%S4xp`k?%h*TkSQbJv;K$E zp$XF170)D(`%>PlH;fQVnCuXcH6)$><0+`Cq(u!CB{WQOR%-dDDt|9X&mWeEo-~Rk zTF$Cd4#F&CKmKBaXar!^%X-1&RLxB;7)l`Pu4>x34itfw)>PY730!p1iC9{A3MFNIhR*kuRt&>TE%m`JRinU#nj1GQHf z@6U6h4jDuH51stL&k&1@4xU@ELMaId^CDP<_W5vEfhrm~(GEVm((}^u&9M-I1j2vM5}z&)k8lr-v!M zTRkG4D#&mcLj6yQT^?X?l@(Px@`M^SIX>gm*#6ogcx0BeTxGm@K+M$;NB~0ZS6g%$ zmQ<6bp{-{;gG&MlFgeKyp4@1nPYwlA?Y#QH%BB)68bBC2qrz!B`z+jz=&MOB$3MaBAy`H>7ynNgy{-ic8C4R6As)SNBhRR#7c=a z1WKji>OcIxqiT4oTN{7LSS&Wv8u0Y?C=`M2K`Z^WVzd_WCk!I?5&!_6=k7js#@hn+ z8TMnJ*E+U0Kxuqt!`F~NBe!gEs2*G*8O=;;c?lze2+n>r)@vPoM0i+Wg;IFzHQJcr zkh(np?P0lw<@{Lf`RWbTj{gAN8)6KNyDBh1W&=3?0M4brz93e(L0WkXwCbys2rxOv zKR=eNo7y>7M(1pSz+?CBIPIt<;#ue+gQaO2<#MNtMmIZ!JAOYsRbEuuJNk)BsZ5yd z&YSCAyy?E5q@<$io1MDf6d*$t7ncx121ZCs@HL#ToVG1-lKxXjLl8az8PNJu%tD2?bQS=ft&5lft z2*6* zjPBMUg|l3e&_?1p|ZmclP+w>y`?M zvxiNbA-QTGR`?|yXo8_uDECCzRlx(*lg>{k=l=k0EnpPpIYTT3<|aW{5Ek>9dDu5G z1oQUqsxY7lXUiysZ?sCc)Kgt0rx0GK)=8YBEcIe1Qaj`}3H=79sSK3%V+$*YN==ml zI!1K{dOD|!)gnPqD^8Pw-gFiNdmcS02|VB zIN+Yzl%M$lzqm4()-)yBG|Gn)kfCrC1_SS_C>0k`r)aGcN4h4-H184wVk9InK2}Bt zZ}Zg@2@gQg){;X7Ml!}~C=A7tNZ^$cS&(2cCj&nm=a06vcqEZ9&1wOkv?;`wNJ2}J zcXU^mYs9oPAG_stt+?%2!>W}v5_a8>Z1+6sv(P8ZGsVX&$FHO{A>~UFarbyYt&Mtm zwm4cZuDMJOt~b`qB(d%`+_v;Y9)oB0o}AwF(-aFr=}*^egT{B~K$9WaryUFZk(&i_`xARX%Dt#3zeYuj>8dMQE0>;+l$* zs-lZ1W0%SyWj(hjKXZ}u#(_JCmnkA#r4J}-XP?C)^5m&*xq_w_=epjk_bI95>AS_w zD*cngH8TckZKrFDH>4vBHZllcbcF6C%~iS63Emh+Y|a;o;rM|+g8`Hnq|j31%tt(a z)O8^`_JyaUsfICYi(*y<2Y8l3agH!I5I<9|NBnZ+u30ikzZQP5NxmscRXhM|Kwc^9 zTDhp5_ktQ-z_xkg9BWV-lM4XMVXnn&^J|%R4S@=mEsT%LJf1sh0zZuPO=EWFBq3VWf$wNFvi{ z+xm&`)wg6DpzGeBrVK_zJuJbLagwF-GxtBPgJ^?9%KrfFebPF>YTMn|!FpTql9isL z#+YeP?lP=)OzBQz!x46_f#=#(s4G@fJ`F+&wcfGI-aGUs{4>=F za;CK#MXu+%NGY0sKV*s&NV~W^mM#aj?mKB|a}c#OvU=an906%Sw$#b)il(-aWKo{k z+qCdA`g`bz3`@Wimq;7@#O{;3n>B3}Y>HW7m1KB>FfvDtt}+h*hCkHkg(SFp;>jnm z%YCD|x=K4WJ*tA^)3Me;Ls3qx8ZBg{LXr4z(hvyk+uK-ExK)-+#XqmaQ~Z7nMb#}m zs|KaH+=r55sjG=VKVKD`fhCvCy-S;`i-%ErgSHadw1K|oie@9N$GgMI) z_2NLJf>g2OXZ*f&>H|=h5RBprks4yRwT@Mc$Q~(?8zY^pKtJooj+Z~!Q*8?d29R#W zds-`uJ-0$@Eikb5P z1PVr}yj!;*M;5!bPE|`JYq z$%Vk+ZS`-6nz-}-2#RG=KuxnJHc%UE*eJ*V*^ zqC>m)OymX|m+KUQ2<$MQBb{YwRR zvh^IkvxySs3K!=RXY1afWe4|NF?C(Jlg)gNaru6~>#G@DI=`|*_V{S)GYMxg0Xn0r z3xVX_CDI3@89U^==sw6=Qi}zR6w%1B<0)e$D`(wELc=-OB zFD3+wvk03ov05VQ!=~*m|0E zryq$H2(#PlzvuCxnaQDH+w%oHBHHT4r=jXeHzHmg807i_4@vgnI}JgY8sX`;i~j(I zZBjKJp1aBjsJlwf!Q8`f9QzQU>f&Gq(^ru9+j5RPDu}|d3Lb%Va`kT2J=_q|73P#czH7Bfr8?4o;COZx5+vi9*uF3~SqTlCkZ}a&^XNE%xz0;u08|eMZQ9 zJ%OS|cUENucbu@!M&Tp$_4oFCUd$5%2Yr7vZNHjALh6bf^o zkCmk4;fM>*KRV+<7~%1Z!xcnRu?NoF0C?53iUk{^NiUI)Q1~PK_01hoK^}tH z`w)K+=lW++gNWuau?o0t+e>!#`PVTQ3dEA(qk1XYH*Y&mPq!NED-%p&T^H~r{{R+y zeYpPsU07aXrO666#H3Mr!s1*W`V2Dsj1-8-O_MU>Mnk4cpeL#?pQE4s41#gYUM+A?z_0~!LGsRP-Wb#Qh z>$Y}+{{Wtp1|dEEgi5LCds4$E?+c9u0BLswuB)a1^OKblG2r`qYUlWc0cfm-%mMR3 z794WaSRwg8)Cic_ZFDooXp~;-3EKanB(64N+6!jM$mZLm zF*Ak=lx2z&jy)lU!PLB;CWaVoTQ(9X*9vvTVi;Ozz<#x!zx44ok*%A*_M%0a{;)wsk;t2?fQ*A z+yaHF=-3|69Y}1^V0+>%S%;mc?KF8to0H%1f1n0KWyls90G$SW2sR5M-j!hk^T++QV6FanvkzZflYq4&{PN} z>JFFKfR8$=-(ASZx6U{|+LXwWMaVqXexsDR#;dkD0TX4pTT~ebT=aCw**4j#oR5$c zzCLve;en}_Ex-4Z@w{WaMUiIG-=jeJZt+DMQ`2>&0I+@EnUWqk#CJFOXV$G3_U*F;xmWc^AlI9R?r6#PJ_c>*bFZv^mq<@Q_ele;} zmJ*N!5+T`T>jP)zHZ#*I0VWT02ba5;&0}nNx5L<)o4!egOC9?~|s%;*x3*+N2Zd2qs*UfqrcR zX*^YpTVzAgcJ%scps=f>X>?Ol7}t@}@jV-h8jaF{F1$12$4I9z^1^VQSBNDc$PIFmg}B~I~*{?Zjr z&d*_<5YZNimUFb=ZuJs_jAx&1Na5s=L3pQ#q9Uj9->W9^OQP*J`r4auw(e-@?V!kK zO~kB|XKn~PRY$8MZgGr_X-eTyB{tFk8N4RBrDml3MGb4Cn$1__)m!Q-ngvNbsTXSn z$S5}akVk)GqJ=PO#VGD87C`Y6rSJ35(seIKQ`oDbb`4!g6Ru%DfnG73?f(E0jQ04| ztw~VC6R*B66e%DZC+06Ml<`PZ+hKt^hR;V$1U^MLj@LVa$}qWe=>f5vl5vy9msbTX zN-WQ*HQ(+Ind%G(J=!-@c#YN4KoumSLPkSHN87i*BOLy^L2{)6!71mHL%w3lFIe2Z zAoY}+vsDB}Rbh|a;Dp9If;s;H&Z-2E8p>OHtWSou0?u;2Cs@2f>+42d9PD673bUajkO@DF+x5`}>_b47LTkPI#=C_Y(L*Gl6=u)5I0xA1<;7qnDGsNL zul986d)HLju9V-rYiTI0E{PC^L$sMU5(ory>F@W@)*lG_D>MiUq zf&T!7WiDfvB(^j()x}Jnba6)Rr-DdwukZd7+wG=GtvEaVLiPd`Xh3TE^`@n$h)mne zY^vG8UO~n)_9ONCXi|d~_KQtW9N}hGS>cLSn=PCk0AMr6`|qZ_*BhZc9pcHc^xQpJ zYGgsSrZp`Y_;53{AA^E<{@NOpW%awP9WsK2cf4T&rWq+KB#G4j01dK1$m1QfTSS{s zyH>Gzcr#i7c&7+>N8CUJqhxmb9BFxqb4YqiF$&T;2A0`xsi8pB(6&ZfbU?7}-&yRi z?|Hj_ai*D4*~RfDD>b!a$)JSB4nbJPf4-v8v*yEltXK(3G#mDbDM?1Ym!*%=FrYvP z0DJb-x0T7mdBxozP)5Xx2%;cUtX$+NQL2=M0logL`#F}3|of$y|uuQKydwju89gaG-wk~P}`!W{v%;>4m*EsZ~+Vr z_-^>9b5zE1EkIP?NFB%G46MiFI)?)bvx}0VERl3P zQ{CylvFcIO?|=#DYC#Tj50!Z3;&mSP!Q_V&maC*PCbQRTcTw~SWO zzvXmnYG-K>d6>$LxcNL|RvudC?H4LHgRE^^ROUchaF|{baL5C`qQFqTDnVuuwQRC^ zg<~p1kO|Kj$G7LJgoCLWe8m7GA9(z>rd7#ffxz6^%V>)!N%D%sAX^?~Q+mXxZEV=w zjN?9`$0OTOMGAU}!07Gv6LnQX-5Rm{71=f*lzgGIR5}$F-Bn?bW%1qsP{M>kIDZ4c-LbR0gOuUy~2_2$LsgiHO3`? zG2<#0<*;*-GsydD&zx0(FCK`8&Pd>Axaa3ldqkdbG9ZiwN0u-@Jp2Cu-%wu@5c)>? zNVh)nz~FcLf7`aChoOlM&9;gDK@yFsNp22KNaN#EObm0{4kMv|Wy)oO_bo)Vb#!2W zs%ahKFaH3ia*Y0i2SHhwNBWCzc|j5qL9`vcqoS@|ThNkK$dkL=>IvHW=c=hhAoc~$ z-|5Haq$Wz@h&HE_*Xq3?^i4#t>6QWM9v>FEQi=zXI&pw9%N@B_1CDY?8bcWVcTeu+H_FK*l|W(VpJ= zlfg+P$!PEL7S2ASOy9I9EXcNrV>`C*sF;!RGlcg)q0ie=o(4${AJQskD28~0FYN~L zu~BEFVEm|BqDIVf?c5Qo4h}){F+F0Zh*=F>t1QVZw z!Tmex$A{s8sP&2&vwz}Xbx)5MDXU3>-n8_KxUZ8NJfi^qGx&cjb~<1pQd9tLu}e#4 z1cMr1#vZFOpQnb7HYEDm0_Tyy^{=S=@%~zZiDa=zee{b0MPGg6LinNA)voZh$)J`% zOnyxw1t4c9cF}-7{q+XM2^7*e3U73cvh}ZBO;0MxeyEfxwg^>>?#=-O09br>E9$L1OoDqdTKK}rgrq!m2u(0AXOJ&-kvPCymfj4p$or{lvao-uw z=5^M=Jhj`!l$nL>h-9-_BcX<+0-oP5UwiC{MO=^MJ% z>m9qM7Ak&LKm>~)a;Jd5uP6TieGY0%SA4$W(wL|LSf(YSx%h$BzeJmoDpG+J90SA55LZ+X8}MfpX1a;{{Y4>=813Nipf1g#!{XWIcU_(?$Lvg25>S! z>~W|Qt_*!U{viJV;(#f3_v;&F@WLtGu~8a>8n2eIo_Wak`5))0PZFS5)2`_g{xQAo zjp^alHA)4Ti1v&hDkWTZ&%fXG)ejJCx6SuOf5!xpW10Gf*DpwQCDzXB8R%+S9F^sl zykvV|u0O7t3KKvusU8xFML^CV+@x88O3AVpAd*{Y2b}Zy<6IM39q%}WR_mjpd=}~Y z4x;eV*EEeOkxbA^qCyo}WN%O(J9g+(CazlO)!#b9Ts)L1Hl80>psjHCr}VpRC(UEK z`}fcc$%-{3`aev0!)J#>SFC=Yq3q8K#MDuhDn>~lmBAPXJaBahGaAx~?(svz$|A|W z6DfQH=t?rGDQg~CB@4D%<_HtMPwVaRqIkSWPxmfxVH}_nvUJHsKjN7U^(tLXCJ5ERx6sJRgwlD zsz0<-z>tqH07FD{%L8HK8;HTe;2(fL&q}JxH3#DeCu)rr)CjmiUNFnl^qdz7|%Y=h-FXF)Deal95~0a(IYW85hvq?R4<&l^I+xFCWL2lM{`wuBTN#YJ@+>lb1WUO5-9x@Nbd>Z@$^ z(X>^R?}p?s;Ql2(;EqB609`*R>J`1`7^^meh(mjgL(?@eJ4jzp3IRNi{r>FhQakTwhapIym6`bVff^onA4t?}msG3p)e9S_k zh^pwg&C+^$hY1{#G!nTOP^`z}*klo=>cEN!%~CM+ijL=HsDg5@6R-k6*j>PFVvU}A z0=dt(eM*8tt(Xxv9HClTVF?HUV%a(O&U@&Lll`GWtfJuXZBlvbEz*Zti9F^8IC)(JP)`en5Sd^0NPSUPCh`_Od)_t-Vj+gf~%vfEmY7ozj~k>fF+xEvB@Jy zE6DYJd+i#QQr3v95SD^C+!ol#%tv_{HW<$Bn~Q8bAMpNu+V2f2Y|dsN%&l*XWAYQk~x5Tk}1C{R=m{{ZWv zS)(6F;-Y*V{OhGMu+r~Bh3{K~tQ)KNxTOXf&6*iL; zAU$awt*FC4bamUfZ5)w|a4KQM~DDiE{sHla(Th`#xuv|^QwrJAXYtAJ@J!_ox_}ezBS5dgP@GVZ*1)W zPr3KUZ~gTF?#4J9LH;63%epcEW#`mM?asKCF*g>8l!1^o;7J34-=BhYAXX~uT}BAg zfb4?=ZXA5}J;(LcnZ?myT~8ei;jc*7HI4_Rl2nRN3lv=LA8e8Ad+B9OvkoFjP;ed_ z;_&9Ra$+*Gw1q*!gSX?qAD)6HrP{B`58xGD1Svc@s(Dz{%+iC99%+=G{C_Vv&$k`^ zbyGZ4HPpf^o*72?nJ0x_dq?F)h1ItiQQJ5J027Rpo^$QT#;Rr{7iz`F21%w7B9p?i z9WaUIgK$zr<=A<8kn;VdItIc*sz4ySN_%Bay~G zGphK(oCE6?PY4G%+ZTp3@J}db8(W>m6*>7B=kz|<(9HEHIZb-{<(yyqRMxbKbMzci z&htLuD*`f+ARionIMvMJ-dnkSpWJu&;A^H)E)IpDjq=GfnLpyj@1DcwT&b&~Lb19C zIC*t3CHg*sc)9wyTV}uv?X^Jd+f_3rvs$3#UV21{QrVk$#t+hV#0&&mvNFb3X6N-f zt)f}UJ$LKUBuJM~1a4aet`mb!xi}ltatZh6+e0OF4tK7QLoQlVQL`C-@1l*Fj6fV? zfHHIbp8Bg7vvya+6rm!xMloAxB&dhL#DvQQu!*24QWj8%H~~~+ zkCE@(>K`%Oeh1o;dw=v%|7LNe#|0c!j!&1{SVeV;}F@vr|OV3Pm-JV~{j$f_98! zzvb*SE-WRR6B8jskVIL28T4Cp^#xsZbOc@R3@R#-$lVrsNVxw12q!;sel?i2nWcrp zE!U)L$&?V6Rp*>Dy(10GX~0%EG92*V1n5a(!OO!oLo<8EYix7@Q3AJbOOk)~)Tp>( zYp2(wO@VR;%xkH(O&W8tjyo%VzI9}(zH6e23PM<4yQ4~3>US9t8_UK61<1(f`ux2+Xm|M+!W|p@SNMtH@0$2~6 z{OSXM3vYh0))Z)`j$!?!>_*eplojs$T&Y20w2ZW8X(v7SDg8C2!z$1?G-B~`?5d}% zS5|3h>nNs*t(Gy2f=^+eZ=D#VsZLGMp;J;i6ZUR#1*-FBXEKN+j=CmNcI=RBKdxg3 z_0p4pvIxA!uLz`?6YsMZRCMT7M(4&r+7~Bxem~#7mPu7nC5n~A(nB01#K#EnmLG5j zAJl7tR+1HJy?!Eq1?5ckAe9XzBBXnoKP$G-23L=D{X2eNIwgZ?)zHOBO_yupNu;M> z@}Nkt1`)wy-}AxM6p#`}t={9>I)Kyh7#Lvz)D~r9yCAoIJ&5G;HF^LDfdU0oce~7O zrh0ab!82631%}gwT(&v;@vicv!i_m@kU{3SKD(rAsTANNBrOb#+e<5LX?@NWROi&-S-%g};-8+7cJ(JxadTN;u z;dx;Q6!Gt{j@|y}v#3{;s~!4BpPl8HQvU!^&9!QgO^~b@YIwaZw|(ed6nZx z_>Dz}?^p(P{Sm`E?X0y!*lWaxQIgQuU z0)kEmJOj@e)tOS?{@-7t07}A$LWFaC9W6bU>Gd?pBsF&S-lX?m@z*m`|+XC$4%0ef+@gqBXm(LiM9x)Qai5Fq-UOU_tz@7 zRnGSOqPGT&#kN?>Bgg9CY;b+Ya1KYdqSc{iewfDqUC8ekn{2VkB6)aFGPoQJocGT@ z`nj#(mwY0H0;VyF)lljRvXvxvbvWSrkIS~DnM46y?>OKKxQzb*av2~q?Gi>0BX%1j zlh}9ECESK#7pb~5n{_Qb#Fkjujsi21#OJU-I=BU6NPx*MB3QIUz?fOUQUis<0!Lyp zK6O$oImRR@#SCJU2)K=$#K$FpmmH4AXwEf4m!d`c-cdCHj>LMl=840`#x-;9S+^g| zkB^-YIY0p212iP#-Za)xQ;-3b*pfGHQXBQ-{{Ve)aK@iI&LKoySB(^wnC5I#)Uv4M z!6auLvO5hC%Yv=%Sb&zgB}Z(Ql0C}k%1GQZlllJu=UnAX749}-!^+KAn&GO^0m6vX z{%4;23~GcE{RBgnzCDg9qjMZg#4+uRZUaTQ79TOiOXnJ9d7?%e5!ngIF|#v&LN!BR z3!}W)eZ(i8?_L0&6>Z=!l2yY1KG{FtRZ&T(5nvU7kI0o$C`h4$BxEa{pn=<+T_7gn zV~J4|5uia{sUyp7h8T)~3{oA$`<~iq23bfI-&g?)a^Obj>m~BlUI?-{ zI|J_flP!&R3w;=BC&wW?|%+Xkx5B+7^#s&(UoPI|h`P2yEk%m5( zwY}=`O-C6A%#l|LImgDKS308T4p^Pz9m&ylN$j0HOG)Ir7-IpH5P8S-)>8b=q3!b? zi4GA_1q9w$Xrc@Ua)oenpU?a12seX#7@Ic@?-<;(yE3yx4n_b9K>pelnq7YHjzq#u zL~Vszd|-f}@se;(iBJf~(ki7kuu3{_siyNG1yy|#Nsz0&VSeA{$kc>#KwlV5h6+GAn~?9M?U?}$<#3%kHlu<*!9)DR3jqxobwh z5Xs=RqI>@UQqlSF{Z$MsE9+_|I1Hp>DDXcKW>9hP8#>A2lo`$Oqr#%mW9uC$S3Fez z0P_*bURF71)eA7>`(upv(jm!kDZRXI5(Q%jgc2YQ9i+h+3PA@SJ+%&yhrQwf1%VN| zI!Fv|?JP%Lby7pMKDM zEE{^z-k-Dx81nI$}zQTQ{!xMt>TYMW?EFwvI@sj$9Ib zSv>K^2DE_yR7MJtd&gFO#<7d-gQ%r5M730tK#v=s&z1y)9FyC>`^J&t5GbMhjh+CO zPZ7Z#ZpJP`+N2u z8kSapu%NrI#6bre&uAT2V~Vpil{B?+1!VxoQmPn{$zF0#8T)I1Qc1ISCiUCFz)hQ@ zHjd3p9G-o$b!Lr2C}$)Sp6#3h4hhG6>yo53n`Cu2@1#+7g~BFLtpt%mUXfBv1u`o4 z9f&75&%fV|R>DOPZ{9w-#epKPy&_#TEOlhl(?6ItvW==Tr#~Qb+p*_T5(N|NI#9(4 zz2aJ0&9)|acFM!CDhAQm@;M)K`Tkn51Q#a1JLwg?XTON_zm)4FD$)kX!BSPTjx+EF zIoAsy6OT{SO=z_5oMBKr=*Rr5nTwQu_|M|W_TX`@w=E@ty*^@~C>Jpe1XXO$6j8tw zVM9s0Fef99{fD-smjr=KdE*e8sF;;aOvT<>aE%afNo)rw+-=Tr$3NS?s+MC)?+hZv zFXkpN>(2$h_|4&s!iUUiI_gT{Jsm`mZxR0h3}j)jsE};h!3$H%hPm8WVG7o z-hdi0eIMM_)0WF^HF}i?KknI!Isoq~w)5NNZLK4T4GI7ry{q-)` z^QnRoVBrEVkT55n{{TN9+g)TRxxOH%ijQAtPPHv&)+C~rCR|`sm3}b>yZjOuTTBe%U3#nkQ>qTD3*ze!podhXK zBoj^N4n3g$&~iB@2t1iwWN+Ca;AcFKzu#0)tF>0DzKx z@JZuU0H_;R{XrGZBhNtbND){@H+Ex?zz)X)+vm2bD)Ccfh`>_LvB~t#@-{$Lh;Pn& z@JD}d*HZXl&$n!2h{`s{LtRYIHA+-QBjH*^Kt~zJWB2}=l>+R~(v8+8ww*>K$1}7A zO=_fyGT=Ir2j%&XZE>`lgZ7CRF)_ZJmX*B9gmMA=Y`F*65C_L?5T?-w>w9>brx=`R zM3Sc8E=d&s0D}OGeb3Jv^YN;bu&rCHSD&5S>t_^9giW625kM1hgRWnp9Ym!Mk0nY=zu8~k|#T2hA zJZ=z(2=l6Gi>U|rJ9mD$In}}x9F&-gcUOOLrn07ro6M4?p**!C3P*59xCH1*MOm5) zXfwOV$Bo1;)hYh~*#k||Jm|{>0&X0%kTHk#-_MOIBG|O zA~V^!&m)8W{x#RMLCP{jD&C*Wwmts<-}!1eMV_%bDAX|9w`m^ZzuW%+I-1=TATzuW zqfrxXeWO8Ryv3BgM`==FmIe=pX;C3 zLsD$1$?wZ~MAFaaZxUiOQ^M~mJMps{$S0p+jZg%T$E0Hvv|ewbpPcOtyF3mu zPCwo?U?2M5z3T&Xl0XjdN{{H&l=5>YU`8>6_dfa26qJPk3wp-|(tyS@MGSN9SQ~KQ z0#KIE&m3|!TPXw!o-9i_jWrUzJf&rf$iYVCBLhFD2SY7pgY~@HEo#UHGo`GMuHYk8 z<#vSxu21{p^U(@#4SqG>+(A)y6n;@C#>BUl(cwVdwS7!`=Q@%A4o7I<;6zC%)rxEk zP62G^8Snc0bE>3}e8!vfiYNdWZkWco7HQKmJA_k{ft-(wp4uXV7vE^)sH_WLh>J8U zN`an9;sY6wupxQJbN9xnF?PJU{cr9f=W`gt9CFjaKb&POxeCp-NX|RwLjZ$mQD9sL zHQo`k=)B61TAd>D#~E|#9>5&-;Og#FK)*&}GAj_jqMa9qa<_eQ5BJs-;)q1J z<@(-@s;6*t0wPZByaDV6r3px=72kNerWA$;eWO&jqVujQE}nN0cKx zH|S0Zx5o#aYs*!n+9o{Q#4$Vii(mFhnQfGPOCOk0q28Kf5P!s~9Au1l%V|C!+#f&H zV@HEf4o|)D(J_ULZsAC751q>KpMT8p@1+t9$0XgD-=t|;YlDt%*~hfCvA9L@zl>bC zc+Ph4eIR^i{PZ;>OHD5A-#=KqwOXK!RWK~^#w4tn2x82vo`rsSJb!Pth!(OcoZwk9>S% zw_)r?tg9g7qxycYil~+kzn*bCS5Aj%XK)Te(}FU?jzGo;1Gv-@F1quV>y%IzKZx9F zB^2!{LsDiSf=|Z>Irr6q$RsI5?{3f~y+{!%JGH^xrFvO*BwCBeO?O(5_IKM71u#%WmERsLp?`k0=reRqq(wSZRQ13f>|3OK0oaYMMKxD!dDV zF~QDwAom1&5!+T$U?@;em_n-CQm&buURBO9xj`rAkG4nXbO4ec2HqIv(YS2?0LqaH zD@`=O1ev0fZ{R08`NlFkbJz`WrPyy~H~_#u%x*S%c1Y>VG7#7!XCNPA$Mia?W!Vn@ z01Ky(*gfg6A{jXoCHo{z;yUK9ZW$e`^z zuoQRxqaR_bCW@hi=YLzHw)$*IHLV^X4BL4a$Mw^qmqtRbta6|2E7LC@T~}5#Z&ve1 z4Mx>CupSORw((_85rG#tnq$*XGxWz$t>9o9BA+M*UWS*KPW>v=Gr1R+<@xkml z&W7OxU%l2DCBT5vba#lnh^?$e!NiImFI@xWe2w`?f zF(g6JSSTg^@q_c8J+elI>NwMu9N>Wq;ceP7Et~;z44F%=byf+Q$xLC zzyV0wltd=a;6%GHOo&OxA9L-DYSsbs0Ozl?NTY!sh?Kd}lhq=Kqg-kD`d6oqwA32$r>!TajM$SiP(3a^wzVJ%Hc{{R~2AUI2N zjz8Jn1y<7~$ILTVvbN9}Jnfk8f)xnk9|u*mtDU(=6a|}3@fs@yx{@VVo$7hoM0Jh2 zFa251zp&L56jM{}76l@aJ}W(#nbbivs74u@XAj%+>~o*fL<&+dtUyXx+9Iu*(FAe* zH3W^fxOQW^+Z&E^qFYc7RTZ$BFpcJx*3&PW9MC@T#A6@V)BgZts}(4_8hdhxWVj5* zB}-jySPW6X%}%-X#)BY!qx$}u2?MMbTjS$nZeQi#@wbjg`e~j~=bga-#!-RC41LHw zv8ZB0n1+lt660{l_WqiXUijfy$S0P$RbV@ic*xYG$1`0Uq6D$QEu3X>{{H}`pw^MX zx5WBr%C9FL-#FJA2N4CHu{cIaRaN67viI-XS2Puhf(fK`U+mG-t7@@|rl%80Gw`D) z10Ba<@9+I}pEXRTuQ=J^ljQ{>v347kI`1Ap48sLYI^ckL`99tArHwqXs1#CZ63ymy zc&Vc^Q^UAEew}iv4nh9B^R5U&L+2J!l)o*}E~chAWM`JXM<0h_OnyX#I;)ZiKT!?< z5sfO-M8-o-(JpYZKI~)sN1pm3RHrswqNOPz)e=(RvCDuJh7LE9F`vs+y;KeF6j`(g z5u&bhB%wBiQQcK`;A$Ko5I=D*gldqpkbPM06uATR)X}0Y7MO_N4&w=LH#lsb-TQx* zb#UhB?-$I*H%BtYHl$er2=%x5{{X*za;$*JEFwXAkjGfsvjtKNN-#2c`+r?a+y2!X z+r@(lUDti01k(9of}tgSdw9V-V^o)AK4-7z-YN-l!Z42FtsI70Fhd;UXewJi{O1|x z8mVJd9f;tkyhe{UHe$qMzU3*9r~3YS8A%rmY1SpmjAF7;$`&c>DgLgpsFFD=BuHd!jKgUqPDmdZ!Pf*7jW?8Bs?9_oc%d3rR9o%kjG@a^`EEfY z0|Pkyf7#Q#FeDWpXv*UYP#W!`r@L92zO5{A#LIsjY~= z@e8Jjp{b6Sr==FKDe0v#LXn7(Nc1A83I=~6rDg?_N*LE3Kmj*){X9lBs}!V+INkV* z01wW8@2Xe|cMfvhdE*^yy_27>FQ}Aj9o-!H3dp>ol?Oh@JoCZNeL_=ljngpQ@5MjP0{KiY|FQ+f}NSX(s;w zUx(Mx5lh9Z+5HBd;g%SO-cgywpbs#{7C&e1rAtSM6ewIqmoGAffEoNzhEd4( zWCQilW}qrsjV^)k^DNBglyy_kNYwF4iUXEY{6_$h#xOnm=@h73&l+l$rXMCY{_PDr zpD;wcu?#~Tk8f`O0Bvz~1L2CCS`3tctKKw8)3eJ{8;Y+e$h`T04s+We@HobV%UVDe zdiukX?1wo1AdNL8bu3V{^H7k|V5_HKK^VqB#{lQ#ee}Rd6thmXh%0JDrb%Fl5AbT$ zxc(}T0m%U15$}w1u1ajG%T#3)ARjbsqLQYv+{R>DF%vo&788+!_;OA=5_9=#iEDtb zQxz%#rsyJ)$0#_OS9}z7K9J*#GUu@1ajU2bx%9htz|da^)lYCq(b46SW@ITCU?k-B z&&VA2?fUBE+Eq*lI4oj8sDiO$2~&kqeQ`su3>2RvF-)1~JA-@9&*R395^Diry8)Ts{+a6oo2hoPb6{uK#op!v_OKs{2X(jS*bT3ajqs6Tzf{!O34HXOlsaZ zkvFUx6+aN<_Q}S1{Qm$wU;5Ni3+149jnZDFF}!LeVhYC?Qn<+PpK?9@wZH&eMQxK- zc*E-?WM^nAnnDiA7I3SNkTc(m@$-#ViM*ZfIH-4|9q_&?iM&Lml31o21dQfMHw@!x zAa)(``fA}pR~LZKdnLqnM^C_!P==2?Tbz7pl!g(EtKyOUrn+9>k5AOF$q1o}6{VCE zW^l#H1cBXuxIfeeZQ#=y!@M-c1rg5n`a;!4Nh+zRlXgZa+ZjE_B$5s@gY%{BIb?y% z#-K`X2L4eZ8XqNy74+>3o$MAgQmc*!`Cxp1X%>a-2KmPXz1t8_+Xs-w3AW103QUm# z!HM7u4n2-Xu{y2Btp1xZB{z{Dg)pT_)#@u4wBrLVm_769?t9}O>#I2r=Jamy43}XW zR@Z7*iZ_Ysnp1$a4EvA}PD#%^`+S{P0;<|4miL5mm;*@NHG`MM$<*NjN{X}pin3qLoRE}nP zNP`*+FJiU2nX*RB!9 zruf?MSG;8eaWIi`5V+i<+;Bem&OUk95(P-!^oTeI^@z1}QnK$@)o1LAWCtJOY>%ESlA&% zaSXX^V6I5UFgp!RpaT_pd__$~BXis-3r4PA%Td6UznG8j`EWSZ!k(AMDuS2B3oLO+ zp<@#%9FoKqA8V7%T;!He58oIW=R_0{QXV6hF}#85 zr&es`*Zp;8CkFf7@Eu94Y^kNFXyTb?l2v2e@4@`>!ThumdqGN#rxEe+5RdQdc=-0y zMTw6;SC7m_<}OKW`{#rC{{T%|`{L9lv5JI!w=v zwxs3t987PF0-z97AN^nHsCOiYl10V{((U;5Zc%_RbN1&~6%~jhI4y6bdaBqWNZhk9 zTxb3KYm`#XOk4{^h%wpy6+qTbo1u$oMsx!k26a8}^@59p7%+xDSt5p%l5INR>pmjx`*6j!)~W5D6)M zP$MummwHBGbp?Qp7!#c1JAJs-6ctbmT{8#;#A=@9p-~6bsVq2IImyBI)_Q<7Yok=5g-@j;T!~Kr zNagjiFdr&^Ut!x_eG}V%F>p|l#x+Y7FR6?w3UDLbOCP`K_S67C0Y{n7<{*&9H9-Xs z^5!weEUI!Lls?i=VaId&=%Gpip6pvstKemh0pkX(G1l#l1dI|CgePDp1GYP7{dGxf zpD4S%I>nu?9il4vY84tkHwtkgks5~i8TkAB>Y~a?2e$7U!YoTA+A^cN$8Je{xLQM< z-iflUfN%)!-1~fJ#Wqc;gq`|*MM~5j%}+SM+!mn|(2<7?fR1u`&p0{lpX;gs5mZxs zppW7w2q8^plBPCTCraXrx#FfQ{#YY9V~+m+0Ac+#3YJnX-hCpfU61U(Sa0GgpW0&7 zmk$anW@eIFn{{LrLAHJsE#<#G`#AoCMwEZp0VCwXoI@^Oly80Glr2|TZ1waP8YvlT zsAfu--9~XFZM}2z%KreKw1PoC@stz*p^u>!>y)<}TsO)nnx>ND5UB&pNeM>*@Bths z&+_~0A#E(2h4iODBYsgT2~f%Rw~rPI`4Vl^BWOUlbWx1`$7AR7pG3Zm`l) zsFzLCH7iz)H4oGxf>k1@3Ql+cl5_dz=L8-QEU@NvUH#QfF%5G3;}af1w`!*wov!Kdt)38xm_0Y3S?Ya6`yd^SmHYmh5)p zjaVwx8kkc^3ejYhCm7aXs*X2jmD$GAmW(j~6NOWc&r%lEPqyAE;nKLABXY+*G{q&_ zB49T*?BsVD`9Gedd8$35l8XS`W7eqii;*Kvq?5EcEtB85{{SzIa9P3kq)AFmZ;27m zMNdj&YH1Xy1;2^_ZasiL{Db-GiX?(IJE^Rm&PIZgG?>m6MUXD!1_l36M`}}_x*H00IW(JtPNwDrl9juD9BtR zkWe;p>E!q1=Zz#nLletg`bPPvY-2=r?TD(-##o_Hkeu$$oz@zks#+LHYKCfTH#u9h{Wf<%%iyQG;N+h#&~Wr2RZGelRjLaQ#P9E_wO1gD_HVY6zJ~K zRmTaYSSF)(LbVFRYazkLKgHjV>!tw-B9HA@GF53dj2NS+qYUmAPc7Kx7Z@#!U=VwN z4s-X_&5~@r@gnCGi4^q|8x`qfb(0I9Fc73;(xdDE1N7A0k9(qR3`Ah>O_uW!<$a+% zz;X@;WAWd()o4p^@euZRe154@B`ZAA!bk@!*<=6==eBZn#6w`y2 z8xRHq9{gl{Wd8uBq=1*S2S|Wj-ZsT`j(B5qo9dkIVCsy_$J`#k`yBiYTqw0q&$py< zZZR5pW+TjcrjAC#G;t|ANx<~sx#PGxgq5?8nBqrhkph-Qsg0+aHqPhC!5+Ya?Z%~v zuj}#7A;siI6k#bV45FH3Mj6-`{9_z^9BLZLa(#WjF$9BfjBZ{^6!IzMU8|O9RInI5 z#z4m&$M2~i2CnzoI9kRwCELrTF4D#~da!BL!6)DxVD=vxkjE-TOZAO%RlL)Upt9{F z1q5Ubz44BJEkPtu9^Sm-QUh~{((I*?BuOFlk5fp@Fn+^1C)*>PSvS0KAl?iWxBV?- zAiJUzQrlx}k~7$hcGR{eI~_wt%8F`t+bd3u5|&b+?I8B!S4bdeA`V1GiEVyfBS{yw z=VeTQ4gmMnS&LFBV*wg#Ho9$C*?eT*paj+oa z!!w_9Ff*U@I-oR)SPvV2b;dUcl33(Mz;o(d+;`w|M>-$_oI;ZK2qcyPQtHzoC>NHD z=i2~wKj*8nR9+*FA){yRe$VzOt{)W{QbtcpHmzFO~y3K43MPBQ5adH?_b diff --git a/examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/test3.png b/examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/test3.png deleted file mode 100644 index 2795f67a46f90b54caadebb1792d4458589bd5b2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 719654 zcmeFZ2U}C!);5X-Y0|s&4$?vC1Q8UGCL)4TA|({52Bb&{z4snkkSZ3c0-;JLv`A5^ zp#_i{I)sqq@a+BWy`S%#_xl0oy7tPI%(=!G_ZasWb7sw~m6gOkG1Q@^yhTYuLPD+k zNc$-X$rTY2lB*$fH!dYfsGJ867DYmK*)m){1BxtRkbpE2E@x=dOyJ5{agXmagul&cqH30=auRd3YDw=U?)^I_&(+ z+}qqhU&YSDP0GgJ!xkjv=jQpB8xl1?mCLRh$lHe3&&}1{OT|x}{~s1Am;JxQ()_&t zF!6R#=QlTa!mH&02JtFL$we|8FqQ%c^)>md5knEdLMezt#U& z2j0#O|ChLbY5ocOM`8ZSSM4u0DyCi_u$G6L8_3-o@So99`v=wkL;0UX{|S8J><4l+ z*LJ={dR-O;AS-uA?tftaZ`J<_eeoZtoPv_l-=Tj~{SEr>0;p))c!RXSpvzZ#z+baX zQbt))M&a4NW}BMy|8Dv>Uw;)u#R%+tnYGPdLkf^plm7q6{>NTT`mgc++xY);a{WVk zIlBOqYSRCnsQ}8Ox4o()B=<;kwKbplUD?h*?O^(L+LS&#jM6i!3E{s=SuD!W>v8YF zb?qyvEIn7R`e=U^jeGj+**3E#eG9+2rm^-$9O)HR1I^~h(Y6*SP9PvKBn&HXS$M+~iuLu=g&$uUcwdz?I%IrUq5C&1$G3 z(AcYUVAV5w*VP}mppXur!_AyN3BdH*x3oJ+dBYIn;2os(#2+Fv|k zA2R+uEB#Blw%oT&{!^rTx|cjMwM!`ep4ETL_TRGoYx@3ow*B`_`tQT%e~)edT@wDa zi2ZjZ{r~$)YU5dy=iKipFDnb_<1}wda0XdGpZ8p2e8XNp<{`61;br-vOD=s0nFH>+ zx#V-$h>i9vcmeXE+eMGMcLfc%mn+yP!#~kw?L!zIS6*4FY{=U&^dp~oACWh_))fFa zmd88x5EUSA*u6-OXOnA$%}l#g8j>m>`W4NkQe?AlRLE!iVl1m-YQtAJAdj5{A8~N5;xKrp-b$jON7g-mvnG|1vw|5<) z7u*}@2}GYAtOh`DSng1Zj|f{c>ax?4_mIe9n(_Ey8by z=-D|6EtJ&3N%V`ADY`11;)n%@4k$yJnjTP(IHmAb5S?~DdG|QwW~GtITttn+6e>b* znAZCI{V=Dt5yu33uDVz`AWh)73J3ht5q4_Ulg%P^Ezwfby?u4}>8@`W6Zkb*&d@N) zNA;;;MK5^AbHOkf&DYxQ3cSqxtfr}lDK%FsGv3rk?@4#{9r*}!MMV}WI&iNdMF6Od z4H3HC@>2DPk)5T1qW9EqDOdW(UT-KrjLm!l&QMMpk`W%H+5s9!LWJ7M+sA1t6DJ)Z z;DILR=EjHGrQ?j8Ewb(|PU0;9bm55}^u7!OOym-kNFTU!#&ba{^`!FE9E%cq0 ztCTFOG3oRLCc>FoUuO~yjcO{Zp0V!(Oq zKAN2@@{J~O=HdK;Lj&$hgxv+3eAJ#@AtB<)ht3%_2OfE8zhh;LiOPe=}BPM z%aqLhnwg6=h=^LxoqN9$u_K?Q270LpELs}4)%nompmBgrAum^D1D>K0>SnzrzcB?N zUxf{fy<2NZjG)3_J@3j{fLeiCRnhm+tz~$RU{&@)5yk~gj~_e^xy0{*_e2j%hQ+yD z118?nWvdcLeAC*~xEYl7dmczn9wnoAaoIa!OzM(>BOUS(fi?cxEAo?x35HzwH?qrqKPg>;Wn4ooC#H4*vra^qR@w z49LeKjk+!J^N`x%Li5+8r)3)9#1-W+4U7mwwrRw?k2|irc?Gb+wbeqEEM;`ywT#n8IK>vC4h*rdq znCbbUtsgn^9Z`fR&i+L&Haca9LBXN@;{&C{_A_5M@mxc>2V<$Gr3i)c)ZJA$+aTL6l4)f zK9RZcxTA1gxM`j9DxEil%60Nw^;+IIVXGB0Kb1Rp5d)KZv-R5>Yu>1-2(ee!mX6n+ zAMzuCJ$Tkf8o%6Y!kZv5(EOxDb!n_4{!t;OUuegv^Jm%~O_fbVF}HEj)|QlS_Tk0C z2~P`@u*udKL(jDElW3z_GPOeTA-_f7#pFK?u)WCwqle)m7 z*ZS({xbK+9rb&p{#gduqA<+-1&OA91&*CNbZ{XZ zB9|PpdWys_LW~H8KA~mZCEeYkfjMVZ<<)J;HE_2z^Cr1?`NP4&l6**52Pe{TVXz$* z06QPiWf7wCBf4ssv7{Vjc{{!1*lC-fu5aYmnTjye_GRRzS7#~IX>S~9bk;t%Wo@D9 zS$GUI&?rVxrB^c!KDg-EKe7vcO4!NY%d-=X=Y`wtxT?YqV0x-$Zk1-LbHBKBf)q%J zH!NbJeLf9D8tiE8Dh~cm1^jVLCq5Gd$si$IRaA6GMG{UNKe!j}xaBr1w)u*j3fet_ zZIrjXnD@Y_`fY-^Pm^&X108~vOu>5RnpN#{$p^q1X_-v+mbo>mxHJjHah_{|9#Exm zVL~Sp{HuCc^tIGH$L=e;R)YmD4%2xt(hUpBH=ev}&{&`q+0H&PO3C~BI+t0BQ1BHd zF}W1uRQ-n!)iE2y@FSUU(<{}{re6I;9Fmt13Dt$(^n-k~;l$<=8okxT2G=ZO~^ZrbqgdVk&oN_dWM3X@jD0CENRk!WnCi_C&sZ(vp+1 znZE(|be)bo5N>h&O_*GXv^P6o#!k%{WRc2$W(j7y7{yzR*Akv|{c7h2an4HazS6^P zXga@WRy-x^Z|rG2Y{yKjax3pyOhl{i918?>f91kJ(I{F}c`I#+MYj$d;t%$1jy*wf zN++e#3Xe2c3cu#&W*#F}HGkR?#1EodI9CL$3%v(nx_)m04fOpzzig#u*HsS*JC{W_72A~vmtx3SA}6PUhmM7g333Gm4068I>C_4MpCefW zdB-~Yd9Cw7@3+mlXYN@8)M^WzR!?&}P&O!jAff%@Pb+#Wy!41~JF{>-H5pxm{vL69 zwo70_uu{jI>r6HdrkW;wj#9gig}!1s|D6r5K9Cpdh;`m{);QGaW+Tif%=o-G=PG-j zCrUYn8H&yS!Kmx{1>)1@(6b~gQUN4ji~JfJAl_D4}$(=k(HXY1C2|T z0|)VU7DP3!?P`!#+-UJ@|AaN}Gi?@h%jI1+9vEd-_VJaGpMUI+o57a>@frl6aef*u zolL01RrDgT%j3eAz;PgX2UtJAOLhXpNgp#mXu7cvI#I=4!%8iyM0?(y0*@QjS1>Ew zj-J+%0LL8_(jP4op0bo92c|L-9re`HjW{=X0U=7|DU z>4Z-FEyNWBz;ixru5fPP&;ng-$@A>`r^RV$#rmtGoohvTcBq@LTV`pLPo;bBrbo4& z8p_Ar%3H+jebc{&Y7wwxlQqN(W|^TCPOaR7cfC+kGGAca+UIvpYjNa-$!MKbATPmw zFVwCz$|yS8{9N6fyD8)#y+7@BF!x5d+owe?bSWgz`sUPNB(wZ&K;)^7alF3B>_pk1 zm}yn`l%V{K!b*@CT6zSi*_yl|d3ZA(%TcE}vVIV7aFkcjDQ5%^j$mKP-a}<+I!>jZ zy3C@ZU{I*BwxOWJh?S`0Z_g75A=^QBzHyEIy$3wNe<*hsZ)W+e^->i{qEeqazlOAEvl!pHZwXf=l< z+TTQ6-SlAY*O8aOV2AoPJ*O7oweE`AO3gvTHa-*bq?%$wy>2YlI#JmE9bG$fTc!<4 zcTE^3#KTyaaisq;6*E8C$MeKpac-)cE64`2#-?zeTB^#)VmIi#445V*5WZbaI(*=5 z+MIM2{|8o0asv0fe|CN8aLHsToymOdlizKJ1xKU1*@0vwj=pI47{b5(3dv?uuAB{7 z`25e#d)GZQ>jYUy8gufT(%w|GxbJc6SmWHk&fOV^vtS_UDxktSynm ze@cH%GS=6Nle({a>z;fR%(Qz=vu@XT8&Y?a;rv+-K!B9V06BCM;{t9 zJ@r{ywA5f;Ht9s(?Jad*8qp8e;G%_7==n0#P=PeVDyW*aowA+y zLOA#6>(NmL&PF()`*o?2tVbLuj+9{fg13a*=9FAnUV1Kj*lQA}leO*EQ+*PCLAPu& zvqD+1y6v0AU_Brwp9O~aX0u56W>5P*kuwju@F*Ubvu>#0yU&M_R`n3hki(FR2R>(^ zvsf_9juijQGSBf%5HN}YiA}Y;0BVtRg^Nk7dbuO-L)QI957Z8LU~KCv8yPr*bLac& z;Xz@jUBJO_Q1n~?qOmiX5k+7DimYPoKX=^47rsk8mo8?Y97|vw`MBbI*V1n@_bkb*!W?87vr%4 zV^JP$&kj=7QT{-&Ugmh%-WZn32X;s4xt){SNFK_0#foj^u^anb!vsuO+3{rTfA0yn za(o5=-hDXA-iFMrKoEsENLlPz#(mn;PVYPx=}j<$q+*5rLjpVUj#)@M3nL@yzc*bO z<{hvDYqiR^$En8xeGW)11iqU@Ix8C4XT}z%WXV0Kklx>x!YSr`Q zKNDf13C?MKVpI`sE_3F~V|iofVZB!0h5|;yefUs1kM{0F0o&Eo3h!KCMsR0!8E*_g>tU~lO^}%!;L6cTIH}W z$}9fRtIFjJ5fe1OghRg8y1r#zhS#U5rnfBBKz6It4lCQH3p^^#JWYQ96_gMw2ziPF zYpm^R#N$mfAKMvI_VM2#MM>FM$4Kc@T)M~o);ZWi2zM4y)*rT)g&vI9?^c?QXUQ8b z`cpovK56uheU~5%`LvLC3P^|CF#?>1&vrzIx4Fn#qL$ka%r zkH2W9GInK)uHu%TU)uYVxxqdTD20a)ufwI_G|F9zBCf$Nx<=e%aJR6txp+Q3_1|qL znyR;y&jrtway9X<&t2w82Az4NXcW@`Vt%)C61d*K?X_Y4Qt6@oYbpjDjL@eL>*rz^ zWhE6W$dG!R%J@0Vm+99^#Wa6CK&U>A0x>tQLG=yyZ1cDbD7qq`uA;85Fug=!V>Wa4 zU_qzjf!U>%cXZ2$Iu&`g1Zt)OH_8^Z79Qqrhs1bJb;w#avu%)gQIp5G< zcl|_a8=o_61^KKIWrO#-&1fcl^y(g&2I&H++^eWF7d-Vm&j#i$?T?TXi$Y|DK|yq$<}!KXB7X1n=%`eyiu} z*dqy1hjQ$X(KSM!w_{ysAcfvI%b?5f=qk;)U48lrsU6R&S-MaTH<4VGd5st@MXpg2 z>FlJhU$#j}hn_&ZVz;&N!R>|H1IB&t;+<)a?Sb(m#q5~(?l zmj!BG$S7V}`JA&1p^K0I?O-vG)S2%NX)qca>#ylfq4?GYA@!=4)t4pIY)>^%lA6&9@41m(KQSezQiU*T0U8Z9!v- zFIG2hbmY>sksE8|uH$gs%04nx`JfHeLKmkYtDpl&AyJqpwz{KC&c>}$nKiZ44)N(@ zU`LDnTA0oCZ!nzH#NyIVIXzArv>p*>jp&Bx@IU*5A37-u=HG{Fh~WBY(}HCi=r`%V zkn>Q{lv|aQO!aoHyT6$UiS33(+$Zn1e?2bdpw?S3k!OdddArML{JneDMxk^eOfP(^ zM{dygvDM2Eu|l5-4^hMZzKVlcltW5^7TjY-nZMt?DsR2qi>Mt?R~EF7*%#PXQ(8#n z$XU=jmv{zbiX0Bn7e1uN_{$yIL|ER~3a<|jT7uec{nl*j+q>acetdCScv6bPsRZ*K z$O(1ub%eh2)}5=^OMGts9K>R5c)Tb_*7(&L>=!qGE)yiS3Lt}?#w>3ohq8gQ0yz{7 z-GweB;pR+(!%*m<+%V6oPDZ0tMUZ>JoI3*Mk{Td12U~B*9eR)&`}$#kPS=h@1Miev zC@~{Q6(_ZN!Ks*q75>1iw%deEA~G4DU6f4k{$hCE9R_Aw3M@y>v?4)u>}%1(^ZFAv z4+`-zzAvg2Fsf?4FS6eKz!?qFhp5jL2yiILPw{L=GXU-0F`@mq=ECoyU83Up&+e(i zRp2+;!@*Ol)q)G)weZ!+l{-_CPThw?!^wu5Hh61JRyGaV2U-4U;pI1m@AVyf>&&iD zKpapn9{O4S`1Q6l*eLUxWc&cDO=UFeP~Sb}K`J8}Zpw}_;6g$)dV?qHa3fRtNwPBH zf=M~XIys5TsbZt2>*Ct6e;mkBEbvCu1Q*yt_|3rS4V$;y+M?XSH64ZBs@>4=)7IpX z7|XsyZvoC#c?iU5A!;IbyIEo8sDu(je^w5BBblY4Kn&8n%=`6it zEgYvm&I=F)gdC-FvL08H`$o%RE$OZ&^xrXP-)mW%QqT|xi3* zJJt#@*^QFey1p80sT4Pt=jQA(_~KR|OR!dp*Vw`saPo2~GIHl%{gQ+nl+$=h=o9P@ zsBSR3ExhZGy?Jjf4#1V;FCZ%Xff+ znc|~{nht7MamVjlqA{rC^<)H3sDKI{&HE=>FCF-<@Fjptu}<}$RDL^^a0$Ka@@iqZ&vM+HjcP;9>6OHVWwv3 zs4-tXXyXcSg?S&DF@-$I#wMhf2F+W{Yp4KjL)5VdXo)~m!IO7d6W zJB0-%h<7RKac}g4fdXy?g8>>3NiBy12t9r0Y{xH`lC&CSG)?rWhmAz%+O&B(lgur4 zb@YmRiH<-e2ZxZEt^LPx`pYnDh#%9#R9_3BTK4lnp>Tec-akMPSk(9wN_~lo__5 z{N@{BN30#Gdu`r6_f>G4_49fe8Y8W@RVlZv!d;`*=b(~K$!CvjHK$Z)-BO=#u;KA?3iQA!~80VJ9fgE8J65biz%eB9F ze;^3DbEO>EPb;ck0+-jU_KOG3Rz_07?oY8JsjlZT5=$hm{ZROpjku1MK3dm{F5ZWU z3ZK8ymXTl^J(M&yCu83A?S162T>fULiI1}|!ea2SBKkB!pS&}3iOl9ZK3Qo7h`^i7 z!>j0#i3Z7!={<$N>pXa~XixNgVLn}blqSH{a#i4hd($GbY?t}E!eDS-TdvRN$L<*d znuhJ4&acsL_d&18MWB?dY2Y~P^PY>ujzVTJW9iaWG-WmWuz-#+B*r4Jnfjn>SKcAk}HpwjYt>-S=S ziA{5XRTp4zztLrx%x|ENJ1OC)E}2NbA!9sYCs%pdqJ z8zTEfr!`J>v_TlJIn^thD&&$DS$yVoeO6vA8f@)(+H@Dau!#Jg6B1VmG_~<;#TXGDVo%5 zfsz!rX#KtYB?PG~xWG#k@EZ{vt|10`)g0xMCt9_%xA$j{7Ug36uC_%GV!#-RT^6b+ zs~FG;Wmo!k#R-p0aF5NR_Ga5vWc22wI1DG>nCWTie$QQI!?Czo7xJq%^lcB)aftV@ zyJs)8MqoeGrx+i*?t6ZkDzv0#oD{f%A9t{geJrh;q?ZjQy3%-kOd;01dQ$^EZ{^-) zeu~s@8K!_{sS%}DXo8tPi3fiW7FM}`fAXQqGxI-wg`XJrxC+eEE~ARqcAQnv zLw!#dAgy3L4bx-?o*!|3b-X_;?5_l8(=fRD6 z$05zZoC!s+^(g${pX|lMMWiR=X-kFp{B2Z6>+5K_H4VsJ9Xww2RND3ElRqYOI^kp; zp5C3rhiA3o`OW+rfm;`>AOBTa?WuvdaYs#7jjxC>7UkrT>V=C{xBOLqTf_l?q?2=T zrvq9f<@yK%O@gb@!`f?~R?1JCIEICk%g9gMAc?T^^zm6OIEKva^ zD5}0fVl8I3k5KA;8NLGS4QGn>B1w9`8TP5`e!AM*b;ZwA!n7i*!FVbFYc?4kBr*)e z%FXcA8@1mHv{6dEU?O{5i=GtS4gq%MIWv$Dk;+3OU>?NPs0X&vR3vJ;zkJ{J zlEu877az^PS)m0I`A$>uAW#sgSfQZv<9pN+;XV2AOb(}W4b)>RP6#pDd##*#e@9p( zi~2M{mB}S!UnIxuA}FHzJT~~&DJ&(6p~`3I_v3>V{rI^`1hU6oJv~rsngerno92|+y9w$)xJ3aD+*BcFH)_H)fD$jO8mT#V8jz`8cTQVmzW*1L_#@4rCzk_o=Zc1_UoqfPQ)9@07U^Q+#{CODkDs+49a$m`Led|Up; zX8l4bt4TzxLu>Y-1g(!eiiG^Rl86)M^U_v>5S!S)XK4a7g<-sdvXGIN1~mY#B~IyuAAnU?Vx1(ftA-Y?m%FmKy6G7 z=JoO)krM=2U#oOG6HjP7oG5Pcdn)4)bpF+xT-o~K+hT$$)#(I>=(^`i#CQ9%seu~0 z2v~I6waq0T=qJiJORYd?%Ea~^ru2T%Q>um=;{LnxtTVdk1+t0D^HYedA4+naAer%w zb^qJ17WIgZ>(pU~{uscc!m{?cGzG*+Mtw-ZitXmNU)4-JY>grG4+C;s=SsRb)aW2x zZWA?dc1cFL6J$wGntj7;v@0vBuk9nRJs zf{q4Ff1P;XN$q;vMTt|=$$-Y=oE3T6^xcQGpBJ(5>3~Rq!i8~W#O-n9V$H5);ioN{ zls5x)#71+b35t!TBeAZwiP>d6#&$--SW!hUQdUZe@{2dhXu#zr<|{vry& zN5S~|LZ?;1S_`|(bk3@YGL6D90(zEu8=tvEwaC9hc>?(>83!7#4Q)oZdI(BZpk`>n z3XPEm3TK+&>pL{)fuw!zb_R)?PK}iq3?J^rpZq(b1LOM5Na z1FtN5$gs<(^(I!I>=BRDp3prPitMhfAFp7qVA>EQm|82FgfO?>ksJBKD0u9 zN>=07Z&cfL2%di~P#eHOcTM7JjRBKOyabs%`)dG~*ED@mo z8H#t5WGo-4s`U1wqo1HM3Aby>n|s`R_vEU*l4N2JM4~Yd2O$2K18WegNp&BBtyX^* z+FoG=GC?T{a!Ue&p=Hq3)ROrJu9DnK@U}4SrB{O-4jnZ*#;`+i_nAW(Z20_wKa;}w zW%NFrV=pj9{N9V~y7a;3C!_Ju7CAfkf{<0=eE*@*;%||aC9hFu?Xi}gLMfKfqlTl~ z%9S>GY5lS&Z4aG^Vl0|xXP3e~8kY&@o6%G=qm@SFsx@}=M3~i5iN zgX##C;YD7*GZwKjBA8Eg6DFF!YwlZ|IF{=qfWe7j9|nrxg-4m#D)wzQdY4O{`Mdf& zth&7Si~VQPYzpL&ADr`erB+!Qn+Y!Tfi*XlS^9<@PksXJ(kYMkczG3fup5qaQH52Z zalHieK(cz{H}kCSfE{wE;<6z;y@yOH&))GFL*KXMayj>X>QC3)O$g4|o{s~nEFU(O zR*sB1N@4l>*X^28e7$36A0)q`;g-5V(PzfU4xCA@=K;ZanxXDCo!B>ujvJZJioWnj z6pc61%c{^A6$m`&q|nMA{U92wPLy}k1t!Y6k-pg8>9woN$s15BT($(Xyu{y?wFhK}x7(}do zW*Z;9%_hcv&mjtC2OZdpgmb!N>N*I+L}(A%b;h&pLvIIUZ;=j;lsG(*stOlgAP=2p z17=qQcRp6_lTGR72?sv#qM@N^Cc}%JYEK}Z0JD2p$vchYkGkKWw^*d4a|XpH)Db)x zW%W=z=o(ji2Lh=*r zUtO$cg{+LuDMV4FIIG}f^QUigx9P?`#wHh|=um-M2g>S$I@be-z}Hp{y9$`FvD`kG z8v=gBKF<;*J-b*dht#RC^t!#XK}-H})GMRC2j2r~ezbkBEt9PfZtJyut+HnvIazJ= z&UR2~@ywPvIFeW9Fpw3}nJYGUTK(GiXY{9IVWN^*FX~Z{;p&e! zwBUVDaH}ZEe#ic)t!x5q%vK*+I_6dY^)Kc(qZlSN!@#l^5&+Q#kvurdJgkr5gyz_v z1(Pu(iaGYqP3ZXgd^sI5pcoffc$ygmn|aod8@sx2;HuopR?S5Xeo^a>%%>DCJ$ihY z`hcc{BisZ12Vl)kWFS(mL%{OA4BZvv0w!qTd`1OY=$R$JTq;iVw7&wwV4ZKBHI5NZ zrmhb=;*1;M4z0GQO7j+2g* ziMUDhI?tj+oc{!21?lU8faJ6&HNoDEbn#!Qp5K56ffK~nWLsqszU^ERareGTyyh5d zANJpU`z`}J$tA|9JlfTACRZcp)=m1>|1|aa)u-C0murn@LHb&b15f zvJ?JhN3rl-j+_VgG3bGU+>+IjRj22mGPO=@;n$TG`%tBp37AIzkf`tNdX61k$5Pl} zs*4;yTos;uzLS{!6O|#K7g^x1Y`8L*6RaO4qQ$uO?W5JF!k5g-Qp8Ue;ow1p(ZF53 z^Rc8!0JCSuHbr1Mu)Mu(q+>a4@sn*jAb)*MzT>fK#Tr>0rqCtwfj4m6<@^daeCXE~ zwFWTn@y8yA5v5lRyQX{Ut!iq*u1USiL;4Lj{f8>BI;FosMh?3Yhpnd(Ma?ey>SVoC z5f-}@^K&y=qNknquD3Z45n2beaYP=4u8{TD%{!eBHz(#{$gJ7xTu)3fOKooHv+yA{ zGNf4oYD5jX`&b^#knWrYZ*o;s;~-7Ef6fI{zL*yEfGAXPeAwOnEwZg?=!Ha!40OTr zvRYKv7td$_F(zWZcR66}5nJ8aUE&{zO-&Xje1&t3bta&~3lKbxb47lhu8NXEIofF> zCE3%{&&%)H{w+>^IV8FosgVW=IZf{4NP4p-{kXYe{h&~w*sHi@fT+{-@fG4lc7FCn zrMTPe6iXe~$lee?d@Y$2vAhM4+K~=ucYO@?Y*N~09OFi>KYn3!Cb&@> z5xm}A)eW*XJk{GOf9u~~sDAI4-DKN?W!0!vthe&*>e;r}xQ_*5B5W~kWfZjb$}RT1 zWvt~vl)#nYI-u;^SjL%1+xHarTl3k-{S3Y7dJ(WK>80tuJ-5MuF8b7_8^j@e@3$ee1ADU zVI(*g%ez4=W;UJt(C{LG&?Dx}&m*L%3n&@NtVwOt)GL*Lp+s*2Ncnqu9) z0;PLW%#ssNo;N31cc@)?cC~EuY(!$x@o@g|L{hese^IwCc0KsGI{wfyKi+I0iH+ye zsXDJ54m5(@U;l20%1^?Mq>4>D^aWT?hrSk|5-9~_}483C#7IfdPL*7WQ& zRw#+cGkC+rvrY9Z%xzJg_?gEGG3O0>8c80@L5|S5-bO(uhCAdMW%E2|+@7D%D*+#X ze(q0evT<2{%rtD!n4EU*M~2Jl#_uZ3?g|ettqQq!BHC?tj;L2T)8l8e#L#5>t zrvCi;0Mu674bd$&E_a$Wl>~9a@0`J~RWzBUX>pb+BX3+HJH3 zM(G9Io4b!&GR}R5PKRNA_v=0^hpTI(2UA$2x%eXu0yVrTkl#)n2g9-xE->PVBc@u} zCrIgTk8TiJRe6(pztts%_S%Z-OdrP|*wr3uC?fq%AP|?L!%9@8tZzaOYX^>FPCSW0L67J$s-v z)m$~gwLe;*C_f-f>`W*Ztrq+{@)Rz;bG^35)&M9?^(0W|LU*|7Rm*-UJOt-B=k>sb zdi<5+J;GkgUYjQXyy%&ZqDh_u_doY3D>Pdpi#1{_=_nkwuqml=H;-Qrn3?Eh=K>X9 zTQC4?7x*NmdN_1)pZE_3NuP=5=r=|A%PnD*&7T8{T1^h>R}`N-vL0#Y)1ik~VFB0$ ze?q0bUUTF+vcE2*HCJzM|MiV*|1osT!Ti_7Ami^Vyw*@+<|$;?ww5hhl2RvwZ_1e( z0OTe9LRok$C`2Ar>r>ZyXDZ%?w7s?~#5hOC*mf7IT~%~Thwv-j(IGw~WVK1-ZX#jO zU1=(pcY(9Y>V@mojfyYPQk={vdO1UOjgE5h7#J%z=eEu{*R(?=WS(|T=3wylT?M6( zK+_4(^5^8O?|^oBtb?t8q{~cIT>(fI@8xAlKheS2Mid2@>tfEL*p%-8 zzW&Ib8msRY;Yj4S0_-ar>dPwJaJw*kzaSx*})u}IBfOEsS-oDnn ze5GNZ9btUbU%RT4#iNI7aa6Ql_at_mO|mHuUmu%~A!y!qVg0qFc?JgPUD!L=50;r3 z8b&$>J><2_Nk-8Rv2M-XA#&T0-4y+r=l^BzCy0)$C5#h`>brj;dkozL(D@Gpb~--2 zyw)Va4Xc1H_b=D#lKCklY(^R&6vNU*ez6C_xrY!3emD|N1WgoUPvbur*27>r;$|>w-8_%9p#abYu_KcH_=` zgA(qWy5zOr>dSqz0gsSxtVV+XB1ax+7{Gi}b@~17iCWutiW<3PSyu+?0!woAmHpLIFQ@2Ap=Ci@{<(EV%d{w&vi=Sf+{ zL}aD2=zeCWv^)Lr@9c%z%8?7@+U4>%GO_iWEl&d#x8F5BPkJfyZI_F47%$fOdC_fC zISJ#mW)f_dzsT;lEluzm3`lKTK%g<5?57?XL>VAV#yGdIy@iu`yB&l~QFrm%6`Xu< zLxmFMGA@<%Wy(ta`ufBNX9xDxf?hM_()xt2wfF&IJkgM4`~uO`!ZnB!fY{&OY5SeX zTwyaMI4w3>D3R6D8-X;F7O)a2WZfM?bi`g!jXl!6811zXm8;0`6*G#}uz}HQ4*f1{ zlMIl?C|{=)+zj>1j*pB3z==yt5WWq5D~bF-?`FPGs;rOuVvORdMK4YSHt|otzh}V~ zKBz5tbVj0cht`w$G_XS@4dc_^rA2sRMz}km*D6~8AgRy1k4i6Ar!9~x@Dp=U1qP2U zGxbwOj4M%uN=<4Kw*zz2+1X|k#VuHg-s-^jL{R8^9_Yhcaa|#I280+rsP-dmif;!B zMI!q;HagT!Jy#AXA?tZfUB(b4UNxaVngLT} zhv8F8chxM};_~m_HykaUzYT}mru74G`om{^?p|#i!-upoL^bW8yj+&zn!TTP&zau% zO0$AHy|I?J!SYcU???UPE$Y+vTGe;!xyW%j?aLEOn{T-8Ve{ZV@164V)y1YV*J0~q zoPMa|?^Frr`=(14Pc;IHx6p=skhvxe_liCgMm4T%{sMWyuj<*6gt@x`+_vAb3>b&l zYgXW)x1lm7hVY6C$Jk`6RXX@0*e$q*lcLuky=vbcy+1NM8}lv`G-%;a3M(}15155+ z<)|w7a$$C$G%2vW#eTNk_!N;O$D|Z$>jU>)q@NT<9r>1m9DCNYpXVUlA#0sk0Sv1D zTEO|<@c_zOPpftU8hR3sv%({6mmHse;?w9}QsYS$R6BJbbgLacT2=}xM~ObekizJQ zr13vFy8wtvaA|Y}GIYs$a)pD~1Pa!UvraEj+aA5k0Hx(vd}{(b=qEF}Ze)VV8hcN_ z!laW5L&qrQB96td#J;|IsL$wPw@)SUwV~AW6pY^U*y<&X+nnr4Mfn<@M#S{&U7dHIXpK_nn$<-^c7}c}%@-H1uHx)DGn3zl^Q9 z4n}m_vygbK3jQ5D8T9F`pqlU7zvJA*iGuEKsdE536O;Ii+ z)ULa~H1Ha#N7ah?qJSzN)=TGq$h_htDLePrzX>M|W0H8n)L&*93 z-?S262wb^u32q`@mAzerV2#Yp&h1OQFDJ!x1U@boh=v>99S#d+pti!jt~>8FIO~oj z-}SDIS>BlJAx9*${!@>zl4JF*E&+6h(@!*dCX!W8&%f){Lf{US@=xP2;oj zw>Y`tr2K?%MS;3D_?Y;kW}F-qvG2N5J{L}oOj^n3m+@`!nfZ3yf5G111|V>K%Z8O5 zM0-Ex#P~MS7sT`9r+DvQ!CJS8`~B0)mBwFui`z`|Fc+` zG1wd#0B#^Xax?X=gAV|l+}dEE>qouHIpkqrrcd$tyG#h(@YsQ;y%Jw9DQAqoUwj+* z)v4FSNtol*jqU|7*Ceq`Z z`HqAzIUe5)23EQi)t+&KI=xaRV2*OXv-1eo*x9lmxVKSVs26sg{cU@m8=1TkKg9cc zqVh`rCK%Hl1NOCOYV;QIzwqr5xjM|$w9gVB#*);J_x#Mo`|wS>GA7RP+%>39^0B}W z`5Fw5iC<#t|i|l4ugLoe9z4jOBkOQ zw+@jbIPQhgWO9vjac)1JYnA^f{|@!|iQfUb5FXXIcJSN|yHk9+oO3DF7hfb7$BC*tyZ_hf3?+l9X)1u&~Z*xuM`Fl7r zV*H}!?`7f3^105gp

otzvn`vEWEKn)I9H19_giJUKZzo%+6tt#>BrV zZ#_X+8SV^sk^glR&pzaz&q4~%=z+{bnbDNniy!7VmyOcx6+$dn zI-_t=PdxX^!@Y^G!4^mRv%Rx-W&1LYFAAoW?Ib;QM@7uHILBAt!?-~${yK&)+nfFw z#zpjM_;o1r>?dqo7E;feaVFkF{r`tc0&>9DaVI#8v2z%lO?h+RwcCj#!d7 zpgEM!4-9I@ckU75YwQN`iNT}izC!=~%n!dD!EVagN6mQt7JHlcg67W3otrz4 z=f|Z_bG&52_TjNX3*LVXKiGO1`R3WV_G{{UIX_!)IegBK`^If}ZU;wm3He{eJ98QJ zexv-=a^w_GuUcX;!~<}uC?w2VPsm0OAG}GTNEvHTtq$~yXm zC|rD8^#;E+Cr5WJBI<$kb&8MUzQ-27gN<7IYOSk_oJ~j{x3sJB+U&JC+M8LPS(Bk0 zcsFp!H6FZg%Z8S3X2*$-Y4m6`CX)KS9K2IFx?Z2`NSGh7nAQNF;Q+2)%>OV?d@LtQ zDCXmj z4a{AhyMp|8qz@O?oxk0B_b1QQKr(7^|F-mcIWd8Hc~Beqe823&vJYv`^!V%iS8y2g zS>LH0zry15C5IF&IQXvLEMLd*s|aVJIvHG`5BJr1;ayDa+qu8lZf1!zd~9(*?Mcl-e4 zC4}Q}vgN(z1%A+Q0=M6=7rHxmPlzuWJXjoG#_z-MD>- zo=mh>zHY&lyKed^^*$9Jq?!nFuT1yo9Nv3va!q^>;ZBb9JOiRF1FRk8$}h7wKw$R8 zT^hN-_v9Bj3d3V&ADQ5AiESbl8kV_oIns$ToFX$3polII-?Vg4dVR`-Z{!zd_alA} z6XGbI|0Ocd!9_7<-Z zpUZb6vv~e_u2;y<(Jz07{lMaY3Bup`pCZHrJY9~CO=zdvRy?)m+AfVao`gXJtf`zIgN-bH!! z{0oJn=l6pP+;qx2Bs@O&iLf>;I+yg*IO=+v;*Y~^|0JqY!OsoA83ntAW#H$=!1V%)ktC>xZR1#Q8kiEGs8fnCSW3S^CA>*XUWp&sV(fZX~{q_wOWrHAfLQ z@&4QJ(&0XKh+mRr$n{bAmbq5c}2e_}CS=l-T$Uxy1q#)aL(k>|hU-;7VvCc>-P`!g^2e*B&r^6ML< zKZpX7PR4fq9{F3?WreSPN1Dg2$VY4QhuOjX#@G~)L*~C8ag^r zZ{ymo_529O zIFWxV-jJ5(TZe~6GLAk=R>i0DzAyMlgYgq=bz5)?FyU$8@gegOo01RYX9cqLb0aIx zqMUnhiC8G(jn6xf3%x=o%^;8NcegU6UFiiJ8DDcG0kFB~rRN|4Xw(d7P= ze#L^aMSR`xrEGDVBKPG;vls1ol%Jk_iuzo^56ND~^S^UsP5a`9 z;|Jn<$;Z!<$j2I#UZqg}=wy`m0%8ETPkdvLx5gq5m18RJuDF@!j^l?hPo~_a{Bm49 z@=Xa|llesEse*3}zDoh^i}vH^4wi8Lr}Q~Lj^{SUO=P{qv80J8j8*zpxwcAg?(dGb z#=Ix~IQ~#9vB}X`l8)y2Go#Z(k)H{6ynCATsqqU1AH}=*Dy${>>L(|}=W)MJe5H8Z zmrX6Zr^=nk*+Jz;h;J*O8l5ZSnANee@bS_||BtA%fRf^R+J5WI>@F_B-95OwB@i4E zg1fr~2?PxU*Py}OLLkB2-Ccq^!P(uJw(nQ{f6w{OoU?bk`?}PvTlG|3{eULzf8pE| z4Fz}0oC7j9Zt7b`rD8TFlhlsi%VRg9Rxu8HSF1?gvQB`fWoY|b*-M?yic zXOxTlx2;GDts~ZHi**oa9cW=TBU~Ps7hFcVdN}-@&iCE+LAx>e?Z$roKIP3pg;#(u z44RX6aDZ#0GMJx33w(gQXnPHiki8oEDE^aok)s6F^8zSCAG-_rETSaVvpEfEDs%qLf~%!i9rb_%R*9hWr8p&P6{`dm_H(Uh`6b!SQ(n|n(s#t3hpJ{ z9h3q0_ zz8Bxtd{4wL0ngom_Vy?JBewX5JU0UZ&V@>)wwSPXOCqQr9) zO-|e}g^R@6g%eRi=M&G>l;lb=%3TBosWkamPIGs)-+}$DL8LFGz88rcC-UrGR=!sS zCRdf`am8Tq0}EV7NUOHcxtpIUUr0@^s71p)g=7};^%~$miM~^ zMMe1*^~%ZjlBnX6GSI&p6OF0lvl~X>Aoa|S8;BBwIJ$D^zsS(Y#>f;1L$~nI`1efs zZ?L;KOsE9n1GF2EZU?s;`3+Wk3ML}>CHPKG6qxDXu{FNKV9Ev`)h()Zs2$~{GLoA~ z$Tx8$X(T!Idg;B8jY$KLAyzaHke61N_eI@!?;{hfmAO|WBzW!WFBmi=NvWK4d^d0{ z=~3=OlvYp4_fHtdE#$L>4R<5$EP?#04)IgCg`MJr>+P@;ApQhA!&4p}jBg1eLw}bM zkUChB@gf;S1e{I4wIB)h$q89A@RUM*l)R`CS@>!;79jlzyPQQfO1pvkotX224f;tW znfe#tix%}cWE?RVZ{#C&0aT0s9FdNSQG+862&u;cq&ABBfs;4{!C8Tl>8<38jFbhY zwBbj;L70}wfrATV`iijvR$z(M%JfG({+CM8KLyMx0mjR$VKy@Ab27dgdnE1z{W~bq zC0vpCx%e5>Ckc)$*?j__0u}fNciGqgTkifuFd+x8m2HaOF>#X6%rvEVLKsU=4t$*?4cgV z_+6%*KRQ7H>ATS=)`hT*cIrsEI05-eu!2}cRoCwFJP=exYx+L{zZlA&A6Oh%#`gwB zV}tnN=&&AU{Nr}V?TaISI~6C<9OSzn(f=CinV(-u$}h$*5ASXI3j*u5L-o{=`f-Jk zKn2v&Y8eb36lz~sRP;!=Hwm^5+s_zYCS!cG_{*m|{{d5RtcZ=oVC^iyYMS6XeW1!Ck5&m`vvy z2H(_6dF_6(rT^=gnGD)zaG!$SgnD|-EbfmooQDKb&x<3P$+lM#}a-~*2_2r-tc=w`Z>Yr!RgS8 zPVt@NF48|m<7&n-U-f=^Pc;ekd2C)aZcv}(Mq;4@T2@i+(jNgkshxoOr*~31La#I= zG%D??C#WmJjM(E6kI~&Pp4vGGC)JA*MuC=1Mm??|BRNF=56s7b+vK}eTWsv3eQzSK zgy9R1iToDnM!q?~5ay-5twODWt%yGl`?^KcSC5a1XP((A4p8IBcQdNOt&}?!&Bm|P z>x1)F7R{q^h9ziDzN5qANzN3hH*(7%ox3j?|%U~pr4TI z_STu#s95(3L+P#Zwus6X2709&Xq|#iDtjLDx?ZGNMA~sfyQpm={##0BKAUaLj&h*& z<*?TYn=!B2&=0$m{oW4BU1=;g_woKYJ2Ulihk_#RN&1!imh$~1^pSPTdRZWnCX6rE z%K-^CcrUsD{DR@3j&^7{jGuJ+NonPc6bz>_5LUJo{diq}q2(d}u>FIzjP@tMD6ACm zA88jf)|v7`c?x(#1R5FCO+0r}W0Zr$@8fM2#t67fSPmMfc2~-Jqn@XPfu!_f9`QJy z<5HA(zoB=TL_TA+U-VxHN8zSs0`*U0WHAeoe!e~1Hxj!G-*Jog*FhCtU_MohuM^*p ze7k!cJ=S5&o|qjmyLmo<-TQv}^>XA&gnHV0?Va{1(jlmO;wGd5f1(}TK*amt*z589 z3k=9g3ieu2D(2UaCzd7cL^zm(_7DEwp~GkOi=WZ{;=bsP-`z?@W0t_a?% z>faE5wYp4Q&ify*F{(m5jOs;%4m40uJBlxFpdDzc3(83;QazF=LVnKIc9bLet8m)S zXx~4^CsA24QEv$0W#jZknf^w;kMiDV5jl|}0!$$wQffCv29{+r)=o`W7*8o*^FA-4 z`4q&jrJm9toXR73?r#b2q2Oa<0y8#Vo5}MFlv1?c8;Sq*@ziHI5+W8P;M@`}`2@1# z;_pAIcS!#%8yX8Ey)OZL$l2f|&y(Qr+1MR4apEZ+0_fRc-uK0@nf^ru)L(<(YcsUT zQg3xD4otICUR|sTTF?*6V9Xdl^%Sz0BZPwitq&!BI()Y#CxIwx+L{sn=7ClAl`p4 z`0r_Fb(9okh<8(&Dv}#aMkaZ?c}}COk&Q|FMg14xwO||n6BR8G(m%XI^nWfqk>%rk z3yv&R>5r05d0~K&s2p%u4nf!8L{c$K7e`z%c6lAb2NugZELQ2q&MzVsOnr zBl!Kqv&uY$kw8_k+*wL_BOuH&?{A}$A|JgtGH2>*v@}`^qhi$7^BMX1o&?c7bi_Xn zX_Q}@BS=}kmoo|)m1)O(X8LmSJB^(X<>3Iclg|hj@?TM;ik?jo3fUI}%M+)-yd;R+q-2cX;bfChw~%+i>DNMwJK z1CEkQ$)-|2^MrZOOw4#R3RW}wP_Oxco2E)ThB^a$FuWx`^{Llo;O1=LNGT8Fn_s~R zjc>jU_%^$io#!R^@ms_9mTE7-0{R%lF@#|le}j6qLP;z0YOFg>9HD|7EG+)^tc=`I z@RI!J=+C5np&Ow)QS$x(s^P_o@+S40EN|#%+r|@ALh3g=Fe)&beD`~MT>8PQ@7;m& zD0m$25$Hm`wej%8yyt-T%(iIXdEwFC@2WsLz6UIZo zq94*KQ_duFdf*M;vug8%F|=oJkcS>%bZ}bSNuc6Pso$iYjR05k9V&W>qmRN(1?>a0 zAG()LB;|4sg$Oh_xIYty&H3!Eu)iU_%|S+|AI_sZeop#HY7@0L&m*B=(})MaLsY%k zbpB+|<@+>sw#Ga&6yt*lBWn&Y`iC&xhdK;_Y#`V@95IVFhqQGM-m(zv(>&m`u!qeK)M7lkj`8GK)gF1ZR&r`YI# zSUYg;?CVQDYQtL4E)+V~-F>{TrBx6@saJzUf6x1I$Y91&uck0Oy!QqwCj=7#^ERJ6 z<-~GTCZ2u*xF8#&Tih$_6^CG~wEnWT@_h(r`k&}W7QS(jd^VwJCY~}JMv8XotBl2h z>B0S13jv3Jp7SNo7siEiMlKN%W7M2er+s)qxouNFZ!ETZv^$U`kWx*sZT|s({ja4=hxx7&$p9z3{2l09yzlpTZL)le_ddyH3i&3oom;=8ayED-A! zE(*Wm`&=ahZB=lfm(G-UXUyE964O%QI2FR}Ja(SDPf6Afl~zg?P_{_B@JYfdk(yR{ z+Vxghr)(iKkf1UTA&_$2vq*Bp(-jU%QT8w&V3VgJi&eA&S}r+p0qxwQ?<78&c&`#+ zVlSn*%ZQ1JNfMKc@#xPL6D;7^D_=%@8O*%v8y6LqnDjaH)FRnLf%>&bk^t#%4 zl2PV|(vM#s#&-gq#+m1CUFUm;?>HZ*Am+~A4)==sJK-o~0F=WOH$_B6f)}QnNO=|Y z1hPa3F!Hh-YIgGP1#Go3;T2TSj4#K7K{f=*%8kKzdg3*OAw9$QVcIl}`gybQ5X5_@ zp4-Z0UIKkRq#fpc8|>IV6MrxmKgJ6`#l_VU#OuNpAw_r=KlJo3HyEBVj?>=Uph1`y zP^oPG!K(}&zB7~0U{Gv>XxDCcpICLmSgzAnlJ2JQK|IoX>0lvE+O-uoE6WIfpV7P${Aseutt(k4jkmlcu&$s=-psM28Ozc zu}AP`;C65a>1uk_<)9on6L}b!Kz@IC`$WZ#g0`MX`LF>^y-DQvfgj_5U;0Gq7}|RP zvl*esQC<;|WOA2RIkf@x;))>SE%P*snL;26tc^=PmZuyJ32RYKy~s=JAo&HXH8%A@ z&2s~$P0rss$|bhI($m0gTj7b#e%!$swkNSDk!g^(O}76c`;G z5u{%1H!XmthzhW+-InrlLWlAamf=#73cPP)w-?p0@wL%YzfU_x*lC#ZO7KDOWAFg; zWoGc_5aVrEa!Lz?1bl1_k-+M!wS-1m(!FHeRifR|_6O^oqWqWX?e@2MpkKpc7sf6} zUQstumqhv5FWbd5h#5}3((8^6{nVG@89f#K7<5ZJzmb1i=eDdDyEcw|1Juvux0`wN zGgsBDquhdA%5;S9aj3+hQ>grNTXh(>NgSbu5LSnh)gV5$x(4e24OJIy0Pjp-Yj>q*+7Uk+nBh!>J-8&NFY!`{ zqb2HJcuJn`Y1)h&X+ANJn@lkC0*0QW$#|w-TZ7=CSoxo-BnRY?3S&%N!&eP`?@pDC zmgPObUQETlQ?`f|f!vY5*OsOF1_o z$Sh<;+&w)e3MZkxoP`wl=c{sM2m(r96vqkkvUiI7o&YYpOZxmQB^VX0vffUoT^cvJ zsb6@XoNbzpgs?qQWkKIUcM8D`whgumvVe>|h;+A7?n$-|1{xLHUOAZq`9A1NNKyb| zji#R}VkljS@8!6xfp(ZLja23_(oeyoQU}uSMI`zU^?Bj-b*Z;A%US3qq5Ml|`)MDL z=vH!)gb}!JUL##vBb|Ui&18YjW--1m03@1-^jUxz-^Osg2`uT{`@3noXYM}LyP`mfeb8%jP?z=p{LDuODZjMNJl-c!EcQ*WvN zl1~;+5@~qe&hKx&FTvA`Ow_!HdZ`~smxlt{6`L;Euvox9BHfeAZL`XTh63k%<2>b0 z#pNjkoAYCotrJh~RR7EZyTNCa|X|Ry# zo#c87@=X%JK#upe;Td0u_p3R@puK^afw=()5hC~znn!+T98)&R_(kzY;x`j6g)w4& z8f#EpGf!elM$e8RKeq2^o&4~lUC@RAaz)W|{WjrD{f>T;_n9%TreA?(CXNu_`&sGb zRia%l)L*awRB+uL}~P+G0YgKSET)4oW+9CfZf<-^USO|E`$;@_+uvJu-%;ske{9(*Rre+ zZrIx;s+r)JVD(@*;u)AxAD|xdV41p-{#Phh9OIJ)mYMOSeS2(K|BtxifO!aTV2-wd z?|;yW0uk!X0N3ok*y~KQ-%c>K*&@0@xrd9j!+|*T3 z+i;+;&N;%kJkWj<`e0<%>ln~Ix0d~t7;Cu0kt3^j|Pz@Pe6{=fuW-IiNc8 zow|ZOf<25?JfFU_4Ynb_6@X$F@;xthS2@Y2Emn~beDgSVMl}gLgS~k}ysi3r9lD8a z?ofRM`IvZ68&CYjm`_ZhoTH4CWTt%;Fwbtt_u8-(D<~%y^OkW(PT5AAM7}mCyVpEtRtuqIQE=vA zI5Mx^+T#VIg8#93B6+mCTLr~k0?5T1{<@iL^e|eJuc(F(Qf_~@l}HZU1KbjKhID;# zowX7k{=Z3~f{Uae(joJ;reaY2Yq;nii)&~cTJO=bASO*Cj z?G=c*ShYs7N79PlqDX#z#YuM+PgoBKQ{fjW4edC@RWnBklVIe;Gd5M*jG2_<=@F3t zV&i;SRJ!VLb)v?6!d*dGuPo)<3Z4y8UZ_o|XYeWOyjY;LF^2d(nU~NZ4xvZco(C)X3pfz!x~8Ndg^ z#(G(u`YX@W?kfEs{3-M~6eivpB%|vY-<;vE!)5qhP{{`~r$j9Z^%6#iy()z-G0lZl z-%9eo8i+B8hg+3WYJ@NcXkn6t(?xcMQ&P@JJV8NUxKw+vVBMTI&IECHV!e#4ia?*k z^Q`Y8$4NI6S!^?&D@J|}Gyg$0HPYJCe-F(u#uDncj2XL)c6NhO(SMj4EftVD^4za6 z15vndqMgb*#lQY?vp)T^THT}-qJJlF8{Rm=@!n3!pL-D6>$3@qaTh@z;?={NnsES$ z_sHl>eoK@;#0nV%bYtI9Y;{C7RjAK7e8AHVZfZ1S{{+*vTxUGcoOMvUkgg=ZB81PN z>(md;##bDgSms8FTbUM z>A8*)f^PS4b~u02aRuD0P9@@hQSXT4LjMsFEdzsDd0SyH5Q*m&34KMT~Q-$_SehTKOaGJp$nsFGhg4D&PbTo;e$(yknAh?&W^rQTToo^c z+~$2rrL0JvP^aZr^YFd~Lsyadx8&D|39$~9Bm=6XrQYf2&#CrmdnxtpfHrq9VNo1= z{=#zcle;_UY$?g1R>~Pit70%@%id( zkr3eB$k*jUMkgpE8Tf_4$wA&jsIsVgiGLTZG4BEXUI0fTBJeSCcN(g!phGmQ6>fTr zgDxP=g|y?Ceo#d8xEB~G)+2abN$+Oo`2Z4vOT^29eGBohYOEpH9ys*WOg&Kn4%O&S z*uzCaxVZ`-EN1C|0q+SG4%@r(S|!3WNU~@T{=tX&;NhYycbUlFpv-GP{^RxO`ULu6 zA1o{L5Q(Mho89N2w@Q=#dtjrzDCacr-)w}bsDVzrbkrh9`qTPZjdo*tuu*SE`3>!n z&JnU%`}ykUr4WKOT&i)Ee469GtPWu!b)+zoNc>W$BUnJ+2Fe8L5pR!mT)>4`C4LkF z8mb+t9jrmU3!<2zea>cnFbHzhv-%D1D}e#2Mt>c1k7G803PWiIdQU)#3+!(h$L*27 zBJ22`-aYS}qJOsHu#Wi|M`{&0t?OgX6&0JxUUv%H-L1MnEmepcK6$MhJU_8%mlzT_6D3B`_1p5d0`i2+zF(OBfFUXCOE|Xa2el65R{#vwBg!8S1|&V- z7kbxxN3|b7D}{I;W!)4%L25g|#pTJbmY&pr5ZTM^M)m{ZPgYimN;zTKMClXKe*7@c z)GzaWg)>Mb6Yfbb4wr)hQjQi07?AYt>LJpVRBFme6E7}BWFFhAQ1vi>gQd*nfq2R> zwL|(1+P7YPBi3X3ar0oHA^AT9{lNM{5%X9uI?flbnH)5L5}fiGx&$zUv;&DtYgh86 z8Mf>smDqw9H=v^V-2BwzBPcf32Pfb>PGL^kd1EV!XK3&{TTtc;sQa3zl=0Vn-#bkC z8};LY4aK&us*FFFiiw`AmlkZSU3fnmw;OW_1;P+VJx^;V1f#?}OsXa#eZyegAoDVq zBxHsXQ14r?oS%`z^otu1*N5kcuwcKF?-f?Atg90o;7*YLDAwy_>LX_E%+J8yV7dV7 z1!o>LmEQ=lZew#&j312V#*fTLB@&Sh5!Fo4S;2fNWHvKN9#tc{K=iNFqaG5H?}?Wo z@=b((!Oh5$$Wro2%iJ~i!fLDw=qp^Bkm!J-s*GIS@T@qREq=INKn z&z57+->Rjp5MuzW1jp%(Deo>vbr@8RdmZ(MeBaCudS!k@rICVm_C{GuyOAdsabHre zEOv>=5X$e0WG$9@Z8lQ+65wQNW|4s7E}(+XFx$$1Vg>5-cIt>qoB2`Br9T2^1IGi* zFS8_O1Ygq+U*lM|80BV%q02xyFTq&CSOogBfApu{wkyaWS?5pVUxZikJp&lz_I%$L zcq$-hY~dcepD3rP+gKc)qS~q@njwS(KZ?L2>QL0$DB2r28+jMmL3ywA$NGP?t0^)v z`WwMTU6*#_1#6ytm-IiX$wlHH=xOfrRlGli`UW46PuI}+(00n73&YZu{#t{iD>vol z!Nb}-!jT-La_~JB9snzlPi9!l5al%h)KG)`f8^2}<{=7sOYZ&z%N$mOURCp!ipoyk zsYS_t!&N`;HO3vosJU=3qKUzyAm8ig^^FGPdsDq6*3G`v^l09nLLzyV`~!9hu?oeO zwiOOu6pmx=<@GOG)rXrES z;o?gqa9sUvaE<4Yqlzl24j!0f^uT!|3GMCPP z<2@>ynvs5~Z8fs!`0%&k@59Y_UT^(vGvVB(xX5F|Dw$E;>}>%OQ&M|i)HPhGdJfK-*y%4i{QYF0I;nvC>ktZNb48J{FRX?z0e8;f#=ab#QVAtw|V z-CU~7qcg~ij!^$=#wGm}?P-af4;u$#lSLR$rM1#Z2q-F}h~K||VHL{6ccSc$f_1X~ z#rH1paIcqB9LwPsWNQS15}!5xeSCloXJBlbFOP{gH@b~lf6OH|J8UGCDO-MBrS zBN8F753`IZl>ZFW+64&NB2alSD8ZtrH1kvbQT4dSe2QNlzcHSL8*Ufw7Vbko{K!>A z?WpGG%UFK0A%|bFe5Q0;dd%GFmsI*Zlqt?)ljm}(AoqbvJ zW#bp(aZ=6}$wR+?us_;y)cZeczO|NkrJP>QSn81;)}Q#k+l>yCGYbhO<6tMYE(?R^ zEVn%IXXaLLqMdcbTggewqFsa37>)KCw%*cY!wANi-$d!;H=HXBm}l{~IaaUJGWe{o?TApAujNF~5TnYas z-W@fIFk)&qWw&k;&(P19FX)f!?q2T<`If*f1mkMokL0!L(#~(f#Utej%Q2qh%Z8mb zQi$iCRyUgs1M92>4)YPF`?Sh?HI-M(w?^%U%?WfWFjPw^l3n8uwX}eGwL-A3*?7*& zr8@@UHYKgE8Xkx0l??_*pXarkyuZxR>F{z76>)GGiv%Mn}fOD+VZ zKYVWWsU!8xW#_XBldiAvv+)bz0%NJMitr7R>UY#@C$hAy)E8S+kwBrij1m<%tJ#ah zlkaji62@J>&vn(z4;1a`^(@S{Qp_{x6Z5~d4n2(Y!wjT1<)4fFR~(QsD~igv=!^BG z+EN(aPce^TUNVm^M;(niM#X1C9T!smQcx_+7wwsPQBn*F8sD8%Zor{FSXO3`amU2ul|w!4zU0ez8-{tt7)O1l z>Tz0To;z`?Wo^RONI+;G7^$hQ%==dCNQ^HxGscJ?LKJJI#J`gB&Z#aE95(ze;yBr8 zWhB+1S5eKPjL=TPLeb}=Sl74*`Bv44zte4Re?|SDA``8~I=B*y32dSrJy6}T-owem z>B4R4?~Rddag2v@M!DkrL;2&>S)wVx4eJ4GFY6^4;ErX)&k|O{&^xe(iM+Io<1;*Y zLl=~jYEL=X>$i+wM6(lI6>1+WNPqttY-J{=zYB%F4lbg;D(Ja0#0$=Chy4o(%qC&bf6zd^w*)RRLl>79Y#IM|J6Ka z(0=?ox!N25dqy=q-)G?8j&TD>k=5WCtG41ky`0FEJ~ri-0EIlAaISJtu(O_AM&FNk z$j{{<4Wl$%{MX?`M-|my{HJ&~9zG+sWo+kI=mFm41~@%QKb;)r@!Xqd(xczeYv{KmlR!8&ueMuz|>%zQc|(MoSn{A;tPyRrJTDg=?8?faM(vjVg3xO zL_^{nS3u3~c6XnEQu#hbr=!tC`rtVwANE$+iGQCOz9q~_R}><=gd;riLp=XnbjI!u zU~!bE7eELKV}&JAjMA0wJz#IvQ~z#=0Q(Xz7c#Pvgl*hTF8zZda-uq(dL+bZ3_>0* zg=;((I(+FY8Bnu{SvOFJe75t0Ai~zokrM)Pw1e7js*a zx`-r=A{fZBA!`k_I$B-Az1jh7JLM&jYYr*r84KwN@z|aOJ7#5a2HH$iCk-dJY<%yi zw(wPB<&{)ol>qZoQzU~BmktUO4F>f;QGw$A;WZw!6jYl%JlYKy`XQ(lz|hq2Gx4Ue z<^Mvx9|2Z;#rV9%aYRzWAy!|x4IXO_OGNO2E?mWskNOmVN1sai??EY*rdi1`fYeSk>R0yVKuD@Xh}*mF*z{xyM{vT-5b>t2lN+i7Pe9TU@jNwM7-(l zOfNI}Bt|wx{J;wHey{}jWeJp#aRoCoO0Wf7tg_XjpPX12|Ma8>iKH%}{wcU+F9GEi z0ZRUk=ODNBUEq5GByMGhp8*5uROFKgU+Re{4>_|d>tnt0R4|dK)0Vh%h(ArgtxhDL zfv6tX0Fa0oa-tTDAWAb)1?Q7z+z~tz5I+n`AA@=sGf|lnUt6n0scR_bO!##q$ULhV z-zxqn=|5wan3Z_hj0{4EK`7-|*gvl_^$S6J>mX?Cw{_6P{8pYxfBI|Ibr4bjkNy1tw{btPAr`^Dh%R6?-rC zX6!ZcEsq^gW$Gu^&WwNPP3WNkJo?w%iPUsI{_m>*ur*^}hYc|hqQ7uB>V5_PM9mG#uQBhsvttu!IDL~nK zy}i`0A`VPg@2s04a!{7zozso_$wBxv`6TC;hI}HRmRKKb!#=i8nFxiUU-3)6SjJDO zq?A!BQLmKfI8qWXKQg8Qr0=7q)L19_eBCv2@?BKbEvU!q$nFT^&DG7f9Qp;-@Gzw7 zO7Nf1?NDO!JA?|X1K&rvsqM$)Hv?-l*0p)lJZs(}{ysnw$H?!9eL@(X@RabJ$PCi! zK__@Z>Mhqk^Zsi*tT!aydi#|9AM-&EWDC$=sNg>v&=*R{;~U|$S2_tqBz{`_%_pD1QU=l=U4x63!4$yD_s*V8qa_4th&bL0cuPns!Z|k7yTt71USi zR*iksedXnG>38R)ecFM(@pZ3%ko<_x>%5eI+gC0ru&3E=`Wc3Qt?>`-d}X#Z))Id| zj3DFARba(chkPz@_oJ+*nusWvr%D;*`eg_UV!p)sQcdZL3NbhDJ_9Hh%f`=|4p zLVnj^h@f9w3hn4B`Au-fIKL2I9ASUQ_tjkXO}Tn^G!4uHEk;kKB_rNgXP7&l_8bH~ zyo31rv0FRH`&`@{keTNTNJ?*zt`lwmdh)&zA|3kOErpRual*^k1)d>Ze@dZ$-JDK8 zv?U5aj(E2T=|=0rv{5`GHxf*eeaJpxGrme9l%CKX*2%rl6YBG~_*y4kKkV^(lWv4Q z##eC%K?x^e9Dc&ZFY_4pM=xaG!()F5*+=aG`0X1@dhM;zLI0b7O2RNPp;bfMsUGC{ zftp5VBJ1_ki8=!gmg0-f#*5prpn%=9$!c~D0-)UVTv3s+Kw7HPDPXKOS|kO?A5sWm zMvywu2WgJ>hN(i}b1G@HMU*oO^m{7u$*g4-353!S6i+)s+QLr}BXz&y&vGXe69ory zQ+&?wgJX+G9c!qEVoifUEhWTGamTxp++mdW7Ka0}fo|jExSKE;0=H(;Q!+fs`D7`A*uY-0uOMPx|&Fcv!WJaz!NXm0# zVQC19gO;E}x$S+0v6APXwV^r_1Ag~cZ#3!qq0v6d`%P%3=|{}v`}-ovOQiFwM3^9HZoxib2zocI$Y++~y>4PQ&=Q9g- zk~vOJ+>GNEv0B0g=@sftap0LnHmpEHe$|jbOpBfpHHq}wkR>i5{s-*39?)LEwN?Yd zzMMV#k#3%{K)|u;2xK6`c^(ZMaw6@?z%Mh;30W4*19asN1iXuD{~g*I^37uuH<)K; zNi)WL&%!wu%n?jUJ;w&$2W3GxFpf`YzX}MH1!KAh-7V@jH!vkY`|w!4P|W4Anu~WJ z5J+-*i2oF2EU^3`yf5#Rkqt2}aa_W; z*C7tV?N&r~*q?IVngs%kD>iq}bs_9US)dSBK*mRXjK^Xe-ci2Nmix(DQ@#yYI83|- zUK5c7Vgp%KByPc#!8B%0^7%^dj?z+bezk9g*HLcz@P=6Cq1D*x80I}hyNReiQK`39 z8D~V#Twm!wcK~ESo4*!Srj{CH8t)9K^tZ(EWE!5JKSJ}K{ zR3QFq?}fUVd}=yL#3}{mQ0$YWzQe@5fqfG67WBnRZe_O^cl<}~lV?mu#;cu(S3pUq zvN2&*(_j6K=WJd*qY3$@RHq;XQb5qu&?gXoFD%X$+Sgz2qj#YmE0H|U=X+g@SH9(Y zKV-|@sRzg{F)py@IWz1zJZI<9+T7$n2$k3fo|EC2l=*beWb#MuUsK}J#bt=& z^Oq%Gwtc4mU>7Uc2dQ6-n!`VMHqsghI2Z!oPb5cRH1Zf__&%K52ASs^!xG5Bgac3- z0R_ihjk_YZ$bb0v{nhukd2Sq6H;(Z^Mf_6#fW)L&R6a2Z0Gautw6=Rh*jC>mABkS&zAtBch7e3L6zfGNA1-PHD^soPpf2>cN<7 zj3fxxB20~pk#>V5>?sEF7i;eC7uB^YD&wQK zfzq`i{2I(58?ev*PbJ+fZ@w_h);;TieV=mrL`Fvj^8PEV+S0HdOPD!K=xeZi=%*m< zMWt9nFq~FGD?aiK_2i~BD;xQ?*ZXR>h?hAkRdA@xQ{$sfzj7yEKwHnc9~K%NDnK|m z)FIS|_7_3YUz&WnS_3SZhj*37>I2eW4c-pkAioH=PqLo0ReBLMEBW*=dkSR3$Z4)N z_%5FRjUkL%c4R2b7xW=jw7NX^b5PV(@$B$8J?2%*@`Fg2Xx+YW7s|U1 zt86vu zjy@y(4jgVABrL`FlaIFee^>Otu+C!-S3s`)YRft-!#+$q5o4&TO}sQ%*QFy}GE|w& ze^hfP_$lgntb>mbzopmQYeV{Mp#5?YX6MMxym0$^L&R?)yGm}c;^x92#busg+x$TM zSTRpiiIGyMTBuB@6!m`woqa`p3vhHJK8W4dGVb`Bo2>srJKx!{Htpadr!it}3bn{9 zSU-+mz2X@+{5%a6_Y!*XX$7BUJU4PAw{Tz6caTvI@aWG8-$ADGQ=yDXMqxmBn@UdD z2;44<*|}Dm%Ld=@elZ(JeKNYDWfG7h)98YT{xSZWBDz-2!Md+f5jS=Xc>W7b>UqL- z%3nUG`;wFS7vkUG@)i~TK6xE~o0XuuloOi(Ok7QJJD z7*Y`y#te}OjPYZAsSg))Y?eztImL{VlcDmEbNvI}HuvH{>{^`p#V=NqS?NN$wU2=k8;Zx}s3 zjOYHaUV}-Wp1ZV|STL0y+Fr5VyXMkfJhWng!TZO^`>v3$frRcc_0Q&|5Dwf`A&?Ln zWf`N8oiI5dJjRt1a2uxz&xcTHLNK}5V4C?GVR!D9Ye>4u2z%d>PYl*#@5#55UQ;$G z(!Udx5@xsGh&2v=*uRnY!R5g|!8XJzg!yYB^3UPqaaqtnL`^4#eBK}vdP)B6&F{?b zNOuF(Tpam*q>2zap!cuT2ehYkq*sJ~3U>jCSfdaSbEH@ z%skV<4s0a6&Xpd=DJK{G1cAqIO>T{Rk?hSd=2DLMi5bgtN28nGn(sH&`x^7md#tdf zkRQg~F5^QzeFvUgk#JHwM*Q_C`>1E=Ah!`m!7x6J9v3~D{Q4l7twFk$D0i7pW=4LL zi%n~C!Puyo|L+dOmD+kyf#JqO5vwd5uI6!bdReh9()JFp{gfcjqMKtOsJ0N*VH$CFb6w&t$#bOr=e+pe8fIuWlB zvWO>ynNgiz8uEJt$aX&YWDKMXUZS28qozci;C()Ah#Jp)I2TzaX5g?x zncYmxv%bM`!EwZ|A7~q(zffr(6Z&M$wAWgc7wH+!8i4>t%7xts>s2j_GM5m)`J;l+ z2~~_6?$R$U*~pvnzFaV8XdCZa14gGD6!oear2H8CfhDFN@e<&`AjD6I9}`cxR#lWD zmB=4}VML>yf7l!C4Ya!mTg*(_TbC=e8DAWZRx~G)&rR*QsLpXGxI->+M#8k)U(Z)S zAJw5<9q?4uk#MFn$C*w(1-M&sE$I~&4jYVl_hh7PRv^9+b!rM&YI}(Ho6R$!z35*Z9x)NPR0o0ge!^ z1NKh)plf5?tEBs0@1e0SOp}8|Z=P#nrcs^t%+-FEgAfO-fpzYGtPKG*x0CRG$vgX$Gu-^-yVCv#XL3}nUw;s`Fa1&TBZ7iX(m;7=2;o=?CMw*~Va8X?0K}{m5g6h~$(yPEapWk5tg7+LU{gtR9HuC|e4}Z$|=$Ez4`hDU>bBv)LsF;h1 zgh@#NE1rdVEHP7?83=0`6^)v_???>VgBPfkau7v>Q$y$&_foVJ{v>Wo?65fK6t<-m zBg`kR?w2k@n|-;xbNZ`{UmqF8&3eeHbJv44A5!+mh#L@WWIPf!;Dv-Ej%b*M4DyWyG#Vy6#VT<5O6F0x##&R zpxQ=VvmVb!w1A9*=Kwt|z(QPW?X`CDdKI4d z^(8r%F)_$MJ|C1Ff~~`BUl-6Sh_`G40@d?qUF9UIcQ@MU{ixSZ+6qyzbEjf$hjDOM zImi5yq~>*SMv|_)w^qh0valwxDm{^uHn{U`YYauPZ3T`|0Ig+ zi9k=Zb@+v^OS{sV%}mki3gW<#U z&w3k~=|%5~>pw>p}7jODk0?;n7@({7*gs%Nz>dK<0#dvFJYV1nr1OmX>d1OA%D{tv zZnX$uVa%HHQ;)6MKYC`$$J$UFutw5Haz`i^>)99L*9z}U4ds9xSsHE;`G@)l28waO zx^J7lg?xGe=l_A{{Yb84Ty1>JOMhHvMlf%1r!mm!%X7dBdQp`32uksccq5I$2K3SS znVB?}e6nKKnw0OofG>9?yw4>~|MC4iG8O8r4OE|Lnn^`aBpy0kokq_)jYB0wewo}t*o?T6-RN#9SQXiX)o^Y zriIJ$y{H`$kUnllHfWovXJ2!>*_rp_?Kd{>0SF}&jTiH?x?nl=5|KjxW!yG$B+KLz z=nxs$keGjRSIb=I6VE%;$H1Ev?HIBqSr|#!f^+fxD^NKJ`QDc^I}--y8kc1tP$5M7 z7$y{NunNMF=T>vl&{fKMeCr5^`Gi`Ju#QqmIucQzFRJ4tb^)cEFTwFe`ODQ+D)sP& zuz?c|jMX(UqXr88MI>4X;F4*n`OqB}gZdzFf3Goc*gox+@;JF9Q`xxHN??mvC=k9J z?rXvcunasaV~`O2!S}O*E})#^XfaC@4)CuRRv?QPJSG70pKa>jgw1fg)qw82RcJ-LUgl@BCljR! z4kVkBUaYv7I6zsq`Qz}FC|`LV1!)}3cTn;^WAPlU2@4fS;cYQLj;j<`B@RLsH!JS< zxY@iXC+Kiygl>i@_sE&h!3(-43!Y;@B&IT7C& zsu$3hH=Jxw>N0-Gf&c%*!oAL|Rk7s%iK}ra7s+B4F$Ul~JKGlt*F}QIctsXO{s^-` zohw{T(3t!VVYPXV@;~AV?>hPR#q$sS0-GK$1PJ5iuxxN37jhc8X|Jjjkqrz#;w5B( zvh8jW2^cErVlrRcTFx+M81bX=uSLA5T2ZEes?7xE0rL>?|J5hzlgS5Jf{2mN^*b1;J3#V7|eExkcK*1?M zmxmc_$ohPg%tJ_*0$EiY<>$r|PBO|VhKwL7{Zm!TqNOD?wJquw;!SlA+p7sHa=@Y= zv6cKv6^5m-W*D^z@8db`XYx-A2w(&Kv0m4F_Y?Igxdh+$DQ~1-l`F8&*(vXdRXGA- zbeB5at#*8GimdfVmQQW7lEHj4@9GzgjC?;23UU&RQj*wbal{X|i>n@=hW8Wj3{8G| zJT**D{0!z%QO%-QE@BpCKY7hBmV9e44{M=H3w~+*+`R#b#~vW`)@KIBC}ZO@;vBpYm!2wF3Qi)vRSUAYBLwr}3P@ zOmAi;{vm$*NWTRU0rMR2`DDTVaZvQFoUw*~r?Ow`XN<+#T=HG%EOHVMZ#kI4Ii&BV zbm1qMKNj` z_g>+3QXmr}`~!C(J*Dgy0DUft-~_K)Edp z`@Fk~^2&g`9#1j1jrHa*zT5hJL#N!EN;IGqVbpS|-Kc*GElMB}^nQ1bdaFHlwq9C( znddpf@w7n%`xK6-=pdyrJRc23WS-o|G_BH>YD;*3m8E!tFa_W~nYj1dt3F3Lg@LI^ z0ZDPa&VolH`%qMFc(*9$K|o*+di&;*g^>XjqhRfS0|*;3#FyY?^3u6AiJ!{Dz@3J4 zl5u6DBXJnXl=G{P^_F&->b54e=mq4n?O; z$AWG7RU|+7ZQ3QeS7jrrytO<}!a(&m+Mf)2lBAUP0~nScNZ-gi?lR!YRkfF>mf+X( zn@nhRD}eu3#2W~{eHs@sAmnrFkxUyvdXMcZE#Y(gWijFLY<*57i7>+ZMTHc6 z7`z|6$@}^!?>a!JN5qed8_D;p+7102<+Vb$+>)>oBgusJZ%b#Kz*jv(9mx)as^ z;hJ^XB0uC&%f%`N5bY9eDfMY>bk^C}adxm>2$)$|_l!Y=*^IIVgvMI~OUXjRZ|iY) zC-r`bmYwTEAgY0J(^{nBg7b`Ju9GH{E zazexZ?LH|F6M@`bcJfQ*rg4*#-vm^%Qz>U7zjf5_2Y&TJYwK|B$J(m_GSjn#Jtweu%(<}!QR006!Vf3Ndr;cDQ7r_Tq1r!<`MPh zcJ&gG0_58rM|16AScBXRm6-PIadrtvlRLi4#eYlu(~y3b{;3RXopSYR=(?GYK=tpc z(WL*~_)FFo64OC4FYIRAYRG(Y9k72IVGFmX(;9~2dH7WXf}$_QPsw5!x-L=OLmlat zoN5j&FY&&Ba{0t_CkV{<T+JYo<3M$zs=0;mFJvOm`JMt}0*iW^pa~w6?y~y9 zW1ex$$t#j7R6(PCtYnBgxrpbE;0VY1!DM2FVBEMhJ>XDDF60;!`IUSQ0{(>7C>Nhu@xOvf)?{ zkWjuP|GiwYu#5gKqu({glK) zJ|2seGfqJEYDdVYhFLj4JdRHX-R68h#-%u{JM*Sd#mG!QrwRlD33z|rIl@RNk%^Ig zB2kHKwT@aa2FfJJKl$Qy?1f1?F+o`5Eu&vLyWNC7vC6K>K~li8I^`KVxs25$-DEso zHzD6e^nW7i(?$J3qa3!wZu&Ihoq(aaLO!+J@-FS9RT)J^i@&4wa-iVAn@H`!_jer4 zo6^2No#}$%@)-!L0TH`F{AJCT6 zKD|8){W;0^{Af&MKN(<8vxkvyZ&prk+FQ*i>Puc>NDboQxUs5Plla|{%=96jYRb31 zrevM60yUsu_Z*k@V;a-T>qa})qXF4RxDBO~w7(TU=mFBd0SF@$uC^}WRHdF%QLWA( zT|rpbf_zU(Z$NiAbQkkf>Xi|D+xC1f34PB`y(Y1PFdkMWE18v^d2=*!DnfsSo`&9s z9`k)Dce^p}&S4w_KO)^LKEdmo``nsLa7dYhi$AY z7&|WS{mVH|y3{Z}btz|hWMjA(?^im9#lY2Rmx+bbH7mE8Hhxr9tX5#qR|iuuQ;q$cH;-0^ag~&6Az^fOTeXpC}e&V zs{qc0k1-rnv=!WX&A^GX0GR;DXD+Je2)_e~!$zeDm0wEuEn3gBbVN73u}XuT6V6eQ zM6vNVSJP4PB3@C5h9VGLCpr1<)pmJvU^A9e zzcE@reH8Wj%(;ekqZsS$Q6J@NYpVM~0E*5~_aE|q*vx@i4BHf=?zqSSDNV2e|j zFwbd`DP$syXTu^trzav#7NFJ!rO7tx|2Hu1KY9O(+m~K4kt%QtcX`@x;rQ+o@1KJH zNKE;M!FtHJe2oJ-`kV7aDe)7dZ9qnHi09LsCFn;^B&$Sqii+T{3xSIJC2n{e?ZhUq zcDN+(%i}|Q73CblF9!>S3w0U@h*(i&&*J+_9AQ3T;**|XMiTEZ{rDI8EJnoMmG5U^#UWgrOF=u)p5{(RSvXih zslHDBI<>w<`owlapP{eAeBfCxVvGa^Ek*?`w3dYFk^5cb{bGHQs2Wf?7K6!E7@G~= zujKo)@`s4}VH>{^2a!rL_ochn$#q5IAcRgp94b&&b zO(2r9aFuY?NLk7mix>I3%M^%zkdg;{u*I@a*66&@x5~ zmJoU8LSa~t5}lP3y&bLI5$qs$rqvY&H@Zx8h3LD~FSXn>j3HKts9LVj&M_QV z#*nTqzIyAC@3(q=tt#!6dQ4G3Rb~xhtgo3-@m6F@_0#_5eLXx?A0+?kNc0Yq&p0oao}BLs)IY>891bHZMA)dDG4@F5 zR}eQh&C$f03It(2?YW4jwA|FI5a!j5c%K5*b0yNBRzE@mL~=F78%O%`c1^1~Rt7D#Oue|n{|$(+{@rW6|*`wGBEDOMeJFBB;9(Hd$c=TK3pgK9nUdfv_h0K z2vGNF;?Kb#p7j~*XEqCTA)h(W{R`yVMn5B}LhqIPNU*7_);G>u%8&Ni2*b(YOth)l;bw^Mhg89?>DmkkT>73@9{pByH+Ggc+8$_PU3qO>}DI2ek9nFe+YpQOFH_$ zm%m;w>&Hd4sCI^dCm(FDKlq^aY-(wlS4a?#Yt$bN!Z*T5V1u>SXLObz*=S4t0puQE z5ibvpASs8NAu5WhP5Hpiah>>W*-@Y~91wtQ6YmkOV&7BW^Z%1nTa~Ti!ca0#v&bd2 zSUdhEk`?147$oK|2hQ87%eZNF5{vSzkd@VDyv!LG%tJSf{zhkG8|AH4mbxb?KP9X} zQl9&9;TZjZKfs-CE#3$5K~FwNa+>-FpB6wJ=+D4M?hNcrIjyLn^m9dgU{fD}BdJB@ z3pAvGFX70h@kV?Eu8u#@-yXgUihD~_k_kKVh2dyqeiySoH;5AJRW5Fn7C3GNUyI0^3V z5-do92MZ8vaSgt5@7VjR;rsTUJ$+`Ty{o&ctDdUv;dbYHFRiU$)|5_odm?{U`_~%t zotqr@Sgd304r4>mL*Px|aquPmmKt+#=+-K4)e;Ao)T)PI(3F%$eS>ryv!{#d9(iLw zQN2Q~lZ9c{riv#d`r}LZfwr_mL1poTbYC;dX-T&Z@q^5VpOr;Er#fF_In^X4HpW8OH&?I>Bl6H2?+6QLK_!0%nk1w;CiHjnbw>+8++ zyzlGv;4mXR%ev51^212LYsqsZT0d5fKcp^Hp7^k$s#donVcm zL5-DWs!o2TxU+61VM(nbJUmtiXg|l3z6FjlS?IvhAM{<|pYRkv-${77W&m)fn9YeN zERXNmlB6rZG$%hUj47q_tZvkLt5RMJTf-s3wn)?x2*p#lP5c8$QZ5m$1TI>E@{@7P zPHDpRT1OFeB2qo5FC+dl@|B5Mik=`pZdQ9I8@)Tk8RJag{Q#~Yxl6io_#m%Le(gBt z$;4TrEmJ!a4uv7jOFery1AI3Py8$mAA>ADBH#r%xfcA8|@|+u%lZjw7W-Eo@PyiJ- zsW*2C74(Xbz6e`1?Fy#|XACp2;cej)p%qN@5}a|0l20AZZY>G-Bhxu9?fwkf0|C?5 z?ac&mab2I2?{{#y_low($^9qlTM6O7skntegq~io1NulDP)^~w45)@1l;dz_ zNzeN>*gFlTo+_Sa^7Fl@nnR`D-Y55gTaoVvw7oiv0S+kfY0LX-*kRlz|3iSYVE`c9 z>LLNadg_q6gYWl0f5(s08KDnzJGXc*CH)*OYiUaSqMVOz5uV_-`^WUBqJ>38gIwm8 zM*UfEy7+#h3Nyj3l5c91%+MFQ{;5(g5O+_&1F#{u4c%eX`z&^m(08C1eou1qy)E#_ zy7cb}JZ?Ruom24>l#=&1k&lv}xZxBP94EfbLihN+IwTXyeZ$iKf%mPDJvCv2nic#x zNPn3r%#Jh&nDy#xL-&gOcjI&-|FC!}Fz)z@xMw9;+W{Lpy z1?jj^-_G~ZD8Wg_L8><>G5mNr5$W+~_A1@TWBDX)M8Bi_osi*_81LC(z%FlTuuXkOKh{-G?x}U;){V z$cMvGg2K2XfBwOj9M@nyr2bwCFqa33e-A66Rm7{yaVeVTmFj4=9#P?id*KT5`p#2;pi*GG|luC_=vMvf0f9LCex z>l_i44A{t~8uV+f53CE&Zs@M0PJWmwT=pf%G4w5a)Zmy*+0C0@jpG4luG z_+<*~1_$_4%4qUyrZv(TH;ieftEI>X<%zf(K;Bc0nFPZOn0PDk3ZO)0et<3NB$9RR z-s@vSC#(x;wV&zFiySR45x<6BN3TOW-gB9!Mt;?i@H3xm-A-*kCf){Rr{_}t)#f6f zp2S#)brq)hsd9+);@P}2{eKgUO$FjjHK!Z%$S(!T{IkTX&&tX;>2tNYGT)43#ysCj zTSKPIXC&y4MM8=~sE4#eNx?58^{*r=A>l=I0srtGw+$!@h4DWl2W0k(dKjq4xD63W z3AABH2dDpI1!* z9S*=8V>5JfEKn)HYY=%AM52wP^PEFJDXsWH-_Qjh8lE+dX9htOZipC1oYFMVYEa%G z?1kxn?wpLa$q4^;-Mr4^cLdLn=SaT*Hy^F(m)v@0U!@$yfgv67uE~$@(W<~E#EnII z!PsFPno(5E+<38C99TkU&*gx?RXC+w<{t`iSqo?Y3}jx?k5~Kai;4FD9ohc~+d4hD zok;kCbUN*g3`bg#HtY4(;Pv2D`eiSy<6go9R={sb-&F0QkFbK-u3K|{{o{j9DJg0UeIE{-)?1Q(30=Mh)wf)3jtfcZD5--Y|DJl-l zP#cML5X-|u^&rpdQSt47fD+96+GGk`WK0r5!tB3{?{6pr8?tOjO<-Awr_d?>e+395 zA01K0ttlczVJ76{4`Z>*9Zy)rDJ7~MwFkE?F)`?vkzys_O=02si_qj!73$4x@g4Ox z)T0CoNO|J*hei6Dd~;){oRzRQS5WXBza~?}sszrzo9HruDw|qz(yt>2P9T84 z+(5B#m#~>~gh{P%_39YWeUIB?F7&NfE3$Eeu>mj+OZHDq=m%Ksb%q1l6CN4Ta$S8xU=pzzVC$^ zw(-3?EJ#bjb}%CJzc&&VVj|DyfOj&^`egH-d5!uN!#a%x3CD10!fZ%vRHZy74DQnm zqd4VG0+qxDfdkMv;vpFWfoCFd)P^y>ESzddZHc;wa2pd8`V3AEBni-8<`#3Zxtj5d z4S%wxQNL~OEWw`O!0uZCv9e*D6$T9KfF-H{*p}k{#w(JLS;X6{tQQe)cyf4R=r!rr zx@!S4DlEGv%s;6AV7;E+m-kI!Dd>OgFlf3TNtYF&`%xH+zY}{W)}!9PL(lIh=OHRr z%J*QvMDnUN)KB;l#p5`=pr4)H2*G8-|AA+7nFrbNdz6#%N}Hw3qRhXU{Aj=LpmY@F za6yh(%W=bC6=86(65S#GRY3Ti5sV)W2B)}Vq&%;@Y#>H9BgP;KO!Aid({YCq-eV%w@#rHhv8{p@vKHnd}_&y`l+j`8-OTyXmi8oquApCWcW_-4O+C!E8qo1A%0|B$PT_odJ;~sEh z3AcPUlHdXw|H>UDDJuy5ieXAYkv!l%D^WHm5H3lD!84nft&Qf4|0m;<-kE&sD0Nhx zF;;mm=HDD!aFjuqQOl!KZ+jE4+|z_b?Ho>9@+qNK1>Gc)qoO+Ftlh&WdQ!@1_1V}) ztA*^a2H!KI>Lhz@6u^hsgvqs_Fc!G9d7&D_FT=6r6cOsNEq393ZRUL&o~MCSnL~Lc z?Xq??-nT*K(S!7-Q9eRX_~ZU45;TsvFjutWE2E`JI{d%Kh*d5dZ)L$AVf2$-ASl$U zwQuEJM9Ho=0HS%S17x``H5?T5kW1gRd(Zsz; zq?w9(wZa(V4)2q*{imS(jAm(_@x$NYa&gz==HXVnrqp8_2dX8Mzr*`YBq-V|_95!a z+_*27@rt`0QBl&q&e~MM1?qB+Knj2fi;)A&r=%847>V}K{SW!(mf-d3waI|t)$@Pj|jA4D(!#^;Lin))XJ za^VthGqU$he9r_3pLSs4R6zXnA#li`B_rQmm?3QAeH0!Kp(7AdCA1=hBejvT&V?F> zx`bQ$>*vkpOLRFfR_$TyTS_NzjdPHrFVqd_tsf@|^ zb7yTW0mbW;jPb?N8DXx^9FRd{3Ob%4XiE29OJr;gP^+9blO zY)nE3@TnpRQ3fJP-bn#x5eh>v+@V!hCJ3IS*9#WPeP|zYsIZcN;5`ZHbAdLZ!*Qr~ zSDVQD-;@=;DDpTmr@cJaVHsuO!Q`Bji2y;wxePcA$Z@p_VNo@o5GqK7k17KNI}}z6 z67LLhE+Lq?z&8gIrdNLVF%ct`!oGxS0}HqiR$Z)^+EBiDB$fJ%!EFKq<$R6q`VrxL zXN7=ov0j+wjNH6v2^^U~HO9w(!1;5|JL;}QBjTMY9i9Z_ZUG#cj^vx7qU|Ui%v3TQva8n&1Ak^ zQ|bx`QyT$w(NE5QR!sqABPveiN&n2Ge`ptMX&Ge(`R(y`2)2gvc?+)@2ddbJ7ZESX z_dcGKACvEXWuLc+@r()N3ebN13@r~ZU%4%vAK&Zot3^NLxAItpSh(7T?}VSzA4lyY z_EGYEh+XGX;sumsjF{pr_g1+}D1W9lMOO(QvE|)oI-2;cq91T)TEHQnVER!NN9bn^%SOg1OjCzvQB4SXeoBxb|@3UxhNLU&OqK z*%3dJu0IYqz9!x%>&GzT3S+zxskQ>fr<$>ea<4P5XY>B8@(nT}4tTJ?%PGGIZ-iCE0pGAOnR}ya= zXhQl4tIdW&7i@D|MRH`mHRDZ-llXs;ZzJe$Og8380bj+6xf+j516t@(I{XAJ6#xU zFiM*=@&$YOAjrHk+u=(1F5|P-E$rqd{(e@?L)7Pw&sWKyK8X?)3JA^S4)M9#Wq|Pw zVFp}F!YFWIPFsNl(Arrl^8tDGVO)+X-fA4&tl@hHu3ouKepyf|W+i{|B509cO&rA3 zrk-iITssTjztl>L1O)$K+l+xcJ1{iN1Fs45Fotw;zXjuu4c1GWanqi_y56L`tLk0m zx`N~6R&Gh+C-Z_X?ZKob(Py-4>z!r4(cU5(?L?ApQVQ z$$ST--$-jjxLvPeoZ$N__=WehD-wp3e6TG1UKvbvng&`2IKXgFTrXDY_#4zi@$}y> z98_6c-U6}uLE*McbgJC)+)pK4 zg|eLVazzQ{;zg#uc-ZFLE%!t#D74ngf{p@llZdR5IE|GB2mdqMg%IukxL$Gt?>j;O zgD6+53kFk<^`8e41yp(gA2Z?iXyk{;)HQ;XJ5J#YNV<902{B+e0T`oBp`67yU!PCM z=HbVN>ulq;p1QnG&e;uuWO78REb!ba-Y)>bscA(BOnFO?B(QKfr9L}Kgdt3&GSM+| zw?wpQ4A8si-Fc3}1sD?oNm-OIfWCEHG4idgRN?e35{f+{YDaACTT9r@ohmZ1P{3_* zsi*t5d%?X$yg$vm#yaxV@o8O<_{-FK;*Nm!Is%Ty3apEO*0J?^rxzyv4DCz(E%8Rs zuZ$C_&N#v5V2oPESN%j|RYZL-R{ziZKz$De+jF%ICD!=iRTaOK}8S(SsxrzzI zf|fy}ztx=ByW}N2%vChIh@YG7Dkt%mI2)YR)T00!D(z)C(>0CfzquOd3KOmX;OP>) zAFK5i5jHoQ8N$dpOQGCFOrWz7;Rx!%9p9IYOS})@fDn4-DwLxldBJVO41Ffg85kMp zMXkyib|Kx6I%;=hAtSM;7M2H90&gJeGDf3pNdY zMLh~2WG_JeH&OY%rXP;l8*J#=E*WYQ?oED2lx`w9HMfG6fDS;2O%#cYGXUd=0o3y= z9GEmAYz1rGjWFaLL@KD@2Z_y&eA8iO!F)g!y-GyrP{CGp58(|MRQgjtiW2x38}7$o zFlbW0wK%NUNmvjiHvJC5BB@Hhq3Y-+Cv4mj*g6Y6j8`*ZysRapSVlW<@w-hw90nGB zl&~GJ<<7KYfj!5bNBn_EsK4d?0vz5lud#mpq=iVg8I=L!1%{+6Xc3|NmV!-TW1A{q zS(e{6qQb*gEt#l*xS~6Y@dfqj&z3ZZc+sxsM$o=p*j-X@u#>(UkibpcC@?=+oo~s3 z(Ou+Dv00CO5BKj$zn2+7J^b_2XB1yr? z#61sshgmN!jGs13Tcph)zjrtUWPC84c`6cAJG-6LT1x+Hvz7~Xi-YcQXj;KPQy#qp z?bt*=F%B@m=i~sa-WBC4&(o3r%;$M0px_t@cI^9AN7dJqh)#&XJ`B@nrohPQ7o684vla#ujcl@k?sOWF12_I9DXRMl@D# z^ea$?&E9XMPYc)tdH^bNN>uDzkyTTyxxmVd5u*dFk0;38gc!kq{^R?3DEu7l>#BDV z#wX|oUj&X*eh>)A3Cg`s|FFJ+VQp!*C*4D8^pt##X_xi=yw8jy;Rn>Cg)!Nr|2Sq1 zR)F$(Cc6Xq^+PavNBe|HYK36TAr(8W-I_}Sc4V%5&r6hr#1 z1Ui`Cm~Bbd4py=?_2~B*M2M&9@J&VD(EU3TTw=#}(ZdQI}38kiD*K3P5#q2ut<(4r9YKrNyb6qUcZfV)+p3m9=O(V~8XL%roH zDL~xoW!=^116i0xJ$G@0pk0{e#K?XgtP-dksG%tFU&g0ND9iUcuxX6HF<1XV|B?1> zWqgFrOWRDSGx)kUS}+4!fT{g~>M7fE>d6rzXHgcI#%7UODe!Z-^04OHbC z<*K~5uvUIYXogh_I>4F0UX%3|#pe@`{zZyfOz0eJVP-jiN3{!-kDSbRu0|7ysHRQH<(10f zhXiQcICV4`PVuI>bO=^dl|_XIN2=*e2o?%eg)n?amt+8jS z7));AbqI+N3#8YXJCYzEdos5qF`!I{^TNR4u&urjg1$#zrj6wL3VWNqg7|Ck34DR{ zn^AE*q(goN9%l1gT|I#^goPB9)hR+9)+H6;YPGpY$h1vLZZ($phmeH*Lw#~$rNDr4 z=Ta#dKqV4ZP8QBGTyD{e{K9HB?1H!piv^W(Q8R6VlteUsLpE?2hT-05-p8Ts{hf42 z`RyZqQ3j+7=^uk0`@nZgapc5<)M^?3qPrigen-1%V0TlS_djqZSx0?7x<#FWgoEv2 zP8#x`qD&K2qj3cTRS24M?nSXy!5Kg{@h9VUrRV53Ae5DjO62=Yy)6rrQXQY-j4OVO zQVCWee3vUjUJ-wgJH-1Q0vu-cH-8}hN6cSeQ_f2~n7kz42cIPb`aG_>nL&HTA$+BL z*xxRq3gs61XOgd71GfORh;JgQH7Rd6=aCV7=Zaea<>5c-sVp$qek>3jG-lWJ?Fz(C z^4X06ww3lp}(8x!P&=oNF@fqGKh>$upHiJua;4M_>_V~3cMawnO88|@{Xk!WlpUMiMg z>S+xL7q`|^-ZHzZHJJEk+=-$}#GT*Sz%VBEqNt!ql9XT7+#-6%Hm{lueQ<|cA&dT0 z3&3FIB|L)t;34w+14jnT1CTiDMREeObU-AcQv z&@0eO`W=-d?ciXv!B@rpi(K?7?N14#U737N;fRF#TZ63r;enKY&_3=QqrA=LT9f$& z`_oWP_)dQ8A`9{T8)uMM=K?+0pl&4pw|cBW{n#{P<-p0QHkV*Fm0?^K!@L1Iv_ZhQ zm|z@nPx5{#zxkxEf$F&q-@DNc<_VH8Jjap$Vy=5;Ljc^pO~CM6s@g#Rm-_UDA!2+1 zyMF0jBwj7MuFX2eCR9xra-1Y2({qvU56Di3l70>&IGuETaFZ~I_bZ`m=8N?l6xmba zkJOs#lPP}$mqXPhzl=^Y!JH@y@%qEKX{q!H2K~petcUX#{d+TXJyf0gj4>u@^iyPf zq#DV_hs|kGkqBeL5ptO?V3h87|B(MWJVn!P0l*^c3S1yn~j?0y$p13*GjAAtJL zvFDQCI<7d`P5UCvl*XTggE(OBA>J5Z+tfqklFl|H9vu@~CwAn!7)>-JT^f#f&2rUm_g=s7PcDD5BZ zju^RMDVPgop%N?}tQM?7zocNiqsYfa<;%Ra9$FVI=mN`~utPrtx&{WC-;&-0Hcvlr z==r2Z60be3LwC@hkHWFC-k?Za&JL|a{4XM3L|V#4S|L_;aaZCV$K9p=YYsfT zKi`W2^X^Igwi;&)%5e_chwTG|%|UrJrTnDa5V4GSF*vkfJW;)WvY<~u>f>Zy0{Lj9 z6(-(HK?_U&VilN_a*|n3?7YN(WH%HEu3Aw^tqMI4MDiu`Y9JIcf%lm}ThXr?7>1wR zE+o8(576U8DCQJ(7(h()e^7@LzGqC5@V$rKM<%w_&T3^ZCDgDghEO1+Jz`yg=*_!0-mK~*yOH>2ov4qdhH<3>6yxt$R`q||)QV%BVR8cXY zD0n4cZxAoTOxnXO(mNv|@Zi4Sp5R}UUj-h%67ipST;EFk3uvX!lHUplbTwgu`WDoZ zFcwjwd*!b1-TFT2IT)oN0|PRnt=5|Fxzz%Q*QH-7sWo^XMf_CcJ06~&@qqoR;FTqO zR5Y0pLVi7;wN9DOSqK!f$P`p`oz;OlItmSyFzQc}O` z_P_Q$($Avbr5=sB1&)5;e4AO%=$GSTTcFaKf=OTlov$vSKL;B>8uSnD=l;-tCtYps zeriv-0>Ujx{&hmT#jMu8gByXYyic!8m2_;#8CAZsAotT5@8DN~`hi+3D2uq=oAzQW zQpqSym;|usBjR^J{#S$r;b20IL^kkngYcp-bfBCA9Z{F>kCebNk=3??2u8c?7qF7z6Z$#CJf8PV)jij64O=3$?xa{<)fP8VA*G(8eqam0v z`U{#g1Ow?MbGM7cP5Xi69eUGB;rNnyYp`YO%pE&2W6^q+iHysq0-sucDhwkqCA2TJivB!AYkYT^Z)a{HDoR5w<+!Z9}*(Q$Hz!Q7pzOJNb*>2<7LC+ke3x{yQ~*|QO=23 zdn++sajw|^*q5l^e=y?AI~>Yp@|bt_K9r)&t8kZai!kdht1s7+^1T4qnu27P)=Xo5 zVtvUHSuAoG&)dyY#=n$bSSzG4uUO5>8q9Cw0+5LZ^j`{*ZByQM=b%VLP~eATe%iO~ zI|63L4eiYMX|(etev8KOp`%jSUq}A(#=6iM3iXMuMtH)T>)t1ycqi6g&hsaZNI94X zgCm9p$H2(DQGqDpanj#zLLV^fe<=G1`*N`BNk7HvkHozT3aNUov>!j{&J5~(9$Duf z#6ym+(jGi=chi_>*oHMW+K{fdT$Td8whS~5bY=dP;@a#od|!+bd=};HFwdA<=#L$d ze?}Z6-jjeAFnAw;#bvy;Bx*nH7WI9`QS30~zQzp<^bQg>tG|Ai1`We}MHREelJ&B^ zQpR_auoszsC+bsQZ7&i=*tUzl(a{cNofuD`r>LtkUT!`lfhze{#2!2Y{dfltZ1j(k z33({xf=J9Q`vu4PY0^Fq8Yr*0WaNTA&rVOSV9^D%z)Xdic7PBa)dycfo)AIL8i2Vv-hb!4A)jBuVcOC0bb zCw(d!9aLo_<&I76i!h|#AR!C6&^>om&J)HEIdxgtAJpzB?-(Z#eNQaXA&ILl2Od;# z3-lF)rMSHox@Ui%;mA5t$V6b^z0liylIGE1S)15tY-cM%?sIu^x#gJh|wv>Owd zDtayG_-(}F2>LHLKe#AJ{v2N_2=+}Yp?|IZ%KK16=AcVFD>8ZHAnIE@*dj>10z-o% zf-%tF$fzz+^hd<|$mfw~$hYX3e8<1&I(oGD0TQq=3`$9n>>$9Ltg#>?;+~|ACHw%~ zS>ydVK*|@1R~H_?H19Q~lwc2FKZXO(R=A}eCV}szc~60;xORwntF`pAe-tq9pZ$dt zBd3$ZYY$H@13dyWd^RFf9viK7Joj?Hb(mn}HB?0WY;UPlo&2VQ71&I=@|?LCcs9=O zVICCRCj6$&C*D+QlA7?YdQTV@Acre8*|?M9+1BCtl0L+jDB&eZRAZb1=LrVDH6b*~ zf1+oJXx*I)CWDEFf>0I4MCqgy7DkYbyqvF-E~3_Dvk-~b5&_F0M*>a@pfmC ztwVjf(Qo~DUlYGlYy_P2lK58@Oa}SFcp+gpiF(rq5h~OJ$z3{Oc(GO3qpl}iTO};k z8SX%2iHxID$G+v1;`@JG7EJzl%H5$y5VnLTUj@P3!HinQ;huZnV>}a+BxX)zA#rv( zzd0*u*EqoBk+i28zSRft{=8jKFeaSSx4WHrKL813FD6cVbjVu?b73tJpkBjaMHyGU zwf2=Tjxdnp^rk$|)0c?oT3@6O5zJHIRN#P|v|+Hyh(8%5pLJznLma#b(_aOviGt#u z0-Vz_&TOavK%?E|xdKZ_e)9PWzhZpXmf~5r8sFcdU>L*qJV+>S@;x;&l=GCIU;oy~ z#rv>VO-VyLQUG`DLi{2K!>Kp6DT!i^tj%LSun=lv(7val{K8l}7UH{y*{VwV-!Oxh z4P^<4vK8e21GjspAl^Utzx^4yzHa3R|ITxBRDjTD;J7*3c*{h89X%(qIM1a6{R3x7 zKSA4~(*DS&ksl+g^FC$ZMQ{}P$8!$7O!@8gR~q?a8`R$}M>xfq;OF;!U`XIQ;+=pZ z|6l=H%a8HHItir%6TVtR^@s|5mplG`BHc}AjnEa)(QNb$j1eljH-0;p=qwyq3;wBw zd0&Aa<#4k8%7E^S<@!$PH>I-MU+D^cWOi-~mdX7SdW9ZPr$?ZsRosTK@IsFn*kF6p zuD`TYD(%Bj?*Bv-pD-l!TO#9{@F`(`JncmOvKh-J1t%5*^kF=w0^3%Ae8!@X{D%6v zcsiR)KfVd~3o~y}7zbsXgGGb8g9hQzKuNPN`Hz9VyRk4I4$Tvli__VP7L}nkQ;AZA zzMA3!u`=m5qNrkH$DwQyuRip=#kiwSAzmXk@`+@jPR#PWEnZ4j8W)hqQJEn|TL&x8L5+ha9k3*+{<^ zvxDulGZ#we7{cKwbXY%;#5K|P@_oIz(}2!_P+!z&KML0avc8}K?26frfcQ7KyLe8C zUz>-NTN@blLHh5Y5os`P$^~SkwRyiBj7u}}TY@CuI`Pi~{yjo{_SpL@=AX9`iC015 zpTa7R`GpPeeZk&vXV@j90bvP1_RJ$7=NUysg=FX=v@U+!?#aO#iB}47#~}W_z0QnJ zg7ql;g!pmjBxoOg$6PtMs?)&qFka{zvWdCBR!vW)Q4jNLbAUOIe*GaZ)QsW#GT_e4 zR~WG5F7;sDo-5D|eT-hufbLPNd=jfLE>S)wl4dsQ%hr#?e}QT;g7#F$>bgJqSJmDN zL+2V^9a$$~fO@!JlFvepZoiQ4U1f(_f%Jv(8$`Qd=!U2^-v=XM7(+M*Wis@DcA|(d zUS4*0jI5gi85eA!Q6AM}Rnqmsyoz-Wt1=&A0;ch`?=ky#=b6KNM=^BIrQh-MeZ^tF zfN{wnZVQxRxQi)Ee%HZBHKx9e80mk=H_{R`aglS;e9A&C~YV^>-TSoYZF_L&q=J^rxNBwNxPE# zMsqU({ReJx8|pSPVE#%wC$SUGB)p02ob{4RqSFi33U?q`1!IMg%prFN?JCLLhs-DT zqi9iyVq^W6s49`S4$@@(jMZMN&Lo|PWu?i15UIHU0OjH((}AO z>*ZxdSGd9~wN{De<=(#z?E!!GkGOF#aA2Rym`p`(yEl9kepfubFtD7QDhc?Q>5@gT zI@)1WOLQ#f-L`5M;x}@l#0-)XS6A;n;R$r#CwM=C9}|iN&%eA;3*z!|4&OHe&s#=% z4F={m`QJfE%7kPYNg!; z6C&^Qd{3t7x-cx+l{9K!zJKoxaF|HmcCCP%m<4O(GpDN%*U0Y)&hqa< zU@>-Hn*|T1D>u&Fl)xo(jJbqC)CF~V$XE8C&LAaQNuvrwJ%0{A`)6?sXNM{(QbjBSU$e@7!aPr`|H3(Z` zr}quVXWh3BbcRjXII%9l z3s-NMn1R0#=|Xo#20feJkO_A&)FU*ViPOaPgmI^}$5CD@$^l^C^fPzPo!6l^&Y}DW zccPNqO!}l;`%FJ$bumP+aV(r~L}iE97gZ#9_;*g?vH%%>V?{}H zrAm5Bzrg}GCDb)kn(`Z4TSW!n_HizXiU`s0FGfw;(aX6h7(P4b+!AwmE`!Y=3nmKY zXg!X4;>J%z{A`!`=y3(_FfYvKem%F#LBVP$9-c_|5jO+S6X)e!uy~68hxTn?FY(?g zhXfpol=GHFd)Zt?wx$4ezACCH5qMiV<=4Q2O9#F`v!A&0X-8F~iIIbPj5CJGNz!uc zq_UtZ8`)Mj&^}X3Ck#0^Zev