From e1a173196ae201a66a6fe0c3b8b7d43218e04a9a Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 6 Aug 2025 11:31:35 -0700 Subject: [PATCH 01/30] initial check in for tensorrt rtx --- .bazelrc | 2 + .github/scripts/install-tensorrt-rtx.sh | 39 ++ .github/scripts/install-torch-tensorrt.sh | 12 + .../build-test-linux-aarch64-jetpack.yml | 4 +- .../workflows/build-test-linux-aarch64.yml | 4 +- .github/workflows/build-test-linux-x86_64.yml | 9 +- .../workflows/build-test-linux-x86_64_rtx.yml | 360 ++++++++++++++++ .github/workflows/build-test-windows.yml | 2 +- .github/workflows/build-test-windows_rtx.yml | 341 +++++++++++++++ ...nux_aarch64.yml => build_wheels_linux.yml} | 66 +-- .github/workflows/build_wheels_windows.yml | 394 ++++++++++++++++++ .github/workflows/linux-test.yml | 9 +- .github/workflows/windows-test.yml | 7 +- BUILD.bazel | 13 + MODULE.bazel | 17 + core/BUILD | 38 +- core/conversion/BUILD | 38 +- core/conversion/conversion.cpp | 2 +- core/conversion/conversionctx/BUILD | 38 +- .../conversionctx/ConversionCtx.cpp | 14 +- core/conversion/conversionctx/ConversionCtx.h | 2 + core/conversion/converters/BUILD | 76 +++- .../converters/impl/quantization.cpp | 11 +- core/conversion/evaluators/BUILD | 38 +- core/conversion/tensorcontainer/BUILD | 38 +- core/conversion/var/BUILD | 38 +- core/ir/BUILD | 38 +- core/lowering/BUILD | 38 +- core/lowering/passes/BUILD | 13 +- core/partitioning/BUILD | 38 +- core/partitioning/partitioningctx/BUILD | 38 +- core/partitioning/partitioninginfo/BUILD | 38 +- core/partitioning/segmentedblock/BUILD | 38 +- core/plugins/BUILD | 74 +++- core/runtime/BUILD | 38 +- core/util/BUILD | 48 ++- core/util/logging/BUILD | 35 +- core/util/trt_util.h | 2 +- cpp/BUILD | 39 +- cpp/CMakeLists.txt | 2 - cpp/bin/torchtrtc/BUILD | 31 +- cpp/bin/torchtrtc/fileio.h | 2 + cpp/bin/torchtrtc/main.cpp | 10 +- cpp/bin/torchtrtc/parser_util.h | 2 + cpp/include/torch_tensorrt/torch_tensorrt.h | 4 +- cpp/src/compile_spec.cpp | 3 +- dev_dep_versions.yml | 1 + docsrc/getting_started/tensorrt_rtx.rst | 65 +++ packaging/pre_build_script.sh | 7 + packaging/pre_build_script_windows.sh | 7 + packaging/smoke_test_windows.py | 5 +- py/torch_tensorrt/__init__.py | 81 +--- py/torch_tensorrt/_utils.py | 32 ++ .../csrc/register_tensorrt_classes.cpp | 2 + py/torch_tensorrt/csrc/tensorrt_classes.cpp | 3 +- py/torch_tensorrt/csrc/tensorrt_classes.h | 10 +- py/torch_tensorrt/csrc/torch_tensorrt_py.cpp | 8 +- .../dynamo/conversion/_TRTInterpreter.py | 28 +- .../dynamo/conversion/aten_ops_converters.py | 41 +- .../conversion/impl/normalization/ops.py | 5 +- .../dynamo/conversion/impl/quantize.py | 12 + .../dynamo/conversion/impl/unsqueeze.py | 20 +- .../runtime/_PythonTorchTensorRTModule.py | 3 +- py/torch_tensorrt/dynamo/utils.py | 3 +- .../fx/converters/acc_ops_converters.py | 13 +- py/torch_tensorrt/fx/fx2trt.py | 8 +- .../test/converters/acc_op/test_dequantize.py | 4 +- .../fx/test/converters/acc_op/test_pad.py | 4 +- .../acc_op/test_quantize_per_tensor.py | 4 +- .../converters/aten_op/test_reshape_aten.py | 7 +- py/torch_tensorrt/fx/tools/common_fx2trt.py | 3 +- py/torch_tensorrt/fx/utils.py | 10 +- py/torch_tensorrt/trt_alias.py | 159 +++++++ pyproject_rtx.toml.temp | 358 ++++++++++++++++ setup.py | 61 ++- tests/py/core/test_classes.py | 4 +- .../py/dynamo/conversion/test_nonzero_aten.py | 19 + tests/py/dynamo/models/test_models_export.py | 7 + .../runtime/test_000_compilation_settings.py | 3 +- tests/util/BUILD | 24 +- third_party/tensorrt_rtx/archive/BUILD | 68 +++ third_party/tensorrt_rtx/local/BUILD | 80 ++++ toolchains/ci_workspaces/MODULE.bazel.tmpl | 17 + toolchains/dep_collection/defs.bzl | 2 +- tools/debug/engine_visualization/__init__.py | 0 85 files changed, 2969 insertions(+), 362 deletions(-) create mode 100644 .github/scripts/install-tensorrt-rtx.sh create mode 100644 .github/workflows/build-test-linux-x86_64_rtx.yml create mode 100644 .github/workflows/build-test-windows_rtx.yml rename .github/workflows/{build_wheels_linux_aarch64.yml => build_wheels_linux.yml} (86%) create mode 100644 .github/workflows/build_wheels_windows.yml create mode 100644 docsrc/getting_started/tensorrt_rtx.rst create mode 100644 py/torch_tensorrt/trt_alias.py create mode 100644 pyproject_rtx.toml.temp create mode 100644 third_party/tensorrt_rtx/archive/BUILD create mode 100644 third_party/tensorrt_rtx/local/BUILD create mode 100644 tools/debug/engine_visualization/__init__.py diff --git a/.bazelrc b/.bazelrc index 03aa1d718e..8422bb9cf7 100644 --- a/.bazelrc +++ b/.bazelrc @@ -39,6 +39,8 @@ build:cxx11_abi --define=abi=cxx11_abi build:jetpack --//toolchains/dep_collection:compute_libs=jetpack +build:rtx --//toolchains/dep_collection:compute_libs=rtx + build:ci_testing --define=torchtrt_src=prebuilt --cxxopt="-DDISABLE_TEST_IN_CI" --action_env "NVIDIA_TF32_OVERRIDE=0" build:use_precompiled_torchtrt --define=torchtrt_src=prebuilt diff --git a/.github/scripts/install-tensorrt-rtx.sh b/.github/scripts/install-tensorrt-rtx.sh new file mode 100644 index 0000000000..bb44681607 --- /dev/null +++ b/.github/scripts/install-tensorrt-rtx.sh @@ -0,0 +1,39 @@ + +install_tensorrt_rtx() { + if [[ ${USE_RTX} == true ]]; then + install_wheel_or_not=${1:-false} + echo "It is the tensorrt-rtx build, install tensorrt-rtx with install_wheel_or_not:${install_wheel_or_not}" + PLATFORM=$(python -c "import sys; print(sys.platform)") + echo "PLATFORM: $PLATFORM" + # PYTHON_VERSION is always set in the CI environment, add this check for local testing + if [ -z "$PYTHON_VERSION" ]; then + echo "Error: PYTHON_VERSION environment variable is not set or empty. example format: export PYTHON_VERSION=3.11" + exit 1 + fi + + # python version is like 3.11, we need to convert it to cp311 + CPYTHON_TAG="cp${PYTHON_VERSION//./}" + if [[ ${PLATFORM} == win32 ]]; then + curl -L https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip -o TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip + unzip TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip + rtx_lib_dir=${PWD}/TensorRT-RTX-1.0.0.21/lib + export LD_LIBRARY_PATH=${rtx_lib_dir}:$LD_LIBRARY_PATH + echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" + if [[ ${install_wheel_or_not} == true ]]; then + pip install TensorRT-RTX-1.0.0.21/python/tensorrt_rtx-1.0.0.21-${CPYTHON_TAG}-none-win_amd64.whl + fi + else + curl -L https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz -o TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz + tar -xzf TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz + rtx_lib_dir=${PWD}/TensorRT-RTX-1.0.0.21/lib + export LD_LIBRARY_PATH=${rtx_lib_dir}:$LD_LIBRARY_PATH + echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" + if [[ ${install_wheel_or_not} == true ]]; then + pip install TensorRT-RTX-1.0.0.21/python/tensorrt_rtx-1.0.0.21-${CPYTHON_TAG}-none-linux_x86_64.whl + fi + fi + else + echo "It is the standard tensorrt build, skip install tensorrt-rtx" + fi + +} \ No newline at end of file diff --git a/.github/scripts/install-torch-tensorrt.sh b/.github/scripts/install-torch-tensorrt.sh index 94de5f022a..49a367b832 100755 --- a/.github/scripts/install-torch-tensorrt.sh +++ b/.github/scripts/install-torch-tensorrt.sh @@ -21,6 +21,12 @@ pip uninstall -y torch torchvision pip install --force-reinstall --pre ${TORCHVISION} --index-url ${INDEX_URL} pip install --force-reinstall --pre ${TORCH} --index-url ${INDEX_URL} +if [[ ${USE_RTX} == true ]]; then + source .github/scripts/install-tensorrt-rtx.sh + # tensorrt-rtx is not publicly available, so we need to install the wheel from the tar ball + install_wheel_or_not=true + install_tensorrt_rtx ${install_wheel_or_not} +fi # Install Torch-TensorRT if [[ ${PLATFORM} == win32 ]]; then @@ -29,4 +35,10 @@ else pip install /opt/torch-tensorrt-builds/torch_tensorrt*.whl fi +if [[ ${USE_RTX} == true ]]; then + # currently tensorrt is installed automatically by install torch-tensorrt since it is a dependency of torch-tensorrt in pyproject.toml + # so we need to uninstall it to avoid conflict + pip uninstall -y tensorrt tensorrt_cu12 tensorrt_cu12_bindings tensorrt_cu12_libs +fi + echo -e "Running test script"; diff --git a/.github/workflows/build-test-linux-aarch64-jetpack.yml b/.github/workflows/build-test-linux-aarch64-jetpack.yml index e4e4fbfc30..33c3612d84 100644 --- a/.github/workflows/build-test-linux-aarch64-jetpack.yml +++ b/.github/workflows/build-test-linux-aarch64-jetpack.yml @@ -66,8 +66,8 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - name: Build torch-tensorrt whl package - uses: ./.github/workflows/build_wheels_linux_aarch64.yml + name: Build torch-tensorrt whl package for aarch64-jetpack + uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} ref: "" diff --git a/.github/workflows/build-test-linux-aarch64.yml b/.github/workflows/build-test-linux-aarch64.yml index 1f83a51287..34b3e4fa34 100644 --- a/.github/workflows/build-test-linux-aarch64.yml +++ b/.github/workflows/build-test-linux-aarch64.yml @@ -62,8 +62,8 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - name: Build torch-tensorrt whl package - uses: ./.github/workflows/build_wheels_linux_aarch64.yml + name: Build torch-tensorrt whl package for aarch64 + uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} ref: "" diff --git a/.github/workflows/build-test-linux-x86_64.yml b/.github/workflows/build-test-linux-x86_64.yml index 51f3730d02..4b18ef559d 100644 --- a/.github/workflows/build-test-linux-x86_64.yml +++ b/.github/workflows/build-test-linux-x86_64.yml @@ -13,6 +13,7 @@ on: - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ workflow_dispatch: + jobs: generate-matrix: uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main @@ -60,8 +61,8 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - name: Build torch-tensorrt whl package - uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main + name: Build torch-tensorrt whl package for x86_64 + uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} ref: "" @@ -74,6 +75,8 @@ jobs: package-name: ${{ matrix.package-name }} smoke-test-script: ${{ matrix.smoke-test-script }} trigger-event: ${{ github.event_name }} + architecture: "x86_64" + use-rtx: false tests-py-torchscript-fe: name: Test torchscript frontend [Python] @@ -338,5 +341,5 @@ jobs: popd concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-tensorrt-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} cancel-in-progress: true diff --git a/.github/workflows/build-test-linux-x86_64_rtx.yml b/.github/workflows/build-test-linux-x86_64_rtx.yml new file mode 100644 index 0000000000..ab7c1ec9f2 --- /dev/null +++ b/.github/workflows/build-test-linux-x86_64_rtx.yml @@ -0,0 +1,360 @@ +name: Build and test Linux x86_64 wheels(RTX) + +on: + pull_request: + push: + branches: + - main + - nightly + - release/* + tags: + # NOTE: Binary build pipelines should only get triggered on release candidate builds + # Release candidate tags look like: v1.11.0-rc1 + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + workflow_dispatch: + +jobs: + generate-matrix: + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: wheel + os: linux + test-infra-repository: pytorch/test-infra + test-infra-ref: main + with-rocm: false + with-cpu: false + + filter-matrix: + needs: [generate-matrix] + outputs: + matrix: ${{ steps.generate.outputs.matrix }} + runs-on: ubuntu-latest + steps: + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - uses: actions/checkout@v4 + with: + repository: pytorch/tensorrt + - name: Generate matrix + id: generate + run: | + set -eou pipefail + MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} + MATRIX_BLOB="$(python3 .github/scripts/filter-matrix.py --matrix "${MATRIX_BLOB}")" + echo "${MATRIX_BLOB}" + echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" + + build: + needs: filter-matrix + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + pre-script: packaging/pre_build_script.sh + env-var-script: packaging/env_vars.txt + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + package-name: torch_tensorrt + name: Build torch-tensorrt-rtx whl package for x86_64 + uses: ./.github/workflows/build_wheels_linux.yml + with: + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + env-var-script: ${{ matrix.env-var-script }} + post-script: ${{ matrix.post-script }} + package-name: ${{ matrix.package-name }} + smoke-test-script: ${{ matrix.smoke-test-script }} + trigger-event: ${{ github.event_name }} + architecture: "x86_64" + use-rtx: true + + tests-py-torchscript-fe: + name: Test torchscript frontend [Python] + needs: [filter-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-torchscript-fe + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/modules + python hub.py + popd + pushd . + cd tests/py/ts + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ + popd + + tests-py-dynamo-converters: + name: Test dynamo converters [Python] + needs: [filter-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-dynamo-converters + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py/dynamo + export FORCE_TENSORRT_RTX=1 + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin.py + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin_with_attrs.py + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_flashinfer_rmsnorm.py + popd + + tests-py-dynamo-fe: + name: Test dynamo frontend [Python] + needs: [filter-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-dynamo-fe + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/ + popd + + tests-py-dynamo-serde: + name: Test dynamo export serde [Python] + needs: [filter-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-dynamo-serde + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/reexport_test_results.xml --ir dynamo models/test_reexport.py + popd + + tests-py-torch-compile-be: + name: Test torch compile backend [Python] + needs: [filter-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-torch-compile-be + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_complete_be_e2e_test_results.xml --ir torch_compile models/test_models.py + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py + popd + + tests-py-dynamo-core: + name: Test dynamo core [Python] + needs: [filter-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-dynamo-core + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/ + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ + popd + + tests-py-dynamo-cudagraphs: + name: Test dynamo cudagraphs [Python] + needs: [filter-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-dynamo-cudagraphs + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + nvidia-smi + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_cpp_test_results.xml runtime/test_002_cudagraphs_cpp.py || true + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_py_test_results.xml runtime/test_002_cudagraphs_py.py || true + popd + + tests-py-core: + name: Test core [Python] + needs: [filter-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-core + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py/core + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . + popd + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-tensorrt-rtx-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} + cancel-in-progress: true diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index ad8ae83846..2d402a8799 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -71,7 +71,7 @@ jobs: smoke-test-script: packaging/smoke_test_windows.py package-name: torch_tensorrt name: Build torch-tensorrt whl package - uses: pytorch/test-infra/.github/workflows/build_wheels_windows.yml@main + uses: ./.github/workflows/build_wheels_windows.yml with: repository: ${{ matrix.repository }} ref: "" diff --git a/.github/workflows/build-test-windows_rtx.yml b/.github/workflows/build-test-windows_rtx.yml new file mode 100644 index 0000000000..31078c39b7 --- /dev/null +++ b/.github/workflows/build-test-windows_rtx.yml @@ -0,0 +1,341 @@ +name: Build and test Windows wheels(RTX) + +on: + pull_request: + push: + branches: + - main + - nightly + - release/* + tags: + # NOTE: Binary build pipelines should only get triggered on release candidate builds + # Release candidate tags look like: v1.11.0-rc1 + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + workflow_dispatch: + +jobs: + generate-matrix: + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: wheel + os: windows + test-infra-repository: pytorch/test-infra + test-infra-ref: main + with-rocm: false + with-cpu: false + + filter-matrix: + needs: [generate-matrix] + outputs: + matrix: ${{ steps.generate.outputs.matrix }} + runs-on: ubuntu-latest + steps: + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - uses: actions/checkout@v4 + with: + repository: pytorch/tensorrt + - name: Generate matrix + id: generate + run: | + set -eou pipefail + MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} + MATRIX_BLOB="$(python3 .github/scripts/filter-matrix.py --matrix "${MATRIX_BLOB}")" + echo "${MATRIX_BLOB}" + echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" + + substitute-runner: + needs: filter-matrix + outputs: + matrix: ${{ steps.substitute.outputs.matrix }} + runs-on: ubuntu-latest + steps: + - name: Substitute runner + id: substitute + run: | + echo matrix="$(echo '${{ needs.filter-matrix.outputs.matrix }}' | sed -e 's/windows.g4dn.xlarge/windows.g5.4xlarge.nvidia.gpu/g')" >> ${GITHUB_OUTPUT} + + build: + needs: substitute-runner + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + pre-script: packaging/pre_build_script_windows.sh + env-script: packaging/vc_env_helper.bat + smoke-test-script: packaging/smoke_test_windows.py + package-name: torch_tensorrt + name: Build torch-tensorrt-rtx whl package + uses: ./.github/workflows/build_wheels_windows.yml + with: + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + env-script: ${{ matrix.env-script }} + smoke-test-script: ${{ matrix.smoke-test-script }} + package-name: ${{ matrix.package-name }} + trigger-event: ${{ github.event_name }} + wheel-build-params: "--use-rtx" + use-rtx: true + timeout: 120 + + tests-py-torchscript-fe: + name: Test torchscript frontend [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-torchscript-fe + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/modules + python hub.py + popd + pushd . + cd tests/py/ts + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ + popd + + tests-py-dynamo-converters: + name: Test dynamo converters [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-converters + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ + popd + + tests-py-dynamo-fe: + name: Test dynamo frontend [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-fe + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/ + popd + + tests-py-dynamo-serde: + name: Test dynamo export serde [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-serde + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/reexport_test_results.xml --ir dynamo models/test_reexport.py + popd + + tests-py-torch-compile-be: + name: Test torch compile backend [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-torch-compile-be + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_complete_be_e2e_test_results.xml --ir torch_compile models/test_models.py + ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py + popd + + tests-py-dynamo-core: + name: Test dynamo core [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-core + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/ + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ + ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ + popd + + tests-py-dynamo-cudagraphs: + name: Test dynamo cudagraphs [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-cudagraphs + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_cpp_test_results.xml runtime/test_002_cudagraphs_cpp.py + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_py_test_results.xml runtime/test_002_cudagraphs_py.py + popd + + tests-py-core: + name: Test core [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-core + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py/core + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . + popd + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-tensorrt-rtx-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} + cancel-in-progress: true diff --git a/.github/workflows/build_wheels_linux_aarch64.yml b/.github/workflows/build_wheels_linux.yml similarity index 86% rename from .github/workflows/build_wheels_linux_aarch64.yml rename to .github/workflows/build_wheels_linux.yml index e2bfeb1540..ef0805cf73 100644 --- a/.github/workflows/build_wheels_linux_aarch64.yml +++ b/.github/workflows/build_wheels_linux.yml @@ -1,4 +1,6 @@ -name: Build Linux Wheels For aarch64 +# This workflow is used to build the wheels for the Linux platform +# it should be periodically synced with https://github.com/pytorch/test-infra/blob/main/.github/workflows/build_wheels_linux.yml +name: Build Linux Wheels on: workflow_call: @@ -88,11 +90,6 @@ on: required: false default: "python -m build --wheel" type: string - is-jetpack: - description: Set to true if the build is for jetpack - required: false - default: false - type: boolean pip-install-torch-extra-args: # NOTE: Why does this exist? # Well setuptools / python packaging doesn't actually allow you to specify dependencies @@ -110,11 +107,22 @@ on: description: 'Timeout for the job (in minutes)' default: 120 type: number + use-rtx: + description: "Set to true if use TensorRT-RTX" + default: false + type: boolean + required: false + is-jetpack: + description: Set to true if the build is for jetpack + required: false + default: false + type: boolean secrets: PYPI_API_TOKEN: description: An optional token to upload to pypi required: false + permissions: id-token: write contents: read @@ -133,7 +141,8 @@ jobs: UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }} ARCH: ${{ inputs.architecture }} BUILD_TARGET: ${{ inputs.build-target }} - name: build-wheel-${{ matrix.python_version }}-${{ matrix.desired_cuda }}-${{ matrix.gpu_arch_type }} + USE_RTX: ${{ inputs.use-rtx }} + name: build-wheel-${{ matrix.python_version }}-${{ matrix.desired_cuda }}-${{ matrix.gpu_arch_type }}-${{ inputs.architecture }}-${{ inputs.use-rtx }}-${{ inputs.is-jetpack }} runs-on: ${{ matrix.validation_runner }} environment: ${{(inputs.trigger-event == 'schedule' || (inputs.trigger-event == 'push' && (startsWith(github.event.ref, 'refs/heads/nightly') || startsWith(github.event.ref, 'refs/tags/v')))) && 'pytorchbot-env' || ''}} container: @@ -251,7 +260,6 @@ jobs: working-directory: ${{ inputs.repository }} shell: bash -l {0} run: | - #set -euxo pipefail set -x source "${BUILD_ENV_FILE}" export PYTORCH_VERSION="$(${CONDA_RUN} pip show torch | grep ^Version: | sed 's/Version: *//' | sed 's/+.\+//')" @@ -261,28 +269,37 @@ jobs: BUILD_VERSION="${BUILD_VERSION}+${CU_VERSION}" fi echo "BUILD_VERSION=$BUILD_VERSION" - if [[ ${{ inputs.is-jetpack }} == false ]]; then - ${CONDA_RUN} python setup.py bdist_wheel + echo "USE_RTX=$USE_RTX" + echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" + if [[ ${{ inputs.use-rtx }} == true ]]; then + echo "Building tensorrt-rtx wheel" + ${CONDA_RUN} python setup.py bdist_wheel --use-rtx else - ${CONDA_RUN} python setup.py bdist_wheel --jetpack --plat-name=linux_tegra_aarch64 + if [[ ${{ inputs.is-jetpack }} == true ]]; then + echo "Building tensorrt wheel for jetpack" + ${CONDA_RUN} python setup.py bdist_wheel --jetpack --plat-name=linux_tegra_aarch64 + else + echo "Building standard tensorrt wheel" + ${CONDA_RUN} python setup.py bdist_wheel + fi fi - name: Repair Manylinux_2_28 Wheel shell: bash -l {0} env: PACKAGE_NAME: ${{ inputs.package-name }} SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} + if: ${{ inputs.architecture == 'x86_64' }} run: | set -euxo pipefail source "${BUILD_ENV_FILE}" - # for pkg in ${{ inputs.repository }}/dist/*-linux_*.whl; do - # # if the glob didn't match anything - # if [[ ! -e $pkg ]]; then - # continue - # fi - # abs_pkg=$(realpath $pkg) - # ./test-infra/.github/scripts/repair_manylinux_2_28.sh $abs_pkg - # done - echo "Repair Manylinux_2_28 Wheel is not supported for aarch64" + for pkg in ${{ inputs.repository }}/dist/*-linux_*.whl; do + # if the glob didn't match anything + if [[ ! -e $pkg ]]; then + continue + fi + abs_pkg=$(realpath $pkg) + ./test-infra/.github/scripts/repair_manylinux_2_28.sh $abs_pkg + done - name: Run Post-Script if: ${{ inputs.post-script != '' }} uses: ./test-infra/.github/actions/run-script-with-cache @@ -291,7 +308,6 @@ jobs: script: ${{ inputs.post-script }} - name: Smoke Test shell: bash -l {0} - if: ${{ inputs.is-jetpack == false }} env: PACKAGE_NAME: ${{ inputs.package-name }} SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} @@ -336,8 +352,10 @@ jobs: upload: needs: build uses: pytorch/test-infra/.github/workflows/_binary_upload.yml@main - # for jetpack builds, only upload to pytorch index for nightly builds - if: ${{ inputs.is-jetpack == false || (github.event_name == 'push' && startsWith(github.event.ref, 'refs/heads/nightly')) }} + # if it is not the jetpack build or tensorrt-rtx build, upload to pytorch index, + # if it is the jetpack build only upload to pytorch_index for nightly builds + # for tensorrt-rtx build, do not upload to pytorch indexat all + if: ${{ (inputs.is-jetpack == false && inputs.use-rtx == false) || (inputs.is-jetpack == true && github.event_name == 'push' && startsWith(github.event.ref, 'refs/heads/nightly')) }} with: repository: ${{ inputs.repository }} ref: ${{ inputs.ref }} @@ -351,5 +369,5 @@ jobs: PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }} concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{inputs.use-rtx}}-${{inputs.architecture}}-${{inputs.is-jetpack}}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }} cancel-in-progress: true diff --git a/.github/workflows/build_wheels_windows.yml b/.github/workflows/build_wheels_windows.yml new file mode 100644 index 0000000000..7af9dead8f --- /dev/null +++ b/.github/workflows/build_wheels_windows.yml @@ -0,0 +1,394 @@ +# This workflow is used to build the wheels for the Windows platform +# it should be periodically synced with https://github.com/pytorch/test-infra/blob/main/.github/workflows/build_wheels_windows.yml +name: Build Windows Wheels + +on: + workflow_call: + inputs: + repository: + description: 'Repository to checkout, defaults to ""' + default: "" + type: string + ref: + description: 'Reference to checkout, defaults to "nightly"' + default: "nightly" + type: string + test-infra-repository: + description: "Test infra repository to use" + default: "pytorch/test-infra" + type: string + test-infra-ref: + description: "Test infra reference to use" + default: "" + type: string + build-matrix: + description: "Build matrix to utilize" + default: "" + type: string + pre-script: + description: "Pre script to run prior to build" + default: "" + type: string + env-script: + description: "Script to setup environment variables for the build" + default: "" + type: string + wheel-build-params: + description: "Additional parameters for bdist_wheel" + default: "" + type: string + post-script: + description: "Post script to run prior to build" + default: "" + type: string + smoke-test-script: + description: "Script for Smoke Test for a specific domain" + default: "" + type: string + package-name: + description: "Name of the actual python package that is imported" + default: "" + type: string + build-platform: + description: Platform to build wheels, choose from 'python-build-package' or 'setup-py' + required: false + type: string + default: 'setup-py' + build-command: + description: The build command to use if build-platform is python-build-package + required: false + default: "python -m build --wheel" + type: string + trigger-event: + description: "Trigger Event in caller that determines whether or not to upload" + default: "" + type: string + cache-path: + description: "The path(s) on the runner to cache or restore. The path is relative to repository." + default: "" + type: string + cache-key: + description: "The key created when saving a cache and the key used to search for a cache." + default: "" + type: string + submodules: + description: "Works as stated in actions/checkout, but the default value is recursive" + required: false + type: string + default: recursive + timeout: + description: 'Timeout for the job (in minutes)' + default: 60 + type: number + use-rtx: + description: "Set to true if use TensorRT-RTX" + default: false + type: boolean + required: false + architecture: + description: 'CPU architecture to build for' + default: "x64" + type: string + +permissions: + id-token: write + contents: read + +jobs: + build: + strategy: + fail-fast: false + matrix: ${{ fromJSON(inputs.build-matrix) }} + env: + PYTHON_VERSION: ${{ matrix.python_version }} + PACKAGE_TYPE: wheel + REPOSITORY: ${{ inputs.repository }} + REF: ${{ inputs.ref }} + CU_VERSION: ${{ matrix.desired_cuda }} + UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }} + USE_RTX: ${{ inputs.use-rtx }} + name: build-${{ matrix.build_name }} + runs-on: ${{ matrix.validation_runner }} + defaults: + run: + shell: bash -l {0} + # If a build is taking longer than 60 minutes on these runners we need + # to have a conversation + timeout-minutes: ${{ inputs.timeout }} + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + # Support the use case where we need to checkout someone's fork + repository: ${{ inputs.test-infra-repository }} + ref: ${{ inputs.test-infra-ref }} + path: test-infra + - uses: ./test-infra/.github/actions/setup-ssh + name: Setup SSH + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + activate-with-label: false + instructions: "SSH with rdesktop using ssh -L 3389:localhost:3389 %%username%%@%%hostname%%" + - name: Add Conda scripts to GitHub path + if: inputs.architecture == 'x64' + run: | + echo "C:/Jenkins/Miniconda3/Scripts" >> $GITHUB_PATH + - name: Setup Git for Windows' minimal SDK + env: + DEPENDENCIES_DIR: c:\temp\dependencies\ + if: inputs.architecture == 'arm64' + uses: git-for-windows/setup-git-for-windows-sdk@v1 + with: + architecture: aarch64 + path: "${{env.DEPENDENCIES_DIR}}\\git" + - uses: ./test-infra/.github/actions/set-channel + - name: Set PYTORCH_VERSION + if: env.CHANNEL == 'test' + run: | + # When building RC, set the version to be the current candidate version, + # otherwise, leave it alone so nightly will pick up the latest + echo "PYTORCH_VERSION=${{ matrix.stable_version }}" >> "${GITHUB_ENV}" + - uses: ./test-infra/.github/actions/setup-binary-builds + if: inputs.architecture == 'x64' + with: + repository: ${{ inputs.repository }} + ref: ${{ inputs.ref }} + submodules: ${{ inputs.submodules }} + setup-miniconda: false + python-version: ${{ env.PYTHON_VERSION }} + cuda-version: ${{ env.CU_VERSION }} + arch: ${{ inputs.architecture }} + - name: Install XPU support package + if: ${{ matrix.gpu_arch_type == 'xpu' }} + env: + XPU_VERSION: '2025.1' + run: | + cmd //c .\\test-infra\\.github\\scripts\\install_xpu.bat + - name: Checkout Target Repository (${{ env.REPOSITORY }}) + if: inputs.architecture == 'arm64' + uses: actions/checkout@v4 + with: + repository: ${{ env.REPOSITORY }} + ref: ${{ env.REF }} + path: ${{ env.REPOSITORY }} + submodules: recursive + - name: Bootstrap python + if: inputs.architecture == 'arm64' + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python_version }} + architecture: arm64 + - name: Install torch dependency + if: inputs.architecture == 'x64' + run: | + source "${BUILD_ENV_FILE}" + # shellcheck disable=SC2086 + ${CONDA_RUN} ${PIP_INSTALL_TORCH} + - name: Run Pre-Script with Caching + if: ${{ inputs.pre-script != '' && inputs.architecture == 'x64' }} + uses: ./test-infra/.github/actions/run-script-with-cache + with: + cache-path: ${{ inputs.cache-path }} + cache-key: ${{ inputs.cache-key }} + repository: ${{ inputs.repository }} + script: ${{ inputs.pre-script }} + is_windows: 'enabled' + - name: Run Pre-Script Arm64 + if: ${{ inputs.pre-script != '' && inputs.architecture == 'arm64' }} + env: + DOWNLOADS_DIR: c:\temp\downloads\ + DEPENDENCIES_DIR: c:\temp\dependencies\ + SCRIPTS_DIR: test-infra\\.github\\scripts\\winarm64 + SRC_DIR: ${{ inputs.repository }} + PRE_SCRIPT: ${{ inputs.pre-script }} + shell: cmd + run: | + set VS_PATH=C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat + set GIT_BASH=%DEPENDENCIES_DIR%\git\usr\bin\bash.exe + + cd %SRC_DIR% + + call "%VS_PATH%" arm64 + "%GIT_BASH%" -c "bash --noprofile --norc %PRE_SCRIPT%" + - name: Install certificates for Arm64 runner + if: ${{ inputs.architecture == 'arm64' }} + working-directory: ${{ inputs.repository }} + shell: cmd + run: | + call .\.venv\Scripts\activate.bat + + pip install --upgrade certifi==2025.04.26 + for /f "delims=" %%A in ('python -m certifi') do set CERT_PATH=%%A + echo Using cert bundle at: %CERT_PATH% + + set SSL_CERT_FILE=%CERT_PATH% + set REQUESTS_CA_BUNDLE=%CERT_PATH% + + echo SSL_CERT_FILE=%CERT_PATH% >> %GITHUB_ENV% + echo REQUESTS_CA_BUNDLE=%CERT_PATH% >> %GITHUB_ENV% + - name: Build clean + if: inputs.architecture == 'x64' + working-directory: ${{ inputs.repository }} + env: + ENV_SCRIPT: ${{ inputs.env-script }} + run: | + source "${BUILD_ENV_FILE}" + if [[ -z "${ENV_SCRIPT}" ]]; then + ${CONDA_RUN} python setup.py clean + else + if [[ ! -f ${ENV_SCRIPT} ]]; then + echo "::error::Specified env-script file (${ENV_SCRIPT}) not found" + exit 1 + else + ${CONDA_RUN} ${ENV_SCRIPT} python setup.py clean + fi + fi + - name: Set PYTORCH_VERSION on x64 + if: inputs.architecture == 'x64' + working-directory: ${{ inputs.repository }} + run: | + source "${BUILD_ENV_FILE}" + if [[ "$CU_VERSION" == "cpu" ]]; then + # CUDA and CPU are ABI compatible on the CPU-only parts, so strip + # in this case + export PYTORCH_VERSION="$(${CONDA_RUN} pip show torch | grep ^Version: | sed 's/Version: *//' | sed 's/+.\+//')" + else + export PYTORCH_VERSION="$(${CONDA_RUN} pip show torch | grep ^Version: | sed 's/Version: *//')" + fi + - name: Build the wheel (python-build-package) X64 + if: ${{ inputs.build-platform == 'python-build-package' && inputs.architecture == 'x64' }} + working-directory: ${{ inputs.repository }} + env: + ENV_SCRIPT: ${{ inputs.env-script }} + BUILD_PARAMS: ${{ inputs.wheel-build-params }} + run: | + source "${BUILD_ENV_FILE}" + ${CONDA_RUN} python -m pip install build==1.2.2 + echo "Successfully installed Python build package" + if [[ -z "${ENV_SCRIPT}" ]]; then + ${CONDA_RUN} ${{ inputs.build-command }} + else + ${CONDA_RUN} ${ENV_SCRIPT} ${{ inputs.build-command }} + fi + - name: Build the wheel (setup-py) X64 + if: ${{ inputs.build-platform == 'setup-py' && inputs.architecture == 'x64' }} + working-directory: ${{ inputs.repository }} + env: + ENV_SCRIPT: ${{ inputs.env-script }} + BUILD_PARAMS: ${{ inputs.wheel-build-params }} + run: | + source "${BUILD_ENV_FILE}" + if [[ -z "${ENV_SCRIPT}" ]]; then + ${CONDA_RUN} python setup.py bdist_wheel + else + ${CONDA_RUN} ${ENV_SCRIPT} python setup.py bdist_wheel ${BUILD_PARAMS} + fi + - name: Build the wheel (bdist_wheel) Arm64 + if: inputs.architecture == 'arm64' + env: + SRC_DIR: ${{ inputs.repository }} + DEPENDENCIES_DIR: c:\temp\dependencies\ + shell: cmd + run: | + set CONDA_PREFIX=%DEPENDENCIES_DIR% + set PATH=%PATH%;%DEPENDENCIES_DIR%\Library\bin + set DISTUTILS_USE_SDK=1 + set VS_PATH=C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat + + call "%VS_PATH%" arm64 + cd %SRC_DIR% + call .venv\Scripts\activate.bat + + pip install --upgrade setuptools==72.1.0 + python setup.py bdist_wheel + - name: Run post-script + working-directory: ${{ inputs.repository }} + env: + POST_SCRIPT: ${{ inputs.post-script }} + ENV_SCRIPT: ${{ inputs.env-script }} + if: ${{ inputs.post-script != '' && inputs.architecture == 'x64'}} + run: | + set -euxo pipefail + source "${BUILD_ENV_FILE}" + ${CONDA_RUN} ${ENV_SCRIPT} ${POST_SCRIPT} + - name: Smoke Test X64 + if: inputs.architecture == 'x64' + env: + ENV_SCRIPT: ${{ inputs.env-script }} + PACKAGE_NAME: ${{ inputs.package-name }} + USE_RTX: ${{ inputs.use-rtx }} + SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} + run: | + source "${BUILD_ENV_FILE}" + WHEEL_NAME=$(ls "${{ inputs.repository }}/dist/") + echo "$WHEEL_NAME" + ${CONDA_RUN} pip install "${{ inputs.repository }}/dist/$WHEEL_NAME" + if [[ $USE_RTX == true ]]; then + export FORCE_TENSORRT_RTX=1 + # TODO: lan to remove this once we have a better way to handle the LD_LIBRARY_PATH + # the LD_LIBRARY_PATH set in the pre_build_script_windows.sh will not be available in the smoke test, have to set it here again + export LD_LIBRARY_PATH=${{ inputs.repository }}/TensorRT-RTX-1.0.0.21/lib:$LD_LIBRARY_PATH + fi + if [[ ! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT} ]]; then + echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found" + ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)" + else + echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} found" + ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}" + fi + - name: Smoke Test ARM64 + if: inputs.architecture == 'arm64' + env: + PACKAGE_NAME: ${{ inputs.package-name }} + SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} + SRC_DIR: ${{ inputs.repository }} + run: | + cd $SRC_DIR + source .venv/Scripts/activate + whl=$(find dist -name "${{env.PACKAGE_NAME}}-*.whl" | head -n 1) + pip install $whl + + if [[ ! -f ${SMOKE_TEST_SCRIPT} ]]; then + echo "${SMOKE_TEST_SCRIPT} not found" + python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)" + else + echo "${SMOKE_TEST_SCRIPT} found" + python "$SMOKE_TEST_SCRIPT" + fi + # NB: Only upload to GitHub after passing smoke tests + - name: Get Artifact name + if: inputs.architecture == 'arm64' + env: + REPOSITORY: ${{ inputs.repository }} + REF: ${{ inputs.ref }} + PYTHON_VERSION: ${{ matrix.python_version }} + CU_VERSION: ${{ env.CU_VERSION }} + ARCH: ${{ inputs.architecture }} + run: | + echo "ARTIFACT_NAME=${REPOSITORY//\//_}_${REF//\//_}_${PYTHON_VERSION}_${CU_VERSION}_${ARCH}" >> "${GITHUB_ENV}" + - name: Upload wheel to GitHub + continue-on-error: true + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: ${{ env.ARTIFACT_NAME }} + path: ${{ inputs.repository }}/dist/ + - uses: ./test-infra/.github/actions/teardown-windows + if: inputs.architecture == 'x64' + name: Teardown Windows + + upload: + needs: build + uses: pytorch/test-infra/.github/workflows/_binary_upload.yml@main + # for tensorrt-rtx build, do not upload to pytorch indexat at all + if: ${{ inputs.use-rtx == false }} + with: + repository: ${{ inputs.repository }} + ref: ${{ inputs.ref }} + test-infra-repository: ${{ inputs.test-infra-repository }} + test-infra-ref: ${{ inputs.test-infra-ref }} + build-matrix: ${{ inputs.build-matrix }} + trigger-event: ${{ inputs.trigger-event }} + architecture: ${{ inputs.architecture }} + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true \ No newline at end of file diff --git a/.github/workflows/linux-test.yml b/.github/workflows/linux-test.yml index 15016ecd36..9883db653d 100644 --- a/.github/workflows/linux-test.yml +++ b/.github/workflows/linux-test.yml @@ -53,6 +53,11 @@ on: description: 'Name to give artifacts uploaded from ${RUNNER_ARTIFACT_DIR}' default: '' type: string + use-rtx: + description: "Whether to use TensorRT-RTX" + default: false + type: boolean + required: false jobs: test: @@ -68,6 +73,7 @@ jobs: SCRIPT: ${{ inputs.script }} RUNNER_TEST_RESULTS_DIR: /tmp/test_results ARCH: ${{ inputs.architecture }} + USE_RTX: ${{ inputs.use-rtx }} DOWNLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_${{ inputs.architecture }} name: ${{ inputs.job-name }}-${{ matrix.tensorrt.version }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }} runs-on: ${{ matrix.validation_runner }} @@ -135,6 +141,7 @@ jobs: working-directory: ${{ inputs.repository }} env: ALL_SECRETS: ${{ toJSON(secrets) }} + USE_RTX: ${{ inputs.use-rtx }} run: | set -euxo pipefail # shellcheck disable=SC2086 @@ -203,5 +210,5 @@ jobs: s3-prefix: ${{ env.REPOSITORY }}/${{ github.event.pull_request.number }} concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{inputs.use-rtx}}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} cancel-in-progress: true diff --git a/.github/workflows/windows-test.yml b/.github/workflows/windows-test.yml index 8dc1b107d3..dcd4351fb4 100644 --- a/.github/workflows/windows-test.yml +++ b/.github/workflows/windows-test.yml @@ -43,6 +43,10 @@ on: description: 'CPU architecture to build for' default: "x64" type: string + use-rtx: + description: "Whether to use TensorRT-RTX" + default: false + type: boolean jobs: test: strategy: @@ -56,6 +60,7 @@ jobs: CU_VERSION: ${{ matrix.desired_cuda }} SCRIPT: ${{ inputs.script }} PYTHONUTF8: 1 + USE_RTX: ${{ inputs.use-rtx }} DOWNLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_win_amd64 name: ${{ inputs.job-name }}-${{ matrix.tensorrt.version }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }} runs-on: ${{ matrix.validation_runner }} @@ -147,5 +152,5 @@ jobs: uses: ./test-infra/.github/actions/teardown-windows concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{inputs.use-rtx}}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} cancel-in-progress: true diff --git a/BUILD.bazel b/BUILD.bazel index 950839a40e..11a96d6ae3 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -7,6 +7,16 @@ config_setting( ], ) +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + pkg_tar( name = "include_core", package_dir = "include/torch_tensorrt", @@ -52,6 +62,7 @@ pkg_tar( pkg_tar( name = "lib", srcs = select({ + ":rtx_win": ["//cpp/lib:torchtrt.dll"], ":windows": ["//cpp/lib:torchtrt.dll"], "//conditions:default": [ "//cpp/lib:libtorchtrt.so", @@ -66,6 +77,7 @@ pkg_tar( pkg_tar( name = "lib_rt", srcs = select({ + ":rtx_win": ["//cpp/lib:torchtrt_runtime.dll"], ":windows": ["//cpp/lib:torchtrt_runtime.dll"], "//conditions:default": [ "//cpp/lib:libtorchtrt_runtime.so", @@ -98,6 +110,7 @@ pkg_tar( ":include_core", ":lib", ] + select({ + ":rtx_win": [], ":windows": [], "//conditions:default": [":bin"], }), diff --git a/MODULE.bazel b/MODULE.bazel index 1b66e2c900..767800e591 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -109,6 +109,15 @@ http_archive( ], ) +http_archive( + name = "tensorrt_rtx", + build_file = "@//third_party/tensorrt_rtx/archive:BUILD", + strip_prefix = "TensorRT-RTX-1.0.0.21", + urls = [ + "https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz", + ], +) + http_archive( name = "tensorrt_sbsa", build_file = "@//third_party/tensorrt/archive:BUILD", @@ -136,6 +145,14 @@ http_archive( ], ) +http_archive( + name = "tensorrt_rtx_win", + build_file = "@//third_party/tensorrt_rtx/archive:BUILD", + strip_prefix = "TensorRT-RTX-1.0.0.21", + urls = [ + "https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip", + ], +) #################################################################################### # Locally installed dependencies (use in cases of custom dependencies or aarch64) #################################################################################### diff --git a/core/BUILD b/core/BUILD index 6f5cfad30f..c6744c66c1 100644 --- a/core/BUILD +++ b/core/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -59,14 +80,17 @@ cc_library( "//core/runtime", "//core/util/logging", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/conversion/BUILD b/core/conversion/BUILD index ff87c5a4b8..480481e6bd 100644 --- a/core/conversion/BUILD +++ b/core/conversion/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -54,14 +75,17 @@ cc_library( "//core/ir", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/conversion/conversion.cpp b/core/conversion/conversion.cpp index f8a26e8d77..a3b50430cc 100644 --- a/core/conversion/conversion.cpp +++ b/core/conversion/conversion.cpp @@ -204,7 +204,7 @@ void AddInputs(ConversionCtx* ctx, c10::ArrayRef input "Optimization profile is invalid, please check the input range provided (conversion.AddInputs)"); ctx->cfg->addOptimizationProfile(profile); -#if NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1) +#ifndef TRT_MAJOR_RTX && (NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1)) if (ctx->enabled_precisions.find(nvinfer1::DataType::kINT8) != ctx->enabled_precisions.end()) { ctx->cfg->setCalibrationProfile(profile); } diff --git a/core/conversion/conversionctx/BUILD b/core/conversion/conversionctx/BUILD index b6820fc757..d0ad2e7bd1 100644 --- a/core/conversion/conversionctx/BUILD +++ b/core/conversion/conversionctx/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -49,14 +70,17 @@ cc_library( "//core/ir", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp index 2eb363706f..625ef1b669 100644 --- a/core/conversion/conversionctx/ConversionCtx.cpp +++ b/core/conversion/conversionctx/ConversionCtx.cpp @@ -31,8 +31,8 @@ std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) { if (s.device.device_type == nvinfer1::DeviceType::kDLA) { os << "\n DLACore: " << s.device.dla_core; } - os << "\n Engine Capability: " << s.capability \ - << "\n Calibrator Created: " << (s.calibrator != nullptr); + os << "\n Engine Capability: " << s.capability; + // << "\n Calibrator Created: " << (s.calibrator != nullptr); return os; } // clang-format on @@ -59,11 +59,16 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) for (auto p = settings.enabled_precisions.begin(); p != settings.enabled_precisions.end(); ++p) { switch (*p) { case nvinfer1::DataType::kHALF: +// tensorrt_rtx is strong typed, cannot set fp16 by builder config, only do this for tensorrt build +#ifndef TRT_MAJOR_RTX TORCHTRT_CHECK( builder->platformHasFastFp16(), "Requested inference in FP16 but platform does not support FP16"); cfg->setFlag(nvinfer1::BuilderFlag::kFP16); break; +#endif case nvinfer1::DataType::kINT8: +// tensorrt_rtx is strong typed, cannot set int8 by builder config, only do this for tensorrt build +#ifndef TRT_MAJOR_RTX TORCHTRT_CHECK( builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8"); cfg->setFlag(nvinfer1::BuilderFlag::kINT8); @@ -74,6 +79,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) cfg->setInt8Calibrator(settings.calibrator); } break; +#endif case nvinfer1::DataType::kFLOAT: break; case nvinfer1::DataType::kINT32: @@ -89,7 +95,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) if (settings.disable_tf32) { cfg->clearFlag(nvinfer1::BuilderFlag::kTF32); } -#if NV_TENSORRT_MAJOR > 7 +#if defined(TRT_MAJOR_RTX) || (NV_TENSORRT_MAJOR > 7) if (settings.sparse_weights) { cfg->setFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS); } @@ -163,7 +169,7 @@ void ConversionCtx::RecordNewITensor(const torch::jit::Value* value, nvinfer1::I } std::string ConversionCtx::SerializeEngine() { -#if NV_TENSORRT_MAJOR > 7 +#if defined(TRT_MAJOR_RTX) || (NV_TENSORRT_MAJOR > 7) auto serialized_network = make_trt(builder->buildSerializedNetwork(*net, *cfg)); if (!serialized_network) { TORCHTRT_THROW_ERROR("Building serialized network failed in TensorRT"); diff --git a/core/conversion/conversionctx/ConversionCtx.h b/core/conversion/conversionctx/ConversionCtx.h index 8587885eca..df5c2a646d 100644 --- a/core/conversion/conversionctx/ConversionCtx.h +++ b/core/conversion/conversionctx/ConversionCtx.h @@ -26,7 +26,9 @@ struct BuilderSettings { bool allow_shape_tensors = false; ir::Device device; nvinfer1::EngineCapability capability = TRT_ENGINE_CAPABILITY_STANDARD; +#ifndef TRT_MAJOR_RTX nvinfer1::IInt8Calibrator* calibrator = nullptr; +#endif uint64_t num_avg_timing_iters = 1; uint64_t workspace_size = 0; uint64_t dla_sram_size = DLA_SRAM_SIZE; diff --git a/core/conversion/converters/BUILD b/core/conversion/converters/BUILD index 456b8ee7d4..84864cea10 100644 --- a/core/conversion/converters/BUILD +++ b/core/conversion/converters/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -49,14 +70,17 @@ cc_library( "//core/conversion/conversionctx", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, @@ -75,14 +99,17 @@ cc_library( "//core/conversion/conversionctx", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, @@ -93,7 +120,6 @@ cc_library( srcs = [ "NodeConverterRegistry.cpp", "impl/activation.cpp", - "impl/batch_norm.cpp", "impl/bitwise.cpp", "impl/cast.cpp", "impl/chunk.cpp", @@ -106,14 +132,11 @@ cc_library( "impl/element_wise.cpp", "impl/expand.cpp", "impl/internal_ops.cpp", - "impl/interpolate.cpp", "impl/layer_norm.cpp", "impl/linear.cpp", "impl/lstm_cell.cpp", "impl/matrix_multiply.cpp", "impl/max.cpp", - "impl/normalize.cpp", - "impl/pooling.cpp", "impl/quantization.cpp", "impl/reduce.cpp", "impl/reflection_pad.cpp", @@ -126,7 +149,17 @@ cc_library( "impl/topk.cpp", "impl/unary.cpp", "impl/unsqueeze.cpp", - ], + ] + select({ + ":rtx_win": [], + # exclude plugins from rtx build + ":rtx_x86_64": [], + "//conditions:default": [ + "impl/batch_norm.cpp", + "impl/interpolate.cpp", + "impl/normalize.cpp", + "impl/pooling.cpp", + ], + }), hdrs = [ "converters.h", ], @@ -138,14 +171,17 @@ cc_library( "//core/plugins:torch_tensorrt_plugins", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/conversion/converters/impl/quantization.cpp b/core/conversion/converters/impl/quantization.cpp index addf629e6b..6a8c2e2f73 100644 --- a/core/conversion/converters/impl/quantization.cpp +++ b/core/conversion/converters/impl/quantization.cpp @@ -9,15 +9,14 @@ namespace converters { namespace impl { namespace { -#if NV_TENSORRT_MAJOR > 7 +#if defined(TRT_MAJOR_RTX) || (NV_TENSORRT_MAJOR > 7) // clang-format off bool add_qdq(ConversionCtx *ctx, const torch::jit::Node* n, nvinfer1::ITensor* input, nvinfer1::ITensor* scale, std::string& opName) { - nvinfer1::IQuantizeLayer* quantize_layer = ctx->net->addQuantize(*input, *scale); + nvinfer1::IQuantizeLayer* quantize_layer = ctx->net->addQuantize(*input, *scale, nvinfer1::DataType::kINT8); TORCHTRT_CHECK(quantize_layer, "Unable to create QuantizeLayer from node: " << *n); quantize_layer->setAxis(0); - - nvinfer1::IDequantizeLayer* dequantize_layer = ctx->net->addDequantize(*quantize_layer->getOutput(0), *scale); + nvinfer1::IDequantizeLayer* dequantize_layer = ctx->net->addDequantize(*quantize_layer->getOutput(0), *scale, input->getType()); TORCHTRT_CHECK(dequantize_layer, "Unable to create DequantizeLayer from node: " << *n); dequantize_layer->setAxis(0); @@ -54,12 +53,12 @@ auto quantization_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns auto scale = args[1].ITensorOrFreeze(ctx); int64_t axis = args[3].unwrapToScalar().to(); // Add and configure a QuantizeLayer. - nvinfer1::IQuantizeLayer* quantize_layer = ctx->net->addQuantize(*input, *scale); + nvinfer1::IQuantizeLayer* quantize_layer = ctx->net->addQuantize(*input, *scale, nvinfer1::DataType::kINT8); // Set a channel axis which represents output channels quantize_layer->setAxis(axis); // Add and configure a DequantizeLayer. - nvinfer1::IDequantizeLayer* dequantize_layer = ctx->net->addDequantize(*quantize_layer->getOutput(0), *scale); + nvinfer1::IDequantizeLayer* dequantize_layer = ctx->net->addDequantize(*quantize_layer->getOutput(0), *scale, input->getType()); dequantize_layer->setAxis(axis); auto qdq_out = ctx->AssociateValueAndTensor(n->outputs()[0], dequantize_layer->getOutput(0)); diff --git a/core/conversion/evaluators/BUILD b/core/conversion/evaluators/BUILD index d3adad10cd..e9fc358582 100644 --- a/core/conversion/evaluators/BUILD +++ b/core/conversion/evaluators/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -55,14 +76,17 @@ cc_library( "//core/conversion/var", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/conversion/tensorcontainer/BUILD b/core/conversion/tensorcontainer/BUILD index 951a0b886e..c6f56b70c8 100644 --- a/core/conversion/tensorcontainer/BUILD +++ b/core/conversion/tensorcontainer/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -48,14 +69,17 @@ cc_library( deps = [ "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/conversion/var/BUILD b/core/conversion/var/BUILD index 770d3c2120..ce58ca70f3 100644 --- a/core/conversion/var/BUILD +++ b/core/conversion/var/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -51,14 +72,17 @@ cc_library( "//core/conversion/tensorcontainer", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/ir/BUILD b/core/ir/BUILD index fce3fbe51f..5dfdeded90 100644 --- a/core/ir/BUILD +++ b/core/ir/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -51,14 +72,17 @@ cc_library( deps = [ "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/lowering/BUILD b/core/lowering/BUILD index 27af435927..6084198c74 100644 --- a/core/lowering/BUILD +++ b/core/lowering/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -53,14 +74,17 @@ cc_library( "//core/lowering/passes", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/lowering/passes/BUILD b/core/lowering/passes/BUILD index 845abdb62a..bd8462eed1 100644 --- a/core/lowering/passes/BUILD +++ b/core/lowering/passes/BUILD @@ -30,6 +30,16 @@ config_setting( }, ) +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + config_setting( name = "windows", constraint_values = [ @@ -76,9 +86,10 @@ cc_library( deps = [ "//core/util:prelude", ] + select({ + ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], ":use_torch_whl": ["@torch_whl//:libtorch"], ":windows": ["@libtorch_win//:libtorch"], - ":jetpack": ["@torch_l4t//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/partitioning/BUILD b/core/partitioning/BUILD index 378752cdfd..bbbb89af37 100644 --- a/core/partitioning/BUILD +++ b/core/partitioning/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -56,14 +77,17 @@ cc_library( "//core/partitioning/segmentedblock", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/partitioning/partitioningctx/BUILD b/core/partitioning/partitioningctx/BUILD index bd21aba7ff..bae63241a6 100644 --- a/core/partitioning/partitioningctx/BUILD +++ b/core/partitioning/partitioningctx/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -52,14 +73,17 @@ cc_library( "//core/partitioning/segmentedblock", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/partitioning/partitioninginfo/BUILD b/core/partitioning/partitioninginfo/BUILD index daebcd615f..04515abb10 100644 --- a/core/partitioning/partitioninginfo/BUILD +++ b/core/partitioning/partitioninginfo/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -51,14 +72,17 @@ cc_library( "//core/lowering", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/partitioning/segmentedblock/BUILD b/core/partitioning/segmentedblock/BUILD index 83e45eaf14..73916bb6bd 100644 --- a/core/partitioning/segmentedblock/BUILD +++ b/core/partitioning/segmentedblock/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -51,14 +72,17 @@ cc_library( "//core/lowering", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/plugins/BUILD b/core/plugins/BUILD index cebce31941..00503552f2 100644 --- a/core/plugins/BUILD +++ b/core/plugins/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -39,16 +60,24 @@ config_setting( cc_library( name = "torch_tensorrt_plugins", - srcs = [ - "impl/interpolate_plugin.cpp", - "impl/normalize_plugin.cpp", - "register_plugins.cpp", - ], - hdrs = [ - "impl/interpolate_plugin.h", - "impl/normalize_plugin.h", - "plugins.h", - ], + srcs = select({ + ":rtx_win": [], + ":rtx_x86_64": [], + "//conditions:default": [ + "impl/interpolate_plugin.cpp", + "impl/normalize_plugin.cpp", + "register_plugins.cpp", + ], + }), + hdrs = select({ + ":rtx_win": [], + ":rtx_x86_64": [], + "//conditions:default": [ + "impl/interpolate_plugin.h", + "impl/normalize_plugin.h", + "plugins.h", + ], + }), copts = [ "-pthread", ], @@ -58,26 +87,29 @@ cc_library( deps = [ "//core/util:prelude", ] + select({ - ":windows": [ - "@tensorrt_win//:nvinfer", - "@tensorrt_win//:nvinferplugin", + ":jetpack": [ + "@tensorrt_l4t//:nvinfer", + "@tensorrt_l4t//:nvinferplugin", ], + ":rtx_win": [], + ":rtx_x86_64": [], ":sbsa": [ "@tensorrt_sbsa//:nvinfer", "@tensorrt_sbsa//:nvinferplugin", ], - ":jetpack": [ - "@tensorrt_l4t//:nvinfer", - "@tensorrt_l4t//:nvinferplugin", + ":windows": [ + "@tensorrt_win//:nvinfer", + "@tensorrt_win//:nvinferplugin", ], "//conditions:default": [ "@tensorrt//:nvinfer", "@tensorrt//:nvinferplugin", ], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/runtime/BUILD b/core/runtime/BUILD index 72c670bff1..a573cfed78 100644 --- a/core/runtime/BUILD +++ b/core/runtime/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -63,14 +84,17 @@ cc_library( "//core/plugins:torch_tensorrt_plugins", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/util/BUILD b/core/util/BUILD index 4f522704ee..0ed97a5eda 100644 --- a/core/util/BUILD +++ b/core/util/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -60,9 +81,10 @@ cc_library( deps = [ ":macros", ] + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), ) @@ -95,9 +117,10 @@ cc_library( "build_info.h", ], deps = select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), ) @@ -114,14 +137,17 @@ cc_library( ":macros", "//core/util/logging", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/util/logging/BUILD b/core/util/logging/BUILD index f0cc067af9..1ac834b021 100644 --- a/core/util/logging/BUILD +++ b/core/util/logging/BUILD @@ -6,17 +6,34 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = {"//toolchains/dep_collection:compute_libs": "rtx"}, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = {"//toolchains/dep_collection:compute_libs": "rtx"}, +) + config_setting( name = "sbsa", constraint_values = [ "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +43,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -35,6 +52,7 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = {"//toolchains/dep_collection:compute_libs": "default"}, ) cc_library( @@ -46,14 +64,17 @@ cc_library( "TorchTRTLogger.h", ], deps = select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/util/trt_util.h b/core/util/trt_util.h index f3df533d8b..a68e00e14d 100644 --- a/core/util/trt_util.h +++ b/core/util/trt_util.h @@ -8,7 +8,7 @@ namespace nvinfer1 { -#if NV_TENSORRT_MAJOR < 8 +#if !defined(TRT_MAJOR_RTX) && (NV_TENSORRT_MAJOR < 8) #define TRT_ENGINE_CAPABILITY_STANDARD nvinfer1::EngineCapability::kDEFAULT #define TRT_ENGINE_CAPABILITY_SAFETY nvinfer1::EngineCapability::kSAFE_GPU diff --git a/cpp/BUILD b/cpp/BUILD index e5cb1558e9..2b5877aa4a 100644 --- a/cpp/BUILD +++ b/cpp/BUILD @@ -2,21 +2,52 @@ load("@rules_cc//cc:defs.bzl", "cc_library") package(default_visibility = ["//visibility:public"]) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + cc_library( name = "torch_tensorrt", srcs = [ "src/compile_spec.cpp", "src/logging.cpp", - "src/ptq.cpp", "src/torch_tensorrt.cpp", "src/types.cpp", - ], + ] + select({ + ":rtx_win": [], + ":rtx_x86_64": [], + "//conditions:default": [ + "src/ptq.cpp", + ], + }), hdrs = [ "include/torch_tensorrt/logging.h", "include/torch_tensorrt/macros.h", - "include/torch_tensorrt/ptq.h", "include/torch_tensorrt/torch_tensorrt.h", - ], + ] + select({ + ":rtx_win": [], + ":rtx_x86_64": [], + "//conditions:default": [ + "include/torch_tensorrt/ptq.h", + ], + }), linkstatic = True, strip_include_prefix = "include/", deps = [ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0c0e5a43f0..690dca2749 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -4,7 +4,6 @@ add_library(${lib_name} OBJECT) set(CXX_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/src/compile_spec.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/logging.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/src/ptq.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/torch_tensorrt.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/types.cpp" ) @@ -12,7 +11,6 @@ set(CXX_SRCS set(HEADER_FILES "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/logging.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/macros.h" - "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/ptq.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/torch_tensorrt.h" ) diff --git a/cpp/bin/torchtrtc/BUILD b/cpp/bin/torchtrtc/BUILD index d858d4de93..2c87eddae2 100644 --- a/cpp/bin/torchtrtc/BUILD +++ b/cpp/bin/torchtrtc/BUILD @@ -5,7 +5,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -15,9 +15,10 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) + config_setting( name = "windows", constraint_values = [ @@ -25,6 +26,16 @@ config_setting( ], ) +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + cc_binary( name = "torchtrtc", srcs = [ @@ -44,17 +55,21 @@ cc_binary( "//cpp:torch_tensorrt", "//third_party/args", ] + select({ - ":windows": [ + ":jetpack": [ + "@torch_l4t//:caffe2", + "@torch_l4t//:libtorch", + ], + ":rtx_win": [ "@libtorch_win//:caffe2", - "@libtorch_win//:libtorch" + "@libtorch_win//:libtorch", ], ":use_torch_whl": [ "@torch_whl//:caffe2", - "@torch_whl//:libtorch" + "@torch_whl//:libtorch", ], - ":jetpack": [ - "@torch_l4t//:caffe2", - "@torch_l4t//:libtorch" + ":windows": [ + "@libtorch_win//:caffe2", + "@libtorch_win//:libtorch", ], "//conditions:default": [ "@libtorch", diff --git a/cpp/bin/torchtrtc/fileio.h b/cpp/bin/torchtrtc/fileio.h index ed52d566a1..a27c0a69e1 100644 --- a/cpp/bin/torchtrtc/fileio.h +++ b/cpp/bin/torchtrtc/fileio.h @@ -23,7 +23,9 @@ #include "torch/torch.h" #include "torch_tensorrt/logging.h" +#ifndef TRT_MAJOR_RTX #include "torch_tensorrt/ptq.h" +#endif #include "torch_tensorrt/torch_tensorrt.h" namespace torchtrtc { diff --git a/cpp/bin/torchtrtc/main.cpp b/cpp/bin/torchtrtc/main.cpp index c36cfdd0fc..b93d977c95 100644 --- a/cpp/bin/torchtrtc/main.cpp +++ b/cpp/bin/torchtrtc/main.cpp @@ -7,7 +7,9 @@ #include "torch/script.h" #include "torch_tensorrt/logging.h" +#ifndef TRT_MAJOR_RTX #include "torch_tensorrt/ptq.h" +#endif #include "torch_tensorrt/torch_tensorrt.h" #include "accuracy.h" @@ -334,8 +336,12 @@ int main(int argc, char** argv) { if (calibration_cache_file) { calibration_cache_file_path = torchtrtc::fileio::resolve_path(args::get(calibration_cache_file)); } - +#ifndef TRT_MAJOR_RTX auto calibrator = torchtrt::ptq::make_int8_cache_calibrator(calibration_cache_file_path); +#else + // rtx build has no calibrator + auto calibrator = nullptr; +#endif compile_settings.require_full_compilation = require_full_compilation; @@ -368,7 +374,9 @@ int main(int argc, char** argv) { } else if (dtype == torchtrt::DataType::kChar) { compile_settings.enabled_precisions.insert(torch::kI8); if (calibration_cache_file) { +#ifndef TRT_MAJOR_RTX compile_settings.ptq_calibrator = calibrator; +#endif } else { torchtrt::logging::log( torchtrt::logging::Level::kINFO, diff --git a/cpp/bin/torchtrtc/parser_util.h b/cpp/bin/torchtrtc/parser_util.h index 9ed5f6d06b..6605ec011a 100644 --- a/cpp/bin/torchtrtc/parser_util.h +++ b/cpp/bin/torchtrtc/parser_util.h @@ -9,7 +9,9 @@ #include "torch/torch.h" #include "torch_tensorrt/logging.h" +#ifndef TRT_MAJOR_RTX #include "torch_tensorrt/ptq.h" +#endif #include "torch_tensorrt/torch_tensorrt.h" namespace torchtrtc { diff --git a/cpp/include/torch_tensorrt/torch_tensorrt.h b/cpp/include/torch_tensorrt/torch_tensorrt.h index adac75d984..4068fa6b80 100644 --- a/cpp/include/torch_tensorrt/torch_tensorrt.h +++ b/cpp/include/torch_tensorrt/torch_tensorrt.h @@ -832,12 +832,12 @@ struct CompileSpec { * host RAM used by DLA to store weights and metadata for execution */ uint64_t dla_global_dram_size = 536870912; - +#ifndef TRT_MAJOR_RTX /** * Calibration dataloaders for each input for post training quantizatiom */ nvinfer1::IInt8Calibrator* ptq_calibrator = nullptr; - +#endif /** * Require the full module be compiled to TensorRT instead of potentially running unsupported operations in PyTorch */ diff --git a/cpp/src/compile_spec.cpp b/cpp/src/compile_spec.cpp index 68a25b3912..8dba4a76b8 100644 --- a/cpp/src/compile_spec.cpp +++ b/cpp/src/compile_spec.cpp @@ -152,7 +152,7 @@ torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external, bool internal.convert_info.engine_settings.dla_global_dram_size = external.dla_global_dram_size; internal.partitioning_info.cast_int8_inputs = true; - +#ifndef TRT_MAJOR_RTX if (internal.convert_info.engine_settings.enabled_precisions.find(nvinfer1::DataType::kINT8) != internal.convert_info.engine_settings.enabled_precisions.end()) { internal.partitioning_info.cast_int8_inputs = false; @@ -166,6 +166,7 @@ torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external, bool } else { internal.convert_info.engine_settings.calibrator = nullptr; } +#endif return internal; } diff --git a/dev_dep_versions.yml b/dev_dep_versions.yml index c9a738feb6..113fe23de6 100644 --- a/dev_dep_versions.yml +++ b/dev_dep_versions.yml @@ -1,2 +1,3 @@ __cuda_version__: "12.8" __tensorrt_version__: "10.12.0" +__tensorrt_rtx_version__: "1.0.0" diff --git a/docsrc/getting_started/tensorrt_rtx.rst b/docsrc/getting_started/tensorrt_rtx.rst new file mode 100644 index 0000000000..8edf80699c --- /dev/null +++ b/docsrc/getting_started/tensorrt_rtx.rst @@ -0,0 +1,65 @@ +.. _Torch-TensorRT_in_RTX: + +Torch-TensorRT in RTX +############################# + +Overview +******** + +TensorRT-RTX +=========== +TensorRT for RTX builds on the proven performance of the NVIDIA TensorRT inference library, and simplifies the deployment of AI models on NVIDIA RTX GPUs across desktops, laptops, and workstations. + +TensorRT for RTX is a drop-in replacement for NVIDIA TensorRT in applications targeting NVIDIA RTX GPUs from Turing through Blackwell generations. It introduces a Just-In-Time (JIT) optimizer in the runtime that compiles improved inference engines directly on the end-user’s RTX-accelerated PC in under 30 seconds. This eliminates the need for lengthy pre-compilation steps and enables rapid engine generation, improved application portability, and cutting-edge inference performance. + +For detailed information about TensorRT-RTX, refer to: +* `TensorRT-RTX Documentation `_ + +Currenlty, Torch-TensorRT only supports TensorRT-RTX for the experiment purpose. +Torch-TensorRT by default uses TensorRT during the build and run. + +In order to use TensorRT-RTX, you need to build the wheel with ``--use-rtx`` flag. +And then set the ``FORCE_TENSORRT_RTX=1`` environment variable during run. + + + + +Prerequisites +************* + +System Preparation +================== +1. **Install TensorRT-RTX**: + TensorRT-RTX can be downloaded from https://developer.nvidia.com/tensorrt-rtx. + .. code-block:: sh + # if TensorRT-RTX is downloaded in /usr/local/tensorrt-rtx + export LD_LIBRARY_PATH=/usr/local/tensorrt-rtx/lib:$LD_LIBRARY_PATH + + +Build Torch-TensorRT with TensorRT-RTX +===================================== + +.. code-block:: sh + # if you have previously build with Standard TensorRT, make sure to clean the build environment + python setup.py clean + # build wheel with TensorRT-RTX + python setup.py bdist_wheel --use-rtx + + # install the wheel + cd dist + python -m pip install torch-tensorrt-*.whl + + # make sure the tensorrt_rtx.so file is linked to the tensorrt_rtx.so file in the TensorRT-RTX installation directory + trt_install_path=$(python -m pip show torch-tensorrt | grep "Location" | awk '{print $2}')/torch_tensorrt + + # check if the libtensorrt_rtx.so.1 is linked + ldd $trt_install_path/lib/libtorchtrt.so + + +Quick Start +=========== + +.. code-block:: py + # you have to set FORCE_TENSORRT_RTX=1 to use TensorRT-RTX + FORCE_TENSORRT_RTX=1 python examples/dynamo/torch_compile_resnet_example.py + diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh index 1f1a2120a9..6294632c59 100755 --- a/packaging/pre_build_script.sh +++ b/packaging/pre_build_script.sh @@ -75,3 +75,10 @@ fi cat MODULE.bazel export CI_BUILD=1 + +if [[ ${USE_RTX} == true ]]; then + cat pyproject_rtx.toml.temp > pyproject.toml + source .github/scripts/install-tensorrt-rtx.sh + install_wheel_or_not=true + install_tensorrt_rtx ${install_wheel_or_not} +fi \ No newline at end of file diff --git a/packaging/pre_build_script_windows.sh b/packaging/pre_build_script_windows.sh index b5b62ebf05..c4d2b37322 100644 --- a/packaging/pre_build_script_windows.sh +++ b/packaging/pre_build_script_windows.sh @@ -36,3 +36,10 @@ fi cat MODULE.bazel echo "RELEASE=1" >> ${GITHUB_ENV} + +if [[ ${USE_RTX} == true ]]; then + cat pyproject_rtx.toml.temp > pyproject.toml + source .github/scripts/install-tensorrt-rtx.sh + install_wheel_or_not=true + install_tensorrt_rtx ${install_wheel_or_not} +fi \ No newline at end of file diff --git a/packaging/smoke_test_windows.py b/packaging/smoke_test_windows.py index c7880cd862..31598663f9 100644 --- a/packaging/smoke_test_windows.py +++ b/packaging/smoke_test_windows.py @@ -1,9 +1,12 @@ import subprocess -import tensorrt # noqa: F401 import torch +import torch_tensorrt +from torch_tensorrt._utils import is_tensorrt_rtx print(f"Torch CUDA version: {torch.version.cuda}") +print(f"Torch TensorRT version: {torch_tensorrt.__version__}") +print(f"Is TensorRT RTX: {is_tensorrt_rtx()}") result = subprocess.run( ["systeminfo"], diff --git a/py/torch_tensorrt/__init__.py b/py/torch_tensorrt/__init__.py index 6d79f9b4f3..1b2a498961 100644 --- a/py/torch_tensorrt/__init__.py +++ b/py/torch_tensorrt/__init__.py @@ -1,12 +1,13 @@ import ctypes +import logging import os import platform import sys from typing import Dict, List +import torch from torch_tensorrt._version import ( # noqa: F401 __cuda_version__, - __tensorrt_version__, __version__, ) @@ -17,72 +18,28 @@ "Python 2 has reached end-of-life and is not supported by Torch-TensorRT" ) +import logging -def _parse_semver(version: str) -> Dict[str, str]: - split = version.split(".") - if len(split) < 3: - split.append("") - - return {"major": split[0], "minor": split[1], "patch": split[2]} - - -def _find_lib(name: str, paths: List[str]) -> str: - for path in paths: - libpath = os.path.join(path, name) - if os.path.isfile(libpath): - return libpath +_LOGGER = logging.getLogger(__name__) - raise FileNotFoundError(f"Could not find {name}\n Search paths: {paths}") +import torch +tensorrt_package_name = "" try: - import tensorrt # noqa: F401 -except ImportError: - cuda_version = _parse_semver(__cuda_version__) - tensorrt_version = _parse_semver(__tensorrt_version__) - - CUDA_MAJOR = cuda_version["major"] - TENSORRT_MAJOR = tensorrt_version["major"] - - if sys.platform.startswith("win"): - WIN_LIBS = [ - "nvinfer.dll", - "nvinfer_plugin.dll", - ] - - WIN_PATHS = os.environ["PATH"].split(os.path.pathsep) - - for lib in WIN_LIBS: - ctypes.CDLL(_find_lib(lib, WIN_PATHS)) - - elif sys.platform.startswith("linux"): - LINUX_PATHS = ["/usr/local/cuda-12.8/lib64", "/usr/lib", "/usr/lib64"] - if "LD_LIBRARY_PATH" in os.environ: - LINUX_PATHS += os.environ["LD_LIBRARY_PATH"].split(os.path.pathsep) - - if platform.uname().processor == "x86_64": - LINUX_PATHS += [ - "/usr/lib/x86_64-linux-gnu", - ] + # note: trt_alias must be imported before any import tensorrt - elif platform.uname().processor == "aarch64": - LINUX_PATHS += ["/usr/lib/aarch64-linux-gnu"] + from . import trt_alias # noqa: F401 - LINUX_LIBS = [ - f"libnvinfer.so.{TENSORRT_MAJOR}", - f"libnvinfer_plugin.so.{TENSORRT_MAJOR}", - ] + tensorrt_package_name = trt_alias.package_name + _LOGGER.info(f"You are using {trt_alias.package_name=} ") - for lib in LINUX_LIBS: - ctypes.CDLL(_find_lib(lib, LINUX_PATHS)) - -import logging - -import torch -from torch_tensorrt._features import ENABLED_FEATURES, _enabled_features_str - -_LOGGER = logging.getLogger(__name__) -_LOGGER.debug(_enabled_features_str()) +except Exception as e: + print(f"import error when try to import trt_alias, got error {e}") + print( + f"make sure tensorrt lib is in the LD_LIBRARY_PATH: {os.environ.get('LD_LIBRARY_PATH')}" + ) + raise Exception(f"import error when try to import trt_alias, got error {e}") def _register_with_torch() -> None: @@ -111,6 +68,12 @@ def _register_with_torch() -> None: torch.ops.load_library(linked_file_runtime_full_path) +# note: trt_alias must be imported before enabled features, because enabled features will check tensorrt.plugin availability +from torch_tensorrt._features import ENABLED_FEATURES, _enabled_features_str + +_LOGGER.debug(_enabled_features_str()) + + _register_with_torch() from torch_tensorrt._Device import Device # noqa: F401 diff --git a/py/torch_tensorrt/_utils.py b/py/torch_tensorrt/_utils.py index 9c76257dee..3a17e7e267 100644 --- a/py/torch_tensorrt/_utils.py +++ b/py/torch_tensorrt/_utils.py @@ -1,6 +1,7 @@ import sys from typing import Any +import tensorrt as trt import torch @@ -24,3 +25,34 @@ def check_cross_compile_trt_win_lib() -> bool: target_lib = ".*libnvinfer_builder_resource_win.so.*" return any(re.match(target_lib, lib) for lib in loaded_libs) return False + + +def is_tensorrt_rtx() -> bool: + if trt._package_name == "tensorrt_rtx": + return True + return False + + +def is_tensorrt_version_supported(min_version: str) -> bool: + """ + Check if the installed TensorRT version supports the specified minimum version. + Args: + min_version (str): Minimum required TensorRT version + Returns: + bool: True if TensorRT version is >= min_version, False otherwise + Example: + >>> if is_tensorrt_version_supported("10.8.0"): + ... # Use FP4 features + ... pass + """ + try: + if is_tensorrt_rtx(): + return True + from importlib import metadata + + from packaging.version import Version + + return bool(Version(metadata.version("tensorrt")) >= Version(min_version)) + except (ImportError, ValueError): + # If tensorrt is not installed or version cannot be determined + return False diff --git a/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp b/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp index bae61881da..68f2d46ad9 100644 --- a/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp +++ b/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp @@ -67,7 +67,9 @@ void RegisterTRTCompileSpec() { .def("_set_precisions", &torch_tensorrt::pyapi::CompileSpec::setPrecisions) .def("_set_device", &torch_tensorrt::pyapi::CompileSpec::setDeviceIntrusive) .def("_set_torch_fallback", &torch_tensorrt::pyapi::CompileSpec::setTorchFallbackIntrusive) +#ifndef TRT_MAJOR_RTX .def("_set_ptq_calibrator", &torch_tensorrt::pyapi::CompileSpec::setPTQCalibratorViaHandle) +#endif .def("__str__", &torch_tensorrt::pyapi::CompileSpec::stringify); ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, sparse_weights); diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.cpp b/py/torch_tensorrt/csrc/tensorrt_classes.cpp index bd3aa6b305..28c6addafe 100644 --- a/py/torch_tensorrt/csrc/tensorrt_classes.cpp +++ b/py/torch_tensorrt/csrc/tensorrt_classes.cpp @@ -342,7 +342,7 @@ core::CompileSpec CompileSpec::toInternalCompileSpec(bool converting_to_trt_engi } info.partitioning_info.cast_int8_inputs = true; - +#ifndef TRT_MAJOR_RTX if (ptq_calibrator) { info.convert_info.engine_settings.calibrator = ptq_calibrator; info.partitioning_info.cast_int8_inputs = false; @@ -354,6 +354,7 @@ core::CompileSpec CompileSpec::toInternalCompileSpec(bool converting_to_trt_engi info.lower_info.disable_cse = true; } } +#endif info.convert_info.engine_settings.sparse_weights = sparse_weights; info.convert_info.engine_settings.disable_tf32 = disable_tf32; info.convert_info.engine_settings.refit = refit; diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.h b/py/torch_tensorrt/csrc/tensorrt_classes.h index 89c5c8661e..0b932080f4 100644 --- a/py/torch_tensorrt/csrc/tensorrt_classes.h +++ b/py/torch_tensorrt/csrc/tensorrt_classes.h @@ -139,11 +139,11 @@ struct CompileSpec : torch::CustomClassHolder { enabled_precisions.insert(static_cast(p)); } } - +#ifndef TRT_MAJOR_RTX int64_t getPTQCalibratorHandle() { return (int64_t)ptq_calibrator; } - +#endif void setDeviceIntrusive(const c10::intrusive_ptr& d) { device = *d; } @@ -152,9 +152,11 @@ struct CompileSpec : torch::CustomClassHolder { torch_fallback = *fb; } +#ifndef TRT_MAJOR_RTX void setPTQCalibratorViaHandle(int64_t handle) { ptq_calibrator = (nvinfer1::IInt8Calibrator*)handle; } +#endif ADD_FIELD_GET_SET(disable_tf32, bool); ADD_FIELD_GET_SET(sparse_weights, bool); @@ -170,11 +172,15 @@ struct CompileSpec : torch::CustomClassHolder { ADD_FIELD_GET_SET(allow_shape_tensors, bool); ADD_FIELD_GET_SET(device, Device); ADD_FIELD_GET_SET(torch_fallback, TorchFallback); +#ifndef TRT_MAJOR_RTX ADD_FIELD_GET_SET(ptq_calibrator, nvinfer1::IInt8Calibrator*); +#endif std::vector inputs; InputSignature input_signature; +#ifndef TRT_MAJOR_RTX nvinfer1::IInt8Calibrator* ptq_calibrator = nullptr; +#endif std::set enabled_precisions = {}; bool sparse_weights = false; bool disable_tf32 = false; diff --git a/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp b/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp index e32d102f8b..043ddacccb 100644 --- a/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp +++ b/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp @@ -18,6 +18,7 @@ namespace py = pybind11; namespace torch_tensorrt { namespace pyapi { +#ifndef TRT_MAJOR_RTX template class pyCalibratorTrampoline : public Derived { public: @@ -146,6 +147,7 @@ class pyIInt8LegacyCalibrator : public pyCalibratorTrampoline(m, "CalibrationAlgo", py::module_local(), "Type of calibration algorithm") .value("LEGACY_CALIBRATION", nvinfer1::CalibrationAlgoType::kLEGACY_CALIBRATION) .value("ENTROPY_CALIBRATION", nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION) @@ -319,6 +322,7 @@ PYBIND11_MODULE(_C, m) { .def(py::init_alias<>()) // Always initialize trampoline class. .def("get_batch_size", &nvinfer1::IInt8MinMaxCalibrator::getBatchSize, "Get batch size") .def("get_algorithm", &nvinfer1::IInt8MinMaxCalibrator::getAlgorithm, "Get algorithm"); +#endif py::class_(m, "Device") .def(py::init<>()) @@ -362,11 +366,13 @@ PYBIND11_MODULE(_C, m) { py::class_(ts_sub_mod, "CompileSpec") .def(py::init<>()) .def("__str__", &torch_tensorrt::pyapi::CompileSpec::stringify) +#ifndef TRT_MAJOR_RTX .def("_get_calibrator_handle", &CompileSpec::getPTQCalibratorHandle, "[Internal] gets a handle from a calibrator") +#endif .def_readwrite("inputs", &CompileSpec::inputs) .def_readwrite("input_signature", &CompileSpec::input_signature) .def_readwrite("enabled_precisions", &CompileSpec::enabled_precisions) - .def_readwrite("ptq_calibrator", &CompileSpec::ptq_calibrator) + // .def_readwrite("ptq_calibrator", &CompileSpec::ptq_calibrator) .def_readwrite("refit", &CompileSpec::refit) .def_readwrite("sparse_weights", &CompileSpec::sparse_weights) .def_readwrite("disable_tf32", &CompileSpec::disable_tf32) diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py index b8d4994fca..749e6c5dbe 100644 --- a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py +++ b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py @@ -28,6 +28,7 @@ from torch_tensorrt._enums import dtype from torch_tensorrt._features import needs_refit from torch_tensorrt._Input import Input +from torch_tensorrt._utils import is_tensorrt_rtx, is_tensorrt_version_supported from torch_tensorrt.dynamo import _defaults from torch_tensorrt.dynamo._engine_cache import BaseEngineCache from torch_tensorrt.dynamo._settings import CompilationSettings, settings_are_compatible @@ -51,8 +52,6 @@ from torch_tensorrt.fx.observer import Observer from torch_tensorrt.logging import TRT_LOGGER -from packaging import version - _LOGGER: logging.Logger = logging.getLogger(__name__) TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[Callable[[torch.fx.GraphModule], None]] = ( @@ -90,11 +89,19 @@ def __init__( self.builder = trt.Builder(self.logger) self._debugger_config = _debugger_config flag = 0 - if compilation_settings.use_explicit_typing: - STRONGLY_TYPED = 1 << (int)( - trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED - ) - flag |= STRONGLY_TYPED + # rtx build, strongly typed is enabled by default, can not set it by builder config + if is_tensorrt_rtx(): + if not compilation_settings.use_explicit_typing: + warnings.warn( + "Strongly typed is enabled by default in rtx build, setting use_explicit_typing to True" + ) + compilation_settings.use_explicit_typing = True + else: + if compilation_settings.use_explicit_typing: + STRONGLY_TYPED = 1 << (int)( + trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED + ) + flag |= STRONGLY_TYPED self.ctx = ConversionContext( self.builder.create_network(flag), compilation_settings @@ -217,14 +224,14 @@ def _populate_trt_builder_config( trt.MemoryPoolType.WORKSPACE, self.compilation_settings.workspace_size ) - if version.parse(trt.__version__) >= version.parse("8.2"): + if is_tensorrt_version_supported("8.2"): builder_config.profiling_verbosity = ( trt.ProfilingVerbosity.DETAILED if self._debugger_config and self._debugger_config.save_engine_profile else trt.ProfilingVerbosity.LAYER_NAMES_ONLY ) - if version.parse(trt.__version__) >= version.parse("8.6"): + if is_tensorrt_version_supported("8.6"): if self.compilation_settings.max_aux_streams is not None: _LOGGER.info( f"Setting max aux streams to {self.compilation_settings.max_aux_streams}" @@ -277,6 +284,7 @@ def _populate_trt_builder_config( trt.MemoryPoolType.DLA_GLOBAL_DRAM, self.compilation_settings.dla_global_dram_size, ) + if not self.compilation_settings.use_explicit_typing: if dtype.float16 in self.compilation_settings.enabled_precisions: builder_config.set_flag(trt.BuilderFlag.FP16) @@ -336,7 +344,7 @@ def _populate_trt_builder_config( if self.compilation_settings.enable_weight_streaming: builder_config.set_flag(trt.BuilderFlag.WEIGHT_STREAMING) - if version.parse(trt.__version__) >= version.parse("10.8"): + if is_tensorrt_version_supported("10.8"): TilingOptimizationLevel = { "none": trt.TilingOptimizationLevel.NONE, "fast": trt.TilingOptimizationLevel.FAST, diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py index fe9a01b06c..ac7883fb30 100644 --- a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py +++ b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py @@ -7,6 +7,7 @@ import numpy as np import torch from torch.fx.node import Argument, Node, Target +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo._settings import CompilationSettings from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl @@ -3561,25 +3562,29 @@ def aten_ops_full( ) -@dynamo_tensorrt_converter( - torch.ops.aten.nonzero.default, - supports_dynamic_shapes=True, - requires_output_allocator=True, -) -def aten_ops_nonzero( - ctx: ConversionContext, - target: Target, - args: Tuple[Argument, ...], - kwargs: Dict[str, Argument], - name: str, -) -> Union[TRTTensor, Sequence[TRTTensor]]: - return impl.unary.nonzero( - ctx, - target, - SourceIR.ATEN, - name, - args[0], +# currently nonzero is not supported for tensorrt_rtx +# TODO: lan to remove this once rtx team has fixed the bug +if not is_tensorrt_rtx(): + + @dynamo_tensorrt_converter( + torch.ops.aten.nonzero.default, + supports_dynamic_shapes=True, + requires_output_allocator=True, ) + def aten_ops_nonzero( + ctx: ConversionContext, + target: Target, + args: Tuple[Argument, ...], + kwargs: Dict[str, Argument], + name: str, + ) -> Union[TRTTensor, Sequence[TRTTensor]]: + return impl.unary.nonzero( + ctx, + target, + SourceIR.ATEN, + name, + args[0], + ) @dynamo_tensorrt_converter(torch.ops.aten.linear.default, supports_dynamic_shapes=True) diff --git a/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py index f9b47542a8..2156ffb26f 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py @@ -5,6 +5,7 @@ import tensorrt as trt import torch from torch.fx.node import Target +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -50,7 +51,9 @@ def batch_norm( # We perform constant folding for batch norm when the weight, bias, running_mean, and running_var are all tensors. # Batch norm operation can be fused into a single layer, which is more efficient than the original implementation. # In this way, the batch norm layer will be fused with the Convolution layer and get a performance boost. - if any( + # TODO: lanl: to remove this once we have solved the batchnorm constant folding issue in RTX + # https://github.com/pytorch/TensorRT/issues/3699 + if is_tensorrt_rtx() or any( [ isinstance(weight, trt.ITensor), isinstance(bias, trt.ITensor), diff --git a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py index 2aeedb144e..f3505c7ff6 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py @@ -5,6 +5,7 @@ import torch from torch.fx.experimental.proxy_tensor import unset_fake_temporarily from torch.fx.node import Target +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -74,6 +75,17 @@ def quantize( dtype = trt.DataType.FP8 max_bound = 448 + if ( + dtype == trt.DataType.INT8 + and ".input_quantizer" in name + and is_tensorrt_rtx() + ): + # RTX does not support int8 activation quantization + # TODO: lan to remove this once rtx team has added the support for int8 activation quantization + raise NotImplementedError( + "TensorRT-RTX does not support int8 activation quantization, only support int8 weight quantization" + ) + axis = None # int8 weight quantization is per-channel quantization(it can have one or multiple amax values) if dtype == trt.DataType.INT8 and amax.numel() > 1: diff --git a/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py b/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py index 35f21198d4..efe4ccc6f4 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py @@ -2,6 +2,7 @@ from typing import List, Optional, Sequence, cast from torch.fx.node import Target +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext from torch_tensorrt.dynamo.conversion.converter_utils import ( @@ -22,19 +23,18 @@ def unsqueeze( input: TRTTensor, dim: int, ) -> TRTTensor: - from importlib import metadata - - from packaging.version import Version - - if Version(metadata.version("tensorrt")) < Version("10.7.0"): + # tensorrt version < 10.7.0, use the old unsqueeze implementation + if is_tensorrt_version_supported("10.7.0"): + # use the new unsqueeze implementation + axes = get_trt_tensor(ctx, dim, f"{name}_axes") + layer = ctx.net.add_unsqueeze(input, axes) + set_layer_name(layer, target, name, source_ir) + return layer.get_output(0) + else: logger.warning( - f"IUnsqueezeLayer is supported starting from TensorRT 10.7.0, using the old unsqueeze implementation in the current TensorRT version: {metadata.version('tensorrt')}" + "IUnsqueezeLayer is supported starting from TensorRT 10.7.0, using the old unsqueeze implementation in the current TensorRT version" ) return unsqueeze_old(ctx, target, source_ir, name, input, dim) - axes = get_trt_tensor(ctx, dim, f"{name}_axes") - layer = ctx.net.add_unsqueeze(input, axes) - set_layer_name(layer, target, name, source_ir) - return layer.get_output(0) # old implementation for jetson due to IUnsqueezeLayer was not supported prior to 10.7.0 diff --git a/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py index 8e18a3ae32..d18a5674e0 100644 --- a/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py +++ b/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py @@ -270,6 +270,7 @@ def setup_engine(self) -> None: if self.settings.enable_weight_streaming: self.set_default_device_memory_budget() self.context = self.engine.create_execution_context() + assert self.context is not None, "Failed to create execution context" assert self.engine.num_io_tensors == ( len(self.input_names) + len(self.output_names) ) @@ -430,7 +431,7 @@ def create_output_allocator(self) -> None: def forward(self, *inputs: torch.Tensor) -> torch.Tensor | Tuple[torch.Tensor, ...]: def run_standard_execution() -> torch.Tensor | Tuple[torch.Tensor, ...]: - shape_changed = self.validate_input_shapes(inputs) + shape_changed = self.validate_input_shapes(contiguous_inputs) ( need_cudagraphs_record, can_use_pre_allocated_outputs, diff --git a/py/torch_tensorrt/dynamo/utils.py b/py/torch_tensorrt/dynamo/utils.py index 0703fd1cb9..7a8e6466cd 100644 --- a/py/torch_tensorrt/dynamo/utils.py +++ b/py/torch_tensorrt/dynamo/utils.py @@ -17,6 +17,7 @@ from torch_tensorrt._enums import dtype from torch_tensorrt._features import ENABLED_FEATURES from torch_tensorrt._Input import Input +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.dynamo import _defaults from torch_tensorrt.dynamo._defaults import default_device from torch_tensorrt.dynamo._engine_cache import BaseEngineCache @@ -76,7 +77,7 @@ class Frameworks(Enum): }, } -if trt.__version__ >= "7.0": +if is_tensorrt_version_supported("7.0"): DataTypeEquivalence[trt.bool] = { Frameworks.NUMPY: np.bool_, Frameworks.TORCH: torch.bool, diff --git a/py/torch_tensorrt/fx/converters/acc_ops_converters.py b/py/torch_tensorrt/fx/converters/acc_ops_converters.py index f998ddb27a..bf2680f12a 100644 --- a/py/torch_tensorrt/fx/converters/acc_ops_converters.py +++ b/py/torch_tensorrt/fx/converters/acc_ops_converters.py @@ -12,6 +12,7 @@ import torch from torch.fx.immutable_collections import immutable_list from torch.fx.node import Argument, Target +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.converters.impl import activation, convolution from torch_tensorrt.fx.passes.lower_basic_pass import ( trt_transposed_linear, @@ -207,7 +208,7 @@ def acc_ops_conv_transposend( return layer.get_output(0) -@tensorrt_converter(acc_ops.pad, enabled=trt.__version__ < "8.2") +@tensorrt_converter(acc_ops.pad, enabled=(not is_tensorrt_version_supported("8.2"))) def acc_ops_pad_with_padding_layer( network: TRTNetwork, target: Target, @@ -257,7 +258,10 @@ def acc_ops_pad_with_padding_layer( return layer.get_output(0) -@tensorrt_converter(acc_ops.pad, enabled=trt.__version__ >= "8.2") +@tensorrt_converter( + acc_ops.pad, + enabled=is_tensorrt_version_supported("8.2"), +) def acc_ops_pad_with_slice_layer( network: TRTNetwork, target: Target, @@ -880,7 +884,10 @@ def acc_ops_sign( ) -> Union[TRTTensor, Sequence[TRTTensor]]: input_val = kwargs["input"] - if trt.__version__ >= "8.2" and not network.has_implicit_batch_dimension: + if ( + is_tensorrt_version_supported("8.2") + and not network.has_implicit_batch_dimension + ): input_val = kwargs["input"] operation_type = trt.UnaryOperation.SIGN return add_unary_layer(network, input_val, operation_type, target, name) diff --git a/py/torch_tensorrt/fx/fx2trt.py b/py/torch_tensorrt/fx/fx2trt.py index 6a29932b1b..f241a936d6 100644 --- a/py/torch_tensorrt/fx/fx2trt.py +++ b/py/torch_tensorrt/fx/fx2trt.py @@ -13,6 +13,7 @@ from torch._ops import OpOverload from torch.fx.node import _get_qualified_name from torch.fx.passes.shape_prop import TensorMetadata +from torch_tensorrt._utils import is_tensorrt_version_supported from .converter_registry import CONVERTERS from .input_tensor_spec import InputTensorSpec @@ -213,7 +214,10 @@ def run( builder_config.max_workspace_size = max_workspace_size # Speed up TRT build time in the test environment - if trt.__version__ >= "8.6" and os.environ.get("TRT_TEST_ENV", "0") == "1": + if ( + is_tensorrt_version_supported("8.6") + and os.environ.get("TRT_TEST_ENV", "0") == "1" + ): _LOGGER.info("Set TRT optimization level to 0") builder_config.builder_optimization_level = 0 @@ -225,7 +229,7 @@ def run( cache = builder_config.create_timing_cache(b"") builder_config.set_timing_cache(cache, False) - if trt.__version__ >= "8.2": + if is_tensorrt_version_supported("8.2"): builder_config.profiling_verbosity = ( profiling_verbosity if profiling_verbosity diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py index 7f32b749c5..217b92f19c 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py @@ -3,9 +3,9 @@ import tensorrt as trt import torch.fx import torch.nn as nn - import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from torch.testing._internal.common_utils import run_tests +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec @@ -15,7 +15,7 @@ """ ) @unittest.skipIf( - trt.__version__ < "8.0", + not is_tensorrt_version_supported("8.0"), "Explicit quantization only supported in TensorRT 8.0 and later", ) class TestDequantizeConverter(AccTestCase): diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py index c82eee79ee..f5b6005782 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py @@ -3,10 +3,10 @@ import tensorrt as trt import torch import torch.nn as nn - import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from parameterized import parameterized from torch.testing._internal.common_utils import run_tests +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase # from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec @@ -80,7 +80,7 @@ def forward(self, x): ] ) @unittest.skipIf( - trt.__version__ < "8.2", + not is_tensorrt_version_supported("8.2"), "Padding 3d only supported in TensorRT 8.2 and later", ) def test_pad_3d(self, _, pad): diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py index c7b050c4ac..3c2708bf91 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py @@ -3,9 +3,9 @@ import tensorrt as trt import torch.fx import torch.nn as nn - import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from torch.testing._internal.common_utils import run_tests +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec @@ -15,7 +15,7 @@ """ ) @unittest.skipIf( - trt.__version__ < "8.0", + not is_tensorrt_version_supported("8.0"), "Explicit quantization only supported in TensorRT 8.0 and later", ) class TestQuantizePerTensorConverter(AccTestCase): diff --git a/py/torch_tensorrt/fx/test/converters/aten_op/test_reshape_aten.py b/py/torch_tensorrt/fx/test/converters/aten_op/test_reshape_aten.py index 538e575d6e..2942945523 100644 --- a/py/torch_tensorrt/fx/test/converters/aten_op/test_reshape_aten.py +++ b/py/torch_tensorrt/fx/test/converters/aten_op/test_reshape_aten.py @@ -4,6 +4,7 @@ import torch from parameterized import parameterized from torch.testing._internal.common_utils import run_tests +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.tools.common_fx2trt import DispatchTestCase, InputTensorSpec @@ -15,7 +16,7 @@ class TestReshapeConverter(DispatchTestCase): ] ) @unittest.skipIf( - trt.__version__ < "8.5", + not is_tensorrt_version_supported("8.5"), "Shape tensor supported well in TensorRT 8.5 and later", ) def test_reshape(self, target_shape): @@ -42,7 +43,7 @@ def forward(self, x): ] ) @unittest.skipIf( - trt.__version__ < "8.5", + not is_tensorrt_version_supported("8.5"), "Shape tensor supported well in TensorRT 8.5 and later", ) def test_reshape_with_dynamic_shape(self, target_shape): @@ -68,7 +69,7 @@ def forward(self, x): ) @unittest.skipIf( - trt.__version__ < "8.5", + not is_tensorrt_version_supported("8.5"), "Shape tensor supported well in TensorRT 8.5 and later", ) def test_reshape_with_dynamic_shape_size(self): diff --git a/py/torch_tensorrt/fx/tools/common_fx2trt.py b/py/torch_tensorrt/fx/tools/common_fx2trt.py index 2ddd832c2a..66f343a55b 100644 --- a/py/torch_tensorrt/fx/tools/common_fx2trt.py +++ b/py/torch_tensorrt/fx/tools/common_fx2trt.py @@ -13,6 +13,7 @@ from torch.fx.passes import shape_prop from torch.fx.passes.infra.pass_base import PassResult from torch.testing._internal.common_utils import TestCase +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx import InputTensorSpec, TRTInterpreter, TRTModule from torch_tensorrt.fx.passes.lower_basic_pass_aten import ( compose_bmm, @@ -258,7 +259,7 @@ def run_test( pass_tracer = chain_passes(*apply_passes) mod = pass_tracer(mod, inputs) - if trt.__version__ >= "8.6": + if is_tensorrt_version_supported("8.6"): test_implicit_batch_dim = False if test_implicit_batch_dim: interp = TRTInterpreter(mod, InputTensorSpec.from_tensors(inputs)) diff --git a/py/torch_tensorrt/fx/utils.py b/py/torch_tensorrt/fx/utils.py index 5bef21b6be..da5cdc0d7f 100644 --- a/py/torch_tensorrt/fx/utils.py +++ b/py/torch_tensorrt/fx/utils.py @@ -8,7 +8,7 @@ import torch from functorch import make_fx from functorch.experimental import functionalize -from torch_tensorrt._utils import sanitized_torch_version +from torch_tensorrt._utils import is_tensorrt_version_supported, sanitized_torch_version from torch_tensorrt.fx.passes.lower_basic_pass import ( replace_op_with_indices, run_const_fold, @@ -60,7 +60,7 @@ class Frameworks(Enum): }, } -if trt.__version__ >= "7.0": +if is_tensorrt_version_supported("7.0"): DataTypeEquivalence[trt.bool] = { Frameworks.NUMPY: np.bool_, Frameworks.TORCH: torch.bool, @@ -105,7 +105,11 @@ def unified_dtype_converter( trt_major_version = int(trt.__version__.split(".")[0]) if dtype in (np.int8, torch.int8, trt.int8): return DataTypeEquivalence[trt.int8][to] - elif trt_major_version >= 7 and dtype in (np.bool_, torch.bool, trt.bool): + elif is_tensorrt_version_supported("7.0") and dtype in ( + np.bool_, + torch.bool, + trt.bool, + ): return DataTypeEquivalence[trt.bool][to] elif dtype in (np.int32, torch.int32, trt.int32): return DataTypeEquivalence[trt.int32][to] diff --git a/py/torch_tensorrt/trt_alias.py b/py/torch_tensorrt/trt_alias.py new file mode 100644 index 0000000000..4a80d1c12e --- /dev/null +++ b/py/torch_tensorrt/trt_alias.py @@ -0,0 +1,159 @@ +import ctypes +import importlib +import importlib.util +import os +import platform +import sys +from types import ModuleType +from typing import Any, Dict, List + +package_imported = False +package_name = "" + + +def _parse_semver(version: str) -> Dict[str, str]: + split = version.split(".") + if len(split) < 3: + split.append("") + + return {"major": split[0], "minor": split[1], "patch": split[2]} + + +def _find_lib(name: str, paths: List[str]) -> str: + for path in paths: + libpath = os.path.join(path, name) + if os.path.isfile(libpath): + return libpath + + raise FileNotFoundError(f"Could not find {name}\n Search paths: {paths}") + + +# TensorRTProxyModule is a proxy module that allows us to register the tensorrt or tensorrt-rtx package +# since tensorrt-rtx is the drop-in replacement for tensorrt, we can use the same interface to use tensorrt-rtx +class TensorRTProxyModule(ModuleType): + def __init__(self, target_module: ModuleType) -> None: + spec = importlib.util.spec_from_loader("tensorrt", loader=None) + self.__spec__ = spec + self.__package__ = target_module.__package__ + self.__path__ = target_module.__path__ + self.__file__ = target_module.__file__ + self.__loader__ = target_module.__loader__ + self.__version__ = target_module.__version__ + self._target_module = target_module + self._nested_module = None + self._package_name: str = "" + + # For RTX: tensorrt.tensorrt -> tensorrt_rtx.tensorrt_rtx + # For standard: tensorrt.tensorrt -> tensorrt.tensorrt (no change) + if hasattr(target_module, "tensorrt_rtx"): + self._nested_module = target_module.tensorrt_rtx + elif hasattr(target_module, "tensorrt"): + self._nested_module = target_module.tensorrt + + # Set up the nested module structure + if self._nested_module: + self.tensorrt = self._nested_module + + # __getattr__ is used to get the attribute from the target module + def __getattr__(self, name: str) -> Any: + # First try to get from the target module + try: + return getattr(self._target_module, name) + except AttributeError: + print(f"AttributeError: {name}") + # For nested modules like tensorrt.tensorrt + if name == "tensorrt" and self._nested_module: + return self._nested_module + raise + + def __dir__(self) -> list[str]: + return dir(self._target_module) + + +def alias_tensorrt() -> None: + global package_imported + global package_name + # tensorrt package has been imported, no need to alias again + if package_imported: + return + + # in order not to break or change the existing behavior, we only build and run with tensorrt by default, tensorrt-rtx is for experiment only + # if we want to test with tensorrt-rtx, we have to build the wheel with --use-rtx and test with FORCE_TENSORRT_RTX=1 + # eg: FORCE_TENSORRT_RTX=1 python test.py + # in future, we can do dynamic linking either to tensorrt or tensorrt-rtx based on the gpu type + use_rtx = False + if os.environ.get("FORCE_TENSORRT_RTX", "0") == "1": + use_rtx = True + package_name = "tensorrt_rtx" if use_rtx else "tensorrt" + # Import the appropriate package + try: + target_module = importlib.import_module(package_name) + proxy = TensorRTProxyModule(target_module) + proxy._package_name = package_name + sys.modules["tensorrt"] = proxy + package_imported = True + except ImportError as e: + # Fallback to standard tensorrt if RTX version not available + print(f"import error when try to import {package_name=} got error {e}") + print( + f"make sure tensorrt lib is in the LD_LIBRARY_PATH: {os.environ.get('LD_LIBRARY_PATH')}" + ) + if use_rtx: + from torch_tensorrt._version import __tensorrt_rtx_version__ + + tensorrt_version = _parse_semver(__tensorrt_rtx_version__) + tensorrt_major = tensorrt_version["major"] + tensorrt_minor = tensorrt_version["minor"] + tensorrt_lib = { + "win": [ + f"tensorrt_rtx_{tensorrt_major}_{tensorrt_minor}.dll", + ], + "linux": [ + f"libtensorrt_rtx.so.{tensorrt_major}", + ], + } + else: + from torch_tensorrt._version import __tensorrt_version__ + + tensorrt_version = _parse_semver(__tensorrt_version__) + tensorrt_major = tensorrt_version["major"] + tensorrt_minor = tensorrt_version["minor"] + tensorrt_lib = { + "win": [ + f"nvinfer_{tensorrt_major}.dll", + f"nvinfer_plugin_{tensorrt_major}.dll", + ], + "linux": [ + f"libnvinfer.so.{tensorrt_major}", + f"libnvinfer_plugin.so.{tensorrt_major}", + ], + } + + from torch_tensorrt import __cuda_version__ + + if sys.platform.startswith("win"): + WIN_LIBS = tensorrt_lib["win"] + WIN_PATHS = os.environ["PATH"].split(os.path.pathsep) + for lib in WIN_LIBS: + ctypes.CDLL(_find_lib(lib, WIN_PATHS)) + + elif sys.platform.startswith("linux"): + LINUX_PATHS = [ + f"/usr/local/cuda-{__cuda_version__}/lib64", + "/usr/lib", + "/usr/lib64", + ] + if "LD_LIBRARY_PATH" in os.environ: + LINUX_PATHS += os.environ["LD_LIBRARY_PATH"].split(os.path.pathsep) + if platform.uname().processor == "x86_64": + LINUX_PATHS += [ + "/usr/lib/x86_64-linux-gnu", + ] + elif platform.uname().processor == "aarch64": + LINUX_PATHS += ["/usr/lib/aarch64-linux-gnu"] + LINUX_LIBS = tensorrt_lib["linux"] + for lib in LINUX_LIBS: + ctypes.CDLL(_find_lib(lib, LINUX_PATHS)) + + +alias_tensorrt() diff --git a/pyproject_rtx.toml.temp b/pyproject_rtx.toml.temp new file mode 100644 index 0000000000..9feb0ce550 --- /dev/null +++ b/pyproject_rtx.toml.temp @@ -0,0 +1,358 @@ +[build-system] +requires = [ + "setuptools>=77.0.0", + "packaging>=23.1", + "wheel>=0.40.0", + "ninja>=1.11.0", + "pyyaml>=6.0", + "cffi>=1.15.1", + "torch>=2.9.0.dev,<2.10.0", + "pybind11==2.6.2", +] +build-backend = "setuptools.build_meta" + +[project] +name = "torch_tensorrt_rtx" +authors = [{ name = "NVIDIA Corporation", email = "narens@nvidia.com" }] +description = "Torch-TensorRT-RTX is a package which allows users to automatically compile PyTorch and TorchScript modules to TensorRT while remaining in PyTorch" +license = { file = "LICENSE" } +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: GPU :: NVIDIA CUDA", + "License :: OSI Approved :: BSD License", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Operating System :: POSIX :: Linux", + "Programming Language :: C++", + "Programming Language :: Python", + "Programming Language :: Python :: Implementation :: CPython", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries", +] +readme = { file = "README.md", content-type = "text/markdown" } +requires-python = ">=3.9" +keywords = [ + "pytorch", + "torch", + "tensorrt", + "tensorrt_rtx", + "trt", + "ai", + "artificial intelligence", + "ml", + "machine learning", + "dl", + "deep learning", + "compiler", + "dynamo", + "torchscript", + "inference", +] +dependencies = [ + "torch>=2.9.0.dev,<2.10.0", + # currently tensorrt_rtx wheel is not publicly accessible, it is only included inside the rtx tar ball + # hence the tensorrt_rtx wheel version is fixed since the version the rtx tar ball downloaded is fixed + "tensorrt_rtx==1.0.0.21", + "packaging>=23", + "numpy", + "typing-extensions>=4.7.0", + "dllist", +] + +dynamic = ["version"] + +[dependency-groups] +dev = [ + "pre-commit>=2.20.0", + "black>=22.6.0", + "clang-format==14.0.6", + "typos", + "mypy", + "isort", + "ruff", + "pyyaml", +] + +debug = [ + "pydot >= 4.0.0", + "tabulate >= 0.8.10", + "graphviz >= 0.20.3" +] + +test = [ + "pytest", + "pytest-xdist", + "parameterized>=0.2.0", + "expecttest==0.1.6", + "timm>=1.0.3", + "transformers>=4.49.0", +] + +[project.optional-dependencies] +torchvision = [ + "torchvision>=0.23.0.dev,<0.24.0", +] +quantization = ["nvidia-modelopt[all]>=0.27.1"] + +[project.urls] +Homepage = "https://pytorch.org/tensorrt" +Documentation = "https://pytorch.org/tensorrt" +Repository = "https://github.com/pytorch/tensorrt.git" +Changelog = "https://github.com/pytorch/tensorrt/releases" + +[tool.setuptools] +package-dir = { "" = "py" } +include-package-data = false + +[tool.uv] +package = true +environments = ["sys_platform == 'linux'", "sys_platform == 'windows'"] +prerelease = "if-necessary-or-explicit" +index-strategy = "unsafe-best-match" + +[tool.uv.sources] +torch = [ + { index = "pytorch-nightly-cu129", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" }, +] +torchvision = [ + { index = "pytorch-nightly-cu129", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" }, +] + +[[tool.uv.index]] +name = "pytorch-nightly-cu129" +url = "https://download.pytorch.org/whl/nightly/cu129" +explicit = false + +[[tool.uv.index]] +name = "nvidia" +url = "https://pypi.nvidia.com" +explicit = false + +[tool.ruff] +# NOTE: Synchoronize the ignores with .flake8 +lint.ignore = [ + # these ignores are from flake8-bugbear; please fix! + "B007", + "B008", + "B017", + "B018", # Useless expression + "B019", + "B020", + "B023", + "B024", + "B026", + "B028", # No explicit `stacklevel` keyword argument found + "B904", + "B905", + "E402", + "C408", # C408 ignored because we like the dict keyword argument syntax + "E501", # E501 is not flexible enough, we're using B950 instead + "E721", + "E731", # Assign lambda expression + "E741", + "EXE001", + "F405", + "F821", + "F841", + # these ignores are from flake8-logging-format; please fix! + "G101", + "G201", + "G202", + "G003", + "G004", + # these ignores are from RUFF perf; please fix! + "PERF203", + "PERF4", + "SIM102", + "SIM103", + "SIM112", # flake8-simplify code styles + "SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason + "SIM108", + "SIM110", + "SIM114", # Combine `if` branches using logical `or` operator + "SIM115", + "SIM116", # Disable Use a dictionary instead of consecutive `if` statements + "SIM117", + "SIM118", +] +#line-length = 120 +lint.select = [ + "B", + "C4", + "G", + "E", + "F", + "SIM1", + "W", + # Not included in flake8 + "PERF", + "PLE", + "TRY302", +] + +# Allow unused variables when underscore-prefixed. +lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +# Allow autofix for all enabled rules (when `--fix`) is provided. +lint.fixable = [ + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "I", + "N", + "Q", + "S", + "T", + "W", + "ANN", + "ARG", + "BLE", + "COM", + "DJ", + "DTZ", + "EM", + "ERA", + "EXE", + "FBT", + "ICN", + "INP", + "ISC", + "NPY", + "PD", + "PGH", + "PIE", + "PL", + "PT", + "PTH", + "PYI", + "RET", + "RSE", + "RUF", + "SIM", + "SLF", + "TCH", + "TID", + "TRY", + "UP", + "YTT", +] +lint.unfixable = [] +target-version = "py311" + +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".mypy_cache", + ".nox", + ".pants.d", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "venv", + "env", + "py/torch_tensorrt/fx", + ".github", + "examples", + "tests", + "tools", + "docs", + "docsrc", + "tests", + "setup.py", + "noxfile.py", + "__init__.py", +] + +[tool.ruff.lint.mccabe] +# Unlike Flake8, default to a complexity level of 10. +max-complexity = 10 + +[tool.isort] +profile = "black" +py_version = 311 +skip = ["py/torch_tensorrt/fx"] + +[tool.black] +#line-length = 120 +target-version = ["py39", "py310", "py311", "py312", "py313"] +force-exclude = """ +elu_converter/setup.py +""" + +[tool.mypy] +strict = true +ignore_missing_imports = true +show_error_codes = true +disable_error_code = "attr-defined" +no_implicit_optional = true +exclude = [ + "^py/torch_tensorrt/fx", + "py/torch_tensorrt/fx", + "torch_tensorrt/fx", + "py/torch_tensorrt/_C.so", + "examples", + "docs", + "docsrc", + "tests", + "setup.py", + "noxfile.py", +] +python_version = "3.11" + +follow_imports = "skip" + +[[tool.mypy.overrides]] +module = "torch_tensorrt.dynamo.conversion.aten_converters" +disable_error_code = "arg-type" + +[[tool.mypy.overrides]] +module = "torch_tensorrt.dynamo.lowering._decompositions" +disallow_untyped_calls = false + +[[tool.mypy.overrides]] +module = "torch_tensorrt.fx.*" +ignore_errors = true +follow_imports = "skip" + +[tool.typos] +files.extend-exclude = [ + "docs/**/*", + "*/fx/*", + "docsrc/_rendered_examples/", + "core/*", + "!core/runtime/", + "third_party/", + "CHANGELOG.md", + "*.ipynb", + "cpp/", + "py/torch_tensorrt/fx/", +] + +[tool.typos.default] +extend-ignore-identifiers-re = [ + "^([A-z]|[a-z])*Nd*", + "^([A-z]|[a-z])*nd*", + "active*([A-z]|[a-z]|[0-9])*,", +] + +[tool.typos.default.extend-words] +arange = "arange" diff --git a/setup.py b/setup.py index f829602f1a..cea08fb028 100644 --- a/setup.py +++ b/setup.py @@ -28,6 +28,7 @@ __version__: str = "0.0.0" __cuda_version__: str = "0.0" __tensorrt_version__: str = "0.0" +__tensorrt_rtx_version__: str = "0.0" LEGACY_BASE_VERSION_SUFFIX_PATTERN = re.compile("a0$") @@ -63,6 +64,7 @@ def get_base_version() -> str: def load_dep_info(): global __cuda_version__ global __tensorrt_version__ + global __tensorrt_rtx_version__ with open("dev_dep_versions.yml", "r") as stream: versions = yaml.safe_load(stream) if (gpu_arch_version := os.environ.get("CU_VERSION")) is not None: @@ -72,6 +74,7 @@ def load_dep_info(): else: __cuda_version__ = versions["__cuda_version__"] __tensorrt_version__ = versions["__tensorrt_version__"] + __tensorrt_rtx_version__ = versions["__tensorrt_rtx_version__"] load_dep_info() @@ -86,6 +89,11 @@ def load_dep_info(): LEGACY = False RELEASE = False CI_BUILD = False +USE_RTX = False + +if "--use-rtx" in sys.argv: + USE_RTX = True + sys.argv.remove("--use-rtx") if "--fx-only" in sys.argv: PY_ONLY = True @@ -115,6 +123,10 @@ def load_dep_info(): if py_only_env_var == "1": PY_ONLY = True +if (use_rtx_env_var := os.environ.get("FORCE_TENSORRT_RTX")) is not None: + if use_rtx_env_var == "1": + USE_RTX = True + if (release_env_var := os.environ.get("RELEASE")) is not None: if release_env_var == "1": RELEASE = True @@ -210,6 +222,10 @@ def build_libtorchtrt_cxx11_abi( else: cmd.append("--config=linux") + if USE_RTX: + cmd.append("--config=rtx") + print("TensorRT RTX build") + if IS_JETPACK: cmd.append("--config=jetpack") print("Jetpack build") @@ -240,6 +256,7 @@ def gen_version_file(): f.write('__version__ = "' + __version__ + '"\n') f.write('__cuda_version__ = "' + __cuda_version__ + '"\n') f.write('__tensorrt_version__ = "' + __tensorrt_version__ + '"\n') + f.write('__tensorrt_rtx_version__ = "' + __tensorrt_rtx_version__ + '"\n') def copy_libtorchtrt(multilinux=False, rt_only=False): @@ -487,6 +504,15 @@ def run(self): .split("/BUILD.bazel")[0] ) + tensorrt_rtx_external_dir = ( + lambda: subprocess.check_output( + [BAZEL_EXE, "query", "@tensorrt_rtx//:nvinfer", "--output", "location"] + ) + .decode("ascii") + .strip() + .split("/BUILD.bazel")[0] + ) + tensorrt_sbsa_external_dir = ( lambda: subprocess.check_output( [BAZEL_EXE, "query", "@tensorrt_sbsa//:nvinfer", "--output", "location"] @@ -510,16 +536,35 @@ def run(self): elif IS_JETPACK: tensorrt_linux_external_dir = tensorrt_jetpack_external_dir else: - tensorrt_linux_external_dir = tensorrt_x86_64_external_dir + if USE_RTX: + tensorrt_linux_external_dir = tensorrt_rtx_external_dir + else: + tensorrt_linux_external_dir = tensorrt_x86_64_external_dir - tensorrt_windows_external_dir = ( - lambda: subprocess.check_output( - [BAZEL_EXE, "query", "@tensorrt_win//:nvinfer", "--output", "location"] + if USE_RTX: + tensorrt_windows_external_dir = ( + lambda: subprocess.check_output( + [ + BAZEL_EXE, + "query", + "@tensorrt_rtx_win//:nvinfer", + "--output", + "location", + ] + ) + .decode("ascii") + .strip() + .split("/BUILD.bazel")[0] + ) + else: + tensorrt_windows_external_dir = ( + lambda: subprocess.check_output( + [BAZEL_EXE, "query", "@tensorrt_win//:nvinfer", "--output", "location"] + ) + .decode("ascii") + .strip() + .split("/BUILD.bazel")[0] ) - .decode("ascii") - .strip() - .split("/BUILD.bazel")[0] - ) ext_modules += [ CUDAExtension( diff --git a/tests/py/core/test_classes.py b/tests/py/core/test_classes.py index 5f6df00ad8..8d59461d7c 100644 --- a/tests/py/core/test_classes.py +++ b/tests/py/core/test_classes.py @@ -2,9 +2,7 @@ import unittest from typing import Dict -import tensorrt as trt import torch -import torch_tensorrt import torch_tensorrt as torchtrt from torch_tensorrt.dynamo.runtime._TorchTensorRTModule import TorchTensorRTModule @@ -58,7 +56,7 @@ def test_from_torch(self): @unittest.skipIf( - not torch_tensorrt.ENABLED_FEATURES.torch_tensorrt_runtime, + not torchtrt.ENABLED_FEATURES.torch_tensorrt_runtime, "Torch-TensorRT runtime is not available", ) class TestPlatform(unittest.TestCase): diff --git a/tests/py/dynamo/conversion/test_nonzero_aten.py b/tests/py/dynamo/conversion/test_nonzero_aten.py index f2c5123575..69a8024077 100644 --- a/tests/py/dynamo/conversion/test_nonzero_aten.py +++ b/tests/py/dynamo/conversion/test_nonzero_aten.py @@ -1,8 +1,11 @@ +import unittest + import torch import torch.nn as nn from parameterized import parameterized from torch.testing._internal.common_utils import run_tests from torch_tensorrt import Input +from torch_tensorrt._utils import is_tensorrt_rtx from .harness import DispatchTestCase @@ -17,6 +20,10 @@ class TestNonZeroConverter(DispatchTestCase): ((2, 3, 4, 5), torch.float), ] ) + @unittest.skipIf( + is_tensorrt_rtx(), + "nonzero is not supported for tensorrt_rtx", + ) def test_nonzero_dds(self, input_shape, dtype): class NonZero(nn.Module): # This is a DDS network @@ -39,6 +46,10 @@ def forward(self, input): ((2, 3, 4, 5), torch.float), ] ) + @unittest.skipIf( + is_tensorrt_rtx(), + "nonzero is not supported for tensorrt_rtx", + ) def test_nonzero_non_dds(self, input_shape, dtype): class NonZero(nn.Module): # This is a static network @@ -78,6 +89,10 @@ def forward(self, input): ), ] ) + @unittest.skipIf( + is_tensorrt_rtx(), + "nonzero is not supported for tensorrt_rtx", + ) def test_nonzero_dynamic_shape_dds(self, _, min_shape, opt_shape, max_shape, dtype): class NonZero(nn.Module): def forward(self, input): @@ -119,6 +134,10 @@ def forward(self, input): ), ] ) + @unittest.skipIf( + is_tensorrt_rtx(), + "nonzero is not supported for tensorrt_rtx", + ) def test_nonzero_dynamic_shape_non_dds( self, _, min_shape, opt_shape, max_shape, dtype ): diff --git a/tests/py/dynamo/models/test_models_export.py b/tests/py/dynamo/models/test_models_export.py index 66f7a1821b..583ee9a18e 100644 --- a/tests/py/dynamo/models/test_models_export.py +++ b/tests/py/dynamo/models/test_models_export.py @@ -7,6 +7,7 @@ import pytest import torch import torch_tensorrt as torchtrt +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity from packaging.version import Version @@ -410,6 +411,9 @@ def calibrate_loop(model): input_tensor = torch.randn(1, 10).cuda().to(dtype) model = SimpleNetwork().eval().cuda().to(dtype) quant_cfg = mtq.INT8_DEFAULT_CFG + # RTX does not support INT8 default quantization(weights+activations), only support INT8 weights only quantization + if is_tensorrt_rtx(): + quant_cfg["quant_cfg"]["*input_quantizer"] = {"enable": False} mtq.quantize(model, quant_cfg, forward_loop=calibrate_loop) # model has INT8 qdq nodes at this point output_pyt = model(input_tensor) @@ -463,6 +467,9 @@ def calibrate_loop(model): model = SimpleNetwork().eval().cuda().to(dtype) quant_cfg = mtq.INT8_DEFAULT_CFG + # RTX does not support INT8 default quantization(weights+activations), only support INT8 weights only quantization + if torchtrt.tensorrt_package_name == "tensorrt_rtx": + quant_cfg["quant_cfg"]["*input_quantizer"] = {"enable": False} mtq.quantize(model, quant_cfg, forward_loop=calibrate_loop) # model has INT8 qdq nodes at this point diff --git a/tests/py/dynamo/runtime/test_000_compilation_settings.py b/tests/py/dynamo/runtime/test_000_compilation_settings.py index 1f7a74fefd..500ca9ef71 100644 --- a/tests/py/dynamo/runtime/test_000_compilation_settings.py +++ b/tests/py/dynamo/runtime/test_000_compilation_settings.py @@ -5,6 +5,7 @@ import torch import torch_tensorrt from torch.testing._internal.common_utils import TestCase, run_tests +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.dynamo.utils import is_tegra_platform from packaging.version import Version @@ -61,7 +62,7 @@ def forward(self, x): torch._dynamo.reset() @unittest.skipIf( - is_tegra_platform() and Version(metadata.version("tensorrt")) > Version("10.8"), + is_tegra_platform() and is_tensorrt_version_supported("10.8"), "DLA is not supported on Jetson platform starting TRT 10.8", ) def test_dla_args(self): diff --git a/tests/util/BUILD b/tests/util/BUILD index 3b89c9073e..fb33140b7e 100644 --- a/tests/util/BUILD +++ b/tests/util/BUILD @@ -5,7 +5,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -22,7 +22,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -32,7 +32,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -58,22 +58,24 @@ cc_library( deps = [ "@googletest//:gtest_main", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": [ - "@libtorch_win//:caffe2", - "@libtorch_win//:libtorch", + ":jetpack": [ + "@torch_l4t//:caffe2", + "@torch_l4t//:libtorch", ], ":use_torch_whl": [ "@torch_whl//:caffe2", "@torch_whl//:libtorch", ], - ":jetpack": [ - "@torch_l4t//:libtorch", - "@torch_l4t//:caffe2", + ":windows": [ + "@libtorch_win//:caffe2", + "@libtorch_win//:libtorch", ], "//conditions:default": [ "@libtorch", diff --git a/third_party/tensorrt_rtx/archive/BUILD b/third_party/tensorrt_rtx/archive/BUILD new file mode 100644 index 0000000000..ec6ebbe985 --- /dev/null +++ b/third_party/tensorrt_rtx/archive/BUILD @@ -0,0 +1,68 @@ +load("@rules_cc//cc:defs.bzl", "cc_import", "cc_library") + +package(default_visibility = ["//visibility:public"]) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = {"@//toolchains/dep_collection:compute_libs": "rtx"}, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = {"@//toolchains/dep_collection:compute_libs": "rtx"}, +) + +cc_library( + name = "nvinfer_headers", + hdrs = glob( + [ + "include/NvInfer*.h", + ], + allow_empty = True, + # exclude = [ + # "include/NvInferPlugin.h", + # "include/NvInferPluginUtils.h", + # ], + ), + includes = ["include/"], + visibility = ["//visibility:private"], +) + +cc_import( + name = "nvinfer_lib", + shared_library = select({ + ":rtx_win": "lib/tensorrt_rtx_1_0.dll", + ":rtx_x86_64": "lib/libtensorrt_rtx.so", + }), + visibility = ["//visibility:private"], +) + +cc_import( + name = "nvinfer_static_lib", + static_library = select({ + ":rtx_win": "lib/tensorrt_rtx_1_0.lib", + }), + visibility = ["//visibility:private"], +) + +cc_library( + name = "nvinfer", + visibility = ["//visibility:public"], + deps = [ + "nvinfer_headers", + "nvinfer_lib", + ] + select({ + ":rtx_win": [ + "nvinfer_static_lib", + "@cuda_win//:cudart", + ], + ":rtx_x86_64": ["@cuda//:cudart"], + }), +) diff --git a/third_party/tensorrt_rtx/local/BUILD b/third_party/tensorrt_rtx/local/BUILD new file mode 100644 index 0000000000..fe844170fb --- /dev/null +++ b/third_party/tensorrt_rtx/local/BUILD @@ -0,0 +1,80 @@ +load("@rules_cc//cc:defs.bzl", "cc_import", "cc_library") + +package(default_visibility = ["//visibility:public"]) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = {"@//toolchains/dep_collection:compute_libs": "rtx"}, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = {"@//toolchains/dep_collection:compute_libs": "rtx"}, +) + +cc_library( + name = "nvinfer_headers", + hdrs = select({ + ":rtx_win": glob( + [ + "include/NvInfer*.h", + ], + allow_empty = True, + # exclude = [ + # "include/NvInferPlugin.h", + # "include/NvInferPluginUtils.h", + # ], + ), + ":rtx_x86_64": glob( + [ + "include/NvInfer*.h", + ], + allow_empty = True, + # exclude = [ + # "include/NvInferPlugin.h", + # "include/NvInferPluginUtils.h", + # ], + ), + }), + includes = ["include/"], + visibility = ["//visibility:private"], +) + +cc_import( + name = "nvinfer_static_lib", + static_library = select({ + ":rtx_win": "lib/tensorrt_rtx_1_0.lib", + }), + visibility = ["//visibility:private"], +) + +cc_import( + name = "nvinfer_lib", + shared_library = select({ + ":rtx_win": "lib/tensorrt_rtx_1_0.dll", + ":rtx_x86_64": "lib/libtensorrt_rtx.so", + }), + visibility = ["//visibility:private"], +) + +cc_library( + name = "nvinfer", + visibility = ["//visibility:public"], + deps = [ + "nvinfer_headers", + "nvinfer_lib", + ] + select({ + ":rtx_win": [ + "nvinfer_static_lib", + "@cuda_win//:cudart", + ], + "//conditions:default": ["@cuda//:cudart"], + }), +) diff --git a/toolchains/ci_workspaces/MODULE.bazel.tmpl b/toolchains/ci_workspaces/MODULE.bazel.tmpl index 4f29a469da..11483872ed 100644 --- a/toolchains/ci_workspaces/MODULE.bazel.tmpl +++ b/toolchains/ci_workspaces/MODULE.bazel.tmpl @@ -81,6 +81,15 @@ http_archive( ], ) +http_archive( + name = "tensorrt_rtx", + build_file = "@//third_party/tensorrt_rtx/archive:BUILD", + strip_prefix = "TensorRT-RTX-1.0.0.21", + urls = [ + "https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz", + ], +) + http_archive( name = "tensorrt_sbsa", build_file = "@//third_party/tensorrt/archive:BUILD", @@ -108,6 +117,14 @@ http_archive( ], ) +http_archive( + name = "tensorrt_rtx_win", + build_file = "@//third_party/tensorrt_rtx/archive:BUILD", + strip_prefix = "TensorRT-RTX-1.0.0.21", + urls = [ + "https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip", + ], +) #################################################################################### # Locally installed dependencies (use in cases of custom dependencies or aarch64) diff --git a/toolchains/dep_collection/defs.bzl b/toolchains/dep_collection/defs.bzl index 6eaa710261..873ef7ec42 100644 --- a/toolchains/dep_collection/defs.bzl +++ b/toolchains/dep_collection/defs.bzl @@ -1,7 +1,7 @@ # buildifier: disable=module-docstring DependencyCollectionInfo = provider(doc = "", fields = ["type"]) -collection_types = ["default", "jetpack"] +collection_types = ["default", "jetpack", "rtx"] def _impl(ctx): _type = ctx.build_setting_value diff --git a/tools/debug/engine_visualization/__init__.py b/tools/debug/engine_visualization/__init__.py new file mode 100644 index 0000000000..e69de29bb2 From 7d34b492baa624f1e768313f434d9a149918c51e Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 6 Aug 2025 12:17:39 -0700 Subject: [PATCH 02/30] LD_LIBRARY_PATH fix for windows smoke test --- .github/workflows/build_wheels_windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_wheels_windows.yml b/.github/workflows/build_wheels_windows.yml index 7af9dead8f..0aa6570f09 100644 --- a/.github/workflows/build_wheels_windows.yml +++ b/.github/workflows/build_wheels_windows.yml @@ -326,7 +326,7 @@ jobs: export FORCE_TENSORRT_RTX=1 # TODO: lan to remove this once we have a better way to handle the LD_LIBRARY_PATH # the LD_LIBRARY_PATH set in the pre_build_script_windows.sh will not be available in the smoke test, have to set it here again - export LD_LIBRARY_PATH=${{ inputs.repository }}/TensorRT-RTX-1.0.0.21/lib:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=${PWD}/${{ inputs.repository }}/TensorRT-RTX-1.0.0.21/lib:$LD_LIBRARY_PATH fi if [[ ! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT} ]]; then echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found" From 184c84c35c1b13f154e61aa6484350d48e166039 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 6 Aug 2025 14:31:45 -0700 Subject: [PATCH 03/30] resolve comments --- .github/scripts/install-tensorrt-rtx.sh | 2 +- .github/scripts/install-torch-tensorrt.sh | 8 +----- .../workflows/build-test-linux-x86_64_rtx.yml | 18 ++++++------- .github/workflows/build-test-windows_rtx.yml | 16 ++++++------ .github/workflows/build_wheels_linux.yml | 4 +-- .github/workflows/build_wheels_windows.yml | 25 +++++++++---------- .github/workflows/linux-test.yml | 4 +-- .github/workflows/windows-test.yml | 2 +- docsrc/getting_started/tensorrt_rtx.rst | 6 ++--- packaging/pre_build_script.sh | 2 +- packaging/pre_build_script_windows.sh | 2 +- .../{trt_alias.py => _TensorRTProxyModule.py} | 6 ++--- py/torch_tensorrt/__init__.py | 16 ++++++------ setup.py | 14 +++++------ 14 files changed, 60 insertions(+), 65 deletions(-) rename py/torch_tensorrt/{trt_alias.py => _TensorRTProxyModule.py} (97%) diff --git a/.github/scripts/install-tensorrt-rtx.sh b/.github/scripts/install-tensorrt-rtx.sh index bb44681607..ef5c2945f3 100644 --- a/.github/scripts/install-tensorrt-rtx.sh +++ b/.github/scripts/install-tensorrt-rtx.sh @@ -1,6 +1,6 @@ install_tensorrt_rtx() { - if [[ ${USE_RTX} == true ]]; then + if [[ ${USE_TRT_RTX} == true ]]; then install_wheel_or_not=${1:-false} echo "It is the tensorrt-rtx build, install tensorrt-rtx with install_wheel_or_not:${install_wheel_or_not}" PLATFORM=$(python -c "import sys; print(sys.platform)") diff --git a/.github/scripts/install-torch-tensorrt.sh b/.github/scripts/install-torch-tensorrt.sh index 49a367b832..7d0b7a5947 100755 --- a/.github/scripts/install-torch-tensorrt.sh +++ b/.github/scripts/install-torch-tensorrt.sh @@ -21,7 +21,7 @@ pip uninstall -y torch torchvision pip install --force-reinstall --pre ${TORCHVISION} --index-url ${INDEX_URL} pip install --force-reinstall --pre ${TORCH} --index-url ${INDEX_URL} -if [[ ${USE_RTX} == true ]]; then +if [[ ${USE_TRT_RTX} == true ]]; then source .github/scripts/install-tensorrt-rtx.sh # tensorrt-rtx is not publicly available, so we need to install the wheel from the tar ball install_wheel_or_not=true @@ -35,10 +35,4 @@ else pip install /opt/torch-tensorrt-builds/torch_tensorrt*.whl fi -if [[ ${USE_RTX} == true ]]; then - # currently tensorrt is installed automatically by install torch-tensorrt since it is a dependency of torch-tensorrt in pyproject.toml - # so we need to uninstall it to avoid conflict - pip uninstall -y tensorrt tensorrt_cu12 tensorrt_cu12_bindings tensorrt_cu12_libs -fi - echo -e "Running test script"; diff --git a/.github/workflows/build-test-linux-x86_64_rtx.yml b/.github/workflows/build-test-linux-x86_64_rtx.yml index ab7c1ec9f2..44f6cb5580 100644 --- a/.github/workflows/build-test-linux-x86_64_rtx.yml +++ b/.github/workflows/build-test-linux-x86_64_rtx.yml @@ -1,4 +1,4 @@ -name: Build and test Linux x86_64 wheels(RTX) +name: RTX - Build and test Linux x86_64 wheels on: pull_request: @@ -104,7 +104,7 @@ jobs: export USE_HOST_DEPS=1 export CI_BUILD=1 export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/modules python hub.py @@ -144,7 +144,7 @@ jobs: export CI_BUILD=1 pushd . cd tests/py/dynamo - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin.py python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin_with_attrs.py @@ -177,7 +177,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -210,7 +210,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -244,7 +244,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -279,7 +279,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -314,7 +314,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -349,7 +349,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py/core python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . diff --git a/.github/workflows/build-test-windows_rtx.yml b/.github/workflows/build-test-windows_rtx.yml index 31078c39b7..b271487395 100644 --- a/.github/workflows/build-test-windows_rtx.yml +++ b/.github/workflows/build-test-windows_rtx.yml @@ -1,4 +1,4 @@ -name: Build and test Windows wheels(RTX) +name: RTX - Build and test Windows wheels on: pull_request: @@ -110,7 +110,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/modules python hub.py @@ -174,7 +174,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -204,7 +204,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -235,7 +235,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -267,7 +267,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -299,7 +299,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -330,7 +330,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py/core python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . diff --git a/.github/workflows/build_wheels_linux.yml b/.github/workflows/build_wheels_linux.yml index ef0805cf73..c2d88da347 100644 --- a/.github/workflows/build_wheels_linux.yml +++ b/.github/workflows/build_wheels_linux.yml @@ -141,7 +141,7 @@ jobs: UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }} ARCH: ${{ inputs.architecture }} BUILD_TARGET: ${{ inputs.build-target }} - USE_RTX: ${{ inputs.use-rtx }} + USE_TRT_RTX: ${{ inputs.use-rtx }} name: build-wheel-${{ matrix.python_version }}-${{ matrix.desired_cuda }}-${{ matrix.gpu_arch_type }}-${{ inputs.architecture }}-${{ inputs.use-rtx }}-${{ inputs.is-jetpack }} runs-on: ${{ matrix.validation_runner }} environment: ${{(inputs.trigger-event == 'schedule' || (inputs.trigger-event == 'push' && (startsWith(github.event.ref, 'refs/heads/nightly') || startsWith(github.event.ref, 'refs/tags/v')))) && 'pytorchbot-env' || ''}} @@ -269,7 +269,7 @@ jobs: BUILD_VERSION="${BUILD_VERSION}+${CU_VERSION}" fi echo "BUILD_VERSION=$BUILD_VERSION" - echo "USE_RTX=$USE_RTX" + echo "USE_TRT_RTX=$USE_TRT_RTX" echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" if [[ ${{ inputs.use-rtx }} == true ]]; then echo "Building tensorrt-rtx wheel" diff --git a/.github/workflows/build_wheels_windows.yml b/.github/workflows/build_wheels_windows.yml index 0aa6570f09..5c9e391cd6 100644 --- a/.github/workflows/build_wheels_windows.yml +++ b/.github/workflows/build_wheels_windows.yml @@ -106,7 +106,7 @@ jobs: REF: ${{ inputs.ref }} CU_VERSION: ${{ matrix.desired_cuda }} UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }} - USE_RTX: ${{ inputs.use-rtx }} + USE_TRT_RTX: ${{ inputs.use-rtx }} name: build-${{ matrix.build_name }} runs-on: ${{ matrix.validation_runner }} defaults: @@ -315,25 +315,24 @@ jobs: env: ENV_SCRIPT: ${{ inputs.env-script }} PACKAGE_NAME: ${{ inputs.package-name }} - USE_RTX: ${{ inputs.use-rtx }} + USE_TRT_RTX: ${{ inputs.use-rtx }} SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} run: | source "${BUILD_ENV_FILE}" WHEEL_NAME=$(ls "${{ inputs.repository }}/dist/") echo "$WHEEL_NAME" ${CONDA_RUN} pip install "${{ inputs.repository }}/dist/$WHEEL_NAME" - if [[ $USE_RTX == true ]]; then - export FORCE_TENSORRT_RTX=1 - # TODO: lan to remove this once we have a better way to handle the LD_LIBRARY_PATH - # the LD_LIBRARY_PATH set in the pre_build_script_windows.sh will not be available in the smoke test, have to set it here again - export LD_LIBRARY_PATH=${PWD}/${{ inputs.repository }}/TensorRT-RTX-1.0.0.21/lib:$LD_LIBRARY_PATH - fi - if [[ ! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT} ]]; then - echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found" - ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)" + if [[ $USE_TRT_RTX == true ]]; then + # TODO: lan to remove this once we have a better way to do a smoke test + echo "Smoke test for TensorRT-RTX is not skipped for now" else - echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} found" - ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}" + if [[ ! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT} ]]; then + echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found" + ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)" + else + echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} found" + ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}" + fi fi - name: Smoke Test ARM64 if: inputs.architecture == 'arm64' diff --git a/.github/workflows/linux-test.yml b/.github/workflows/linux-test.yml index 9883db653d..ae2b5216d5 100644 --- a/.github/workflows/linux-test.yml +++ b/.github/workflows/linux-test.yml @@ -73,7 +73,7 @@ jobs: SCRIPT: ${{ inputs.script }} RUNNER_TEST_RESULTS_DIR: /tmp/test_results ARCH: ${{ inputs.architecture }} - USE_RTX: ${{ inputs.use-rtx }} + USE_TRT_RTX: ${{ inputs.use-rtx }} DOWNLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_${{ inputs.architecture }} name: ${{ inputs.job-name }}-${{ matrix.tensorrt.version }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }} runs-on: ${{ matrix.validation_runner }} @@ -141,7 +141,7 @@ jobs: working-directory: ${{ inputs.repository }} env: ALL_SECRETS: ${{ toJSON(secrets) }} - USE_RTX: ${{ inputs.use-rtx }} + USE_TRT_RTX: ${{ inputs.use-rtx }} run: | set -euxo pipefail # shellcheck disable=SC2086 diff --git a/.github/workflows/windows-test.yml b/.github/workflows/windows-test.yml index dcd4351fb4..5e3b5c9918 100644 --- a/.github/workflows/windows-test.yml +++ b/.github/workflows/windows-test.yml @@ -60,7 +60,7 @@ jobs: CU_VERSION: ${{ matrix.desired_cuda }} SCRIPT: ${{ inputs.script }} PYTHONUTF8: 1 - USE_RTX: ${{ inputs.use-rtx }} + USE_TRT_RTX: ${{ inputs.use-rtx }} DOWNLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_win_amd64 name: ${{ inputs.job-name }}-${{ matrix.tensorrt.version }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }} runs-on: ${{ matrix.validation_runner }} diff --git a/docsrc/getting_started/tensorrt_rtx.rst b/docsrc/getting_started/tensorrt_rtx.rst index 8edf80699c..42c9e916d8 100644 --- a/docsrc/getting_started/tensorrt_rtx.rst +++ b/docsrc/getting_started/tensorrt_rtx.rst @@ -19,7 +19,7 @@ Currenlty, Torch-TensorRT only supports TensorRT-RTX for the experiment purpose. Torch-TensorRT by default uses TensorRT during the build and run. In order to use TensorRT-RTX, you need to build the wheel with ``--use-rtx`` flag. -And then set the ``FORCE_TENSORRT_RTX=1`` environment variable during run. +And then set the ``USE_TRT_RTX=1`` environment variable during run. @@ -60,6 +60,6 @@ Quick Start =========== .. code-block:: py - # you have to set FORCE_TENSORRT_RTX=1 to use TensorRT-RTX - FORCE_TENSORRT_RTX=1 python examples/dynamo/torch_compile_resnet_example.py + # you have to set USE_TRT_RTX=1 to use TensorRT-RTX + USE_TRT_RTX=1 python examples/dynamo/torch_compile_resnet_example.py diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh index 6294632c59..d4ab7a6241 100755 --- a/packaging/pre_build_script.sh +++ b/packaging/pre_build_script.sh @@ -76,7 +76,7 @@ fi cat MODULE.bazel export CI_BUILD=1 -if [[ ${USE_RTX} == true ]]; then +if [[ ${USE_TRT_RTX} == true ]]; then cat pyproject_rtx.toml.temp > pyproject.toml source .github/scripts/install-tensorrt-rtx.sh install_wheel_or_not=true diff --git a/packaging/pre_build_script_windows.sh b/packaging/pre_build_script_windows.sh index c4d2b37322..5cc04011b3 100644 --- a/packaging/pre_build_script_windows.sh +++ b/packaging/pre_build_script_windows.sh @@ -37,7 +37,7 @@ fi cat MODULE.bazel echo "RELEASE=1" >> ${GITHUB_ENV} -if [[ ${USE_RTX} == true ]]; then +if [[ ${USE_TRT_RTX} == true ]]; then cat pyproject_rtx.toml.temp > pyproject.toml source .github/scripts/install-tensorrt-rtx.sh install_wheel_or_not=true diff --git a/py/torch_tensorrt/trt_alias.py b/py/torch_tensorrt/_TensorRTProxyModule.py similarity index 97% rename from py/torch_tensorrt/trt_alias.py rename to py/torch_tensorrt/_TensorRTProxyModule.py index 4a80d1c12e..832e8c8932 100644 --- a/py/torch_tensorrt/trt_alias.py +++ b/py/torch_tensorrt/_TensorRTProxyModule.py @@ -78,11 +78,11 @@ def alias_tensorrt() -> None: return # in order not to break or change the existing behavior, we only build and run with tensorrt by default, tensorrt-rtx is for experiment only - # if we want to test with tensorrt-rtx, we have to build the wheel with --use-rtx and test with FORCE_TENSORRT_RTX=1 - # eg: FORCE_TENSORRT_RTX=1 python test.py + # if we want to test with tensorrt-rtx, we have to build the wheel with --use-rtx and test with USE_TRT_RTX=1 + # eg: USE_TRT_RTX=1 python test.py # in future, we can do dynamic linking either to tensorrt or tensorrt-rtx based on the gpu type use_rtx = False - if os.environ.get("FORCE_TENSORRT_RTX", "0") == "1": + if os.environ.get("USE_TRT_RTX", "0") == "1": use_rtx = True package_name = "tensorrt_rtx" if use_rtx else "tensorrt" # Import the appropriate package diff --git a/py/torch_tensorrt/__init__.py b/py/torch_tensorrt/__init__.py index 1b2a498961..d127f42690 100644 --- a/py/torch_tensorrt/__init__.py +++ b/py/torch_tensorrt/__init__.py @@ -27,19 +27,21 @@ tensorrt_package_name = "" try: - # note: trt_alias must be imported before any import tensorrt + # note: _TensorRTProxyModule must be imported before any import tensorrt - from . import trt_alias # noqa: F401 + from . import _TensorRTProxyModule # noqa: F401 - tensorrt_package_name = trt_alias.package_name - _LOGGER.info(f"You are using {trt_alias.package_name=} ") + tensorrt_package_name = _TensorRTProxyModule.package_name + _LOGGER.info(f"You are using {_TensorRTProxyModule.package_name=} ") except Exception as e: - print(f"import error when try to import trt_alias, got error {e}") + print(f"import error when try to import _TensorRTProxyModule, got error {e}") print( f"make sure tensorrt lib is in the LD_LIBRARY_PATH: {os.environ.get('LD_LIBRARY_PATH')}" ) - raise Exception(f"import error when try to import trt_alias, got error {e}") + raise Exception( + f"import error when try to import _TensorRTProxyModule, got error {e}" + ) def _register_with_torch() -> None: @@ -68,7 +70,7 @@ def _register_with_torch() -> None: torch.ops.load_library(linked_file_runtime_full_path) -# note: trt_alias must be imported before enabled features, because enabled features will check tensorrt.plugin availability +# note: _TensorRTProxyModule must be imported before enabled features, because enabled features will check tensorrt.plugin availability from torch_tensorrt._features import ENABLED_FEATURES, _enabled_features_str _LOGGER.debug(_enabled_features_str()) diff --git a/setup.py b/setup.py index cea08fb028..a4bbfb2834 100644 --- a/setup.py +++ b/setup.py @@ -89,10 +89,10 @@ def load_dep_info(): LEGACY = False RELEASE = False CI_BUILD = False -USE_RTX = False +USE_TRT_RTX = False if "--use-rtx" in sys.argv: - USE_RTX = True + USE_TRT_RTX = True sys.argv.remove("--use-rtx") if "--fx-only" in sys.argv: @@ -123,9 +123,9 @@ def load_dep_info(): if py_only_env_var == "1": PY_ONLY = True -if (use_rtx_env_var := os.environ.get("FORCE_TENSORRT_RTX")) is not None: +if (use_rtx_env_var := os.environ.get("USE_TRT_RTX")) is not None: if use_rtx_env_var == "1": - USE_RTX = True + USE_TRT_RTX = True if (release_env_var := os.environ.get("RELEASE")) is not None: if release_env_var == "1": @@ -222,7 +222,7 @@ def build_libtorchtrt_cxx11_abi( else: cmd.append("--config=linux") - if USE_RTX: + if USE_TRT_RTX: cmd.append("--config=rtx") print("TensorRT RTX build") @@ -536,12 +536,12 @@ def run(self): elif IS_JETPACK: tensorrt_linux_external_dir = tensorrt_jetpack_external_dir else: - if USE_RTX: + if USE_TRT_RTX: tensorrt_linux_external_dir = tensorrt_rtx_external_dir else: tensorrt_linux_external_dir = tensorrt_x86_64_external_dir - if USE_RTX: + if USE_TRT_RTX: tensorrt_windows_external_dir = ( lambda: subprocess.check_output( [ From cf809977256a9088453cd23e5844715f4a75ca1b Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 6 Aug 2025 15:50:30 -0700 Subject: [PATCH 04/30] change the pyproject.toml to make dependencies dynamic --- .../workflows/build-test-linux-x86_64_rtx.yml | 8 - .github/workflows/build-test-windows_rtx.yml | 7 - docsrc/getting_started/tensorrt_rtx.rst | 19 +- packaging/pre_build_script.sh | 1 - packaging/pre_build_script_windows.sh | 1 - py/torch_tensorrt/_TensorRTProxyModule.py | 9 +- pyproject.toml | 29 +- pyproject_rtx.toml.temp | 358 --------------- setup.py | 55 ++- uv.lock | 422 +++--------------- 10 files changed, 137 insertions(+), 772 deletions(-) delete mode 100644 pyproject_rtx.toml.temp diff --git a/.github/workflows/build-test-linux-x86_64_rtx.yml b/.github/workflows/build-test-linux-x86_64_rtx.yml index 44f6cb5580..24d0169e2c 100644 --- a/.github/workflows/build-test-linux-x86_64_rtx.yml +++ b/.github/workflows/build-test-linux-x86_64_rtx.yml @@ -104,7 +104,6 @@ jobs: export USE_HOST_DEPS=1 export CI_BUILD=1 export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH - export USE_TRT_RTX=1 pushd . cd tests/modules python hub.py @@ -144,7 +143,6 @@ jobs: export CI_BUILD=1 pushd . cd tests/py/dynamo - export USE_TRT_RTX=1 python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin.py python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin_with_attrs.py @@ -177,7 +175,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -210,7 +207,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -244,7 +240,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -279,7 +274,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -314,7 +308,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -349,7 +342,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py/core python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . diff --git a/.github/workflows/build-test-windows_rtx.yml b/.github/workflows/build-test-windows_rtx.yml index b271487395..6321cd8a52 100644 --- a/.github/workflows/build-test-windows_rtx.yml +++ b/.github/workflows/build-test-windows_rtx.yml @@ -110,7 +110,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/modules python hub.py @@ -174,7 +173,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -204,7 +202,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -235,7 +232,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -267,7 +263,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -299,7 +294,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -330,7 +324,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py/core python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . diff --git a/docsrc/getting_started/tensorrt_rtx.rst b/docsrc/getting_started/tensorrt_rtx.rst index 42c9e916d8..5e617b459a 100644 --- a/docsrc/getting_started/tensorrt_rtx.rst +++ b/docsrc/getting_started/tensorrt_rtx.rst @@ -18,8 +18,8 @@ For detailed information about TensorRT-RTX, refer to: Currenlty, Torch-TensorRT only supports TensorRT-RTX for the experiment purpose. Torch-TensorRT by default uses TensorRT during the build and run. -In order to use TensorRT-RTX, you need to build the wheel with ``--use-rtx`` flag. -And then set the ``USE_TRT_RTX=1`` environment variable during run. +In order to use TensorRT-RTX, you need to build the wheel with ``--use-rtx`` flag or ``USE_TRT_RTX=true``. +And during the run, you need set the ``USE_TRT_RTX=true`` environment variable to invoke with TensorRT-RTX. @@ -34,6 +34,11 @@ System Preparation .. code-block:: sh # if TensorRT-RTX is downloaded in /usr/local/tensorrt-rtx export LD_LIBRARY_PATH=/usr/local/tensorrt-rtx/lib:$LD_LIBRARY_PATH + cd /usr/local/tensorrt-rtx/python + # install the tensorrt_rtx wheel + # currently tensorrt_rtx wheel is only available from tarball downloaded, not from pypi yet. + python -m pip install tensorrt_rtx-1.0.0.21-cp39-none-linux_x86_64.whl + Build Torch-TensorRT with TensorRT-RTX @@ -49,17 +54,19 @@ Build Torch-TensorRT with TensorRT-RTX cd dist python -m pip install torch-tensorrt-*.whl + # check that tensorrt related wheel is not installed, only tensorrt_rtx is there + python -m pip list | grep tensorrt + # make sure the tensorrt_rtx.so file is linked to the tensorrt_rtx.so file in the TensorRT-RTX installation directory trt_install_path=$(python -m pip show torch-tensorrt | grep "Location" | awk '{print $2}')/torch_tensorrt - # check if the libtensorrt_rtx.so.1 is linked + # check if the libtensorrt_rtx.so.1 is linked, and make sure there is no libnvinfer.so.10 linked ldd $trt_install_path/lib/libtorchtrt.so - Quick Start =========== .. code-block:: py - # you have to set USE_TRT_RTX=1 to use TensorRT-RTX - USE_TRT_RTX=1 python examples/dynamo/torch_compile_resnet_example.py + # you have to set USE_TRT_RTX=true to use TensorRT-RTX + USE_TRT_RTX=true python examples/dynamo/torch_compile_resnet_example.py diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh index d4ab7a6241..914c203997 100755 --- a/packaging/pre_build_script.sh +++ b/packaging/pre_build_script.sh @@ -77,7 +77,6 @@ cat MODULE.bazel export CI_BUILD=1 if [[ ${USE_TRT_RTX} == true ]]; then - cat pyproject_rtx.toml.temp > pyproject.toml source .github/scripts/install-tensorrt-rtx.sh install_wheel_or_not=true install_tensorrt_rtx ${install_wheel_or_not} diff --git a/packaging/pre_build_script_windows.sh b/packaging/pre_build_script_windows.sh index 5cc04011b3..4be0018f0d 100644 --- a/packaging/pre_build_script_windows.sh +++ b/packaging/pre_build_script_windows.sh @@ -38,7 +38,6 @@ cat MODULE.bazel echo "RELEASE=1" >> ${GITHUB_ENV} if [[ ${USE_TRT_RTX} == true ]]; then - cat pyproject_rtx.toml.temp > pyproject.toml source .github/scripts/install-tensorrt-rtx.sh install_wheel_or_not=true install_tensorrt_rtx ${install_wheel_or_not} diff --git a/py/torch_tensorrt/_TensorRTProxyModule.py b/py/torch_tensorrt/_TensorRTProxyModule.py index 832e8c8932..c5917a3ae0 100644 --- a/py/torch_tensorrt/_TensorRTProxyModule.py +++ b/py/torch_tensorrt/_TensorRTProxyModule.py @@ -78,12 +78,13 @@ def alias_tensorrt() -> None: return # in order not to break or change the existing behavior, we only build and run with tensorrt by default, tensorrt-rtx is for experiment only - # if we want to test with tensorrt-rtx, we have to build the wheel with --use-rtx and test with USE_TRT_RTX=1 - # eg: USE_TRT_RTX=1 python test.py + # if we want to test with tensorrt-rtx, we have to build the wheel with --use-rtx and test with USE_TRT_RTX=true + # eg: USE_TRT_RTX=true python test.py # in future, we can do dynamic linking either to tensorrt or tensorrt-rtx based on the gpu type use_rtx = False - if os.environ.get("USE_TRT_RTX", "0") == "1": - use_rtx = True + if (use_rtx_env_var := os.environ.get("USE_TRT_RTX")) is not None: + if use_rtx_env_var.lower() == "true": + use_rtx = True package_name = "tensorrt_rtx" if use_rtx else "tensorrt" # Import the appropriate package try: diff --git a/pyproject.toml b/pyproject.toml index d390e8b4a9..19cd737bc1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,30 +50,8 @@ keywords = [ "torchscript", "inference", ] -dependencies = [ - "torch>=2.9.0.dev,<2.10.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", - "torch>=2.7.0,<2.8.0; platform_machine == 'aarch64' and 'tegra' in platform_release", - - "tensorrt>=10.12.0,<10.13.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", - "tensorrt-cu12>=10.12.0,<10.13.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", - "tensorrt-cu12-bindings>=10.12.0,<10.13.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", - "tensorrt-cu12-libs>=10.12.0,<10.13.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", - - "tensorrt>=10.3.0,<10.4.0;platform_machine == 'aarch64' and 'tegra' in platform_release", - "tensorrt-cu12>=10.3.0,<10.4.0; platform_machine == 'aarch64' and 'tegra' in platform_release", - "tensorrt-cu12-bindings>=10.3.0,<10.4.0; platform_machine == 'aarch64' and 'tegra' in platform_release", - "tensorrt-cu12-libs>=10.3.0,<10.4.0; platform_machine == 'aarch64' and 'tegra' in platform_release", - - "packaging>=23", - "numpy; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", - "numpy<2.0.0; platform_machine == 'aarch64' and 'tegra' in platform_release", - - "typing-extensions>=4.7.0", - "dllist", -] - -dynamic = ["version"] +dynamic = ["version", "dependencies", "optional-dependencies"] [dependency-groups] dev = [ @@ -102,11 +80,6 @@ test = [ "transformers>=4.49.0", ] -[project.optional-dependencies] -torchvision = [ - "torchvision>=0.23.0.dev,<0.24.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", - "torchvision>=0.22.0,<0.23.0; platform_machine == 'aarch64' and 'tegra' in platform_release", -] quantization = ["nvidia-modelopt[all]>=0.27.1"] [project.urls] diff --git a/pyproject_rtx.toml.temp b/pyproject_rtx.toml.temp deleted file mode 100644 index 9feb0ce550..0000000000 --- a/pyproject_rtx.toml.temp +++ /dev/null @@ -1,358 +0,0 @@ -[build-system] -requires = [ - "setuptools>=77.0.0", - "packaging>=23.1", - "wheel>=0.40.0", - "ninja>=1.11.0", - "pyyaml>=6.0", - "cffi>=1.15.1", - "torch>=2.9.0.dev,<2.10.0", - "pybind11==2.6.2", -] -build-backend = "setuptools.build_meta" - -[project] -name = "torch_tensorrt_rtx" -authors = [{ name = "NVIDIA Corporation", email = "narens@nvidia.com" }] -description = "Torch-TensorRT-RTX is a package which allows users to automatically compile PyTorch and TorchScript modules to TensorRT while remaining in PyTorch" -license = { file = "LICENSE" } -classifiers = [ - "Development Status :: 5 - Production/Stable", - "Environment :: GPU :: NVIDIA CUDA", - "License :: OSI Approved :: BSD License", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "Operating System :: POSIX :: Linux", - "Programming Language :: C++", - "Programming Language :: Python", - "Programming Language :: Python :: Implementation :: CPython", - "Topic :: Scientific/Engineering", - "Topic :: Scientific/Engineering :: Artificial Intelligence", - "Topic :: Software Development", - "Topic :: Software Development :: Libraries", -] -readme = { file = "README.md", content-type = "text/markdown" } -requires-python = ">=3.9" -keywords = [ - "pytorch", - "torch", - "tensorrt", - "tensorrt_rtx", - "trt", - "ai", - "artificial intelligence", - "ml", - "machine learning", - "dl", - "deep learning", - "compiler", - "dynamo", - "torchscript", - "inference", -] -dependencies = [ - "torch>=2.9.0.dev,<2.10.0", - # currently tensorrt_rtx wheel is not publicly accessible, it is only included inside the rtx tar ball - # hence the tensorrt_rtx wheel version is fixed since the version the rtx tar ball downloaded is fixed - "tensorrt_rtx==1.0.0.21", - "packaging>=23", - "numpy", - "typing-extensions>=4.7.0", - "dllist", -] - -dynamic = ["version"] - -[dependency-groups] -dev = [ - "pre-commit>=2.20.0", - "black>=22.6.0", - "clang-format==14.0.6", - "typos", - "mypy", - "isort", - "ruff", - "pyyaml", -] - -debug = [ - "pydot >= 4.0.0", - "tabulate >= 0.8.10", - "graphviz >= 0.20.3" -] - -test = [ - "pytest", - "pytest-xdist", - "parameterized>=0.2.0", - "expecttest==0.1.6", - "timm>=1.0.3", - "transformers>=4.49.0", -] - -[project.optional-dependencies] -torchvision = [ - "torchvision>=0.23.0.dev,<0.24.0", -] -quantization = ["nvidia-modelopt[all]>=0.27.1"] - -[project.urls] -Homepage = "https://pytorch.org/tensorrt" -Documentation = "https://pytorch.org/tensorrt" -Repository = "https://github.com/pytorch/tensorrt.git" -Changelog = "https://github.com/pytorch/tensorrt/releases" - -[tool.setuptools] -package-dir = { "" = "py" } -include-package-data = false - -[tool.uv] -package = true -environments = ["sys_platform == 'linux'", "sys_platform == 'windows'"] -prerelease = "if-necessary-or-explicit" -index-strategy = "unsafe-best-match" - -[tool.uv.sources] -torch = [ - { index = "pytorch-nightly-cu129", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" }, -] -torchvision = [ - { index = "pytorch-nightly-cu129", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" }, -] - -[[tool.uv.index]] -name = "pytorch-nightly-cu129" -url = "https://download.pytorch.org/whl/nightly/cu129" -explicit = false - -[[tool.uv.index]] -name = "nvidia" -url = "https://pypi.nvidia.com" -explicit = false - -[tool.ruff] -# NOTE: Synchoronize the ignores with .flake8 -lint.ignore = [ - # these ignores are from flake8-bugbear; please fix! - "B007", - "B008", - "B017", - "B018", # Useless expression - "B019", - "B020", - "B023", - "B024", - "B026", - "B028", # No explicit `stacklevel` keyword argument found - "B904", - "B905", - "E402", - "C408", # C408 ignored because we like the dict keyword argument syntax - "E501", # E501 is not flexible enough, we're using B950 instead - "E721", - "E731", # Assign lambda expression - "E741", - "EXE001", - "F405", - "F821", - "F841", - # these ignores are from flake8-logging-format; please fix! - "G101", - "G201", - "G202", - "G003", - "G004", - # these ignores are from RUFF perf; please fix! - "PERF203", - "PERF4", - "SIM102", - "SIM103", - "SIM112", # flake8-simplify code styles - "SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason - "SIM108", - "SIM110", - "SIM114", # Combine `if` branches using logical `or` operator - "SIM115", - "SIM116", # Disable Use a dictionary instead of consecutive `if` statements - "SIM117", - "SIM118", -] -#line-length = 120 -lint.select = [ - "B", - "C4", - "G", - "E", - "F", - "SIM1", - "W", - # Not included in flake8 - "PERF", - "PLE", - "TRY302", -] - -# Allow unused variables when underscore-prefixed. -lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" - -# Allow autofix for all enabled rules (when `--fix`) is provided. -lint.fixable = [ - "A", - "B", - "C", - "D", - "E", - "F", - "G", - "I", - "N", - "Q", - "S", - "T", - "W", - "ANN", - "ARG", - "BLE", - "COM", - "DJ", - "DTZ", - "EM", - "ERA", - "EXE", - "FBT", - "ICN", - "INP", - "ISC", - "NPY", - "PD", - "PGH", - "PIE", - "PL", - "PT", - "PTH", - "PYI", - "RET", - "RSE", - "RUF", - "SIM", - "SLF", - "TCH", - "TID", - "TRY", - "UP", - "YTT", -] -lint.unfixable = [] -target-version = "py311" - -# Exclude a variety of commonly ignored directories. -exclude = [ - ".bzr", - ".direnv", - ".eggs", - ".git", - ".git-rewrite", - ".hg", - ".mypy_cache", - ".nox", - ".pants.d", - ".pytype", - ".ruff_cache", - ".svn", - ".tox", - ".venv", - "__pypackages__", - "_build", - "buck-out", - "build", - "dist", - "node_modules", - "venv", - "env", - "py/torch_tensorrt/fx", - ".github", - "examples", - "tests", - "tools", - "docs", - "docsrc", - "tests", - "setup.py", - "noxfile.py", - "__init__.py", -] - -[tool.ruff.lint.mccabe] -# Unlike Flake8, default to a complexity level of 10. -max-complexity = 10 - -[tool.isort] -profile = "black" -py_version = 311 -skip = ["py/torch_tensorrt/fx"] - -[tool.black] -#line-length = 120 -target-version = ["py39", "py310", "py311", "py312", "py313"] -force-exclude = """ -elu_converter/setup.py -""" - -[tool.mypy] -strict = true -ignore_missing_imports = true -show_error_codes = true -disable_error_code = "attr-defined" -no_implicit_optional = true -exclude = [ - "^py/torch_tensorrt/fx", - "py/torch_tensorrt/fx", - "torch_tensorrt/fx", - "py/torch_tensorrt/_C.so", - "examples", - "docs", - "docsrc", - "tests", - "setup.py", - "noxfile.py", -] -python_version = "3.11" - -follow_imports = "skip" - -[[tool.mypy.overrides]] -module = "torch_tensorrt.dynamo.conversion.aten_converters" -disable_error_code = "arg-type" - -[[tool.mypy.overrides]] -module = "torch_tensorrt.dynamo.lowering._decompositions" -disallow_untyped_calls = false - -[[tool.mypy.overrides]] -module = "torch_tensorrt.fx.*" -ignore_errors = true -follow_imports = "skip" - -[tool.typos] -files.extend-exclude = [ - "docs/**/*", - "*/fx/*", - "docsrc/_rendered_examples/", - "core/*", - "!core/runtime/", - "third_party/", - "CHANGELOG.md", - "*.ipynb", - "cpp/", - "py/torch_tensorrt/fx/", -] - -[tool.typos.default] -extend-ignore-identifiers-re = [ - "^([A-z]|[a-z])*Nd*", - "^([A-z]|[a-z])*nd*", - "active*([A-z]|[a-z]|[0-9])*,", -] - -[tool.typos.default.extend-words] -arange = "arange" diff --git a/setup.py b/setup.py index a4bbfb2834..1195dc7954 100644 --- a/setup.py +++ b/setup.py @@ -124,7 +124,7 @@ def load_dep_info(): PY_ONLY = True if (use_rtx_env_var := os.environ.get("USE_TRT_RTX")) is not None: - if use_rtx_env_var == "1": + if use_rtx_env_var == "1" or use_rtx_env_var.lower() == "true": USE_TRT_RTX = True if (release_env_var := os.environ.get("RELEASE")) is not None: @@ -702,6 +702,58 @@ def run(self): with open(os.path.join(get_root_dir(), "README.md"), "r", encoding="utf-8") as fh: long_description = fh.read() + +def get_requirements(): + requirements = [ + "packaging>=23", + "typing-extensions>=4.7.0", + "dllist", + ] + + if IS_JETPACK: + requirements.extend( + [ + "torch>=2.8.0,<2.9.0", + "tensorrt>=10.3.0,<10.4.0", + "numpy<2.0.0", + ] + ) + elif IS_SBSA: + requirements.extend( + [ + "torch>=2.9.0.dev,<2.10.0", + "tensorrt>=10.12.0,<10.13.0", + "tensorrt-cu12>=10.12.0,<10.13.0", + "tensorrt-cu12-bindings>=10.12.0,<10.13.0", + "tensorrt-cu12-libs>=10.12.0,<10.13.0", + "numpy", + ] + ) + else: + requirements.extend( + [ + "torch>=2.9.0.dev,<2.10.0", + "numpy", + ] + ) + if USE_TRT_RTX: + requirements.extend( + [ + "tensorrt-rtx>=1.0.0.21", + ] + ) + else: + requirements.extend( + [ + "tensorrt>=10.12.0,<10.13.0", + "tensorrt-cu12>=10.12.0,<10.13.0", + "tensorrt-cu12-bindings>=10.12.0,<10.13.0", + "tensorrt-cu12-libs>=10.12.0,<10.13.0", + ] + ) + return requirements + + setup( name="torch_tensorrt", ext_modules=ext_modules, @@ -715,6 +767,7 @@ def run(self): "editable_wheel": EditableWheelCommand, }, zip_safe=False, + install_requires=get_requirements(), packages=packages, package_dir=package_dir, include_package_data=False, diff --git a/uv.lock b/uv.lock index 18b5f3d7ed..eb2c536573 100644 --- a/uv.lock +++ b/uv.lock @@ -48,8 +48,7 @@ dependencies = [ { name = "psutil", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "pyyaml", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "safetensors", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, - { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.9.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "torch", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/97/33/47bbd507e3a851d33d19ce7b2141c5ea3689bfae91ba168044d7db24b0e9/accelerate-1.7.0.tar.gz", hash = "sha256:e8a2a5503d6237b9eee73cc8d36cf543f9c2d8dd2c6713450b322f5e6d53a610", size = 376026, upload-time = "2025-05-15T10:00:52.117Z" } wheels = [ @@ -1343,11 +1342,9 @@ all = [ { name = "pynvml", marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, { name = "regex", marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, { name = "safetensors", marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, - { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.9.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'windows') or (python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "torch", marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, { name = "torchprofile", marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.22.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.23.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'windows') or (python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "torchvision", marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, { name = "transformers", marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, ] @@ -1406,11 +1403,9 @@ all = [ { name = "pulp", marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, { name = "regex", marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, { name = "safetensors", marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, - { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.9.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version >= '3.10' and python_full_version < '3.12' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and python_full_version < '3.12' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'windows')" }, + { name = "torch", marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, { name = "torchprofile", marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.22.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.23.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version >= '3.10' and python_full_version < '3.12' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and python_full_version < '3.12' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'windows')" }, + { name = "torchvision", marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, { name = "transformers", marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, ] @@ -1791,8 +1786,7 @@ dependencies = [ { name = "psutil", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "pyyaml", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "safetensors", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, - { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.9.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "torch", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "tqdm", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "transformers", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] @@ -2507,207 +2501,51 @@ wheels = [ [[package]] name = "tensorrt" -version = "10.3.0" +version = "10.12.0.36" source = { registry = "https://pypi.nvidia.com/" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] dependencies = [ - { name = "tensorrt-cu12", version = "10.3.0", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, + { name = "tensorrt-cu12", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] -sdist = { url = "https://pypi.nvidia.com/tensorrt/tensorrt-10.3.0.tar.gz", hash = "sha256:23b0dbeeada4ba1c72021d3ee0a2f172fb7cb60c72ad5e268b62822fab698d1e" } - -[[package]] -name = "tensorrt" -version = "10.11.0.33" -source = { registry = "https://pypi.nvidia.com/" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] -dependencies = [ - { name = "tensorrt-cu12", version = "10.11.0.33", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, -] -sdist = { url = "https://pypi.nvidia.com/tensorrt/tensorrt-10.11.0.33.tar.gz", hash = "sha256:a3d6048f86e11ea5202d473646194d3be866c0c8d578ac0b7eeb91d923f65d0b" } - -[[package]] -name = "tensorrt-cu12" -version = "10.3.0" -source = { registry = "https://pypi.nvidia.com/" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] -sdist = { url = "https://pypi.nvidia.com/tensorrt-cu12/tensorrt-cu12-10.3.0.tar.gz", hash = "sha256:14f0e60f40713a658f9634fffb1a5a665c35feb019be48b2f49e25ac12d2d084" } +sdist = { url = "https://pypi.nvidia.com/tensorrt/tensorrt-10.12.0.36.tar.gz", hash = "sha256:b246a830c26713e097b73151917e101cfb81aa0e7274c3c3b4c1f9f8b886be2e" } [[package]] name = "tensorrt-cu12" -version = "10.11.0.33" +version = "10.12.0.36" source = { registry = "https://pypi.nvidia.com/" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] dependencies = [ - { name = "tensorrt-cu12-bindings", version = "10.11.0.33", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "tensorrt-cu12-libs", version = "10.11.0.33", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "tensorrt-cu12-bindings", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "tensorrt-cu12-libs", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] -sdist = { url = "https://pypi.nvidia.com/tensorrt-cu12/tensorrt_cu12-10.11.0.33.tar.gz", hash = "sha256:7e29c8b16771c025320035ba9609c2a074767d9a8c05696a30c9d5c0fdfb37df" } +sdist = { url = "https://pypi.nvidia.com/tensorrt-cu12/tensorrt_cu12-10.12.0.36.tar.gz", hash = "sha256:aedeee0195c042592ac6b0536b19bc8cdbb1a548f35e09d24fbe78e1c76217c5" } [[package]] name = "tensorrt-cu12-bindings" -version = "10.3.0" +version = "10.12.0.36" source = { registry = "https://pypi.nvidia.com/" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] - -[[package]] -name = "tensorrt-cu12-bindings" -version = "10.11.0.33" -source = { registry = "https://pypi.nvidia.com/" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] wheels = [ - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp310-none-manylinux_2_28_x86_64.whl", hash = "sha256:a2d27745575be5d7f06caa9565230025b8e41a8915ee6a5dc735d41c3faf206d" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp310-none-manylinux_2_31_aarch64.whl", hash = "sha256:546c7ee976366dc9cb76ffefbde555dec4feddcfb508b4c99ee626447b8c72de" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp311-none-manylinux_2_28_x86_64.whl", hash = "sha256:e7b7a5b80174f8c4ddd8a63bc9fa97cad3320409eafad79428bc2b1e15884068" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp311-none-manylinux_2_31_aarch64.whl", hash = "sha256:492e3e91d7c1083bff1f7c15fdd8f5fb09a782dcfa6d1d0f8d9034b2e3b38cad" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp312-none-manylinux_2_28_x86_64.whl", hash = "sha256:a8f374f6d752ce4b0d4a8303d29c3ba9904eb29da0dc95b4db6b75c501997e4a" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp312-none-manylinux_2_31_aarch64.whl", hash = "sha256:6a3b768cea69b153ed0c2eb50130d150406d5c1498fdb0bf6c8a1be160137a6a" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp313-none-manylinux_2_28_x86_64.whl", hash = "sha256:1ceda290d1ed79b6107b0eb29eeb178f569d007c1506b72caae8248975d57662" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp313-none-manylinux_2_31_aarch64.whl", hash = "sha256:3c27e0d6e36a3b1f06e1dc8b735e34f04f5b8aac3e7d9b21762b8264496e825f" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp39-none-manylinux_2_28_x86_64.whl", hash = "sha256:9a801886f389b75f92e69fc6be40308392ec7746dbf4de4a2b76585d591960f0" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp39-none-manylinux_2_31_aarch64.whl", hash = "sha256:42e9b3cc2e3c6bcc0785c9c96b4dd25cd7043ff95e4fd09c8d35331f63ce9634" }, -] - -[[package]] -name = "tensorrt-cu12-libs" -version = "10.3.0" -source = { registry = "https://pypi.nvidia.com/" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] -dependencies = [ - { name = "nvidia-cuda-runtime-cu12", version = "12.8.90", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp310-none-manylinux_2_28_x86_64.whl", hash = "sha256:7ecdb6fc2555caed7d4fbbd8158ed7ced64e230c125484f62a5369c40dcc70e5" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp310-none-manylinux_2_31_aarch64.whl", hash = "sha256:d8548ab5976ca5c91279c68ee77f4c892e03460709cfa3fbd2a22aa8123cb731" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp311-none-manylinux_2_28_x86_64.whl", hash = "sha256:58cf45605bb330e86f8ad49bc8997ed68cfdf5b09da229534fb7f84aa3fe5bf4" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp311-none-manylinux_2_31_aarch64.whl", hash = "sha256:ae0866a89caaeada1c16776de85413a523f78f53b1fd83f1b903c39eed264d82" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp312-none-manylinux_2_28_x86_64.whl", hash = "sha256:fb3a2ce96c7472a46bbee2030ce6a54fd6a32deda401c1c67d9de057550e0171" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp312-none-manylinux_2_31_aarch64.whl", hash = "sha256:f5128b8b2a379e65c09745ba97df58abf3a418cbfd6508d37f76121d9bdd3bc8" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp313-none-manylinux_2_28_x86_64.whl", hash = "sha256:0eb8d3e41279b1d0d329b85372d5d720c8d2ff1228f6273142d717b44d75935b" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp313-none-manylinux_2_31_aarch64.whl", hash = "sha256:a850992cad842340e6fed41fe74f529064064ff61881d50ef5a2be1816526f9b" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp39-none-manylinux_2_28_x86_64.whl", hash = "sha256:986cb86202ef9541279b59d4e254743aff43bae1def87d14dd06e02369107c8b" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp39-none-manylinux_2_31_aarch64.whl", hash = "sha256:c5b86638ae5e3a2101755d469ac2ce831d4bdece1d20fa2bd546c05c554b5952" }, ] [[package]] name = "tensorrt-cu12-libs" -version = "10.11.0.33" +version = "10.12.0.36" source = { registry = "https://pypi.nvidia.com/" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] dependencies = [ - { name = "nvidia-cuda-runtime-cu12", version = "12.8.90", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "nvidia-cuda-runtime-cu12", version = "12.8.90", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'windows'" }, { name = "nvidia-cuda-runtime-cu12", version = "12.9.79", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" }, ] wheels = [ - { url = "https://pypi.nvidia.com/tensorrt-cu12-libs/tensorrt_cu12_libs-10.11.0.33-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:81ace8d3284fdbef0804c444a4d7555343ee079370e79c93cb328c7d9b08f968" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-libs/tensorrt_cu12_libs-10.11.0.33-py2.py3-none-manylinux_2_31_aarch64.whl", hash = "sha256:b6846dbc32d717a5031d9757f16293dd9e25de8a1c4aae8c00701d52351ef173" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-libs/tensorrt_cu12_libs-10.12.0.36-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:3910039e1d49de0edfdc8bf273e40ad4b85a9d57c7c383fe0e22f75417df9610" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-libs/tensorrt_cu12_libs-10.12.0.36-py2.py3-none-manylinux_2_31_aarch64.whl", hash = "sha256:1c117effa7318b65508457e9a11e67941859c8e5c346b59fd0090f66be28f2f4" }, ] [[package]] @@ -2718,10 +2556,8 @@ dependencies = [ { name = "huggingface-hub", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "pyyaml", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "safetensors", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, - { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.9.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.22.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.23.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "torch", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "torchvision", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/bc/0c/66b0f9b4a4cb9ffdac7b52b17b37c7d3c4f75623b469e388b0c6d89b4e88/timm-1.0.15.tar.gz", hash = "sha256:756a3bc30c96565f056e608a9b559daed904617eaadb6be536f96874879b1055", size = 2230258, upload-time = "2025-02-23T05:05:55.959Z" } wheels = [ @@ -2776,73 +2612,17 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257, upload-time = "2024-11-27T22:38:35.385Z" }, ] -[[package]] -name = "torch" -version = "2.7.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] -dependencies = [ - { name = "filelock", marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "fsspec", marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "jinja2", marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "networkx", version = "3.2.1", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "networkx", version = "3.4.2", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "networkx", version = "3.5", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version >= '3.11' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version >= '3.11' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "setuptools", marker = "(python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "sympy", marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/46/c2/3fb87940fa160d956ee94d644d37b99a24b9c05a4222bf34f94c71880e28/torch-2.7.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:c9afea41b11e1a1ab1b258a5c31afbd646d6319042bfe4f231b408034b51128b", size = 99158447, upload-time = "2025-04-23T14:35:10.557Z" }, - { url = "https://files.pythonhosted.org/packages/40/da/7378d16cc636697f2a94f791cb496939b60fb8580ddbbef22367db2c2274/torch-2.7.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2b7813e904757b125faf1a9a3154e1d50381d539ced34da1992f52440567c156", size = 99159397, upload-time = "2025-04-23T14:35:35.304Z" }, - { url = "https://files.pythonhosted.org/packages/aa/5e/ac759f4c0ab7c01feffa777bd68b43d2ac61560a9770eeac074b450f81d4/torch-2.7.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:36a6368c7ace41ad1c0f69f18056020b6a5ca47bedaca9a2f3b578f5a104c26c", size = 99013250, upload-time = "2025-04-23T14:35:15.589Z" }, - { url = "https://files.pythonhosted.org/packages/14/24/720ea9a66c29151b315ea6ba6f404650834af57a26b2a04af23ec246b2d5/torch-2.7.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:868ccdc11798535b5727509480cd1d86d74220cfdc42842c4617338c1109a205", size = 99015553, upload-time = "2025-04-23T14:34:41.075Z" }, - { url = "https://files.pythonhosted.org/packages/cb/b4/8df3f9fe6bdf59e56a0e538592c308d18638eb5f5dc4b08d02abb173c9f0/torch-2.7.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:2a885fc25afefb6e6eb18a7d1e8bfa01cc153e92271d980a49243b250d5ab6d9", size = 99091348, upload-time = "2025-04-23T14:33:48.975Z" }, - { url = "https://files.pythonhosted.org/packages/57/6a/36775d1b553a443ba1453e1bfeae903ef20d94c95ab31aa09225bf52fda1/torch-2.7.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:e362efaa5b3078e5f75c33efc05005b9b46de0d2e899519d5b4cad0e050ed0f7", size = 99197389, upload-time = "2025-04-23T14:32:33.083Z" }, -] - [[package]] name = "torch" version = "2.9.0.dev20250701+cu129" source = { registry = "https://download.pytorch.org/whl/nightly/cu129" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] dependencies = [ - { name = "filelock", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "fsspec", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "jinja2", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "networkx", version = "3.2.1", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'windows') or (python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "networkx", version = "3.4.2", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version == '3.10.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.10.*' and platform_machine != 'aarch64' and sys_platform == 'windows') or (python_full_version == '3.10.*' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version == '3.10.*' and 'tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "networkx", version = "3.5", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version >= '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.11' and platform_machine != 'aarch64' and sys_platform == 'windows') or (python_full_version >= '3.11' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.11' and 'tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "filelock", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "fsspec", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "jinja2", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "networkx", version = "3.2.1", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, + { name = "networkx", version = "3.4.2", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version == '3.10.*' and sys_platform == 'linux') or (python_full_version == '3.10.*' and sys_platform == 'windows')" }, + { name = "networkx", version = "3.5", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version >= '3.11' and sys_platform == 'linux') or (python_full_version >= '3.11' and sys_platform == 'windows')" }, { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, @@ -2858,9 +2638,9 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "pytorch-triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "setuptools", marker = "(python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'windows') or (python_full_version >= '3.12' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.12' and 'tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "sympy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "typing-extensions", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "setuptools", marker = "(python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, + { name = "sympy", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "typing-extensions", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] wheels = [ { url = "https://download.pytorch.org/whl/nightly/cu129/torch-2.9.0.dev20250701%2Bcu129-cp310-cp310-manylinux_2_28_aarch64.whl" }, @@ -2885,29 +2665,14 @@ dependencies = [ { name = "numpy", version = "1.26.4", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows') or ('tegra' in platform_release and sys_platform == 'linux') or ('tegra' in platform_release and sys_platform == 'windows')" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows')" }, { name = "packaging", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, - { name = "tensorrt", version = "10.3.0", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "tensorrt", version = "10.11.0.33", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "tensorrt-cu12", version = "10.3.0", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "tensorrt-cu12", version = "10.11.0.33", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "tensorrt-cu12-bindings", version = "10.3.0", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "tensorrt-cu12-bindings", version = "10.11.0.33", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "tensorrt-cu12-libs", version = "10.3.0", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "tensorrt-cu12-libs", version = "10.11.0.33", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.9.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "tensorrt", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "tensorrt-cu12", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "tensorrt-cu12-bindings", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "tensorrt-cu12-libs", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "torch", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "typing-extensions", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] -[package.optional-dependencies] -quantization = [ - { name = "nvidia-modelopt", version = "0.29.0", source = { registry = "https://pypi.nvidia.com/" }, extra = ["all"], marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, - { name = "nvidia-modelopt", version = "0.31.0", source = { registry = "https://pypi.nvidia.com/" }, extra = ["all"], marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, -] -torchvision = [ - { name = "torchvision", version = "0.22.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.23.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, -] - [package.dev-dependencies] debug = [ { name = "graphviz", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, @@ -2924,6 +2689,10 @@ dev = [ { name = "ruff", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "typos", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] +quantization = [ + { name = "nvidia-modelopt", version = "0.29.0", source = { registry = "https://pypi.nvidia.com/" }, extra = ["all"], marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, + { name = "nvidia-modelopt", version = "0.31.0", source = { registry = "https://pypi.nvidia.com/" }, extra = ["all"], marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, +] test = [ { name = "expecttest", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "parameterized", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, @@ -2936,25 +2705,16 @@ test = [ [package.metadata] requires-dist = [ { name = "dllist" }, - { name = "numpy", marker = "platform_machine != 'aarch64' or 'tegra' not in platform_release" }, - { name = "numpy", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release", specifier = "<2.0.0" }, - { name = "nvidia-modelopt", extras = ["all"], marker = "extra == 'quantization'", specifier = ">=0.27.1" }, + { name = "numpy" }, { name = "packaging", specifier = ">=23" }, - { name = "tensorrt", marker = "platform_machine != 'aarch64' or 'tegra' not in platform_release", specifier = ">=10.11.0,<10.12.0" }, - { name = "tensorrt", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release", specifier = ">=10.3.0,<10.4.0" }, - { name = "tensorrt-cu12", marker = "platform_machine != 'aarch64' or 'tegra' not in platform_release", specifier = ">=10.11.0,<10.12.0" }, - { name = "tensorrt-cu12", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release", specifier = ">=10.3.0,<10.4.0" }, - { name = "tensorrt-cu12-bindings", marker = "platform_machine != 'aarch64' or 'tegra' not in platform_release", specifier = ">=10.11.0,<10.12.0" }, - { name = "tensorrt-cu12-bindings", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release", specifier = ">=10.3.0,<10.4.0" }, - { name = "tensorrt-cu12-libs", marker = "platform_machine != 'aarch64' or 'tegra' not in platform_release", specifier = ">=10.11.0,<10.12.0" }, - { name = "tensorrt-cu12-libs", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release", specifier = ">=10.3.0,<10.4.0" }, + { name = "tensorrt", specifier = ">=10.12.0,<10.13.0" }, + { name = "tensorrt-cu12", specifier = ">=10.12.0,<10.13.0" }, + { name = "tensorrt-cu12-bindings", specifier = ">=10.12.0,<10.13.0" }, + { name = "tensorrt-cu12-libs", specifier = ">=10.12.0,<10.13.0" }, { name = "torch", marker = "platform_machine != 'aarch64' or 'tegra' not in platform_release", specifier = ">=2.9.0.dev0,<2.10.0", index = "https://download.pytorch.org/whl/nightly/cu129" }, - { name = "torch", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release", specifier = ">=2.7.0,<2.8.0" }, - { name = "torchvision", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release and extra == 'torchvision'", specifier = ">=0.22.0,<0.23.0" }, - { name = "torchvision", marker = "(platform_machine != 'aarch64' and extra == 'torchvision') or ('tegra' not in platform_release and extra == 'torchvision')", specifier = ">=0.23.0.dev0,<0.24.0", index = "https://download.pytorch.org/whl/nightly/cu129" }, + { name = "torch", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release", specifier = ">=2.9.0.dev0,<2.10.0" }, { name = "typing-extensions", specifier = ">=4.7.0" }, ] -provides-extras = ["torchvision", "quantization"] [package.metadata.requires-dev] debug = [ @@ -2972,6 +2732,7 @@ dev = [ { name = "ruff" }, { name = "typos" }, ] +quantization = [{ name = "nvidia-modelopt", extras = ["all"], specifier = ">=0.27.1" }] test = [ { name = "expecttest", specifier = "==0.1.6" }, { name = "parameterized", specifier = ">=0.2.0" }, @@ -2988,10 +2749,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy", version = "1.26.4", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows') or ('tegra' in platform_release and sys_platform == 'linux') or ('tegra' in platform_release and sys_platform == 'windows')" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.9.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.22.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.23.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "torch", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "torchvision", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/6f/36/574c0c46e818533b78b3c09505211162918188325ab4165ef11a3f295755/torchprofile-0.0.4.tar.gz", hash = "sha256:96b6da17d752a06b02977e078aea95614893b31d4117dd5dcd081f30ce65611b", size = 4557, upload-time = "2021-06-22T04:58:03.592Z" } wheels = [ @@ -3000,71 +2759,18 @@ wheels = [ [[package]] name = "torchvision" -version = "0.22.0" +version = "0.11.3" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] dependencies = [ - { name = "numpy", version = "1.26.4", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "pillow", marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/a3/e5/ec4b52041cd8c440521b75864376605756bd2d112d6351ea6a1ab25008c1/torchvision-0.22.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:810ea4af3bc63cf39e834f91f4218ff5999271caaffe2456247df905002bd6c0", size = 2512604, upload-time = "2025-04-23T14:41:56.515Z" }, - { url = "https://files.pythonhosted.org/packages/7e/71/ce9a303b94e64fe25d534593522ffc76848c4e64c11e4cbe9f6b8d537210/torchvision-0.22.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6c5620e10ffe388eb6f4744962106ed7cf1508d26e6fdfa0c10522d3249aea24", size = 2514016, upload-time = "2025-04-23T14:41:48.566Z" }, - { url = "https://files.pythonhosted.org/packages/72/ef/21f8b6122e13ae045b8e49658029c695fd774cd21083b3fa5c3f9c5d3e35/torchvision-0.22.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:8f116bc82e0c076e70ba7776e611ed392b9666aa443662e687808b08993d26af", size = 2514571, upload-time = "2025-04-23T14:41:53.458Z" }, - { url = "https://files.pythonhosted.org/packages/77/77/88f64879483d66daf84f1d1c4d5c31ebb08e640411139042a258d5f7dbfe/torchvision-0.22.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:471c6dd75bb984c6ebe4f60322894a290bf3d4b195e769d80754f3689cd7f238", size = 2471592, upload-time = "2025-04-23T14:41:54.991Z" }, - { url = "https://files.pythonhosted.org/packages/6a/9a/2b59f5758ba7e3f23bc84e16947493bbce97392ec6d18efba7bdf0a3b10e/torchvision-0.22.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:753d3c84eeadd5979a33b3b73a25ecd0aa4af44d6b45ed2c70d44f5e0ac68312", size = 2476555, upload-time = "2025-04-23T14:41:38.357Z" }, - { url = "https://files.pythonhosted.org/packages/2c/40/ca84add0f8e548a5b083b271e832786cd397047a9c2e7fac76c0c1f3de04/torchvision-0.22.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:4095fac2b2e49a9c30f701e09ec1bdf3d11b1e48b006a76a9015a2ed8b39556e", size = 2512670, upload-time = "2025-04-23T14:41:33.739Z" }, -] - -[[package]] -name = "torchvision" -version = "0.23.0.dev20250701+cu129" -source = { registry = "https://download.pytorch.org/whl/nightly/cu129" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] -dependencies = [ - { name = "numpy", version = "1.26.4", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'windows') or (python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, + { name = "numpy", version = "1.26.4", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows') or ('tegra' in platform_release and sys_platform == 'linux') or ('tegra' in platform_release and sys_platform == 'windows')" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "pillow", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.9.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "pillow", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "torch", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] wheels = [ - { url = "https://download.pytorch.org/whl/nightly/cu129/torchvision-0.23.0.dev20250701%2Bcu129-cp310-cp310-manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/nightly/cu129/torchvision-0.23.0.dev20250701%2Bcu129-cp311-cp311-manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/nightly/cu129/torchvision-0.23.0.dev20250701%2Bcu129-cp312-cp312-manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/nightly/cu129/torchvision-0.23.0.dev20250701%2Bcu129-cp313-cp313-manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/nightly/cu129/torchvision-0.23.0.dev20250701%2Bcu129-cp313-cp313t-manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/nightly/cu129/torchvision-0.23.0.dev20250701%2Bcu129-cp39-cp39-manylinux_2_28_x86_64.whl" }, + { url = "https://files.pythonhosted.org/packages/48/20/380758a94be49d38798a6cfd25824f72ec1f230b00c0014efb15903777c6/torchvision-0.11.3-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:8bc8a7db80c97ca254be362ba883a202192e361ba2f6dff7ff5bb010d4bfc23a", size = 14675721, upload-time = "2022-01-27T20:36:45.882Z" }, + { url = "https://files.pythonhosted.org/packages/ac/b1/9702d02e233bec7ce231cc8be94489ee31084fb6d350703f0ed22086ebed/torchvision-0.11.3-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:eca0b0f7a0e462bdecf7926d89faae6dcd51da418ca0cf70e725981ed775a11b", size = 23199346, upload-time = "2022-01-27T20:36:38.508Z" }, + { url = "https://files.pythonhosted.org/packages/ac/d3/913e25d7775c74f76d174a82eba45bf68e384dc78373598f6c2b3a727fed/torchvision-0.11.3-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:25e72231be8ce03467a77806d9c3f5fd34b9cd23b9543d3e999bf57622377532", size = 14674764, upload-time = "2022-01-27T20:37:09.227Z" }, ] [[package]] From 7727fc8c2b1e024cb61fced877583d0889d63791 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Thu, 7 Aug 2025 14:24:46 -0700 Subject: [PATCH 05/30] add documentation --- docsrc/getting_started/tensorrt_rtx.rst | 137 ++++++++++++++++++------ 1 file changed, 103 insertions(+), 34 deletions(-) diff --git a/docsrc/getting_started/tensorrt_rtx.rst b/docsrc/getting_started/tensorrt_rtx.rst index 5e617b459a..07cf476f38 100644 --- a/docsrc/getting_started/tensorrt_rtx.rst +++ b/docsrc/getting_started/tensorrt_rtx.rst @@ -1,72 +1,141 @@ .. _Torch-TensorRT_in_RTX: Torch-TensorRT in RTX -############################# +===================== Overview -******** +-------- TensorRT-RTX -=========== +~~~~~~~~~~~~ + TensorRT for RTX builds on the proven performance of the NVIDIA TensorRT inference library, and simplifies the deployment of AI models on NVIDIA RTX GPUs across desktops, laptops, and workstations. TensorRT for RTX is a drop-in replacement for NVIDIA TensorRT in applications targeting NVIDIA RTX GPUs from Turing through Blackwell generations. It introduces a Just-In-Time (JIT) optimizer in the runtime that compiles improved inference engines directly on the end-user’s RTX-accelerated PC in under 30 seconds. This eliminates the need for lengthy pre-compilation steps and enables rapid engine generation, improved application portability, and cutting-edge inference performance. For detailed information about TensorRT-RTX, refer to: + * `TensorRT-RTX Documentation `_ -Currenlty, Torch-TensorRT only supports TensorRT-RTX for the experiment purpose. -Torch-TensorRT by default uses TensorRT during the build and run. +Currently, Torch-TensorRT only supports TensorRT-RTX for experimental purposes. +Torch-TensorRT by default uses standard TensorRT during the build and run. + +To use TensorRT-RTX: -In order to use TensorRT-RTX, you need to build the wheel with ``--use-rtx`` flag or ``USE_TRT_RTX=true``. -And during the run, you need set the ``USE_TRT_RTX=true`` environment variable to invoke with TensorRT-RTX. +- Build the wheel with the ``--use-rtx`` flag or set ``USE_TRT_RTX=true``. +- During runtime, set the ``USE_TRT_RTX=true`` environment variable to invoke TensorRT-RTX. +Prerequisites +------------- +Install Bazel +~~~~~~~~~~~~~ +Bazel is required to build the wheel with TensorRT-RTX. -Prerequisites -************* +**In Linux:** + +.. code-block:: sh + + curl -L https://github.com/bazelbuild/bazelisk/releases/download/v1.26.0/bazelisk-linux-amd64 \ + -o bazelisk \ + && mv bazelisk /usr/bin/bazel \ + && chmod +x /usr/bin/bazel + +**In Windows:** + +.. code-block:: sh -System Preparation -================== -1. **Install TensorRT-RTX**: - TensorRT-RTX can be downloaded from https://developer.nvidia.com/tensorrt-rtx. - .. code-block:: sh - # if TensorRT-RTX is downloaded in /usr/local/tensorrt-rtx - export LD_LIBRARY_PATH=/usr/local/tensorrt-rtx/lib:$LD_LIBRARY_PATH - cd /usr/local/tensorrt-rtx/python - # install the tensorrt_rtx wheel - # currently tensorrt_rtx wheel is only available from tarball downloaded, not from pypi yet. - python -m pip install tensorrt_rtx-1.0.0.21-cp39-none-linux_x86_64.whl + choco install bazelisk -y +Install TensorRT-RTX Tarball +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +TensorRT-RTX tarball can be downloaded from https://developer.nvidia.com/tensorrt-rtx. +Currently, Torch-TensorRT uses TensorRT-RTX version **1.0.0.21**. + +Once downloaded: + +**In Linux:** + +Make sure you add the lib path to the ``LD_LIBRARY_PATH`` environment variable. + +.. code-block:: sh + + # If TensorRT-RTX is downloaded in /your_local_download_path/TensorRT-RTX-1.0.0.21 + export LD_LIBRARY_PATH=/your_local_download_path/TensorRT-RTX-1.0.0.21/lib:$LD_LIBRARY_PATH + echo $LD_LIBRARY_PATH | grep TensorRT-RTX + +**In Windows:** + +Make sure you add the lib path to the Windows system variable ``PATH``. + +.. code-block:: sh + # If TensorRT-RTX is downloaded in C:\your_local_download_path\TensorRT-RTX-1.0.0.21 + set PATH="%PATH%;C:\your_local_download_path\TensorRT-RTX-1.0.0.21\lib" + echo %PATH% | findstr TensorRT-RTX + +Install TensorRT-RTX Wheel +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Currently, the `tensorrt_rtx` wheel is not published on PyPI. +You must install it manually from the downloaded tarball. + +.. code-block:: sh + + # If the tarball is downloaded in /your_local_download_path/TensorRT-RTX-1.0.0.21 + python -m pip install /your_local_download_path/TensorRT-RTX-1.0.0.21/python/tensorrt_rtx-1.0.0.21-cp39-none-linux_x86_64.whl Build Torch-TensorRT with TensorRT-RTX -===================================== +-------------------------------------- + +Build Locally with TensorRT-RTX +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: sh - # if you have previously build with Standard TensorRT, make sure to clean the build environment + + # If you have previously built with standard TensorRT, make sure to clean the build environment python setup.py clean - # build wheel with TensorRT-RTX + bazel clean --expunge + + # Build wheel with TensorRT-RTX python setup.py bdist_wheel --use-rtx - # install the wheel - cd dist - python -m pip install torch-tensorrt-*.whl + # Install the wheel + python -m pip install dist/torch-tensorrt-*.whl + +Quick Start +----------- + +.. code-block:: python - # check that tensorrt related wheel is not installed, only tensorrt_rtx is there + # You must set USE_TRT_RTX=true to use TensorRT-RTX + USE_TRT_RTX=true python examples/dynamo/torch_compile_resnet_example.py + +Troubleshooting +--------------- + +If you encounter load or link errors, check if `tensorrt_rtx` is linked correctly. +If not, clean up the environment and rebuild. + +**In Linux:** + +.. code-block:: sh + + # Ensure only tensorrt_rtx is installed (no standard tensorrt wheels) python -m pip list | grep tensorrt - # make sure the tensorrt_rtx.so file is linked to the tensorrt_rtx.so file in the TensorRT-RTX installation directory + # Check if libtorchtrt.so links to the correct tensorrt_rtx shared object trt_install_path=$(python -m pip show torch-tensorrt | grep "Location" | awk '{print $2}')/torch_tensorrt - # check if the libtensorrt_rtx.so.1 is linked, and make sure there is no libnvinfer.so.10 linked + # Verify libtensorrt_rtx.so.1 is linked, and libnvinfer.so.10 is NOT ldd $trt_install_path/lib/libtorchtrt.so -Quick Start -=========== +**In Windows:** -.. code-block:: py - # you have to set USE_TRT_RTX=true to use TensorRT-RTX - USE_TRT_RTX=true python examples/dynamo/torch_compile_resnet_example.py +.. code-block:: sh + # Check if tensorrt_rtx_1_0.dll is linked, and libnvinfer.dll is NOT + cd py/torch_tensorrt + dumpbin /DEPENDENTS torchtrt.dll From 8eee6af89a09e0175fbff551190b55bcd9a1a59d Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 11 Aug 2025 10:49:32 -0700 Subject: [PATCH 06/30] resolve merge conflict --- .github/scripts/install-tensorrt-rtx.sh | 4 +- .../build-test-linux-aarch64-jetpack.yml | 2 +- .../workflows/build-test-linux-aarch64.yml | 2 +- .github/workflows/build-test-linux-x86_64.yml | 2 +- .../workflows/build-test-linux-x86_64_rtx.yml | 2 +- .github/workflows/build-test-windows.yml | 2 +- .github/workflows/build-test-windows_rtx.yml | 2 +- .github/workflows/build_wheels_linux.yml | 3 +- .../conversionctx/ConversionCtx.cpp | 1 - cpp/BUILD | 16 +-- py/torch_tensorrt/csrc/tensorrt_classes.cpp | 2 +- .../fx/converters/acc_ops_converters.py | 13 +-- py/torch_tensorrt/fx/fx2trt.py | 8 +- .../test/converters/acc_op/test_dequantize.py | 3 +- .../fx/test/converters/acc_op/test_pad.py | 3 +- .../acc_op/test_quantize_per_tensor.py | 3 +- .../converters/aten_op/test_reshape_aten.py | 7 +- py/torch_tensorrt/fx/tools/common_fx2trt.py | 3 +- py/torch_tensorrt/fx/utils.py | 10 +- setup.py | 104 +++++++++++------- tests/py/ts/api/test_classes.py | 5 + 21 files changed, 99 insertions(+), 98 deletions(-) diff --git a/.github/scripts/install-tensorrt-rtx.sh b/.github/scripts/install-tensorrt-rtx.sh index ef5c2945f3..61a0ce2ae6 100644 --- a/.github/scripts/install-tensorrt-rtx.sh +++ b/.github/scripts/install-tensorrt-rtx.sh @@ -17,8 +17,8 @@ install_tensorrt_rtx() { curl -L https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip -o TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip unzip TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip rtx_lib_dir=${PWD}/TensorRT-RTX-1.0.0.21/lib - export LD_LIBRARY_PATH=${rtx_lib_dir}:$LD_LIBRARY_PATH - echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" + export PATH=${rtx_lib_dir}:$PATH + echo "PATH: $PATH" if [[ ${install_wheel_or_not} == true ]]; then pip install TensorRT-RTX-1.0.0.21/python/tensorrt_rtx-1.0.0.21-${CPYTHON_TAG}-none-win_amd64.whl fi diff --git a/.github/workflows/build-test-linux-aarch64-jetpack.yml b/.github/workflows/build-test-linux-aarch64-jetpack.yml index a2e8a3a736..9523872acf 100644 --- a/.github/workflows/build-test-linux-aarch64-jetpack.yml +++ b/.github/workflows/build-test-linux-aarch64-jetpack.yml @@ -65,7 +65,7 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - name: Build torch-tensorrt whl package for aarch64-jetpack + name: Build torch-tensorrt whl package for jetpack uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} diff --git a/.github/workflows/build-test-linux-aarch64.yml b/.github/workflows/build-test-linux-aarch64.yml index 34b3e4fa34..a2b2a78db9 100644 --- a/.github/workflows/build-test-linux-aarch64.yml +++ b/.github/workflows/build-test-linux-aarch64.yml @@ -62,7 +62,7 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - name: Build torch-tensorrt whl package for aarch64 + name: Build torch-tensorrt whl package for SBSA uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} diff --git a/.github/workflows/build-test-linux-x86_64.yml b/.github/workflows/build-test-linux-x86_64.yml index 4b18ef559d..3998eab0e7 100644 --- a/.github/workflows/build-test-linux-x86_64.yml +++ b/.github/workflows/build-test-linux-x86_64.yml @@ -61,7 +61,7 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - name: Build torch-tensorrt whl package for x86_64 + name: Build torch-tensorrt whl package for Linux x86_64 uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} diff --git a/.github/workflows/build-test-linux-x86_64_rtx.yml b/.github/workflows/build-test-linux-x86_64_rtx.yml index 24d0169e2c..1f9b605584 100644 --- a/.github/workflows/build-test-linux-x86_64_rtx.yml +++ b/.github/workflows/build-test-linux-x86_64_rtx.yml @@ -60,7 +60,7 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - name: Build torch-tensorrt-rtx whl package for x86_64 + name: RTX - Build torch-tensorrt-rtx whl package for Linux x86_64 uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index 2d402a8799..ca39f8cb5f 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -70,7 +70,7 @@ jobs: env-script: packaging/vc_env_helper.bat smoke-test-script: packaging/smoke_test_windows.py package-name: torch_tensorrt - name: Build torch-tensorrt whl package + name: Build torch-tensorrt whl package for Windows uses: ./.github/workflows/build_wheels_windows.yml with: repository: ${{ matrix.repository }} diff --git a/.github/workflows/build-test-windows_rtx.yml b/.github/workflows/build-test-windows_rtx.yml index 6321cd8a52..137adb252c 100644 --- a/.github/workflows/build-test-windows_rtx.yml +++ b/.github/workflows/build-test-windows_rtx.yml @@ -70,7 +70,7 @@ jobs: env-script: packaging/vc_env_helper.bat smoke-test-script: packaging/smoke_test_windows.py package-name: torch_tensorrt - name: Build torch-tensorrt-rtx whl package + name: RTX - Build torch-tensorrt-rtx whl package for Windows uses: ./.github/workflows/build_wheels_windows.yml with: repository: ${{ matrix.repository }} diff --git a/.github/workflows/build_wheels_linux.yml b/.github/workflows/build_wheels_linux.yml index 7cfddd49c8..4efa65add7 100644 --- a/.github/workflows/build_wheels_linux.yml +++ b/.github/workflows/build_wheels_linux.yml @@ -288,7 +288,8 @@ jobs: env: PACKAGE_NAME: ${{ inputs.package-name }} SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} - if: ${{ inputs.architecture == 'x86_64' }} + # TODO: lan to verify whether manylinux repair is needed for jetpack + #if: ${{ inputs.is-jetpack == true }} run: | set -euxo pipefail source "${BUILD_ENV_FILE}" diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp index 0983060e55..07ade4b17f 100644 --- a/core/conversion/conversionctx/ConversionCtx.cpp +++ b/core/conversion/conversionctx/ConversionCtx.cpp @@ -68,7 +68,6 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) case nvinfer1::DataType::kINT8: LOG_DEBUG("INT8 precision has been enabled, we assume the network has Q/DQ nodes obtained from modelopt"); break; -#endif case nvinfer1::DataType::kFLOAT: break; case nvinfer1::DataType::kINT32: diff --git a/cpp/BUILD b/cpp/BUILD index 2b5877aa4a..79081e1f81 100644 --- a/cpp/BUILD +++ b/cpp/BUILD @@ -30,24 +30,12 @@ cc_library( "src/logging.cpp", "src/torch_tensorrt.cpp", "src/types.cpp", - ] + select({ - ":rtx_win": [], - ":rtx_x86_64": [], - "//conditions:default": [ - "src/ptq.cpp", - ], - }), + ], hdrs = [ "include/torch_tensorrt/logging.h", "include/torch_tensorrt/macros.h", "include/torch_tensorrt/torch_tensorrt.h", - ] + select({ - ":rtx_win": [], - ":rtx_x86_64": [], - "//conditions:default": [ - "include/torch_tensorrt/ptq.h", - ], - }), + ], linkstatic = True, strip_include_prefix = "include/", deps = [ diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.cpp b/py/torch_tensorrt/csrc/tensorrt_classes.cpp index 694f6b9930..553c6238a5 100644 --- a/py/torch_tensorrt/csrc/tensorrt_classes.cpp +++ b/py/torch_tensorrt/csrc/tensorrt_classes.cpp @@ -349,7 +349,7 @@ core::CompileSpec CompileSpec::toInternalCompileSpec(bool converting_to_trt_engi info.lower_info.unfreeze_module = true; info.lower_info.disable_cse = true; } -#endif + info.convert_info.engine_settings.sparse_weights = sparse_weights; info.convert_info.engine_settings.disable_tf32 = disable_tf32; info.convert_info.engine_settings.refit = refit; diff --git a/py/torch_tensorrt/fx/converters/acc_ops_converters.py b/py/torch_tensorrt/fx/converters/acc_ops_converters.py index bf2680f12a..f998ddb27a 100644 --- a/py/torch_tensorrt/fx/converters/acc_ops_converters.py +++ b/py/torch_tensorrt/fx/converters/acc_ops_converters.py @@ -12,7 +12,6 @@ import torch from torch.fx.immutable_collections import immutable_list from torch.fx.node import Argument, Target -from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.converters.impl import activation, convolution from torch_tensorrt.fx.passes.lower_basic_pass import ( trt_transposed_linear, @@ -208,7 +207,7 @@ def acc_ops_conv_transposend( return layer.get_output(0) -@tensorrt_converter(acc_ops.pad, enabled=(not is_tensorrt_version_supported("8.2"))) +@tensorrt_converter(acc_ops.pad, enabled=trt.__version__ < "8.2") def acc_ops_pad_with_padding_layer( network: TRTNetwork, target: Target, @@ -258,10 +257,7 @@ def acc_ops_pad_with_padding_layer( return layer.get_output(0) -@tensorrt_converter( - acc_ops.pad, - enabled=is_tensorrt_version_supported("8.2"), -) +@tensorrt_converter(acc_ops.pad, enabled=trt.__version__ >= "8.2") def acc_ops_pad_with_slice_layer( network: TRTNetwork, target: Target, @@ -884,10 +880,7 @@ def acc_ops_sign( ) -> Union[TRTTensor, Sequence[TRTTensor]]: input_val = kwargs["input"] - if ( - is_tensorrt_version_supported("8.2") - and not network.has_implicit_batch_dimension - ): + if trt.__version__ >= "8.2" and not network.has_implicit_batch_dimension: input_val = kwargs["input"] operation_type = trt.UnaryOperation.SIGN return add_unary_layer(network, input_val, operation_type, target, name) diff --git a/py/torch_tensorrt/fx/fx2trt.py b/py/torch_tensorrt/fx/fx2trt.py index f241a936d6..6a29932b1b 100644 --- a/py/torch_tensorrt/fx/fx2trt.py +++ b/py/torch_tensorrt/fx/fx2trt.py @@ -13,7 +13,6 @@ from torch._ops import OpOverload from torch.fx.node import _get_qualified_name from torch.fx.passes.shape_prop import TensorMetadata -from torch_tensorrt._utils import is_tensorrt_version_supported from .converter_registry import CONVERTERS from .input_tensor_spec import InputTensorSpec @@ -214,10 +213,7 @@ def run( builder_config.max_workspace_size = max_workspace_size # Speed up TRT build time in the test environment - if ( - is_tensorrt_version_supported("8.6") - and os.environ.get("TRT_TEST_ENV", "0") == "1" - ): + if trt.__version__ >= "8.6" and os.environ.get("TRT_TEST_ENV", "0") == "1": _LOGGER.info("Set TRT optimization level to 0") builder_config.builder_optimization_level = 0 @@ -229,7 +225,7 @@ def run( cache = builder_config.create_timing_cache(b"") builder_config.set_timing_cache(cache, False) - if is_tensorrt_version_supported("8.2"): + if trt.__version__ >= "8.2": builder_config.profiling_verbosity = ( profiling_verbosity if profiling_verbosity diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py index 217b92f19c..7d17056b62 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py @@ -5,7 +5,6 @@ import torch.nn as nn import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from torch.testing._internal.common_utils import run_tests -from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec @@ -15,7 +14,7 @@ """ ) @unittest.skipIf( - not is_tensorrt_version_supported("8.0"), + trt.__version__ < "8.0", "Explicit quantization only supported in TensorRT 8.0 and later", ) class TestDequantizeConverter(AccTestCase): diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py index f5b6005782..b21779b65f 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py @@ -6,7 +6,6 @@ import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from parameterized import parameterized from torch.testing._internal.common_utils import run_tests -from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase # from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec @@ -80,7 +79,7 @@ def forward(self, x): ] ) @unittest.skipIf( - not is_tensorrt_version_supported("8.2"), + trt.__version__ < "8.2", "Padding 3d only supported in TensorRT 8.2 and later", ) def test_pad_3d(self, _, pad): diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py index 3c2708bf91..22cbdd826d 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py @@ -5,7 +5,6 @@ import torch.nn as nn import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from torch.testing._internal.common_utils import run_tests -from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec @@ -15,7 +14,7 @@ """ ) @unittest.skipIf( - not is_tensorrt_version_supported("8.0"), + trt.__version__ < "8.0", "Explicit quantization only supported in TensorRT 8.0 and later", ) class TestQuantizePerTensorConverter(AccTestCase): diff --git a/py/torch_tensorrt/fx/test/converters/aten_op/test_reshape_aten.py b/py/torch_tensorrt/fx/test/converters/aten_op/test_reshape_aten.py index 2942945523..538e575d6e 100644 --- a/py/torch_tensorrt/fx/test/converters/aten_op/test_reshape_aten.py +++ b/py/torch_tensorrt/fx/test/converters/aten_op/test_reshape_aten.py @@ -4,7 +4,6 @@ import torch from parameterized import parameterized from torch.testing._internal.common_utils import run_tests -from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.tools.common_fx2trt import DispatchTestCase, InputTensorSpec @@ -16,7 +15,7 @@ class TestReshapeConverter(DispatchTestCase): ] ) @unittest.skipIf( - not is_tensorrt_version_supported("8.5"), + trt.__version__ < "8.5", "Shape tensor supported well in TensorRT 8.5 and later", ) def test_reshape(self, target_shape): @@ -43,7 +42,7 @@ def forward(self, x): ] ) @unittest.skipIf( - not is_tensorrt_version_supported("8.5"), + trt.__version__ < "8.5", "Shape tensor supported well in TensorRT 8.5 and later", ) def test_reshape_with_dynamic_shape(self, target_shape): @@ -69,7 +68,7 @@ def forward(self, x): ) @unittest.skipIf( - not is_tensorrt_version_supported("8.5"), + trt.__version__ < "8.5", "Shape tensor supported well in TensorRT 8.5 and later", ) def test_reshape_with_dynamic_shape_size(self): diff --git a/py/torch_tensorrt/fx/tools/common_fx2trt.py b/py/torch_tensorrt/fx/tools/common_fx2trt.py index 66f343a55b..2ddd832c2a 100644 --- a/py/torch_tensorrt/fx/tools/common_fx2trt.py +++ b/py/torch_tensorrt/fx/tools/common_fx2trt.py @@ -13,7 +13,6 @@ from torch.fx.passes import shape_prop from torch.fx.passes.infra.pass_base import PassResult from torch.testing._internal.common_utils import TestCase -from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx import InputTensorSpec, TRTInterpreter, TRTModule from torch_tensorrt.fx.passes.lower_basic_pass_aten import ( compose_bmm, @@ -259,7 +258,7 @@ def run_test( pass_tracer = chain_passes(*apply_passes) mod = pass_tracer(mod, inputs) - if is_tensorrt_version_supported("8.6"): + if trt.__version__ >= "8.6": test_implicit_batch_dim = False if test_implicit_batch_dim: interp = TRTInterpreter(mod, InputTensorSpec.from_tensors(inputs)) diff --git a/py/torch_tensorrt/fx/utils.py b/py/torch_tensorrt/fx/utils.py index da5cdc0d7f..5bef21b6be 100644 --- a/py/torch_tensorrt/fx/utils.py +++ b/py/torch_tensorrt/fx/utils.py @@ -8,7 +8,7 @@ import torch from functorch import make_fx from functorch.experimental import functionalize -from torch_tensorrt._utils import is_tensorrt_version_supported, sanitized_torch_version +from torch_tensorrt._utils import sanitized_torch_version from torch_tensorrt.fx.passes.lower_basic_pass import ( replace_op_with_indices, run_const_fold, @@ -60,7 +60,7 @@ class Frameworks(Enum): }, } -if is_tensorrt_version_supported("7.0"): +if trt.__version__ >= "7.0": DataTypeEquivalence[trt.bool] = { Frameworks.NUMPY: np.bool_, Frameworks.TORCH: torch.bool, @@ -105,11 +105,7 @@ def unified_dtype_converter( trt_major_version = int(trt.__version__.split(".")[0]) if dtype in (np.int8, torch.int8, trt.int8): return DataTypeEquivalence[trt.int8][to] - elif is_tensorrt_version_supported("7.0") and dtype in ( - np.bool_, - torch.bool, - trt.bool, - ): + elif trt_major_version >= 7 and dtype in (np.bool_, torch.bool, trt.bool): return DataTypeEquivalence[trt.bool][to] elif dtype in (np.int32, torch.int32, trt.int32): return DataTypeEquivalence[trt.int32][to] diff --git a/setup.py b/setup.py index 4e2efffdaa..da5455da1e 100644 --- a/setup.py +++ b/setup.py @@ -416,7 +416,18 @@ def run(self): ext_modules = [] -packages = [ +fx_packages = [ + "torch_tensorrt.fx", + "torch_tensorrt.fx.converters", + "torch_tensorrt.fx.converters.impl", + "torch_tensorrt.fx.passes", + "torch_tensorrt.fx.tools", + "torch_tensorrt.fx.tracer", + "torch_tensorrt.fx.tracer.acc_tracer", + "torch_tensorrt.fx.tracer.dispatch_tracer", +] + +dynamo_packages = [ "torch_tensorrt", "torch_tensorrt.dynamo", "torch_tensorrt.dynamo.backend", @@ -435,18 +446,22 @@ def run(self): "torch_tensorrt.dynamo.partitioning", "torch_tensorrt.dynamo.runtime", "torch_tensorrt.dynamo.tools", - "torch_tensorrt.fx", - "torch_tensorrt.fx.converters", - "torch_tensorrt.fx.converters.impl", - "torch_tensorrt.fx.passes", - "torch_tensorrt.fx.tools", - "torch_tensorrt.fx.tracer", - "torch_tensorrt.fx.tracer.acc_tracer", - "torch_tensorrt.fx.tracer.dispatch_tracer", "torch_tensorrt.runtime", ] -package_dir = { +fx_package_dir = { + "torch_tensorrt.fx": "py/torch_tensorrt/fx", + "torch_tensorrt.fx.converters": "py/torch_tensorrt/fx/converters", + "torch_tensorrt.fx.converters.impl": "py/torch_tensorrt/fx/converters/impl", + "torch_tensorrt.fx.passes": "py/torch_tensorrt/fx/passes", + "torch_tensorrt.fx.tools": "py/torch_tensorrt/fx/tools", + "torch_tensorrt.fx.tracer": "py/torch_tensorrt/fx/tracer", + "torch_tensorrt.fx.tracer.acc_tracer": "py/torch_tensorrt/fx/tracer/acc_tracer", + "torch_tensorrt.fx.tracer.dispatch_tracer": "py/torch_tensorrt/fx/tracer/dispatch_tracer", +} + + +dynamo_package_dir = { "torch_tensorrt": "py/torch_tensorrt", "torch_tensorrt.dynamo": "py/torch_tensorrt/dynamo", "torch_tensorrt.dynamo.backend": "py/torch_tensorrt/dynamo/backend", @@ -465,17 +480,49 @@ def run(self): "torch_tensorrt.dynamo.partitioning": "py/torch_tensorrt/dynamo/partitioning", "torch_tensorrt.dynamo.runtime": "py/torch_tensorrt/dynamo/runtime", "torch_tensorrt.dynamo.tools": "py/torch_tensorrt/dynamo/tools", - "torch_tensorrt.fx": "py/torch_tensorrt/fx", - "torch_tensorrt.fx.converters": "py/torch_tensorrt/fx/converters", - "torch_tensorrt.fx.converters.impl": "py/torch_tensorrt/fx/converters/impl", - "torch_tensorrt.fx.passes": "py/torch_tensorrt/fx/passes", - "torch_tensorrt.fx.tools": "py/torch_tensorrt/fx/tools", - "torch_tensorrt.fx.tracer": "py/torch_tensorrt/fx/tracer", - "torch_tensorrt.fx.tracer.acc_tracer": "py/torch_tensorrt/fx/tracer/acc_tracer", - "torch_tensorrt.fx.tracer.dispatch_tracer": "py/torch_tensorrt/fx/tracer/dispatch_tracer", "torch_tensorrt.runtime": "py/torch_tensorrt/runtime", } +if USE_TRT_RTX: + package_dir = dynamo_package_dir + packages = dynamo_packages + exclude_package_data = { + "": [ + "py/torch_tensorrt/csrc/*.cpp", + "torch_tensorrt/csrc/*.cpp", + "test*", + "*.cpp", + ], + "torch_tensorrt": [ + "py/torch_tensorrt/csrc/*.cpp", + "torch_tensorrt/csrc/*.cpp", + "test*", + "*.cpp", + ], + } +else: + package_dir = dynamo_package_dir | fx_package_dir + packages = dynamo_packages + fx_packages + exclude_package_data = { + "": [ + "py/torch_tensorrt/csrc/*.cpp", + "py/torch_tensorrt/fx/test*", + "torch_tensorrt/csrc/*.cpp", + "torch_tensorrt/fx/test*", + "test*", + "*.cpp", + ], + "torch_tensorrt": [ + "py/torch_tensorrt/csrc/*.cpp", + "py/torch_tensorrt/fx/test*", + "torch_tensorrt/csrc/*.cpp", + "torch_tensorrt/fx/test*", + "test*", + "*.cpp", + ], + "torch_tensorrt.fx": ["test/*.py"], + } + package_data = {} if not (PY_ONLY or NO_TS): @@ -756,24 +803,5 @@ def get_requirements(): package_dir=package_dir, include_package_data=False, package_data=package_data, - exclude_package_data={ - "": [ - "py/torch_tensorrt/csrc/*.cpp", - "py/torch_tensorrt/fx/test*", - "torch_tensorrt/csrc/*.cpp", - "torch_tensorrt/fx/test*", - "test*", - "*.cpp", - ], - "torch_tensorrt": [ - "py/torch_tensorrt/csrc/*.cpp", - "py/torch_tensorrt/fx/test*", - "torch_tensorrt/csrc/*.cpp", - "torch_tensorrt/fx/test*", - "test*", - "*.cpp", - ], - "torch_tensorrt.dynamo": ["test/*.py"], - "torch_tensorrt.fx": ["test/*.py"], - }, + exclude_package_data=exclude_package_data, ) diff --git a/tests/py/ts/api/test_classes.py b/tests/py/ts/api/test_classes.py index 39983d29f8..a32b2b2722 100644 --- a/tests/py/ts/api/test_classes.py +++ b/tests/py/ts/api/test_classes.py @@ -5,6 +5,7 @@ import torch import torch_tensorrt as torchtrt import torchvision.models as models +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo.runtime._TorchTensorRTModule import TorchTensorRTModule @@ -313,6 +314,10 @@ def test_set_get_profile_path_prefix(self): trt_mod.engine.profile_path_prefix = "/tmp/" self.assertTrue(trt_mod.engine.profile_path_prefix == "/tmp/") + @unittest.skipIf( + is_tensorrt_rtx(), + "layer info is different for tensorrt_rtx", + ) def test_get_layer_info(self): """ { From bfd9d3f5d558d2cca1033205d4b3c6eb59895e67 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 12 Aug 2025 13:28:23 -0700 Subject: [PATCH 07/30] resolve comments --- packaging/pre_build_script.sh | 4 ++-- .../dynamo/conversion/_ConverterRegistry.py | 21 +++++++++++++------ tests/py/ts/api/test_ts_backend.py | 19 ++++++++++------- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh index 66dd0fc455..be449b73af 100755 --- a/packaging/pre_build_script.sh +++ b/packaging/pre_build_script.sh @@ -12,10 +12,10 @@ if [[ $(uname -m) == "aarch64" ]]; then if [[ ${os_name} == "ubuntu" ]]; then IS_JETPACK=true apt-get update - apt-get install -y ninja-build gettext curl libopenblas-dev + apt-get install -y ninja-build gettext curl libopenblas-dev zip unzip else IS_SBSA=true - yum install -y ninja-build gettext + yum install -y ninja-build gettext zip unzip fi else BAZEL_PLATFORM="amd64" diff --git a/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py b/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py index eb1692e392..ea7b3c3677 100644 --- a/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py +++ b/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py @@ -23,9 +23,9 @@ from torch import SymBool, SymFloat, SymInt from torch._ops import OpOverloadPacket from torch.fx.node import Argument, Node, Target, _get_qualified_name +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo._settings import CompilationSettings from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext -from torch_tensorrt.fx.converter_registry import CONVERTERS as FX_CONVERTERS logger = logging.getLogger(__name__) @@ -624,8 +624,17 @@ def display_all_available_converters(self) -> str: # Initialize dynamo converter registry with the FX and Dynamo aten registries # Note the Dynamo registry is listed first, for precedence -DYNAMO_CONVERTERS: ConverterRegistry = ConverterRegistry( - [DYNAMO_ATEN_CONVERTERS, FX_CONVERTERS], # type: ignore[list-item] - ["Dynamo ATen Converters Registry", "FX Legacy ATen Converters Registry"], - [CallingConvention.CTX, CallingConvention.LEGACY], -) +if is_tensorrt_rtx(): + DYNAMO_CONVERTERS = ConverterRegistry( + [DYNAMO_ATEN_CONVERTERS], # type: ignore[list-item] + ["Dynamo ATen Converters Registry"], + [CallingConvention.CTX], + ) +else: + from torch_tensorrt.fx.converter_registry import CONVERTERS as FX_CONVERTERS + + DYNAMO_CONVERTERS = ConverterRegistry( + [DYNAMO_ATEN_CONVERTERS, FX_CONVERTERS], # type: ignore[list-item] + ["Dynamo ATen Converters Registry", "FX Legacy ATen Converters Registry"], + [CallingConvention.CTX, CallingConvention.LEGACY], + ) diff --git a/tests/py/ts/api/test_ts_backend.py b/tests/py/ts/api/test_ts_backend.py index e56ab4f902..c4d1ba403a 100644 --- a/tests/py/ts/api/test_ts_backend.py +++ b/tests/py/ts/api/test_ts_backend.py @@ -1,10 +1,12 @@ +import copy import unittest -import torch_tensorrt as torchtrt +from typing import Dict + import torch +import torch_tensorrt as torchtrt import torchvision.models as models -import copy -from typing import Dict -from utils import cosine_similarity, COSINE_THRESHOLD +from torch_tensorrt._utils import is_tensorrt_rtx +from utils import COSINE_THRESHOLD, cosine_similarity class TestCompile(unittest.TestCase): @@ -139,10 +141,11 @@ def test_module_type(self): torchtrt._compile._parse_module_type(ts_module), torchtrt._compile._ModuleType.ts, ) - self.assertEqual( - torchtrt._compile._parse_module_type(fx_module), - torchtrt._compile._ModuleType.fx, - ) + if not is_tensorrt_rtx(): + self.assertEqual( + torchtrt._compile._parse_module_type(fx_module), + torchtrt._compile._ModuleType.fx, + ) if __name__ == "__main__": From 74f746467fa0bc3539758b80afc3e244a229e10a Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 12 Aug 2025 16:44:02 -0700 Subject: [PATCH 08/30] replace fx.observer --- .github/scripts/install-tensorrt-rtx.sh | 17 +- .github/workflows/build_wheels_linux.yml | 44 ++-- docsrc/getting_started/tensorrt_rtx.rst | 5 +- docsrc/index.rst | 1 + .../dynamo/conversion/_ConverterRegistry.py | 30 ++- .../dynamo/conversion/_TRTInterpreter.py | 2 +- py/torch_tensorrt/dynamo/observer.py | 194 ++++++++++++++++++ 7 files changed, 253 insertions(+), 40 deletions(-) create mode 100644 py/torch_tensorrt/dynamo/observer.py diff --git a/.github/scripts/install-tensorrt-rtx.sh b/.github/scripts/install-tensorrt-rtx.sh index 61a0ce2ae6..4e746d18c6 100644 --- a/.github/scripts/install-tensorrt-rtx.sh +++ b/.github/scripts/install-tensorrt-rtx.sh @@ -1,6 +1,7 @@ install_tensorrt_rtx() { if [[ ${USE_TRT_RTX} == true ]]; then + TRT_RTX_VERSION=1.0.0.21 install_wheel_or_not=${1:-false} echo "It is the tensorrt-rtx build, install tensorrt-rtx with install_wheel_or_not:${install_wheel_or_not}" PLATFORM=$(python -c "import sys; print(sys.platform)") @@ -14,22 +15,22 @@ install_tensorrt_rtx() { # python version is like 3.11, we need to convert it to cp311 CPYTHON_TAG="cp${PYTHON_VERSION//./}" if [[ ${PLATFORM} == win32 ]]; then - curl -L https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip -o TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip - unzip TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip - rtx_lib_dir=${PWD}/TensorRT-RTX-1.0.0.21/lib + curl -L https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-${TRT_RTX_VERSION}.Windows.win10.cuda-12.9.zip -o TensorRT-RTX-${TRT_RTX_VERSION}.Windows.win10.cuda-12.9.zip + unzip TensorRT-RTX-${TRT_RTX_VERSION}.Windows.win10.cuda-12.9.zip + rtx_lib_dir=${PWD}/TensorRT-RTX-${TRT_RTX_VERSION}/lib export PATH=${rtx_lib_dir}:$PATH echo "PATH: $PATH" if [[ ${install_wheel_or_not} == true ]]; then - pip install TensorRT-RTX-1.0.0.21/python/tensorrt_rtx-1.0.0.21-${CPYTHON_TAG}-none-win_amd64.whl + pip install TensorRT-RTX-${TRT_RTX_VERSION}/python/tensorrt_rtx-${TRT_RTX_VERSION}-${CPYTHON_TAG}-none-win_amd64.whl fi else - curl -L https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz -o TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz - tar -xzf TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz - rtx_lib_dir=${PWD}/TensorRT-RTX-1.0.0.21/lib + curl -L https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-${TRT_RTX_VERSION}.Linux.x86_64-gnu.cuda-12.9.tar.gz -o TensorRT-RTX-${TRT_RTX_VERSION}.Linux.x86_64-gnu.cuda-12.9.tar.gz + tar -xzf TensorRT-RTX-${TRT_RTX_VERSION}.Linux.x86_64-gnu.cuda-12.9.tar.gz + rtx_lib_dir=${PWD}/TensorRT-RTX-${TRT_RTX_VERSION}/lib export LD_LIBRARY_PATH=${rtx_lib_dir}:$LD_LIBRARY_PATH echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" if [[ ${install_wheel_or_not} == true ]]; then - pip install TensorRT-RTX-1.0.0.21/python/tensorrt_rtx-1.0.0.21-${CPYTHON_TAG}-none-linux_x86_64.whl + pip install TensorRT-RTX-${TRT_RTX_VERSION}/python/tensorrt_rtx-${TRT_RTX_VERSION}-${CPYTHON_TAG}-none-linux_x86_64.whl fi fi else diff --git a/.github/workflows/build_wheels_linux.yml b/.github/workflows/build_wheels_linux.yml index 4efa65add7..4b95a90e86 100644 --- a/.github/workflows/build_wheels_linux.yml +++ b/.github/workflows/build_wheels_linux.yml @@ -317,30 +317,34 @@ jobs: source "${BUILD_ENV_FILE}" WHEEL_NAME=$(ls "${{ inputs.repository }}/dist/") echo "$WHEEL_NAME" + if [[ ${{ inputs.is-jetpack }} == true ]]; then + echo "Skipping smoke test for jetpack, since it is not the actual jetpack environment" + else + ${CONDA_RUN} pip install "${{ inputs.repository }}/dist/$WHEEL_NAME" + # Checking that we have a pinned version of torch in our dependency tree + ( + pushd "${RUNNER_TEMP}" + unzip -o "${GITHUB_WORKSPACE}/${{ inputs.repository }}/dist/$WHEEL_NAME" + # Ensure that pytorch version is pinned, should output file where it was found + grep "Requires-Dist: torch (==.*)" -r . + ) - ${CONDA_RUN} pip install "${{ inputs.repository }}/dist/$WHEEL_NAME" - # Checking that we have a pinned version of torch in our dependency tree - ( - pushd "${RUNNER_TEMP}" - unzip -o "${GITHUB_WORKSPACE}/${{ inputs.repository }}/dist/$WHEEL_NAME" - # Ensure that pytorch version is pinned, should output file where it was found - grep "Requires-Dist: torch (==.*)" -r . - ) - - if [[ (! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT}) ]]; then - echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found" - if [[ "${PACKAGE_NAME}" = "torchrec" ]]; then - # Special case for torchrec temporarily since __version__ does not - # work correctly on main in torchrec. This block will be - # removed once we fix it. - ${CONDA_RUN} python -c "import ${PACKAGE_NAME}" + if [[ (! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT}) ]]; then + echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found" + if [[ "${PACKAGE_NAME}" = "torchrec" ]]; then + # Special case for torchrec temporarily since __version__ does not + # work correctly on main in torchrec. This block will be + # removed once we fix it. + ${CONDA_RUN} python -c "import ${PACKAGE_NAME}" + else + ${CONDA_RUN} python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)" + fi else - ${CONDA_RUN} python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)" + echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} found" + ${CONDA_RUN} python "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}" fi - else - echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} found" - ${CONDA_RUN} python "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}" fi + # NB: Only upload to GitHub after passing smoke tests - name: Upload wheel to GitHub diff --git a/docsrc/getting_started/tensorrt_rtx.rst b/docsrc/getting_started/tensorrt_rtx.rst index 07cf476f38..32104ddcef 100644 --- a/docsrc/getting_started/tensorrt_rtx.rst +++ b/docsrc/getting_started/tensorrt_rtx.rst @@ -95,9 +95,12 @@ Build Locally with TensorRT-RTX .. code-block:: sh - # If you have previously built with standard TensorRT, make sure to clean the build environment + # If you have previously built with standard TensorRT, make sure to clean the build environment, + # otherwise it will use the existing .so built with standard TensorRT, which is not compatible with TensorRT-RTX. python setup.py clean bazel clean --expunge + #remove everything under build directory, + rm -rf build/* # Build wheel with TensorRT-RTX python setup.py bdist_wheel --use-rtx diff --git a/docsrc/index.rst b/docsrc/index.rst index 4d28d77640..68e1ba5259 100644 --- a/docsrc/index.rst +++ b/docsrc/index.rst @@ -28,6 +28,7 @@ Getting Started getting_started/installation getting_started/jetpack getting_started/quick_start + getting_started/tensorrt_rtx User Guide ------------ diff --git a/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py b/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py index ea7b3c3677..7851d54cd6 100644 --- a/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py +++ b/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py @@ -625,16 +625,26 @@ def display_all_available_converters(self) -> str: # Initialize dynamo converter registry with the FX and Dynamo aten registries # Note the Dynamo registry is listed first, for precedence if is_tensorrt_rtx(): - DYNAMO_CONVERTERS = ConverterRegistry( - [DYNAMO_ATEN_CONVERTERS], # type: ignore[list-item] - ["Dynamo ATen Converters Registry"], - [CallingConvention.CTX], - ) + registries = [ + DYNAMO_ATEN_CONVERTERS, + ] + registry_names = ["Dynamo ATen Converters Registry"] + registry_calling_conventions = [ + CallingConvention.CTX, + ] else: from torch_tensorrt.fx.converter_registry import CONVERTERS as FX_CONVERTERS - DYNAMO_CONVERTERS = ConverterRegistry( - [DYNAMO_ATEN_CONVERTERS, FX_CONVERTERS], # type: ignore[list-item] - ["Dynamo ATen Converters Registry", "FX Legacy ATen Converters Registry"], - [CallingConvention.CTX, CallingConvention.LEGACY], - ) + registries = [DYNAMO_ATEN_CONVERTERS, FX_CONVERTERS] + registry_names = [ + "Dynamo ATen Converters Registry", + "FX Legacy ATen Converters Registry", + ] + registry_calling_conventions = [CallingConvention.CTX, CallingConvention.LEGACY] + + +DYNAMO_CONVERTERS: ConverterRegistry = ConverterRegistry( + registries, + registry_names, + registry_calling_conventions, +) diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py index 749e6c5dbe..9cd0bc99ba 100644 --- a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py +++ b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py @@ -48,8 +48,8 @@ ) from torch_tensorrt.dynamo.debug._DebuggerConfig import DebuggerConfig from torch_tensorrt.dynamo.debug._supports_debugger import cls_supports_debugger +from torch_tensorrt.dynamo.observer import Observer from torch_tensorrt.dynamo.utils import DYNAMIC_DIM, deallocate_module, to_torch_device -from torch_tensorrt.fx.observer import Observer from torch_tensorrt.logging import TRT_LOGGER _LOGGER: logging.Logger = logging.getLogger(__name__) diff --git a/py/torch_tensorrt/dynamo/observer.py b/py/torch_tensorrt/dynamo/observer.py new file mode 100644 index 0000000000..3742bd2840 --- /dev/null +++ b/py/torch_tensorrt/dynamo/observer.py @@ -0,0 +1,194 @@ +import contextlib +import functools +import logging +import traceback +import typing as t +from contextvars import ContextVar +from dataclasses import dataclass, field + +_LOGGER = logging.getLogger(__name__) + +# A context variable to hold registered callbacks for all the observers for the +# current execution context. The callbacks list could have been a member +# variable on the observer instance, however, contextvars document advice +# against creating context variables not at module-global level. +# https://docs.python.org/3/library/contextvars.html#contextvars.ContextVar +_CALLBACKS: ContextVar[t.Dict["Observer", t.List[t.Callable]]] = ContextVar( + "_CALLBACKS", default=None +) + +TObserverCallback = t.TypeVar("TObserverCallback", bound=t.Callable[..., t.Any]) + +# Whether to rethrow the exception caught while calling observer callbacks. +# Default to False. True is only used during tests. +RETHROW_CALLBACK_EXCEPTION: bool = False + + +@dataclass(frozen=True) +class Observer(t.Generic[TObserverCallback]): + """ + Usage: + + >>> some_observer: Observer = ... + >>> with some_observer.add(callback_func): + >>> # do stuff, and when some_observer.observe() is called, + >>> # it will execute callback_func() + >>> ... + + """ + + name: str = "" + # Ensure each Observer instance is considered a distinct key when stored in + # the `_CALLBACKS` dictionary. + unique_id: object = field(default_factory=lambda: object()) + + def add(self, callback: TObserverCallback) -> t.ContextManager: + self._get_callbacks().append(callback) + + # Cannot decorate the outer `add` directly with `contextmanager`, + # because if it were not used with a `with` statement, its body won't + # be executed. + @contextlib.contextmanager + def _add(): + try: + yield + finally: + try: + self._get_callbacks().remove(callback) + except ValueError: + # Callback should be in the callbacks list. I'm just being + # extra cautious here. I don't want it to throw and affect + # business logic. + pass + + return _add() + + def observe(self, *args, **kwargs) -> None: + for callback in self._get_callbacks(): + with _log_error( + "Error calling observer callback", rethrow=RETHROW_CALLBACK_EXCEPTION + ): + callback(*args, **kwargs) + + def _get_callbacks(self) -> t.List[t.Callable]: + """ + Gets the callbacks registered in current execution context. Any code + that manipulates the returned list (add, remove, iterate) is + concurrency safe. + """ + callbacks_dict = _CALLBACKS.get() + if callbacks_dict is None: + callbacks_dict = {} + _CALLBACKS.set(callbacks_dict) + + if self not in callbacks_dict: + callbacks_dict[self] = [] + + return callbacks_dict[self] + + +@dataclass(frozen=True) +class ObserveContext: + """ + Passed to the registered callables that observes any function decorated by + `observable`. See `observable` for detail. + + Attributes: + callable: the observed callable object + args: the args passed to the callable + kwargs: the kwargs passed to the callable + return_value: the return value returned by the callable, only available + when observing the callable after its invocation (via + `CallableObservers.post`) + """ + + callable: t.Callable + args: t.List[t.Any] + kwargs: t.Mapping[str, t.Any] + return_value: t.Any = None + + +def observable(): + """ + A decorator to turn a function into observable + + Example: + + >>> @observable() + >>> def func_to_observe(x, y) -> int: + >>> ... + >>> + >>> def log(ctx: ObserveContext): + >>> print( + >>> f"called {ctx.callable.__name__} with {ctx.args} {ctx.kwargs}" + >>> ) + >>> + >>> # register: + >>> with func_to_observe.observers.pre.add(log): + >>> func_to_observe(1, 2) + >>> # print out "called func_to_observe with (1,2) + >>> # here it won't print + """ + + def decorator(observed_func: callable) -> ObservedCallable: + wrapped_func = _make_observable(orig_func=observed_func) + return functools.wraps(observed_func)(wrapped_func) + + return decorator + + +@dataclass(frozen=True) +class CallableObservers: + pre: Observer[t.Callable[[ObserveContext], None]] + post: Observer[t.Callable[[ObserveContext], None]] + + +class ObservedCallable: + """ + Interface for an observed callable + """ + + observers: CallableObservers + orig_func: callable + + def __call__(self, *args, **kwargs) -> t.Any: + raise NotImplementedError() + + +def _make_observable(orig_func: t.Callable) -> ObservedCallable: + """ + A wrapper for a callable which is to be observed. + """ + + observers = CallableObservers( + pre=Observer(), + post=Observer(), + ) + + @functools.wraps(orig_func) + def observed_func(*args, **kwargs): + observers.pre.observe(ObserveContext(orig_func, args, kwargs)) + return_value = None + try: + return_value = orig_func(*args, **kwargs) + return return_value + finally: + observers.post.observe( + ObserveContext(orig_func, args, kwargs, return_value) + ) + + observed_func.orig_func = orig_func + observed_func.observers = observers + + return observed_func + + +@contextlib.contextmanager +def _log_error(msg: str, rethrow: bool = False) -> t.ContextManager: + try: + yield + except Exception as e: + _e = e # noqa: F841 + _LOGGER.info(f"{msg} (This error is handled): {traceback.format_exc()}") + if rethrow: + raise From edfd75265c26b9608f52f78ab6d088f27795a325 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 12 Aug 2025 16:57:07 -0700 Subject: [PATCH 09/30] resolve comments --- py/torch_tensorrt/_compile.py | 23 +++++++++++++---------- py/torch_tensorrt/_features.py | 3 ++- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/py/torch_tensorrt/_compile.py b/py/torch_tensorrt/_compile.py index acae618f1b..3c2385f6da 100644 --- a/py/torch_tensorrt/_compile.py +++ b/py/torch_tensorrt/_compile.py @@ -7,17 +7,14 @@ from typing import Any, Callable, List, Optional, Sequence, Set, Union import torch -import torch.fx from torch_tensorrt._enums import dtype from torch_tensorrt._features import ENABLED_FEATURES, needs_cross_compile from torch_tensorrt._Input import Input +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo import _defaults from torch_tensorrt.dynamo.runtime._CudaGraphsTorchTensorRTModule import ( CudaGraphsTorchTensorRTModule, ) -from torch_tensorrt.fx import InputTensorSpec -from torch_tensorrt.fx.lower import compile as fx_compile -from torch_tensorrt.fx.utils import LowerPrecision from typing_extensions import TypeGuard if ENABLED_FEATURES.torchscript_frontend: @@ -62,12 +59,6 @@ def _non_fx_input_interface( return all(isinstance(i, (torch.Tensor, Input)) for i in inputs) -def _fx_input_interface( - inputs: Sequence[Input | torch.Tensor | InputTensorSpec], -) -> TypeGuard[List[InputTensorSpec | torch.Tensor]]: - return all(isinstance(i, (torch.Tensor, InputTensorSpec)) for i in inputs) - - class _IRType(Enum): """Enum to determine the type of IR selected for model compilation""" @@ -237,6 +228,13 @@ def compile( ) return compiled_ts_module elif target_ir == _IRType.fx: + if is_tensorrt_rtx(): + raise RuntimeError("FX frontend is not supported on TensorRT-RTX") + import torch.fx + from torch_tensorrt.fx import InputTensorSpec + from torch_tensorrt.fx.lower import compile as fx_compile + from torch_tensorrt.fx.utils import LowerPrecision + if ( torch.float16 in enabled_precisions_set or torch_tensorrt.dtype.half in enabled_precisions_set @@ -250,6 +248,11 @@ def compile( else: raise ValueError(f"Precision {enabled_precisions_set} not supported on FX") + def _fx_input_interface( + inputs: Sequence[Input | torch.Tensor | InputTensorSpec], + ) -> TypeGuard[List[InputTensorSpec | torch.Tensor]]: + return all(isinstance(i, (torch.Tensor, InputTensorSpec)) for i in inputs) + assert _fx_input_interface(input_list) compiled_fx_module: torch.nn.Module = fx_compile( module, diff --git a/py/torch_tensorrt/_features.py b/py/torch_tensorrt/_features.py index f1993486cb..0300f4b296 100644 --- a/py/torch_tensorrt/_features.py +++ b/py/torch_tensorrt/_features.py @@ -6,6 +6,7 @@ from torch_tensorrt._utils import ( check_cross_compile_trt_win_lib, + is_tensorrt_rtx, sanitized_torch_version, ) @@ -42,7 +43,7 @@ _TS_FE_AVAIL = os.path.isfile(linked_file_full_path) _TORCHTRT_RT_AVAIL = _TS_FE_AVAIL or os.path.isfile(linked_file_runtime_full_path) _DYNAMO_FE_AVAIL = version.parse(sanitized_torch_version()) >= version.parse("2.1.dev") -_FX_FE_AVAIL = True +_FX_FE_AVAIL = False if is_tensorrt_rtx() else True _REFIT_AVAIL = True _WINDOWS_CROSS_COMPILE = check_cross_compile_trt_win_lib() From 97d6432b06eed2928a36f1b760d7364420c5c19c Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 13 Aug 2025 09:27:43 -0700 Subject: [PATCH 10/30] test --- py/torch_tensorrt/_compile.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/py/torch_tensorrt/_compile.py b/py/torch_tensorrt/_compile.py index 3c2385f6da..0541bebe88 100644 --- a/py/torch_tensorrt/_compile.py +++ b/py/torch_tensorrt/_compile.py @@ -10,13 +10,22 @@ from torch_tensorrt._enums import dtype from torch_tensorrt._features import ENABLED_FEATURES, needs_cross_compile from torch_tensorrt._Input import Input -from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo import _defaults from torch_tensorrt.dynamo.runtime._CudaGraphsTorchTensorRTModule import ( CudaGraphsTorchTensorRTModule, ) from typing_extensions import TypeGuard +if ENABLED_FEATURES.fx_frontend: + import torch.fx + from torch_tensorrt.fx import InputTensorSpec + from torch_tensorrt.fx.lower import compile as fx_compile + from torch_tensorrt.fx.utils import LowerPrecision + + InputType = Union[Input, torch.Tensor, InputTensorSpec] +else: + InputType = Union[Input, torch.Tensor] + if ENABLED_FEATURES.torchscript_frontend: import torch_tensorrt.ts from torch_tensorrt.ts._compiler import compile as torchscript_compile @@ -54,7 +63,7 @@ def _non_fx_input_interface( - inputs: Sequence[Input | torch.Tensor | InputTensorSpec], + inputs: Sequence[Input | torch.Tensor], ) -> TypeGuard[List[Input | torch.Tensor]]: return all(isinstance(i, (torch.Tensor, Input)) for i in inputs) @@ -158,7 +167,7 @@ def _get_target_fe(module_type: _ModuleType, ir: str) -> _IRType: def compile( module: Any, ir: str = "default", - inputs: Optional[Sequence[Input | torch.Tensor | InputTensorSpec]] = None, + inputs: Optional[Sequence[InputType]] = None, arg_inputs: Optional[Sequence[Sequence[Any]]] = None, kwarg_inputs: Optional[dict[Any, Any]] = None, enabled_precisions: Optional[Set[Union[torch.dtype, dtype]]] = None, @@ -228,12 +237,10 @@ def compile( ) return compiled_ts_module elif target_ir == _IRType.fx: - if is_tensorrt_rtx(): - raise RuntimeError("FX frontend is not supported on TensorRT-RTX") - import torch.fx - from torch_tensorrt.fx import InputTensorSpec - from torch_tensorrt.fx.lower import compile as fx_compile - from torch_tensorrt.fx.utils import LowerPrecision + if not ENABLED_FEATURES.fx_frontend: + raise RuntimeError( + "FX frontend is not enabled, cannot compile with target_ir=fx" + ) if ( torch.float16 in enabled_precisions_set @@ -423,7 +430,7 @@ def torch_compile(module: torch.nn.Module, **kwargs: Any) -> Any: def convert_method_to_trt_engine( module: Any, method_name: str = "forward", - inputs: Optional[Sequence[Input | torch.Tensor | InputTensorSpec]] = None, + inputs: Optional[Sequence[Input | torch.Tensor]] = None, arg_inputs: Optional[Sequence[Sequence[Any]]] = None, kwarg_inputs: Optional[dict[Any, Any]] = None, ir: str = "default", @@ -670,7 +677,7 @@ def save( inductor_configs = kwargs["inductor_configs"] torch._inductor.aoti_compile_and_package( - exp_program, + module, inductor_configs=inductor_configs, package_path=file_path, ) From f7b6c9a9b21489fb4ada8c0d71f151509b3de101 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Thu, 14 Aug 2025 22:43:13 -0700 Subject: [PATCH 11/30] merge main to the branch --- py/torch_tensorrt/csrc/tensorrt_classes.cpp | 1 - py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py | 1 + py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py | 1 + .../fx/test/converters/acc_op/test_quantize_per_tensor.py | 1 + 4 files changed, 3 insertions(+), 1 deletion(-) diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.cpp b/py/torch_tensorrt/csrc/tensorrt_classes.cpp index 553c6238a5..788a45184b 100644 --- a/py/torch_tensorrt/csrc/tensorrt_classes.cpp +++ b/py/torch_tensorrt/csrc/tensorrt_classes.cpp @@ -349,7 +349,6 @@ core::CompileSpec CompileSpec::toInternalCompileSpec(bool converting_to_trt_engi info.lower_info.unfreeze_module = true; info.lower_info.disable_cse = true; } - info.convert_info.engine_settings.sparse_weights = sparse_weights; info.convert_info.engine_settings.disable_tf32 = disable_tf32; info.convert_info.engine_settings.refit = refit; diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py index 7d17056b62..7f32b749c5 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py @@ -3,6 +3,7 @@ import tensorrt as trt import torch.fx import torch.nn as nn + import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from torch.testing._internal.common_utils import run_tests from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py index b21779b65f..c82eee79ee 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py @@ -3,6 +3,7 @@ import tensorrt as trt import torch import torch.nn as nn + import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from parameterized import parameterized from torch.testing._internal.common_utils import run_tests diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py index 22cbdd826d..c7b050c4ac 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py @@ -3,6 +3,7 @@ import tensorrt as trt import torch.fx import torch.nn as nn + import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from torch.testing._internal.common_utils import run_tests from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec From 6c72548c42352bc4e03bced6799eccc5a540a003 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Fri, 22 Aug 2025 12:14:45 -0700 Subject: [PATCH 12/30] add skip test for rtx --- .../dynamo/conversion/_TRTInterpreter.py | 7 ++++++- .../dynamo/conversion/test_deconvolution_aten.py | 15 ++++++++++----- tests/py/dynamo/conversion/test_hardtanh_aten.py | 7 +++++++ tests/py/dynamo/models/test_dyn_models.py | 3 +++ tests/py/dynamo/models/test_models.py | 9 +++++++++ tests/py/dynamo/models/test_models_export.py | 8 +++++++- 6 files changed, 42 insertions(+), 7 deletions(-) diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py index 9cd0bc99ba..9f151954a0 100644 --- a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py +++ b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py @@ -111,7 +111,7 @@ def __init__( if not CONVERTERS.compilation_settings: # Configure user compilation settings to converters. CONVERTERS.set_compilation_settings(compilation_settings) - + self.validate_compile_settings assert TRTInterpreter._all_precisions_supported( compilation_settings.enabled_precisions ), f"Attempted to enable kernel precisions that are not supported (got: {compilation_settings.enabled_precisions}, support: {_defaults.SUPPORTED_KERNEL_PRECISIONS})" @@ -196,6 +196,11 @@ def _all_precisions_supported(enabled_precisions: Set[dtype]) -> bool: return enabled_precisions.issubset(_defaults.SUPPORTED_KERNEL_PRECISIONS) def validate_compile_settings(self) -> None: + if is_tensorrt_rtx(): + if dtype.bfloat16 in self.compilation_settings.enabled_precisions: + raise RuntimeError("TensorRT-RTX does not support bfloat16!") + return + if ( dtype.i8 in self.compilation_settings.enabled_precisions and not self.builder.platform_has_fast_int8 diff --git a/tests/py/dynamo/conversion/test_deconvolution_aten.py b/tests/py/dynamo/conversion/test_deconvolution_aten.py index 1909cb8fbb..e2898758e6 100644 --- a/tests/py/dynamo/conversion/test_deconvolution_aten.py +++ b/tests/py/dynamo/conversion/test_deconvolution_aten.py @@ -1,8 +1,10 @@ +import unittest + import torch from parameterized import param, parameterized from torch.testing._internal.common_utils import run_tests - from torch_tensorrt import Input +from torch_tensorrt._utils import is_tensorrt_rtx from .harness import DispatchTestCase @@ -22,14 +24,15 @@ class TestDeconvolutionConverter(DispatchTestCase): param("output_padding_4", 3, stride=3, padding=2, output_padding=1), param("output_padding_5", 3, stride=3, padding=3, output_padding=1), param("output_padding_6", 3, stride=3, padding=3, output_padding=2), + # tensorrt-rtx: does not support both strided and dilated deconv due to cuDNN limitation param( "combined_params", 3, - stride=3, + stride=1, padding=3, dilation=2, groups=3, - output_padding=2, + output_padding=1, ), ] ) @@ -126,14 +129,15 @@ def forward(self, x): param("output_padding_5", 3, stride=3, padding=2, output_padding=1), param("output_padding_6", 3, stride=3, padding=3, output_padding=1), param("output_padding_7", 3, stride=3, padding=3, output_padding=2), + # tensorrt-rtx: does not support both strided and dilated deconv due to cuDNN limitation param( "combined_params", 3, - stride=3, + stride=1, padding=3, dilation=2, groups=3, - output_padding=2, + output_padding=1, ), ] ) @@ -200,6 +204,7 @@ def forward(self, x): enable_passes=True, ) + @unittest.skipIf(is_tensorrt_rtx(), "TensorRT-RTX has bug on deconv3d") @parameterized.expand( [ ("default", 1), diff --git a/tests/py/dynamo/conversion/test_hardtanh_aten.py b/tests/py/dynamo/conversion/test_hardtanh_aten.py index 1c8cae2d53..e286c1cf6e 100644 --- a/tests/py/dynamo/conversion/test_hardtanh_aten.py +++ b/tests/py/dynamo/conversion/test_hardtanh_aten.py @@ -1,11 +1,18 @@ +import unittest + import torch import torch.nn as nn from torch.testing._internal.common_utils import run_tests from torch_tensorrt import Input +from torch_tensorrt._utils import is_tensorrt_rtx from .harness import DispatchTestCase +@unittest.skipIf( + is_tensorrt_rtx(), + "hardtanh is implemented in fx, need to move to dynamo, skip for TensorRT-RTX for now", +) class TestHardTanHConverter(DispatchTestCase): def test_hardtanh(self): class TestModule(nn.Module): diff --git a/tests/py/dynamo/models/test_dyn_models.py b/tests/py/dynamo/models/test_dyn_models.py index fb3a3b8688..28171ed41e 100644 --- a/tests/py/dynamo/models/test_dyn_models.py +++ b/tests/py/dynamo/models/test_dyn_models.py @@ -183,6 +183,9 @@ def test_resnet_dynamic(ir, dtype): """ Tests the Resnet18 model (which is fully convertible) with dynamic shapes """ + if is_tensorrt_rtx() and dtype == torch.bfloat16: + pytest.skip("TensorRT-RTX does not support bfloat16") + import torchvision.models as models model = models.resnet18(pretrained=True).eval().to("cuda").to(dtype) diff --git a/tests/py/dynamo/models/test_models.py b/tests/py/dynamo/models/test_models.py index 84f36e48ce..c598d43392 100644 --- a/tests/py/dynamo/models/test_models.py +++ b/tests/py/dynamo/models/test_models.py @@ -138,6 +138,9 @@ def test_resnet18_torch_exec_ops(ir): ) @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32]) def test_mobilenet_v2(ir, dtype): + if is_tensorrt_rtx() and dtype == torch.bfloat16: + pytest.skip("TensorRT-RTX does not support bfloat16") + model = models.mobilenet_v2(pretrained=True).eval().to("cuda").to(dtype) input = torch.randn((1, 3, 224, 224)).to("cuda").to(dtype) @@ -177,6 +180,9 @@ def test_mobilenet_v2(ir, dtype): ) @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32]) def test_efficientnet_b0(ir, dtype): + if is_tensorrt_rtx() and dtype == torch.bfloat16: + pytest.skip("TensorRT-RTX does not support bfloat16") + model = ( timm.create_model("efficientnet_b0", pretrained=True) .eval() @@ -221,6 +227,9 @@ def test_efficientnet_b0(ir, dtype): ) @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32]) def test_bert_base_uncased(ir, dtype): + if is_tensorrt_rtx() and dtype == torch.bfloat16: + pytest.skip("TensorRT-RTX does not support bfloat16") + from transformers import BertModel model = BertModel.from_pretrained("bert-base-uncased").cuda().eval().to(dtype) diff --git a/tests/py/dynamo/models/test_models_export.py b/tests/py/dynamo/models/test_models_export.py index 583ee9a18e..3971786926 100644 --- a/tests/py/dynamo/models/test_models_export.py +++ b/tests/py/dynamo/models/test_models_export.py @@ -284,7 +284,7 @@ def test_base_fp4_static_shapes(ir): import modelopt.torch.quantization as mtq from modelopt.torch.quantization.utils import export_torch_mode - dtype = torch.bfloat16 + dtype = torch.float16 class SimpleNetwork(torch.nn.Module): def __init__(self): @@ -392,6 +392,9 @@ def test_base_int8(ir, dtype): import modelopt.torch.quantization as mtq from modelopt.torch.quantization.utils import export_torch_mode + if is_tensorrt_rtx() and dtype == torch.bfloat16: + pytest.skip("TensorRT-RTX does not support bfloat16") + class SimpleNetwork(torch.nn.Module): def __init__(self): super(SimpleNetwork, self).__init__() @@ -448,6 +451,9 @@ def test_base_int8_dynamic_shape(ir, dtype): import modelopt.torch.quantization as mtq from modelopt.torch.quantization.utils import export_torch_mode + if is_tensorrt_rtx() and dtype == torch.bfloat16: + pytest.skip("TensorRT-RTX does not support bfloat16") + class SimpleNetwork(torch.nn.Module): def __init__(self): super(SimpleNetwork, self).__init__() From 887438c7074c1f8c934d97a5193b5ee91d99196a Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 25 Aug 2025 15:00:30 -0700 Subject: [PATCH 13/30] add skip test --- py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py | 2 +- tests/py/dynamo/models/test_dyn_models.py | 6 +++++- tests/py/dynamo/models/test_models.py | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py index 9f151954a0..0e0e7c0f74 100644 --- a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py +++ b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py @@ -111,7 +111,7 @@ def __init__( if not CONVERTERS.compilation_settings: # Configure user compilation settings to converters. CONVERTERS.set_compilation_settings(compilation_settings) - self.validate_compile_settings + self.validate_compile_settings() assert TRTInterpreter._all_precisions_supported( compilation_settings.enabled_precisions ), f"Attempted to enable kernel precisions that are not supported (got: {compilation_settings.enabled_precisions}, support: {_defaults.SUPPORTED_KERNEL_PRECISIONS})" diff --git a/tests/py/dynamo/models/test_dyn_models.py b/tests/py/dynamo/models/test_dyn_models.py index 28171ed41e..21744817a9 100644 --- a/tests/py/dynamo/models/test_dyn_models.py +++ b/tests/py/dynamo/models/test_dyn_models.py @@ -5,7 +5,11 @@ import pytest import torch import torch_tensorrt as torchtrt -from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity +from torch_tensorrt.dynamo.utils import ( + COSINE_THRESHOLD, + cosine_similarity, + is_tensorrt_rtx, +) assertions = unittest.TestCase() diff --git a/tests/py/dynamo/models/test_models.py b/tests/py/dynamo/models/test_models.py index c598d43392..0a16bae044 100644 --- a/tests/py/dynamo/models/test_models.py +++ b/tests/py/dynamo/models/test_models.py @@ -9,6 +9,7 @@ COSINE_THRESHOLD, cosine_similarity, get_model_device, + is_tensorrt_rtx, ) assertions = unittest.TestCase() From 036c5c63be2f8a31d18f2267ffa44faa082e31be Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 25 Aug 2025 15:17:50 -0700 Subject: [PATCH 14/30] add skip test for bfloat16 --- tests/py/dynamo/conversion/test_binary_ops_aten.py | 5 +++++ tests/py/dynamo/conversion/test_casts.py | 4 ++++ tests/py/dynamo/models/test_dtype_support.py | 5 +++++ tests/py/dynamo/models/test_dyn_models.py | 2 +- tests/py/dynamo/models/test_models.py | 12 ++++++++++-- 5 files changed, 25 insertions(+), 3 deletions(-) diff --git a/tests/py/dynamo/conversion/test_binary_ops_aten.py b/tests/py/dynamo/conversion/test_binary_ops_aten.py index ac8cf4b00b..90654b9282 100644 --- a/tests/py/dynamo/conversion/test_binary_ops_aten.py +++ b/tests/py/dynamo/conversion/test_binary_ops_aten.py @@ -5,6 +5,7 @@ from parameterized import parameterized from torch.testing._internal.common_utils import run_tests from torch_tensorrt import Input +from torch_tensorrt._utils import is_tensorrt_rtx from .harness import DispatchTestCase @@ -235,6 +236,10 @@ def forward(self, x, y): if op[0].__name__ not in ["pow.Tensor_Tensor", "fmod.Tensor"] ] ) + @unittest.skipIf( + is_tensorrt_rtx(), + "bf16 is not supported for tensorrt_rtx", + ) def test_elementwise_ops_bf16(self, _, orig_op): class TestModule(nn.Module): def __init__(self, orig_op): diff --git a/tests/py/dynamo/conversion/test_casts.py b/tests/py/dynamo/conversion/test_casts.py index 997092d24b..6c8f27dd8c 100644 --- a/tests/py/dynamo/conversion/test_casts.py +++ b/tests/py/dynamo/conversion/test_casts.py @@ -64,6 +64,10 @@ def forward(self, x): precision=torch.float, ) + @unittest.skipIf( + is_tensorrt_rtx(), + "bf16 is not supported for tensorrt_rtx", + ) def test_to_copy_bfloat16(self): class ToCopyBFloat16(nn.Module): def forward(self, x): diff --git a/tests/py/dynamo/models/test_dtype_support.py b/tests/py/dynamo/models/test_dtype_support.py index 62bcacc94a..df0b70aa73 100644 --- a/tests/py/dynamo/models/test_dtype_support.py +++ b/tests/py/dynamo/models/test_dtype_support.py @@ -8,6 +8,7 @@ from torch import nn from torch.nn.parameter import Parameter, UninitializedParameter from torch.testing._internal.common_utils import TestCase, run_tests +from torch_tensorrt._utils import is_tensorrt_rtx from ..testing_utilities import DECIMALS_OF_AGREEMENT, lower_graph_testing @@ -197,6 +198,10 @@ def forward(self, x): ), "Platform does not have BF16 support", ) +@unittest.skipIf( + is_tensorrt_rtx(), + "bf16 is not supported for tensorrt_rtx", +) class TestBF16Support(TestCase): @unittest.skipIf( not torch_tensorrt.ENABLED_FEATURES.torch_tensorrt_runtime, diff --git a/tests/py/dynamo/models/test_dyn_models.py b/tests/py/dynamo/models/test_dyn_models.py index 21744817a9..343ae2ba0c 100644 --- a/tests/py/dynamo/models/test_dyn_models.py +++ b/tests/py/dynamo/models/test_dyn_models.py @@ -5,10 +5,10 @@ import pytest import torch import torch_tensorrt as torchtrt +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo.utils import ( COSINE_THRESHOLD, cosine_similarity, - is_tensorrt_rtx, ) assertions = unittest.TestCase() diff --git a/tests/py/dynamo/models/test_models.py b/tests/py/dynamo/models/test_models.py index 0a16bae044..c9c6f3c814 100644 --- a/tests/py/dynamo/models/test_models.py +++ b/tests/py/dynamo/models/test_models.py @@ -5,11 +5,11 @@ import pytest import torch import torch_tensorrt as torchtrt +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo.utils import ( COSINE_THRESHOLD, cosine_similarity, get_model_device, - is_tensorrt_rtx, ) assertions = unittest.TestCase() @@ -113,7 +113,7 @@ def test_resnet18_torch_exec_ops(ir): ) ], "ir": ir, - "enabled_precisions": {torch.float32, torch.float16, torch.bfloat16}, + "enabled_precisions": {torch.float32, torch.float16}, "min_block_size": 1, "output_format": "exported_program", "cache_built_engines": True, @@ -368,6 +368,10 @@ def test_resnet18_half(ir): @pytest.mark.unit +@unittest.skipIf( + is_tensorrt_rtx(), + "bf16 is not supported for tensorrt_rtx", +) def test_bf16_model(ir): class MyModule(torch.nn.Module): def __init__(self): @@ -412,6 +416,10 @@ def forward(self, x): @pytest.mark.unit +@unittest.skipIf( + is_tensorrt_rtx(), + "bf16 is not supported for tensorrt_rtx", +) def test_bf16_fallback_model(ir): class MyModule(torch.nn.Module): def __init__(self): From ec4951208f6b84add24a1914cef39d03c6237a0c Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 25 Aug 2025 16:51:32 -0700 Subject: [PATCH 15/30] fix the ts test failures. --- core/conversion/converters/BUILD | 17 +- core/conversion/converters/converter_util.cpp | 6 +- .../conversion/converters/impl/batch_norm.cpp | 310 +++++++++--------- .../converters/impl/element_wise.cpp | 6 +- .../converters/impl/interpolate.cpp | 2 + core/conversion/converters/impl/normalize.cpp | 60 +++- core/conversion/converters/impl/pooling.cpp | 37 +++ core/conversion/converters/impl/select.cpp | 15 +- tests/py/ts/api/test_e2e_behavior.py | 21 ++ tests/py/ts/api/test_embed_engines.py | 10 + tests/py/ts/api/test_module_fallback.py | 14 +- tests/py/ts/api/test_operator_fallback.py | 14 +- tests/py/ts/api/test_ts_backend.py | 4 + .../py/ts/integrations/test_to_backend_api.py | 4 + .../test_trt_intercompatibility.py | 4 + tests/py/ts/models/test_models.py | 6 + 16 files changed, 346 insertions(+), 184 deletions(-) diff --git a/core/conversion/converters/BUILD b/core/conversion/converters/BUILD index 84864cea10..ee69320df5 100644 --- a/core/conversion/converters/BUILD +++ b/core/conversion/converters/BUILD @@ -149,17 +149,12 @@ cc_library( "impl/topk.cpp", "impl/unary.cpp", "impl/unsqueeze.cpp", - ] + select({ - ":rtx_win": [], - # exclude plugins from rtx build - ":rtx_x86_64": [], - "//conditions:default": [ - "impl/batch_norm.cpp", - "impl/interpolate.cpp", - "impl/normalize.cpp", - "impl/pooling.cpp", - ], - }), + # these files has plugins which is not supported for tensorrt_rtx, commented inside the files + "impl/batch_norm.cpp", + "impl/interpolate.cpp", + "impl/normalize.cpp", + "impl/pooling.cpp", + ], hdrs = [ "converters.h", ], diff --git a/core/conversion/converters/converter_util.cpp b/core/conversion/converters/converter_util.cpp index fda153195e..1cbd377492 100644 --- a/core/conversion/converters/converter_util.cpp +++ b/core/conversion/converters/converter_util.cpp @@ -220,10 +220,8 @@ nvinfer1::ITensor* castITensor( std::ostringstream tensor_id; tensor_id << reinterpret_cast(tensor); - auto id_layer = ctx->net->addIdentity(*tensor); - TORCHTRT_CHECK(id_layer, "Unable to create identity layer for ITensor: " << tensor_id.str()); - // layer->setOutputType should be used for casting and not manually setting output_tensor->setType() - id_layer->setOutputType(0, dtype); + auto id_layer = ctx->net->addCast(*tensor, dtype); + TORCHTRT_CHECK(id_layer, "Unable to create cast layer for ITensor: " << tensor_id.str()); auto casted_tensor = id_layer->getOutput(0); LOG_DEBUG(ctx->logger, "Casting ITensor " << tensor_id.str() << " from " << tensor->getType() << " to " << dtype); diff --git a/core/conversion/converters/impl/batch_norm.cpp b/core/conversion/converters/impl/batch_norm.cpp index c8ec1977a7..03b844fdd2 100644 --- a/core/conversion/converters/impl/batch_norm.cpp +++ b/core/conversion/converters/impl/batch_norm.cpp @@ -52,6 +52,158 @@ void _batch_norm( LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions()); } +bool BatchNormConverter(ConversionCtx* ctx, const torch::jit::Node* n, args& args) { + auto input = args[0].ITensor(); // assumes non-static input Tensor + auto orig_shape = input->getDimensions(); + auto shape = util::toVec(orig_shape); + auto tensor_type = util::TRTDataTypeToScalarType(input->getType()); + auto options = torch::TensorOptions().dtype(tensor_type).device(torch::kCUDA, ctx->settings.device.gpu_id); + + torch::Tensor gamma, beta, mean, var; + LOG_DEBUG("Input :" << orig_shape << "/" << input->getType()); + // affine=True + LOG_DEBUG("Args[1] gamma : " << args[1].isIValue() << " / " << args[1].IValue()->isNone()); + LOG_DEBUG("Args[2] beta : " << args[2].isIValue() << " / " << args[2].IValue()->isNone()); + // track_running_stats=True + LOG_DEBUG("Args[3] mean : " << args[3].isIValue() << " / " << args[3].IValue()->isNone()); + LOG_DEBUG("Args[4] var : " << args[4].isIValue() << " / " << args[4].IValue()->isNone()); + LOG_DEBUG("use_input_stats, momemtum, cudnn_enabled disregarded"); + LOG_DEBUG("ctx->input_is_dynamic : " << ctx->input_is_dynamic); + + auto channel_dim = shape[1]; + if (ctx->input_is_dynamic) { + gamma = args[1].unwrapToTensor(at::full(channel_dim, 1, options)); + beta = args[2].unwrapToTensor(at::full(channel_dim, 0, options)); + mean = args[3].unwrapToTensor(); + var = args[4].unwrapToTensor(); + } else { + gamma = args[1].unwrapToTensor(at::full(channel_dim, 1, options)); + beta = args[2].unwrapToTensor(at::full(channel_dim, 0, options)); + mean = args[3].unwrapToTensor(at::full(channel_dim, 0, options)); + var = args[4].unwrapToTensor(at::full(channel_dim, 0, options)); + } + + auto eps = static_cast(args[7].unwrapToDouble(1e-5f)); + + TORCHTRT_CHECK(orig_shape.nbDims >= 2, "Unable to create batch normalization layer from node: " << *n); + + // Expand spatial dims from 1D to 2D if needed + bool expandDims = (orig_shape.nbDims < 4); + if (expandDims) { + input = addPadding(ctx, n, input, 4); + } + + _batch_norm(ctx, n, input, orig_shape, gamma, beta, mean, var, eps); + + return true; +} +#ifndef TRT_MAJOR_RTX +bool InstanceNormConverter(ConversionCtx* ctx, const torch::jit::Node* n, args& args) { + auto input = args[0].ITensorOrFreeze(ctx); + auto orig_shape = input->getDimensions(); + auto shape = util::toVec(orig_shape); + auto tensor_type = util::TRTDataTypeToScalarType(input->getType()); + auto options = torch::TensorOptions().dtype(tensor_type); + + LOG_DEBUG("Input :" << orig_shape << "/" << input->getType()); + // affine=True + LOG_DEBUG("Args[1] weight : " << args[1].isIValue() << " / " << args[1].IValue()->isNone()); + LOG_DEBUG("Args[2] bias : " << args[2].isIValue() << " / " << args[2].IValue()->isNone()); + // track_running_stats=True + LOG_DEBUG("Args[3] running_mean : " << args[3].isIValue() << " / " << args[3].IValue()->isNone()); + LOG_DEBUG("Args[4] running_var : " << args[4].isIValue() << " / " << args[4].IValue()->isNone()); + LOG_DEBUG("use_input_stats, momemtum are disregarded"); + LOG_DEBUG("ctx->input_is_dynamic : " << ctx->input_is_dynamic); + + // Expand spatial dims from 1D to 2D if needed + bool expandDims = (orig_shape.nbDims < 4); + if (expandDims) { + input = addPadding(ctx, n, input, 4); + } + + auto eps = static_cast(args[7].unwrapToDouble(1e-5f)); + + auto scales = at::ones(shape[1], options); + if (!args[1].IValue()->isNone()) { + scales = args[1].unwrapToTensor(at::ones(shape[1], options)).cpu().contiguous(); + } + auto bias = at::zeros(shape[1], options); + if (!args[2].IValue()->isNone()) { + bias = args[2].unwrapToTensor(at::zeros(shape[1], options)).cpu().contiguous(); + } + // track_running_stats=True + if (!args[3].IValue()->isNone() || !args[4].IValue()->isNone()) { + auto running_mean = args[3].unwrapToTensor(); + auto running_var = args[4].unwrapToTensor(); + _batch_norm( + ctx, + n, + input, + orig_shape, + scales.to(running_mean.options()), + bias.to(running_mean.options()), + running_mean, + running_var, + eps); + return true; + } + + // Not sure this actually does something since the cudnn_enabled is from the PyTorch context. + // We need cuDNN either way to run this converter + auto cudnn_enabled = static_cast(args[8].unwrapToBool(false)); + if (!cudnn_enabled) { + LOG_DEBUG( + "cuDNN is not enabled, skipping instance_norm conversion. \ + Since TRT 10.0, cuDNN is loaded as a dynamic dependency, \ + so for some functionalities, users need to install correct \ + cuDNN version by themselves. Please see our support matrix \ + here: https://docs.nvidia.com/deeplearning/tensorrt/support-matrix/index.html."); + // return false; + } + + const int relu = 0; + const float alpha = 0; + LOG_DEBUG("Set parameter `relu` and `alpha` to 0"); + /* + https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/namespacenvinfer1.html + https://github.com/NVIDIA/TensorRT/tree/8.0.1/plugin/instanceNormalizationPlugin + Type Parameter Description + float epsilon A small number to prevent being divided by zero during normalization. + Weights * scale A pointer to weights which contains information about scale factors for + normalization. The definition of Weights can be found in the NvInfer.h header. + Weights * bias A pointer to weights which contains information about the bias values for + normalization. The definition of Weights can be found in the NvInfer.h header. + int relu A value used to enable leaky relu activation + float alpha A small negative slope for the leaky relu activation + */ + std::vector f; + f.emplace_back(nvinfer1::PluginField("epsilon", &eps, nvinfer1::PluginFieldType::kFLOAT32, 1)); + f.emplace_back( + nvinfer1::PluginField("scales", scales.data_ptr(), nvinfer1::PluginFieldType::kFLOAT32, scales.numel())); + f.emplace_back( + nvinfer1::PluginField("bias", bias.data_ptr(), nvinfer1::PluginFieldType::kFLOAT32, bias.numel())); + f.emplace_back(nvinfer1::PluginField("relu", &relu, nvinfer1::PluginFieldType::kINT32, 1)); + f.emplace_back(nvinfer1::PluginField("alpha", &alpha, nvinfer1::PluginFieldType::kFLOAT32, 1)); + + nvinfer1::PluginFieldCollection fc; + fc.nbFields = f.size(); + fc.fields = f.data(); + + auto creator = getPluginRegistry()->getPluginCreator("InstanceNormalization_TRT", "1", ""); + auto instance_norm_plugin = creator->createPlugin("instance_norm", &fc); + + TORCHTRT_CHECK(instance_norm_plugin, "Unable to create instance_norm plugin from TensorRT plugin registry" << *n); + + auto new_layer = ctx->net->addPluginV2(reinterpret_cast(&input), 1, *instance_norm_plugin); + + new_layer->setName(util::node_info(n).c_str()); + auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], new_layer->getOutput(0)); + LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions()); + return true; +} +#endif + +#ifndef TRT_MAJOR_RTX auto batch_norm_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns() .pattern({ @@ -59,50 +211,7 @@ auto batch_norm_registrations TORCHTRT_UNUSED = Tensor? mean, Tensor? var, bool training, float momentum, float eps, bool cudnn_enabled) -> (Tensor))SIG", [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool { - auto input = args[0].ITensor(); // assumes non-static input Tensor - auto orig_shape = input->getDimensions(); - auto shape = util::toVec(orig_shape); - auto tensor_type = util::TRTDataTypeToScalarType(input->getType()); - auto options = - torch::TensorOptions().dtype(tensor_type).device(torch::kCUDA, ctx->settings.device.gpu_id); - - torch::Tensor gamma, beta, mean, var; - LOG_DEBUG("Input :" << orig_shape << "/" << input->getType()); - // affine=True - LOG_DEBUG("Args[1] gamma : " << args[1].isIValue() << " / " << args[1].IValue()->isNone()); - LOG_DEBUG("Args[2] beta : " << args[2].isIValue() << " / " << args[2].IValue()->isNone()); - // track_running_stats=True - LOG_DEBUG("Args[3] mean : " << args[3].isIValue() << " / " << args[3].IValue()->isNone()); - LOG_DEBUG("Args[4] var : " << args[4].isIValue() << " / " << args[4].IValue()->isNone()); - LOG_DEBUG("use_input_stats, momemtum, cudnn_enabled disregarded"); - LOG_DEBUG("ctx->input_is_dynamic : " << ctx->input_is_dynamic); - - auto channel_dim = shape[1]; - if (ctx->input_is_dynamic) { - gamma = args[1].unwrapToTensor(at::full(channel_dim, 1, options)); - beta = args[2].unwrapToTensor(at::full(channel_dim, 0, options)); - mean = args[3].unwrapToTensor(); - var = args[4].unwrapToTensor(); - } else { - gamma = args[1].unwrapToTensor(at::full(channel_dim, 1, options)); - beta = args[2].unwrapToTensor(at::full(channel_dim, 0, options)); - mean = args[3].unwrapToTensor(at::full(channel_dim, 0, options)); - var = args[4].unwrapToTensor(at::full(channel_dim, 0, options)); - } - - auto eps = static_cast(args[7].unwrapToDouble(1e-5f)); - - TORCHTRT_CHECK(orig_shape.nbDims >= 2, "Unable to create batch normalization layer from node: " << *n); - - // Expand spatial dims from 1D to 2D if needed - bool expandDims = (orig_shape.nbDims < 4); - if (expandDims) { - input = addPadding(ctx, n, input, 4); - } - - _batch_norm(ctx, n, input, orig_shape, gamma, beta, mean, var, eps); - - return true; + return BatchNormConverter(ctx, n, args); }}) .pattern({ R"SIG(aten::instance_norm(Tensor input, Tensor? weight, Tensor? bias, @@ -110,110 +219,17 @@ auto batch_norm_registrations TORCHTRT_UNUSED = bool use_input_stats, float momentum, float eps, bool cudnn_enabled) -> (Tensor))SIG", [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool { - auto input = args[0].ITensorOrFreeze(ctx); - auto orig_shape = input->getDimensions(); - auto shape = util::toVec(orig_shape); - auto tensor_type = util::TRTDataTypeToScalarType(input->getType()); - auto options = torch::TensorOptions().dtype(tensor_type); - - LOG_DEBUG("Input :" << orig_shape << "/" << input->getType()); - // affine=True - LOG_DEBUG("Args[1] weight : " << args[1].isIValue() << " / " << args[1].IValue()->isNone()); - LOG_DEBUG("Args[2] bias : " << args[2].isIValue() << " / " << args[2].IValue()->isNone()); - // track_running_stats=True - LOG_DEBUG("Args[3] running_mean : " << args[3].isIValue() << " / " << args[3].IValue()->isNone()); - LOG_DEBUG("Args[4] running_var : " << args[4].isIValue() << " / " << args[4].IValue()->isNone()); - LOG_DEBUG("use_input_stats, momemtum are disregarded"); - LOG_DEBUG("ctx->input_is_dynamic : " << ctx->input_is_dynamic); - - // Expand spatial dims from 1D to 2D if needed - bool expandDims = (orig_shape.nbDims < 4); - if (expandDims) { - input = addPadding(ctx, n, input, 4); - } - - auto eps = static_cast(args[7].unwrapToDouble(1e-5f)); - - auto scales = at::ones(shape[1], options); - if (!args[1].IValue()->isNone()) { - scales = args[1].unwrapToTensor(at::ones(shape[1], options)).cpu().contiguous(); - } - auto bias = at::zeros(shape[1], options); - if (!args[2].IValue()->isNone()) { - bias = args[2].unwrapToTensor(at::zeros(shape[1], options)).cpu().contiguous(); - } - // track_running_stats=True - if (!args[3].IValue()->isNone() || !args[4].IValue()->isNone()) { - auto running_mean = args[3].unwrapToTensor(); - auto running_var = args[4].unwrapToTensor(); - _batch_norm( - ctx, - n, - input, - orig_shape, - scales.to(running_mean.options()), - bias.to(running_mean.options()), - running_mean, - running_var, - eps); - return true; - } - - // Not sure this actually does something since the cudnn_enabled is from the PyTorch context. - // We need cuDNN either way to run this converter - auto cudnn_enabled = static_cast(args[8].unwrapToBool(false)); - if (!cudnn_enabled) { - LOG_DEBUG( - "cuDNN is not enabled, skipping instance_norm conversion. \ - Since TRT 10.0, cuDNN is loaded as a dynamic dependency, \ - so for some functionalities, users need to install correct \ - cuDNN version by themselves. Please see our support matrix \ - here: https://docs.nvidia.com/deeplearning/tensorrt/support-matrix/index.html."); - // return false; - } - - const int relu = 0; - const float alpha = 0; - LOG_DEBUG("Set parameter `relu` and `alpha` to 0"); - /* - https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/namespacenvinfer1.html - https://github.com/NVIDIA/TensorRT/tree/8.0.1/plugin/instanceNormalizationPlugin - Type Parameter Description - float epsilon A small number to prevent being divided by zero during normalization. - Weights * scale A pointer to weights which contains information about scale factors for - normalization. The definition of Weights can be found in the NvInfer.h header. - Weights * bias A pointer to weights which contains information about the bias values for - normalization. The definition of Weights can be found in the NvInfer.h header. - int relu A value used to enable leaky relu activation - float alpha A small negative slope for the leaky relu activation - */ - std::vector f; - f.emplace_back(nvinfer1::PluginField("epsilon", &eps, nvinfer1::PluginFieldType::kFLOAT32, 1)); - f.emplace_back(nvinfer1::PluginField( - "scales", scales.data_ptr(), nvinfer1::PluginFieldType::kFLOAT32, scales.numel())); - f.emplace_back(nvinfer1::PluginField( - "bias", bias.data_ptr(), nvinfer1::PluginFieldType::kFLOAT32, bias.numel())); - f.emplace_back(nvinfer1::PluginField("relu", &relu, nvinfer1::PluginFieldType::kINT32, 1)); - f.emplace_back(nvinfer1::PluginField("alpha", &alpha, nvinfer1::PluginFieldType::kFLOAT32, 1)); - - nvinfer1::PluginFieldCollection fc; - fc.nbFields = f.size(); - fc.fields = f.data(); - - auto creator = getPluginRegistry()->getPluginCreator("InstanceNormalization_TRT", "1", ""); - auto instance_norm_plugin = creator->createPlugin("instance_norm", &fc); - - TORCHTRT_CHECK( - instance_norm_plugin, "Unable to create instance_norm plugin from TensorRT plugin registry" << *n); - - auto new_layer = - ctx->net->addPluginV2(reinterpret_cast(&input), 1, *instance_norm_plugin); - - new_layer->setName(util::node_info(n).c_str()); - auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], new_layer->getOutput(0)); - LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions()); - return true; + return InstanceNormConverter(ctx, n, args); }}); +#else +auto batch_norm_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns().pattern({ + R"SIG(aten::batch_norm(Tensor input, Tensor? gamma, Tensor? beta, + Tensor? mean, Tensor? var, + bool training, float momentum, float eps, bool cudnn_enabled) -> (Tensor))SIG", + [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool { + return BatchNormConverter(ctx, n, args); + }}); +#endif } // namespace } // namespace impl } // namespace converters diff --git a/core/conversion/converters/impl/element_wise.cpp b/core/conversion/converters/impl/element_wise.cpp index 73c28af1a1..446eacd378 100644 --- a/core/conversion/converters/impl/element_wise.cpp +++ b/core/conversion/converters/impl/element_wise.cpp @@ -468,8 +468,7 @@ auto element_wise_registrations TORCHTRT_UNUSED = auto options = torch::TensorOptions().dtype(torch::kFloat32); auto ones = at::full({1}, 1, {options}); auto ones_tensor = tensor_to_const(ctx, ones); - nvinfer1::IIdentityLayer* cast_layer = ctx->net->addIdentity(*ones_tensor); - cast_layer->setOutputType(0, nvinfer1::DataType::kBOOL); + nvinfer1::ICastLayer* cast_layer = ctx->net->addCast(*ones_tensor, nvinfer1::DataType::kBOOL); auto sub = add_elementwise( ctx, @@ -500,8 +499,7 @@ auto element_wise_registrations TORCHTRT_UNUSED = auto options = torch::TensorOptions().dtype(torch::kFloat32); auto ones = at::full({1}, 1, {options}); auto ones_tensor = tensor_to_const(ctx, ones); - nvinfer1::IIdentityLayer* cast_layer = ctx->net->addIdentity(*ones_tensor); - cast_layer->setOutputType(0, nvinfer1::DataType::kBOOL); + nvinfer1::ICastLayer* cast_layer = ctx->net->addCast(*ones_tensor, nvinfer1::DataType::kBOOL); auto sub = add_elementwise( ctx, diff --git a/core/conversion/converters/impl/interpolate.cpp b/core/conversion/converters/impl/interpolate.cpp index 96c257c4b7..f3b0180188 100644 --- a/core/conversion/converters/impl/interpolate.cpp +++ b/core/conversion/converters/impl/interpolate.cpp @@ -14,6 +14,7 @@ namespace { /* * Helper functions */ +#ifndef TRT_MAJOR_RTX void create_plugin( ConversionCtx* ctx, const torch::jit::Node* n, @@ -65,6 +66,7 @@ void create_plugin( LOG_DEBUG("Output tensor shape: " << layer_output->getDimensions()); } +#endif void resize_layer_size( ConversionCtx* ctx, diff --git a/core/conversion/converters/impl/normalize.cpp b/core/conversion/converters/impl/normalize.cpp index 4569c31110..9e50a0c418 100644 --- a/core/conversion/converters/impl/normalize.cpp +++ b/core/conversion/converters/impl/normalize.cpp @@ -14,6 +14,7 @@ namespace { /* * Helper functions */ +#ifndef TRT_MAJOR_RTX void create_plugin( ConversionCtx* ctx, const torch::jit::Node* n, @@ -52,6 +53,7 @@ void create_plugin( LOG_DEBUG("Normalize layer output tensor shape: " << layer_output->getDimensions()); } +#endif int32_t axes_mask_from_axes_values( const torch::jit::Node* n, @@ -93,7 +95,7 @@ nvinfer1::ITensor* frobenius_norm( auto sqrt_output = sqrt_layer->getOutput(0); return sqrt_output; } - +#ifndef TRT_MAJOR_RTX auto normalize_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns() .pattern( @@ -165,6 +167,62 @@ auto normalize_registrations TORCHTRT_UNUSED = LOG_DEBUG("Output tensor shape: " << out->getDimensions()); return true; }}); +#else +auto normalize_registrations TORCHTRT_UNUSED = + RegisterNodeConversionPatterns() + .pattern( + {"aten::frobenius_norm.dim(Tensor self, int[1] dim, bool keepdim=False) -> (Tensor)", + [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool { + auto self = args[0].ITensorOrFreeze(ctx); + auto axes_values = args[1].unwrapToIntList().vec(); + auto keep_dims = args[2].unwrapToBool(); + + auto axes_mask = axes_mask_from_axes_values(n, self->getDimensions().nbDims, axes_values); + + auto norm = frobenius_norm(ctx, n, self, axes_mask, keep_dims); + auto out = ctx->AssociateValueAndTensor(n->outputs()[0], norm); + LOG_DEBUG("Output tensor shape: " << out->getDimensions()); + return true; + }}) + .pattern( + {"aten::linalg_norm(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, int? dtype=None) -> (Tensor)", + [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool { + // https://pytorch.org/docs/stable/generated/torch.linalg.norm.html + auto self = args[0].ITensorOrFreeze(ctx); + TORCHTRT_CHECK( + args[1].IValue()->isNone(), + "aten::linalg_norm converter does not yet support non-None 'ord' arguments. Add aten::linalg_norm to torch_executed_ops to force it to fallback."); + auto keep_dims = args[3].unwrapToBool(); + auto self_nb_dims = self->getDimensions().nbDims; + + if (!args.back().IValue()->isNone()) { + // If specified, the input tensor is cast to dtype before performing the operation, and the returned + // tensor’s type will be dtype + auto dtype = args.back().unwrapToScalar().to(); + auto trt_dtype = util::ScalarTypeToTRTDataType(static_cast(dtype)); + self = castITensor(ctx, self, trt_dtype); + } + + int32_t axes_mask = 0; + if (args[2].IValue()->isNone()) { + // If dim= None and ord= None, self will be flattened to 1D and the 2-norm of the resulting vector will + // be computed. + axes_mask = 1; + keep_dims = true; // the single output dim is always preserved + auto flatten_layer = ctx->net->addShuffle(*self); + TORCHTRT_CHECK(flatten_layer, "Unable to create shuffle layer from node: " << *n); + flatten_layer->setReshapeDimensions(util::toDims(std::vector({-1}))); + flatten_layer->setName((util::node_info(n) + "_flatten").c_str()); + self = flatten_layer->getOutput(0); + } else { + axes_mask = axes_mask_from_axes_values(n, self_nb_dims, args[2].unwrapToIntList().vec()); + } + auto norm = frobenius_norm(ctx, n, self, axes_mask, keep_dims); + auto out = ctx->AssociateValueAndTensor(n->outputs()[0], norm); + LOG_DEBUG("Output tensor shape: " << out->getDimensions()); + return true; + }}); +#endif } // namespace } // namespace impl diff --git a/core/conversion/converters/impl/pooling.cpp b/core/conversion/converters/impl/pooling.cpp index 9016fc3ad7..0e7f4e0dbc 100644 --- a/core/conversion/converters/impl/pooling.cpp +++ b/core/conversion/converters/impl/pooling.cpp @@ -37,6 +37,7 @@ bool GlobalPoolingConverter( return true; } +#ifndef TRT_MAJOR_RTX bool AdaptivePoolingConverter( ConversionCtx* ctx, const torch::jit::Node* n, @@ -110,6 +111,7 @@ bool AdaptivePoolingConverter( return true; } +#endif bool PoolingConverter(ConversionCtx* ctx, const torch::jit::Node* n, args& args, nvinfer1::PoolingType pool_type) { auto in = args[0].ITensorOrFreeze(ctx); @@ -197,6 +199,7 @@ bool PoolingConverter(ConversionCtx* ctx, const torch::jit::Node* n, args& args, return true; } // namespace +#ifndef TRT_MAJOR_RTX auto pooling_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns() .pattern( @@ -259,6 +262,40 @@ auto pooling_registrations TORCHTRT_UNUSED = [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool { return AdaptivePoolingConverter(ctx, n, args, nvinfer1::PoolingType::kMAX, "adaptive_max_pool3d"); }}); +#else +auto pooling_registrations TORCHTRT_UNUSED = + RegisterNodeConversionPatterns() + .pattern( + {"aten::max_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=[], int[1] dilation=[], bool ceil_mode=False) -> (Tensor)", + [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool { + return PoolingConverter(ctx, n, args, nvinfer1::PoolingType::kMAX); + }}) + .pattern( + {"aten::avg_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, bool ceil_mode=False, bool count_include_pad=True) -> Tensor", + [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool { + return PoolingConverter(ctx, n, args, nvinfer1::PoolingType::kAVERAGE); + }}) + .pattern( + {"aten::max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=[0, 0], int[2] dilation=[1, 1], bool ceil_mode=False) -> (Tensor)", + [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool { + return PoolingConverter(ctx, n, args, nvinfer1::PoolingType::kMAX); + }}) + .pattern( + {"aten::avg_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=[0, 0], bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> (Tensor)", + [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool { + return PoolingConverter(ctx, n, args, nvinfer1::PoolingType::kAVERAGE); + }}) + .pattern( + {"aten::max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=[], int[3] dilation=[], bool ceil_mode=False) -> (Tensor)", + [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool { + return PoolingConverter(ctx, n, args, nvinfer1::PoolingType::kMAX); + }}) + .pattern( + {"aten::avg_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=[], bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> (Tensor)", + [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool { + return PoolingConverter(ctx, n, args, nvinfer1::PoolingType::kAVERAGE); + }}); +#endif } // namespace } // namespace impl } // namespace converters diff --git a/core/conversion/converters/impl/select.cpp b/core/conversion/converters/impl/select.cpp index 827bfaf00a..192b88b301 100644 --- a/core/conversion/converters/impl/select.cpp +++ b/core/conversion/converters/impl/select.cpp @@ -278,9 +278,8 @@ auto select_registrations TORCHTRT_UNUSED = auto embeddingTensor = args[0].ITensorOrFreeze(ctx); auto indicesTensor = args[1].ITensorOrFreeze(ctx); // Set datatype for indices tensor to INT32 - auto identity = ctx->net->addIdentity(*indicesTensor); - identity->setOutputType(0, nvinfer1::DataType::kINT32); - indicesTensor = identity->getOutput(0); + auto cast = ctx->net->addCast(*indicesTensor, nvinfer1::DataType::kINT32); + indicesTensor = cast->getOutput(0); // IGatherLayer takes in input tensor, the indices, and the axis of input tensor to take indices from auto gather_layer = ctx->net->addGather(*embeddingTensor, *indicesTensor, 0); @@ -337,9 +336,8 @@ auto select_registrations TORCHTRT_UNUSED = adv_idx_indices.push_back(i); auto cont = t.toCustomClass(); // Set datatype for indices tensor to INT32 - auto identity = ctx->net->addIdentity(*cont->tensor()); - identity->setOutputType(0, nvinfer1::DataType::kINT32); - tensors.push_back(identity->getOutput(0)); + auto cast = ctx->net->addCast(*cont->tensor(), nvinfer1::DataType::kINT32); + tensors.push_back(cast->getOutput(0)); } } } @@ -351,9 +349,8 @@ auto select_registrations TORCHTRT_UNUSED = } else if (tensors.size() == 1) { auto indicesTensor = tensors[0]; // Set datatype for indices tensor to INT32 - auto identity = ctx->net->addIdentity(*indicesTensor); - identity->setOutputType(0, nvinfer1::DataType::kINT32); - indicesTensor = identity->getOutput(0); + auto cast = ctx->net->addCast(*indicesTensor, nvinfer1::DataType::kINT32); + indicesTensor = cast->getOutput(0); // IGatherLayer takes in input tensor, the indices, and the axis of input tensor to take indices // from diff --git a/tests/py/ts/api/test_e2e_behavior.py b/tests/py/ts/api/test_e2e_behavior.py index 7e1f3dd538..df8c5af034 100644 --- a/tests/py/ts/api/test_e2e_behavior.py +++ b/tests/py/ts/api/test_e2e_behavior.py @@ -5,9 +5,14 @@ import torch import torch_tensorrt as torchtrt import torchvision.models as models +from torch_tensorrt._utils import is_tensorrt_rtx from utils import same_output_format +@unittest.skipIf( + is_tensorrt_rtx(), + "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", +) class TestInputTypeDefaultsFP32Model(unittest.TestCase): def test_input_use_default_fp32(self): self.model = models.resnet18(pretrained=True).eval().to("cuda") @@ -52,6 +57,10 @@ def test_input_respect_user_setting_fp32_weights_fp16_in_non_constructor(self): class TestInputTypeDefaultsFP16Model(unittest.TestCase): + @unittest.skipIf( + is_tensorrt_rtx(), + "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", + ) def test_input_use_default_fp16(self): self.model = models.resnet18(pretrained=True).eval().to("cuda") self.input = torch.randn((1, 3, 224, 224)).to("cuda") @@ -66,6 +75,10 @@ def test_input_use_default_fp16(self): ) trt_mod(self.input.half()) + @unittest.skipIf( + is_tensorrt_rtx(), + "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", + ) def test_input_use_default_fp16_without_fp16_enabled(self): self.model = models.resnet18(pretrained=True).eval().to("cuda") self.input = torch.randn((1, 3, 224, 224)).to("cuda") @@ -78,6 +91,10 @@ def test_input_use_default_fp16_without_fp16_enabled(self): ) trt_mod(self.input.half()) + @unittest.skipIf( + is_tensorrt_rtx(), + "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", + ) def test_input_respect_user_setting_fp16_weights_fp32_in(self): self.model = models.resnet18(pretrained=True).eval().to("cuda") self.input = torch.randn((1, 3, 224, 224)).to("cuda") @@ -93,6 +110,10 @@ def test_input_respect_user_setting_fp16_weights_fp32_in(self): ) trt_mod(self.input) + @unittest.skipIf( + is_tensorrt_rtx(), + "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", + ) def test_input_respect_user_setting_fp16_weights_fp32_in_non_constuctor(self): self.model = models.resnet18(pretrained=True).eval().to("cuda") self.input = torch.randn((1, 3, 224, 224)).to("cuda") diff --git a/tests/py/ts/api/test_embed_engines.py b/tests/py/ts/api/test_embed_engines.py index ed421dfe2c..a00585f0d1 100644 --- a/tests/py/ts/api/test_embed_engines.py +++ b/tests/py/ts/api/test_embed_engines.py @@ -11,11 +11,17 @@ import timm import torchvision.models as models +from torch_tensorrt._utils import is_tensorrt_rtx + class TestModelToEngineToModel(unittest.TestCase): @unittest.skipIf( not importlib.util.find_spec("torchvision"), "torchvision not installed" ) + @unittest.skipIf( + is_tensorrt_rtx(), + "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", + ) def test_resnet50(self): self.model = models.resnet50(pretrained=True).eval().to("cuda") self.input = torch.randn((1, 3, 224, 224)).to("cuda") @@ -49,6 +55,10 @@ def test_resnet50(self): or not importlib.util.find_spec("torchvision"), "timm or torchvision not installed", ) + @unittest.skipIf( + is_tensorrt_rtx(), + "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", + ) def test_efficientnet_b0(self): self.model = ( timm.create_model("efficientnet_b0", pretrained=True).eval().to("cuda") diff --git a/tests/py/ts/api/test_module_fallback.py b/tests/py/ts/api/test_module_fallback.py index 8ef1059b2f..1a57810145 100644 --- a/tests/py/ts/api/test_module_fallback.py +++ b/tests/py/ts/api/test_module_fallback.py @@ -1,12 +1,18 @@ +import copy import unittest -import torch_tensorrt as torchtrt +from typing import Dict + import torch +import torch_tensorrt as torchtrt import torchvision.models as models -import copy -from typing import Dict -from utils import cosine_similarity, COSINE_THRESHOLD +from torch_tensorrt._utils import is_tensorrt_rtx +from utils import COSINE_THRESHOLD, cosine_similarity +@unittest.skipIf( + is_tensorrt_rtx(), + "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", +) class TestModuleFallback(unittest.TestCase): def test_fallback_resnet18(self): self.model = models.resnet18(pretrained=True).eval().to("cuda") diff --git a/tests/py/ts/api/test_operator_fallback.py b/tests/py/ts/api/test_operator_fallback.py index 3e4777869f..f1104fb526 100644 --- a/tests/py/ts/api/test_operator_fallback.py +++ b/tests/py/ts/api/test_operator_fallback.py @@ -1,12 +1,18 @@ +import copy import unittest -import torch_tensorrt as torchtrt +from typing import Dict + import torch +import torch_tensorrt as torchtrt import torchvision.models as models -import copy -from typing import Dict -from utils import cosine_similarity, COSINE_THRESHOLD +from torch_tensorrt._utils import is_tensorrt_rtx +from utils import COSINE_THRESHOLD, cosine_similarity +@unittest.skipIf( + is_tensorrt_rtx(), + "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", +) class TestFallbackModels(unittest.TestCase): def test_fallback_resnet18(self): self.model = models.resnet18(pretrained=True).eval().to("cuda") diff --git a/tests/py/ts/api/test_ts_backend.py b/tests/py/ts/api/test_ts_backend.py index c4d1ba403a..7cddf57a85 100644 --- a/tests/py/ts/api/test_ts_backend.py +++ b/tests/py/ts/api/test_ts_backend.py @@ -119,6 +119,10 @@ def test_default_device(self): ) +@unittest.skipIf( + is_tensorrt_rtx(), + "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", +) class TestCheckMethodOpSupport(unittest.TestCase): def test_check_support(self): module = models.alexnet(pretrained=True).eval().to("cuda") diff --git a/tests/py/ts/integrations/test_to_backend_api.py b/tests/py/ts/integrations/test_to_backend_api.py index 028a0b8c6e..f7a1eebd2b 100644 --- a/tests/py/ts/integrations/test_to_backend_api.py +++ b/tests/py/ts/integrations/test_to_backend_api.py @@ -11,6 +11,10 @@ not torchtrt.ENABLED_FEATURES.torchscript_frontend, "TorchScript Frontend is not available", ) +@unittest.skipIf( + is_tensorrt_rtx(), + "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", +) class TestToBackendLowering(unittest.TestCase): def setUp(self): self.input = torch.randn((1, 3, 300, 300)).to("cuda") diff --git a/tests/py/ts/integrations/test_trt_intercompatibility.py b/tests/py/ts/integrations/test_trt_intercompatibility.py index 2ee3f7bf7a..a16415386d 100644 --- a/tests/py/ts/integrations/test_trt_intercompatibility.py +++ b/tests/py/ts/integrations/test_trt_intercompatibility.py @@ -11,6 +11,10 @@ not torchtrt.ENABLED_FEATURES.torchscript_frontend, "TorchScript Frontend is not available", ) +@unittest.skipIf( + is_tensorrt_rtx(), + "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", +) class TestPyTorchToTRTEngine(unittest.TestCase): def test_pt_to_trt(self): self.model = models.resnet18(pretrained=True).eval().to("cuda:0") diff --git a/tests/py/ts/models/test_models.py b/tests/py/ts/models/test_models.py index 923618a708..4993655ce7 100644 --- a/tests/py/ts/models/test_models.py +++ b/tests/py/ts/models/test_models.py @@ -11,6 +11,8 @@ import timm import torchvision.models as models +from torch_tensorrt._utils import is_tensorrt_rtx + @unittest.skipIf( not torchtrt.ENABLED_FEATURES.torchscript_frontend, @@ -19,6 +21,10 @@ @unittest.skipIf( not importlib.util.find_spec("torchvision"), "torchvision not installed" ) +@unittest.skipIf( + is_tensorrt_rtx(), + "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", +) class TestModels(unittest.TestCase): def test_resnet18(self): self.model = models.resnet18(pretrained=True).eval().to("cuda") From 3e4c41297bcd9c876d085ae4ba533806b4d0642f Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 25 Aug 2025 22:35:45 -0700 Subject: [PATCH 16/30] test --- tests/py/dynamo/conversion/test_binary_ops_aten.py | 1 + tests/py/dynamo/conversion/test_casts.py | 2 ++ tests/py/ts/integrations/test_to_backend_api.py | 1 + tests/py/ts/integrations/test_trt_intercompatibility.py | 1 + 4 files changed, 5 insertions(+) diff --git a/tests/py/dynamo/conversion/test_binary_ops_aten.py b/tests/py/dynamo/conversion/test_binary_ops_aten.py index 90654b9282..c06c131c61 100644 --- a/tests/py/dynamo/conversion/test_binary_ops_aten.py +++ b/tests/py/dynamo/conversion/test_binary_ops_aten.py @@ -1,3 +1,4 @@ +import unittest from typing import Callable import torch diff --git a/tests/py/dynamo/conversion/test_casts.py b/tests/py/dynamo/conversion/test_casts.py index 6c8f27dd8c..592164e5fb 100644 --- a/tests/py/dynamo/conversion/test_casts.py +++ b/tests/py/dynamo/conversion/test_casts.py @@ -1,5 +1,7 @@ # type: ignore +import unittest + import torch import torch.nn as nn from torch.testing._internal.common_utils import run_tests diff --git a/tests/py/ts/integrations/test_to_backend_api.py b/tests/py/ts/integrations/test_to_backend_api.py index f7a1eebd2b..7cb7fc5b00 100644 --- a/tests/py/ts/integrations/test_to_backend_api.py +++ b/tests/py/ts/integrations/test_to_backend_api.py @@ -4,6 +4,7 @@ import torch import torch_tensorrt as torchtrt import torchvision.models as models +from torch_tensorrt._utils import is_tensorrt_rtx from utils import COSINE_THRESHOLD, cosine_similarity diff --git a/tests/py/ts/integrations/test_trt_intercompatibility.py b/tests/py/ts/integrations/test_trt_intercompatibility.py index a16415386d..0599869dca 100644 --- a/tests/py/ts/integrations/test_trt_intercompatibility.py +++ b/tests/py/ts/integrations/test_trt_intercompatibility.py @@ -4,6 +4,7 @@ import torch import torch_tensorrt as torchtrt import torchvision.models as models +from torch_tensorrt._utils import is_tensorrt_rtx from utils import COSINE_THRESHOLD, cosine_similarity From 714905c8bf3c779d21c606dab239cf2ec477f9b3 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 26 Aug 2025 08:40:18 -0700 Subject: [PATCH 17/30] ignore cudagraph tests --- tests/py/dynamo/conversion/test_casts.py | 1 + tests/py/dynamo/runtime/test_000_compilation_settings.py | 1 - .../py/dynamo/runtime/test_000_convert_module_to_trt_engine.py | 2 -- tests/py/dynamo/runtime/test_004_weight_streaming.py | 2 ++ 4 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/py/dynamo/conversion/test_casts.py b/tests/py/dynamo/conversion/test_casts.py index 592164e5fb..b152a2adb8 100644 --- a/tests/py/dynamo/conversion/test_casts.py +++ b/tests/py/dynamo/conversion/test_casts.py @@ -6,6 +6,7 @@ import torch.nn as nn from torch.testing._internal.common_utils import run_tests from torch_tensorrt import dtype +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo.conversion import UnsupportedOperatorException from .harness import DispatchTestCase diff --git a/tests/py/dynamo/runtime/test_000_compilation_settings.py b/tests/py/dynamo/runtime/test_000_compilation_settings.py index 500ca9ef71..f46ddefa9d 100644 --- a/tests/py/dynamo/runtime/test_000_compilation_settings.py +++ b/tests/py/dynamo/runtime/test_000_compilation_settings.py @@ -1,7 +1,6 @@ import unittest from importlib import metadata -import tensorrt as trt import torch import torch_tensorrt from torch.testing._internal.common_utils import TestCase, run_tests diff --git a/tests/py/dynamo/runtime/test_000_convert_module_to_trt_engine.py b/tests/py/dynamo/runtime/test_000_convert_module_to_trt_engine.py index b513ff46c8..0b8720c6cd 100644 --- a/tests/py/dynamo/runtime/test_000_convert_module_to_trt_engine.py +++ b/tests/py/dynamo/runtime/test_000_convert_module_to_trt_engine.py @@ -5,8 +5,6 @@ from torch_tensorrt.dynamo.runtime import PythonTorchTensorRTModule from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity -import tensorrt as trt - class TestConvertModuleToTrtEngine(unittest.TestCase): def test_convert_module(self): diff --git a/tests/py/dynamo/runtime/test_004_weight_streaming.py b/tests/py/dynamo/runtime/test_004_weight_streaming.py index d453f91c3f..59563d43a2 100644 --- a/tests/py/dynamo/runtime/test_004_weight_streaming.py +++ b/tests/py/dynamo/runtime/test_004_weight_streaming.py @@ -6,6 +6,7 @@ import torch_tensorrt as torchtrt from parameterized import parameterized from torch.testing._internal.common_utils import TestCase, run_tests +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo.utils import prepare_inputs INPUT_SIZE = (64, 100) @@ -291,6 +292,7 @@ def test_weight_streaming_cudagraphs(self, _, use_python_runtime): ("cpp_runtime", False), ] ) + @unittest.skipIf(is_tensorrt_rtx(), "TensorRT-RTX has bug on cudagraphs") def test_runtime_state_change(self, _, use_python_runtime): class SampleModel(torch.nn.Module): def __init__(self): From 26486c335fc25346defb3fadca5dac2adfafc079 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 26 Aug 2025 10:06:24 -0700 Subject: [PATCH 18/30] add fx flag in setup.py --- setup.py | 67 +++++++++++++++++++++++--------------------------------- 1 file changed, 28 insertions(+), 39 deletions(-) diff --git a/setup.py b/setup.py index 7531252798..f6dc67cacc 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,7 @@ from setuptools.command.editable_wheel import editable_wheel from setuptools.command.install import install from torch.utils.cpp_extension import IS_WINDOWS, BuildExtension, CUDAExtension +from torch_tensorrt._features import _FX_FE_AVAIL __version__: str = "0.0.0" __cuda_version__: str = "0.0" @@ -483,45 +484,33 @@ def run(self): "torch_tensorrt.runtime": "py/torch_tensorrt/runtime", } -if USE_TRT_RTX: - package_dir = dynamo_package_dir - packages = dynamo_packages - exclude_package_data = { - "": [ - "py/torch_tensorrt/csrc/*.cpp", - "torch_tensorrt/csrc/*.cpp", - "test*", - "*.cpp", - ], - "torch_tensorrt": [ - "py/torch_tensorrt/csrc/*.cpp", - "torch_tensorrt/csrc/*.cpp", - "test*", - "*.cpp", - ], - } -else: - package_dir = dynamo_package_dir | fx_package_dir - packages = dynamo_packages + fx_packages - exclude_package_data = { - "": [ - "py/torch_tensorrt/csrc/*.cpp", - "py/torch_tensorrt/fx/test*", - "torch_tensorrt/csrc/*.cpp", - "torch_tensorrt/fx/test*", - "test*", - "*.cpp", - ], - "torch_tensorrt": [ - "py/torch_tensorrt/csrc/*.cpp", - "py/torch_tensorrt/fx/test*", - "torch_tensorrt/csrc/*.cpp", - "torch_tensorrt/fx/test*", - "test*", - "*.cpp", - ], - "torch_tensorrt.fx": ["test/*.py"], - } +package_dir = dynamo_package_dir +packages = dynamo_packages +exclude_package_data = { + "": [ + "py/torch_tensorrt/csrc/*.cpp", + "torch_tensorrt/csrc/*.cpp", + "test*", + "*.cpp", + ], + "torch_tensorrt": [ + "py/torch_tensorrt/csrc/*.cpp", + "torch_tensorrt/csrc/*.cpp", + "test*", + "*.cpp", + ], +} + +if _FX_FE_AVAIL: + package_dir = package_dir | fx_package_dir + packages = packages + fx_packages + exclude_package_data["torch_tensorrt.fx"] = ["test/*.py"] + exclude_package_data[""].extend( + ["py/torch_tensorrt/fx/test*", "torch_tensorrt/fx/test*"] + ) + exclude_package_data["torch_tensorrt"].extend( + ["py/torch_tensorrt/fx/test*", "torch_tensorrt/fx/test*"] + ) package_data = {} From b768b91f42afe293fc5ac6d38674cc8d491dbad7 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 26 Aug 2025 10:46:23 -0700 Subject: [PATCH 19/30] test --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f6dc67cacc..b26a34f371 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,6 @@ from setuptools.command.editable_wheel import editable_wheel from setuptools.command.install import install from torch.utils.cpp_extension import IS_WINDOWS, BuildExtension, CUDAExtension -from torch_tensorrt._features import _FX_FE_AVAIL __version__: str = "0.0.0" __cuda_version__: str = "0.0" @@ -415,6 +414,7 @@ def run(self): os.remove(path) +_FX_FE_AVAIL = False if USE_TRT_RTX else True ext_modules = [] fx_packages = [ From 4cd77554d48c3e8ef7c3a836c35e5ed0133c4ab3 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 26 Aug 2025 15:52:47 -0700 Subject: [PATCH 20/30] resolve comments --- docsrc/getting_started/tensorrt_rtx.rst | 4 +-- packaging/smoke_test_windows.py | 3 +- py/torch_tensorrt/_features.py | 18 ++++++++++-- py/torch_tensorrt/_utils.py | 8 +----- .../dynamo/conversion/_ConverterRegistry.py | 28 ++++++++----------- .../dynamo/conversion/_TRTInterpreter.py | 9 +++--- .../dynamo/conversion/aten_ops_converters.py | 5 ++-- .../conversion/impl/normalization/ops.py | 4 +-- .../dynamo/conversion/impl/quantize.py | 4 +-- .../dynamo/conversion/test_binary_ops_aten.py | 3 +- tests/py/dynamo/conversion/test_casts.py | 4 +-- .../conversion/test_deconvolution_aten.py | 6 ++-- .../dynamo/conversion/test_hardtanh_aten.py | 4 +-- .../py/dynamo/conversion/test_nonzero_aten.py | 10 +++---- tests/py/dynamo/models/test_dtype_support.py | 3 +- tests/py/dynamo/models/test_dyn_models.py | 3 +- tests/py/dynamo/models/test_models.py | 11 ++++---- tests/py/dynamo/models/test_models_export.py | 7 ++--- .../runtime/test_004_weight_streaming.py | 5 ++-- tests/py/ts/api/test_classes.py | 3 +- tests/py/ts/api/test_e2e_behavior.py | 11 ++++---- tests/py/ts/api/test_embed_engines.py | 6 ++-- tests/py/ts/api/test_module_fallback.py | 3 +- tests/py/ts/api/test_operator_fallback.py | 3 +- tests/py/ts/api/test_ts_backend.py | 5 ++-- .../py/ts/integrations/test_to_backend_api.py | 3 +- .../test_trt_intercompatibility.py | 3 +- tests/py/ts/models/test_models.py | 4 +-- 28 files changed, 85 insertions(+), 95 deletions(-) diff --git a/docsrc/getting_started/tensorrt_rtx.rst b/docsrc/getting_started/tensorrt_rtx.rst index 32104ddcef..167a4bd859 100644 --- a/docsrc/getting_started/tensorrt_rtx.rst +++ b/docsrc/getting_started/tensorrt_rtx.rst @@ -1,6 +1,6 @@ -.. _Torch-TensorRT_in_RTX: +.. _Torch-TensorRT-RTX: -Torch-TensorRT in RTX +Torch-TensorRT-RTX ===================== Overview diff --git a/packaging/smoke_test_windows.py b/packaging/smoke_test_windows.py index 31598663f9..5597aafe1f 100644 --- a/packaging/smoke_test_windows.py +++ b/packaging/smoke_test_windows.py @@ -2,11 +2,10 @@ import torch import torch_tensorrt -from torch_tensorrt._utils import is_tensorrt_rtx print(f"Torch CUDA version: {torch.version.cuda}") print(f"Torch TensorRT version: {torch_tensorrt.__version__}") -print(f"Is TensorRT RTX: {is_tensorrt_rtx()}") +print(f"Is TensorRT RTX: {torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx}") result = subprocess.run( ["systeminfo"], diff --git a/py/torch_tensorrt/_features.py b/py/torch_tensorrt/_features.py index 0300f4b296..866da6d946 100644 --- a/py/torch_tensorrt/_features.py +++ b/py/torch_tensorrt/_features.py @@ -4,9 +4,9 @@ from collections import namedtuple from typing import Any, Callable, Dict, List, Optional, Type, TypeVar +import tensorrt from torch_tensorrt._utils import ( check_cross_compile_trt_win_lib, - is_tensorrt_rtx, sanitized_torch_version, ) @@ -22,6 +22,7 @@ "refit", "qdp_plugin", "windows_cross_compile", + "tensorrt_rtx", ], ) @@ -40,10 +41,12 @@ linked_file_full_path = os.path.join(trtorch_dir, linked_file) linked_file_runtime_full_path = os.path.join(trtorch_dir, linked_file_runtime) +_TENSORRT_RTX = tensorrt._package_name == "tensorrt_rtx" + _TS_FE_AVAIL = os.path.isfile(linked_file_full_path) _TORCHTRT_RT_AVAIL = _TS_FE_AVAIL or os.path.isfile(linked_file_runtime_full_path) _DYNAMO_FE_AVAIL = version.parse(sanitized_torch_version()) >= version.parse("2.1.dev") -_FX_FE_AVAIL = False if is_tensorrt_rtx() else True +_FX_FE_AVAIL = False if _TENSORRT_RTX else True _REFIT_AVAIL = True _WINDOWS_CROSS_COMPILE = check_cross_compile_trt_win_lib() @@ -60,6 +63,7 @@ _REFIT_AVAIL, _QDP_PLUGIN_AVAIL, _WINDOWS_CROSS_COMPILE, + _TENSORRT_RTX, ) T = TypeVar("T") @@ -67,10 +71,18 @@ def _enabled_features_str() -> str: enabled = lambda x: "ENABLED" if x else "DISABLED" - out_str: str = f"Enabled Features:\n - Dynamo Frontend: {enabled(_DYNAMO_FE_AVAIL)}\n - Torch-TensorRT Runtime: {enabled(_TORCHTRT_RT_AVAIL)}\n - FX Frontend: {enabled(_FX_FE_AVAIL)}\n - TorchScript Frontend: {enabled(_TS_FE_AVAIL)}\n - Refit: {enabled(_REFIT_AVAIL)}\n - QDP Plugin: {enabled(_QDP_PLUGIN_AVAIL)}\n" # type: ignore[no-untyped-call] + out_str: str = f"Enabled Features:\n - Dynamo Frontend: {enabled(_DYNAMO_FE_AVAIL)}\n - Torch-TensorRT Runtime: {enabled(_TORCHTRT_RT_AVAIL)}\n - FX Frontend: {enabled(_FX_FE_AVAIL)}\n - TorchScript Frontend: {enabled(_TS_FE_AVAIL)}\n - Refit: {enabled(_REFIT_AVAIL)}\n - QDP Plugin: {enabled(_QDP_PLUGIN_AVAIL)} \n - TensorRT-RTX: {enabled(_TENSORRT_RTX)}\n" # type: ignore[no-untyped-call] return out_str +def needs_tensorrt_rtx(f: Callable[..., Any]) -> Callable[..., Any]: + def wrapper(*args: List[Any], **kwargs: Dict[str, Any]) -> Any: + if ENABLED_FEATURES.tensorrt_rtx: + return f(*args, **kwargs) + else: + raise NotImplementedError("TensorRT-RTX is not available") + + def needs_torch_tensorrt_runtime(f: Callable[..., Any]) -> Callable[..., Any]: def wrapper(*args: List[Any], **kwargs: Dict[str, Any]) -> Any: if ENABLED_FEATURES.torch_tensorrt_runtime: diff --git a/py/torch_tensorrt/_utils.py b/py/torch_tensorrt/_utils.py index 3a17e7e267..b981fb325a 100644 --- a/py/torch_tensorrt/_utils.py +++ b/py/torch_tensorrt/_utils.py @@ -27,12 +27,6 @@ def check_cross_compile_trt_win_lib() -> bool: return False -def is_tensorrt_rtx() -> bool: - if trt._package_name == "tensorrt_rtx": - return True - return False - - def is_tensorrt_version_supported(min_version: str) -> bool: """ Check if the installed TensorRT version supports the specified minimum version. @@ -46,7 +40,7 @@ def is_tensorrt_version_supported(min_version: str) -> bool: ... pass """ try: - if is_tensorrt_rtx(): + if trt._package_name == "tensorrt_rtx": return True from importlib import metadata diff --git a/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py b/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py index 7851d54cd6..35d5d7eeec 100644 --- a/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py +++ b/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py @@ -20,10 +20,10 @@ import tensorrt as trt import torch +import torch_tensorrt from torch import SymBool, SymFloat, SymInt from torch._ops import OpOverloadPacket from torch.fx.node import Argument, Node, Target, _get_qualified_name -from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo._settings import CompilationSettings from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -624,23 +624,19 @@ def display_all_available_converters(self) -> str: # Initialize dynamo converter registry with the FX and Dynamo aten registries # Note the Dynamo registry is listed first, for precedence -if is_tensorrt_rtx(): - registries = [ - DYNAMO_ATEN_CONVERTERS, - ] - registry_names = ["Dynamo ATen Converters Registry"] - registry_calling_conventions = [ - CallingConvention.CTX, - ] -else: +registries = [ + DYNAMO_ATEN_CONVERTERS, +] +registry_names = ["Dynamo ATen Converters Registry"] +registry_calling_conventions = [ + CallingConvention.CTX, +] +if torch_tensorrt.ENABLED_FEATURES.fx_frontend: from torch_tensorrt.fx.converter_registry import CONVERTERS as FX_CONVERTERS - registries = [DYNAMO_ATEN_CONVERTERS, FX_CONVERTERS] - registry_names = [ - "Dynamo ATen Converters Registry", - "FX Legacy ATen Converters Registry", - ] - registry_calling_conventions = [CallingConvention.CTX, CallingConvention.LEGACY] + registries.append(FX_CONVERTERS) + registry_names.append("FX Legacy ATen Converters Registry") + registry_calling_conventions.append(CallingConvention.LEGACY) DYNAMO_CONVERTERS: ConverterRegistry = ConverterRegistry( diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py index 713b64076d..73af09448e 100644 --- a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py +++ b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py @@ -25,10 +25,11 @@ from torch.fx.node import _get_qualified_name from torch.fx.passes.shape_prop import TensorMetadata from torch.utils._python_dispatch import _disable_current_modes +from torch_tensorrt import ENABLED_FEATURES from torch_tensorrt._enums import dtype from torch_tensorrt._features import needs_refit from torch_tensorrt._Input import Input -from torch_tensorrt._utils import is_tensorrt_rtx, is_tensorrt_version_supported +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.dynamo import _defaults from torch_tensorrt.dynamo._engine_cache import BaseEngineCache from torch_tensorrt.dynamo._settings import CompilationSettings, settings_are_compatible @@ -90,10 +91,10 @@ def __init__( self._debugger_config = _debugger_config flag = 0 # rtx build, strongly typed is enabled by default, can not set it by builder config - if is_tensorrt_rtx(): + if ENABLED_FEATURES.tensorrt_rtx: if not compilation_settings.use_explicit_typing: warnings.warn( - "Strongly typed is enabled by default in rtx build, setting use_explicit_typing to True" + "Strongly typed is enabled by default in torch-tensorrt-rtx build, setting use_explicit_typing to True" ) compilation_settings.use_explicit_typing = True else: @@ -196,7 +197,7 @@ def _all_precisions_supported(enabled_precisions: Set[dtype]) -> bool: return enabled_precisions.issubset(_defaults.SUPPORTED_KERNEL_PRECISIONS) def validate_compile_settings(self) -> None: - if is_tensorrt_rtx(): + if ENABLED_FEATURES.tensorrt_rtx: if dtype.bfloat16 in self.compilation_settings.enabled_precisions: raise RuntimeError("TensorRT-RTX does not support bfloat16!") return diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py index f477d35e0e..670e9457a4 100644 --- a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py +++ b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py @@ -8,7 +8,8 @@ import torch from tensorrt import ITensor as TRTTensor from torch.fx.node import Argument, Node, Target -from torch_tensorrt._utils import is_tensorrt_rtx, is_tensorrt_version_supported +from torch_tensorrt import ENABLED_FEATURES +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.dynamo._settings import CompilationSettings from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl @@ -3592,7 +3593,7 @@ def aten_ops_full( # currently nonzero is not supported for tensorrt_rtx # TODO: lan to remove this once rtx team has fixed the bug -if not is_tensorrt_rtx(): +if not ENABLED_FEATURES.tensorrt_rtx: @dynamo_tensorrt_converter( torch.ops.aten.nonzero.default, diff --git a/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py index 56c5481fc3..75edf2b44d 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py @@ -6,7 +6,7 @@ import torch from torch._subclasses.fake_tensor import unset_fake_temporarily from torch.fx.node import Target -from torch_tensorrt._utils import is_tensorrt_rtx +from torch_tensorrt import ENABLED_FEATURES from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -55,7 +55,7 @@ def batch_norm( # In this way, the batch norm layer will be fused with the Convolution layer and get a performance boost. # TODO: lanl: to remove this once we have solved the batchnorm constant folding issue in RTX # https://github.com/pytorch/TensorRT/issues/3699 - if is_tensorrt_rtx() or any( + if ENABLED_FEATURES.tensorrt_rtx or any( [ isinstance(weight, trt.ITensor), isinstance(bias, trt.ITensor), diff --git a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py index 0afcde7dd1..b609385ff8 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py @@ -6,7 +6,7 @@ from tensorrt import ITensor as TRTTensor from torch.fx.experimental.proxy_tensor import unset_fake_temporarily from torch.fx.node import Target -from torch_tensorrt._utils import is_tensorrt_rtx +from torch_tensorrt import ENABLED_FEATURES from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -81,7 +81,7 @@ def quantize( if ( dtype == trt.DataType.INT8 and ".input_quantizer" in name - and is_tensorrt_rtx() + and ENABLED_FEATURES.tensorrt_rtx ): # RTX does not support int8 activation quantization # TODO: lan to remove this once rtx team has added the support for int8 activation quantization diff --git a/tests/py/dynamo/conversion/test_binary_ops_aten.py b/tests/py/dynamo/conversion/test_binary_ops_aten.py index c06c131c61..0dd3264306 100644 --- a/tests/py/dynamo/conversion/test_binary_ops_aten.py +++ b/tests/py/dynamo/conversion/test_binary_ops_aten.py @@ -6,7 +6,6 @@ from parameterized import parameterized from torch.testing._internal.common_utils import run_tests from torch_tensorrt import Input -from torch_tensorrt._utils import is_tensorrt_rtx from .harness import DispatchTestCase @@ -238,7 +237,7 @@ def forward(self, x, y): ] ) @unittest.skipIf( - is_tensorrt_rtx(), + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "bf16 is not supported for tensorrt_rtx", ) def test_elementwise_ops_bf16(self, _, orig_op): diff --git a/tests/py/dynamo/conversion/test_casts.py b/tests/py/dynamo/conversion/test_casts.py index b152a2adb8..62920c9610 100644 --- a/tests/py/dynamo/conversion/test_casts.py +++ b/tests/py/dynamo/conversion/test_casts.py @@ -4,9 +4,9 @@ import torch import torch.nn as nn +import torch_tensorrt from torch.testing._internal.common_utils import run_tests from torch_tensorrt import dtype -from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo.conversion import UnsupportedOperatorException from .harness import DispatchTestCase @@ -68,7 +68,7 @@ def forward(self, x): ) @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "bf16 is not supported for tensorrt_rtx", ) def test_to_copy_bfloat16(self): diff --git a/tests/py/dynamo/conversion/test_deconvolution_aten.py b/tests/py/dynamo/conversion/test_deconvolution_aten.py index e2898758e6..a650360160 100644 --- a/tests/py/dynamo/conversion/test_deconvolution_aten.py +++ b/tests/py/dynamo/conversion/test_deconvolution_aten.py @@ -1,10 +1,10 @@ import unittest import torch +import torch_tensorrt from parameterized import param, parameterized from torch.testing._internal.common_utils import run_tests from torch_tensorrt import Input -from torch_tensorrt._utils import is_tensorrt_rtx from .harness import DispatchTestCase @@ -204,7 +204,9 @@ def forward(self, x): enable_passes=True, ) - @unittest.skipIf(is_tensorrt_rtx(), "TensorRT-RTX has bug on deconv3d") + @unittest.skipIf( + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "TensorRT-RTX has bug on deconv3d" + ) @parameterized.expand( [ ("default", 1), diff --git a/tests/py/dynamo/conversion/test_hardtanh_aten.py b/tests/py/dynamo/conversion/test_hardtanh_aten.py index e286c1cf6e..d71cd3d6dc 100644 --- a/tests/py/dynamo/conversion/test_hardtanh_aten.py +++ b/tests/py/dynamo/conversion/test_hardtanh_aten.py @@ -2,15 +2,15 @@ import torch import torch.nn as nn +import torch_tensorrt from torch.testing._internal.common_utils import run_tests from torch_tensorrt import Input -from torch_tensorrt._utils import is_tensorrt_rtx from .harness import DispatchTestCase @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "hardtanh is implemented in fx, need to move to dynamo, skip for TensorRT-RTX for now", ) class TestHardTanHConverter(DispatchTestCase): diff --git a/tests/py/dynamo/conversion/test_nonzero_aten.py b/tests/py/dynamo/conversion/test_nonzero_aten.py index 69a8024077..7408f3d221 100644 --- a/tests/py/dynamo/conversion/test_nonzero_aten.py +++ b/tests/py/dynamo/conversion/test_nonzero_aten.py @@ -2,10 +2,10 @@ import torch import torch.nn as nn +import torch_tensorrt from parameterized import parameterized from torch.testing._internal.common_utils import run_tests from torch_tensorrt import Input -from torch_tensorrt._utils import is_tensorrt_rtx from .harness import DispatchTestCase @@ -21,7 +21,7 @@ class TestNonZeroConverter(DispatchTestCase): ] ) @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "nonzero is not supported for tensorrt_rtx", ) def test_nonzero_dds(self, input_shape, dtype): @@ -47,7 +47,7 @@ def forward(self, input): ] ) @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "nonzero is not supported for tensorrt_rtx", ) def test_nonzero_non_dds(self, input_shape, dtype): @@ -90,7 +90,7 @@ def forward(self, input): ] ) @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "nonzero is not supported for tensorrt_rtx", ) def test_nonzero_dynamic_shape_dds(self, _, min_shape, opt_shape, max_shape, dtype): @@ -135,7 +135,7 @@ def forward(self, input): ] ) @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "nonzero is not supported for tensorrt_rtx", ) def test_nonzero_dynamic_shape_non_dds( diff --git a/tests/py/dynamo/models/test_dtype_support.py b/tests/py/dynamo/models/test_dtype_support.py index df0b70aa73..a7274da40e 100644 --- a/tests/py/dynamo/models/test_dtype_support.py +++ b/tests/py/dynamo/models/test_dtype_support.py @@ -8,7 +8,6 @@ from torch import nn from torch.nn.parameter import Parameter, UninitializedParameter from torch.testing._internal.common_utils import TestCase, run_tests -from torch_tensorrt._utils import is_tensorrt_rtx from ..testing_utilities import DECIMALS_OF_AGREEMENT, lower_graph_testing @@ -199,7 +198,7 @@ def forward(self, x): "Platform does not have BF16 support", ) @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "bf16 is not supported for tensorrt_rtx", ) class TestBF16Support(TestCase): diff --git a/tests/py/dynamo/models/test_dyn_models.py b/tests/py/dynamo/models/test_dyn_models.py index 343ae2ba0c..bd8a823519 100644 --- a/tests/py/dynamo/models/test_dyn_models.py +++ b/tests/py/dynamo/models/test_dyn_models.py @@ -5,7 +5,6 @@ import pytest import torch import torch_tensorrt as torchtrt -from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo.utils import ( COSINE_THRESHOLD, cosine_similarity, @@ -187,7 +186,7 @@ def test_resnet_dynamic(ir, dtype): """ Tests the Resnet18 model (which is fully convertible) with dynamic shapes """ - if is_tensorrt_rtx() and dtype == torch.bfloat16: + if torchtrt.ENABLED_FEATURES.tensorrt_rtx and dtype == torch.bfloat16: pytest.skip("TensorRT-RTX does not support bfloat16") import torchvision.models as models diff --git a/tests/py/dynamo/models/test_models.py b/tests/py/dynamo/models/test_models.py index c9c6f3c814..6f76d9510d 100644 --- a/tests/py/dynamo/models/test_models.py +++ b/tests/py/dynamo/models/test_models.py @@ -5,7 +5,6 @@ import pytest import torch import torch_tensorrt as torchtrt -from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo.utils import ( COSINE_THRESHOLD, cosine_similarity, @@ -139,7 +138,7 @@ def test_resnet18_torch_exec_ops(ir): ) @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32]) def test_mobilenet_v2(ir, dtype): - if is_tensorrt_rtx() and dtype == torch.bfloat16: + if torchtrt.ENABLED_FEATURES.tensorrt_rtx and dtype == torch.bfloat16: pytest.skip("TensorRT-RTX does not support bfloat16") model = models.mobilenet_v2(pretrained=True).eval().to("cuda").to(dtype) @@ -181,7 +180,7 @@ def test_mobilenet_v2(ir, dtype): ) @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32]) def test_efficientnet_b0(ir, dtype): - if is_tensorrt_rtx() and dtype == torch.bfloat16: + if torchtrt.ENABLED_FEATURES.tensorrt_rtx and dtype == torch.bfloat16: pytest.skip("TensorRT-RTX does not support bfloat16") model = ( @@ -228,7 +227,7 @@ def test_efficientnet_b0(ir, dtype): ) @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32]) def test_bert_base_uncased(ir, dtype): - if is_tensorrt_rtx() and dtype == torch.bfloat16: + if torchtrt.ENABLED_FEATURES.tensorrt_rtx and dtype == torch.bfloat16: pytest.skip("TensorRT-RTX does not support bfloat16") from transformers import BertModel @@ -369,7 +368,7 @@ def test_resnet18_half(ir): @pytest.mark.unit @unittest.skipIf( - is_tensorrt_rtx(), + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "bf16 is not supported for tensorrt_rtx", ) def test_bf16_model(ir): @@ -417,7 +416,7 @@ def forward(self, x): @pytest.mark.unit @unittest.skipIf( - is_tensorrt_rtx(), + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "bf16 is not supported for tensorrt_rtx", ) def test_bf16_fallback_model(ir): diff --git a/tests/py/dynamo/models/test_models_export.py b/tests/py/dynamo/models/test_models_export.py index 3971786926..e8c3933d00 100644 --- a/tests/py/dynamo/models/test_models_export.py +++ b/tests/py/dynamo/models/test_models_export.py @@ -7,7 +7,6 @@ import pytest import torch import torch_tensorrt as torchtrt -from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity from packaging.version import Version @@ -392,7 +391,7 @@ def test_base_int8(ir, dtype): import modelopt.torch.quantization as mtq from modelopt.torch.quantization.utils import export_torch_mode - if is_tensorrt_rtx() and dtype == torch.bfloat16: + if torchtrt.ENABLED_FEATURES.tensorrt_rtx and dtype == torch.bfloat16: pytest.skip("TensorRT-RTX does not support bfloat16") class SimpleNetwork(torch.nn.Module): @@ -415,7 +414,7 @@ def calibrate_loop(model): model = SimpleNetwork().eval().cuda().to(dtype) quant_cfg = mtq.INT8_DEFAULT_CFG # RTX does not support INT8 default quantization(weights+activations), only support INT8 weights only quantization - if is_tensorrt_rtx(): + if torchtrt.ENABLED_FEATURES.tensorrt_rtx: quant_cfg["quant_cfg"]["*input_quantizer"] = {"enable": False} mtq.quantize(model, quant_cfg, forward_loop=calibrate_loop) # model has INT8 qdq nodes at this point @@ -451,7 +450,7 @@ def test_base_int8_dynamic_shape(ir, dtype): import modelopt.torch.quantization as mtq from modelopt.torch.quantization.utils import export_torch_mode - if is_tensorrt_rtx() and dtype == torch.bfloat16: + if torchtrt.ENABLED_FEATURES.tensorrt_rtx and dtype == torch.bfloat16: pytest.skip("TensorRT-RTX does not support bfloat16") class SimpleNetwork(torch.nn.Module): diff --git a/tests/py/dynamo/runtime/test_004_weight_streaming.py b/tests/py/dynamo/runtime/test_004_weight_streaming.py index 59563d43a2..fe0ae649bc 100644 --- a/tests/py/dynamo/runtime/test_004_weight_streaming.py +++ b/tests/py/dynamo/runtime/test_004_weight_streaming.py @@ -6,7 +6,6 @@ import torch_tensorrt as torchtrt from parameterized import parameterized from torch.testing._internal.common_utils import TestCase, run_tests -from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo.utils import prepare_inputs INPUT_SIZE = (64, 100) @@ -292,7 +291,9 @@ def test_weight_streaming_cudagraphs(self, _, use_python_runtime): ("cpp_runtime", False), ] ) - @unittest.skipIf(is_tensorrt_rtx(), "TensorRT-RTX has bug on cudagraphs") + @unittest.skipIf( + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "TensorRT-RTX has bug on cudagraphs" + ) def test_runtime_state_change(self, _, use_python_runtime): class SampleModel(torch.nn.Module): def __init__(self): diff --git a/tests/py/ts/api/test_classes.py b/tests/py/ts/api/test_classes.py index a32b2b2722..ea0fe27444 100644 --- a/tests/py/ts/api/test_classes.py +++ b/tests/py/ts/api/test_classes.py @@ -5,7 +5,6 @@ import torch import torch_tensorrt as torchtrt import torchvision.models as models -from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo.runtime._TorchTensorRTModule import TorchTensorRTModule @@ -315,7 +314,7 @@ def test_set_get_profile_path_prefix(self): self.assertTrue(trt_mod.engine.profile_path_prefix == "/tmp/") @unittest.skipIf( - is_tensorrt_rtx(), + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "layer info is different for tensorrt_rtx", ) def test_get_layer_info(self): diff --git a/tests/py/ts/api/test_e2e_behavior.py b/tests/py/ts/api/test_e2e_behavior.py index df8c5af034..169dce9ef1 100644 --- a/tests/py/ts/api/test_e2e_behavior.py +++ b/tests/py/ts/api/test_e2e_behavior.py @@ -5,12 +5,11 @@ import torch import torch_tensorrt as torchtrt import torchvision.models as models -from torch_tensorrt._utils import is_tensorrt_rtx from utils import same_output_format @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) class TestInputTypeDefaultsFP32Model(unittest.TestCase): @@ -58,7 +57,7 @@ def test_input_respect_user_setting_fp32_weights_fp16_in_non_constructor(self): class TestInputTypeDefaultsFP16Model(unittest.TestCase): @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) def test_input_use_default_fp16(self): @@ -76,7 +75,7 @@ def test_input_use_default_fp16(self): trt_mod(self.input.half()) @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) def test_input_use_default_fp16_without_fp16_enabled(self): @@ -92,7 +91,7 @@ def test_input_use_default_fp16_without_fp16_enabled(self): trt_mod(self.input.half()) @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) def test_input_respect_user_setting_fp16_weights_fp32_in(self): @@ -111,7 +110,7 @@ def test_input_respect_user_setting_fp16_weights_fp32_in(self): trt_mod(self.input) @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) def test_input_respect_user_setting_fp16_weights_fp32_in_non_constuctor(self): diff --git a/tests/py/ts/api/test_embed_engines.py b/tests/py/ts/api/test_embed_engines.py index a00585f0d1..7aba1f4d74 100644 --- a/tests/py/ts/api/test_embed_engines.py +++ b/tests/py/ts/api/test_embed_engines.py @@ -11,15 +11,13 @@ import timm import torchvision.models as models -from torch_tensorrt._utils import is_tensorrt_rtx - class TestModelToEngineToModel(unittest.TestCase): @unittest.skipIf( not importlib.util.find_spec("torchvision"), "torchvision not installed" ) @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) def test_resnet50(self): @@ -56,7 +54,7 @@ def test_resnet50(self): "timm or torchvision not installed", ) @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) def test_efficientnet_b0(self): diff --git a/tests/py/ts/api/test_module_fallback.py b/tests/py/ts/api/test_module_fallback.py index 1a57810145..59cd037a19 100644 --- a/tests/py/ts/api/test_module_fallback.py +++ b/tests/py/ts/api/test_module_fallback.py @@ -5,12 +5,11 @@ import torch import torch_tensorrt as torchtrt import torchvision.models as models -from torch_tensorrt._utils import is_tensorrt_rtx from utils import COSINE_THRESHOLD, cosine_similarity @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) class TestModuleFallback(unittest.TestCase): diff --git a/tests/py/ts/api/test_operator_fallback.py b/tests/py/ts/api/test_operator_fallback.py index f1104fb526..81f5827c2c 100644 --- a/tests/py/ts/api/test_operator_fallback.py +++ b/tests/py/ts/api/test_operator_fallback.py @@ -5,12 +5,11 @@ import torch import torch_tensorrt as torchtrt import torchvision.models as models -from torch_tensorrt._utils import is_tensorrt_rtx from utils import COSINE_THRESHOLD, cosine_similarity @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) class TestFallbackModels(unittest.TestCase): diff --git a/tests/py/ts/api/test_ts_backend.py b/tests/py/ts/api/test_ts_backend.py index 7cddf57a85..f3e349fd1c 100644 --- a/tests/py/ts/api/test_ts_backend.py +++ b/tests/py/ts/api/test_ts_backend.py @@ -5,7 +5,6 @@ import torch import torch_tensorrt as torchtrt import torchvision.models as models -from torch_tensorrt._utils import is_tensorrt_rtx from utils import COSINE_THRESHOLD, cosine_similarity @@ -120,7 +119,7 @@ def test_default_device(self): @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) class TestCheckMethodOpSupport(unittest.TestCase): @@ -145,7 +144,7 @@ def test_module_type(self): torchtrt._compile._parse_module_type(ts_module), torchtrt._compile._ModuleType.ts, ) - if not is_tensorrt_rtx(): + if not torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx: self.assertEqual( torchtrt._compile._parse_module_type(fx_module), torchtrt._compile._ModuleType.fx, diff --git a/tests/py/ts/integrations/test_to_backend_api.py b/tests/py/ts/integrations/test_to_backend_api.py index 7cb7fc5b00..da6485920a 100644 --- a/tests/py/ts/integrations/test_to_backend_api.py +++ b/tests/py/ts/integrations/test_to_backend_api.py @@ -4,7 +4,6 @@ import torch import torch_tensorrt as torchtrt import torchvision.models as models -from torch_tensorrt._utils import is_tensorrt_rtx from utils import COSINE_THRESHOLD, cosine_similarity @@ -13,7 +12,7 @@ "TorchScript Frontend is not available", ) @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) class TestToBackendLowering(unittest.TestCase): diff --git a/tests/py/ts/integrations/test_trt_intercompatibility.py b/tests/py/ts/integrations/test_trt_intercompatibility.py index 0599869dca..1d648c33d3 100644 --- a/tests/py/ts/integrations/test_trt_intercompatibility.py +++ b/tests/py/ts/integrations/test_trt_intercompatibility.py @@ -4,7 +4,6 @@ import torch import torch_tensorrt as torchtrt import torchvision.models as models -from torch_tensorrt._utils import is_tensorrt_rtx from utils import COSINE_THRESHOLD, cosine_similarity @@ -13,7 +12,7 @@ "TorchScript Frontend is not available", ) @unittest.skipIf( - is_tensorrt_rtx(), + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) class TestPyTorchToTRTEngine(unittest.TestCase): diff --git a/tests/py/ts/models/test_models.py b/tests/py/ts/models/test_models.py index 4993655ce7..04b825f3ab 100644 --- a/tests/py/ts/models/test_models.py +++ b/tests/py/ts/models/test_models.py @@ -11,8 +11,6 @@ import timm import torchvision.models as models -from torch_tensorrt._utils import is_tensorrt_rtx - @unittest.skipIf( not torchtrt.ENABLED_FEATURES.torchscript_frontend, @@ -22,7 +20,7 @@ not importlib.util.find_spec("torchvision"), "torchvision not installed" ) @unittest.skipIf( - is_tensorrt_rtx(), + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) class TestModels(unittest.TestCase): From 20c45c55f9cb50b62bb566185b163022a106991d Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 26 Aug 2025 16:52:17 -0700 Subject: [PATCH 21/30] resolve comments --- tests/py/ts/api/test_e2e_behavior.py | 10 +++++----- tests/py/ts/api/test_embed_engines.py | 4 ++-- tests/py/ts/api/test_module_fallback.py | 2 +- tests/py/ts/api/test_operator_fallback.py | 2 +- tests/py/ts/api/test_ts_backend.py | 2 +- tests/py/ts/integrations/test_to_backend_api.py | 2 +- .../py/ts/integrations/test_trt_intercompatibility.py | 6 +++--- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/py/ts/api/test_e2e_behavior.py b/tests/py/ts/api/test_e2e_behavior.py index 169dce9ef1..c41ec86139 100644 --- a/tests/py/ts/api/test_e2e_behavior.py +++ b/tests/py/ts/api/test_e2e_behavior.py @@ -9,7 +9,7 @@ @unittest.skipIf( - torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) class TestInputTypeDefaultsFP32Model(unittest.TestCase): @@ -57,7 +57,7 @@ def test_input_respect_user_setting_fp32_weights_fp16_in_non_constructor(self): class TestInputTypeDefaultsFP16Model(unittest.TestCase): @unittest.skipIf( - torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) def test_input_use_default_fp16(self): @@ -75,7 +75,7 @@ def test_input_use_default_fp16(self): trt_mod(self.input.half()) @unittest.skipIf( - torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) def test_input_use_default_fp16_without_fp16_enabled(self): @@ -91,7 +91,7 @@ def test_input_use_default_fp16_without_fp16_enabled(self): trt_mod(self.input.half()) @unittest.skipIf( - torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) def test_input_respect_user_setting_fp16_weights_fp32_in(self): @@ -110,7 +110,7 @@ def test_input_respect_user_setting_fp16_weights_fp32_in(self): trt_mod(self.input) @unittest.skipIf( - torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) def test_input_respect_user_setting_fp16_weights_fp32_in_non_constuctor(self): diff --git a/tests/py/ts/api/test_embed_engines.py b/tests/py/ts/api/test_embed_engines.py index 7aba1f4d74..5be960236d 100644 --- a/tests/py/ts/api/test_embed_engines.py +++ b/tests/py/ts/api/test_embed_engines.py @@ -17,7 +17,7 @@ class TestModelToEngineToModel(unittest.TestCase): not importlib.util.find_spec("torchvision"), "torchvision not installed" ) @unittest.skipIf( - torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) def test_resnet50(self): @@ -54,7 +54,7 @@ def test_resnet50(self): "timm or torchvision not installed", ) @unittest.skipIf( - torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) def test_efficientnet_b0(self): diff --git a/tests/py/ts/api/test_module_fallback.py b/tests/py/ts/api/test_module_fallback.py index 59cd037a19..a5e5de3572 100644 --- a/tests/py/ts/api/test_module_fallback.py +++ b/tests/py/ts/api/test_module_fallback.py @@ -9,7 +9,7 @@ @unittest.skipIf( - torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) class TestModuleFallback(unittest.TestCase): diff --git a/tests/py/ts/api/test_operator_fallback.py b/tests/py/ts/api/test_operator_fallback.py index 81f5827c2c..85122c4621 100644 --- a/tests/py/ts/api/test_operator_fallback.py +++ b/tests/py/ts/api/test_operator_fallback.py @@ -9,7 +9,7 @@ @unittest.skipIf( - torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) class TestFallbackModels(unittest.TestCase): diff --git a/tests/py/ts/api/test_ts_backend.py b/tests/py/ts/api/test_ts_backend.py index f3e349fd1c..61ff5f2723 100644 --- a/tests/py/ts/api/test_ts_backend.py +++ b/tests/py/ts/api/test_ts_backend.py @@ -119,7 +119,7 @@ def test_default_device(self): @unittest.skipIf( - torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) class TestCheckMethodOpSupport(unittest.TestCase): diff --git a/tests/py/ts/integrations/test_to_backend_api.py b/tests/py/ts/integrations/test_to_backend_api.py index da6485920a..fa9b615f0a 100644 --- a/tests/py/ts/integrations/test_to_backend_api.py +++ b/tests/py/ts/integrations/test_to_backend_api.py @@ -12,7 +12,7 @@ "TorchScript Frontend is not available", ) @unittest.skipIf( - torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) class TestToBackendLowering(unittest.TestCase): diff --git a/tests/py/ts/integrations/test_trt_intercompatibility.py b/tests/py/ts/integrations/test_trt_intercompatibility.py index 1d648c33d3..24f6841a37 100644 --- a/tests/py/ts/integrations/test_trt_intercompatibility.py +++ b/tests/py/ts/integrations/test_trt_intercompatibility.py @@ -1,6 +1,6 @@ import unittest -import tensorrt as trt + import torch import torch_tensorrt as torchtrt import torchvision.models as models @@ -12,7 +12,7 @@ "TorchScript Frontend is not available", ) @unittest.skipIf( - torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + torchtrt.ENABLED_FEATURES.tensorrt_rtx, "aten::adaptive_avg_pool2d is implemented via plugins which is not supported for tensorrt_rtx", ) class TestPyTorchToTRTEngine(unittest.TestCase): @@ -35,7 +35,7 @@ def test_pt_to_trt(self): trt_engine = torchtrt.ts.convert_method_to_trt_engine( self.ts_model, "forward", **compile_spec ) - + import tensorrt as trt TRT_LOGGER = trt.Logger(trt.Logger.WARNING) with trt.Runtime(TRT_LOGGER) as rt: engine = rt.deserialize_cuda_engine(trt_engine) From 1a6a57b7eecfa355dc1d2c8d09c8616456d109f8 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 26 Aug 2025 17:38:37 -0700 Subject: [PATCH 22/30] resolve comments --- tests/py/dynamo/conversion/test_binary_ops_aten.py | 3 ++- tests/py/dynamo/conversion/test_converter_utils.py | 2 +- tests/py/dynamo/conversion/test_deconvolution_aten.py | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/py/dynamo/conversion/test_binary_ops_aten.py b/tests/py/dynamo/conversion/test_binary_ops_aten.py index 0dd3264306..16b82b9858 100644 --- a/tests/py/dynamo/conversion/test_binary_ops_aten.py +++ b/tests/py/dynamo/conversion/test_binary_ops_aten.py @@ -3,6 +3,7 @@ import torch import torch.nn as nn +import torch_tensorrt from parameterized import parameterized from torch.testing._internal.common_utils import run_tests from torch_tensorrt import Input @@ -237,7 +238,7 @@ def forward(self, x, y): ] ) @unittest.skipIf( - torchtrt.ENABLED_FEATURES.tensorrt_rtx, + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "bf16 is not supported for tensorrt_rtx", ) def test_elementwise_ops_bf16(self, _, orig_op): diff --git a/tests/py/dynamo/conversion/test_converter_utils.py b/tests/py/dynamo/conversion/test_converter_utils.py index 8025eae841..598f3559bc 100644 --- a/tests/py/dynamo/conversion/test_converter_utils.py +++ b/tests/py/dynamo/conversion/test_converter_utils.py @@ -6,7 +6,7 @@ enforce_tensor_types, flatten_dims, ) -from torch_tensorrt.fx.types import TRTTensor +from torch_tensorrt.dynamo.types import TRTTensor from ..testing_utilities import DECIMALS_OF_AGREEMENT, lower_graph_testing diff --git a/tests/py/dynamo/conversion/test_deconvolution_aten.py b/tests/py/dynamo/conversion/test_deconvolution_aten.py index a650360160..3790b90813 100644 --- a/tests/py/dynamo/conversion/test_deconvolution_aten.py +++ b/tests/py/dynamo/conversion/test_deconvolution_aten.py @@ -204,9 +204,6 @@ def forward(self, x): enable_passes=True, ) - @unittest.skipIf( - torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "TensorRT-RTX has bug on deconv3d" - ) @parameterized.expand( [ ("default", 1), @@ -230,6 +227,9 @@ def forward(self, x): ), ] ) + @unittest.skipIf( + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "TensorRT-RTX has bug on deconv3d" + ) def test_deconv3d( self, _, From 74719dc23c9c41443b081dcb11843e29759b2e6f Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 26 Aug 2025 18:49:14 -0700 Subject: [PATCH 23/30] resolve comments --- tests/py/ts/api/test_ts_backend.py | 2 +- tests/py/ts/integrations/test_trt_intercompatibility.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/py/ts/api/test_ts_backend.py b/tests/py/ts/api/test_ts_backend.py index 61ff5f2723..da6aaa8859 100644 --- a/tests/py/ts/api/test_ts_backend.py +++ b/tests/py/ts/api/test_ts_backend.py @@ -144,7 +144,7 @@ def test_module_type(self): torchtrt._compile._parse_module_type(ts_module), torchtrt._compile._ModuleType.ts, ) - if not torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx: + if not torchtrt.ENABLED_FEATURES.tensorrt_rtx: self.assertEqual( torchtrt._compile._parse_module_type(fx_module), torchtrt._compile._ModuleType.fx, diff --git a/tests/py/ts/integrations/test_trt_intercompatibility.py b/tests/py/ts/integrations/test_trt_intercompatibility.py index 24f6841a37..a0998cbf0e 100644 --- a/tests/py/ts/integrations/test_trt_intercompatibility.py +++ b/tests/py/ts/integrations/test_trt_intercompatibility.py @@ -1,6 +1,5 @@ import unittest - import torch import torch_tensorrt as torchtrt import torchvision.models as models @@ -35,7 +34,9 @@ def test_pt_to_trt(self): trt_engine = torchtrt.ts.convert_method_to_trt_engine( self.ts_model, "forward", **compile_spec ) + import tensorrt as trt + TRT_LOGGER = trt.Logger(trt.Logger.WARNING) with trt.Runtime(TRT_LOGGER) as rt: engine = rt.deserialize_cuda_engine(trt_engine) From 3935a8def4f30eeb1b657a5db8dfd4f33a2d17b8 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 26 Aug 2025 21:38:48 -0700 Subject: [PATCH 24/30] test --- .../conversion/test_embedding_bag_aten.py | 97 ++++++++++--------- .../dynamo/runtime/test_output_allocator.py | 14 +++ .../test_multiple_registered_engines.py | 4 + 3 files changed, 67 insertions(+), 48 deletions(-) diff --git a/tests/py/dynamo/conversion/test_embedding_bag_aten.py b/tests/py/dynamo/conversion/test_embedding_bag_aten.py index 1f119bd77e..f09a8e28d1 100644 --- a/tests/py/dynamo/conversion/test_embedding_bag_aten.py +++ b/tests/py/dynamo/conversion/test_embedding_bag_aten.py @@ -207,54 +207,55 @@ def forward(self, weight, indices): include_last_offset=True, padding_idx=-1, ), - param( - test_name="1d_indices_5", - weight=torch.randn((10, 4), dtype=torch.float16), - indices=torch.tensor([1, 2, 4, 5, 4, 3, 2, 9], dtype=torch.int32), - offsets=torch.tensor([0, 5, 5], dtype=torch.int32), - scale_grad_by_freq=False, - mode=1, - sparse=False, - per_sample_weights=None, - include_last_offset=True, - padding_idx=-1, - ), - param( - test_name="1d_indices_6", - weight=torch.randn((10, 4), dtype=torch.float16), - indices=torch.tensor([1, 2, 4, 5, 4, 3, 2, 9], dtype=torch.int32), - offsets=torch.tensor([0, 5, 5], dtype=torch.int32), - scale_grad_by_freq=False, - mode=2, - sparse=False, - per_sample_weights=None, - include_last_offset=False, - padding_idx=-1, - ), - param( - test_name="1d_indices_7", - weight=torch.randn((10, 4), dtype=torch.float16), - indices=torch.tensor([1, 2, 4, 5, 4, 3, 2, 9], dtype=torch.int32), - offsets=torch.tensor([0, 8, 8], dtype=torch.int32), - scale_grad_by_freq=False, - mode=0, - sparse=False, - per_sample_weights=None, - include_last_offset=True, - padding_idx=-1, - ), - param( - test_name="1d_indices_8", - weight=torch.randn((10, 4), dtype=torch.float16), - indices=torch.tensor([1, 2, 4, 5, 4, 3, 2, 9], dtype=torch.int32), - offsets=torch.tensor([0, 8, 8], dtype=torch.int32), - scale_grad_by_freq=False, - mode=1, - sparse=False, - per_sample_weights=None, - include_last_offset=False, - padding_idx=-1, - ), + # TODO: add the strong type support for embedding bag op, disable the test case for now + # param( + # test_name="1d_indices_5", + # weight=torch.randn((10, 4), dtype=torch.float16), + # indices=torch.tensor([1, 2, 4, 5, 4, 3, 2, 9], dtype=torch.int32), + # offsets=torch.tensor([0, 5, 5], dtype=torch.int32), + # scale_grad_by_freq=False, + # mode=1, + # sparse=False, + # per_sample_weights=None, + # include_last_offset=True, + # padding_idx=-1, + # ), + # param( + # test_name="1d_indices_6", + # weight=torch.randn((10, 4), dtype=torch.float16), + # indices=torch.tensor([1, 2, 4, 5, 4, 3, 2, 9], dtype=torch.int32), + # offsets=torch.tensor([0, 5, 5], dtype=torch.int32), + # scale_grad_by_freq=False, + # mode=2, + # sparse=False, + # per_sample_weights=None, + # include_last_offset=False, + # padding_idx=-1, + # ), + # param( + # test_name="1d_indices_7", + # weight=torch.randn((10, 4), dtype=torch.float16), + # indices=torch.tensor([1, 2, 4, 5, 4, 3, 2, 9], dtype=torch.int32), + # offsets=torch.tensor([0, 8, 8], dtype=torch.int32), + # scale_grad_by_freq=False, + # mode=0, + # sparse=False, + # per_sample_weights=None, + # include_last_offset=True, + # padding_idx=-1, + # ), + # param( + # test_name="1d_indices_8", + # weight=torch.randn((10, 4), dtype=torch.float16), + # indices=torch.tensor([1, 2, 4, 5, 4, 3, 2, 9], dtype=torch.int32), + # offsets=torch.tensor([0, 8, 8], dtype=torch.int32), + # scale_grad_by_freq=False, + # mode=1, + # sparse=False, + # per_sample_weights=None, + # include_last_offset=False, + # padding_idx=-1, + # ), # 2D input # param( # test_name="2d_indices_1", diff --git a/tests/py/dynamo/runtime/test_output_allocator.py b/tests/py/dynamo/runtime/test_output_allocator.py index c915f42173..73ff6c496f 100644 --- a/tests/py/dynamo/runtime/test_output_allocator.py +++ b/tests/py/dynamo/runtime/test_output_allocator.py @@ -1,3 +1,5 @@ +import unittest + import pytest import torch import torch_tensorrt @@ -150,6 +152,10 @@ def test_combination_of_cg_and_oa(self, _, use_python_runtime): out = cudagraphs_module(*inputs) +@unittest.skipIf( + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + "TensorRT RTX does not support nonzero which are required for this test", +) class TestOutputAllocatorDDSModel(TestCase): @parameterized.expand( [ @@ -256,6 +262,10 @@ def test_combination_of_cg_and_oa(self, _, use_python_runtime): out = cudagraphs_module(*inputs) +@unittest.skipIf( + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + "TensorRT RTX does not support nonzero which are required for this test", +) class TestOutputAllocatorDDSOpWithReductionOpModel(TestCase): """ The DDSOpWithReductionOpModel is a model that contains DDS op + reduction op. @@ -366,6 +376,10 @@ def test_combination_of_cg_and_oa(self, _, use_python_runtime): out = cudagraphs_module(*inputs) +@unittest.skipIf( + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + "TensorRT RTX does not support nonzero which are required for this test", +) class TestOutputAllocatorDDSModelWithGraphBreak(TestCase): @parameterized.expand( [ diff --git a/tests/py/ts/models/test_multiple_registered_engines.py b/tests/py/ts/models/test_multiple_registered_engines.py index 05b6905882..c05876aed9 100644 --- a/tests/py/ts/models/test_multiple_registered_engines.py +++ b/tests/py/ts/models/test_multiple_registered_engines.py @@ -18,6 +18,10 @@ @unittest.skipIf( not importlib.util.find_spec("torchvision"), "torchvision not installed" ) +@unittest.skipIf( + torchtrt.ENABLED_FEATURES.tensorrt_rtx, + "TensorRT RTX does not support plugins which are required for this test", +) class TestModelToEngineToModel(unittest.TestCase): def test_multiple_engines(self): self.resnet18 = models.resnet18(pretrained=True).eval().to("cuda") From b81bdebbdcd8086ac28e1bb5bdc863fe87306dc0 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 27 Aug 2025 08:15:32 -0700 Subject: [PATCH 25/30] ignore refit error test and added TODO to fix later --- .../dynamo/conversion/impl/elementwise/ops.py | 24 +++- .../conversion/test_embedding_bag_aten.py | 105 +++++++++--------- tests/py/dynamo/models/test_engine_cache.py | 15 +++ .../models/test_weight_stripped_engine.py | 5 + 4 files changed, 93 insertions(+), 56 deletions(-) diff --git a/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py index 9ed9213576..2e631638b7 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py @@ -551,10 +551,26 @@ def pow( lhs_dtype = None rhs_dtype = None - if isinstance(lhs_val, int): - lhs_dtype = torch.int32 - if isinstance(rhs_val, int): - rhs_dtype = torch.int32 + if isinstance(lhs_val, (int, float)) and isinstance(rhs_val, (int, float)): + raise ValueError( + "Both lhs_val and rhs_val are int or float, at least one of them should be a tensor" + ) + elif isinstance(lhs_val, (int, float)): + # At this point, rhs_val must be a Tensor since we checked both aren't scalars + assert isinstance(rhs_val, (TRTTensor, torch.Tensor)) + rhs_dtype = rhs_val.dtype + lhs_dtype = rhs_dtype + elif isinstance(rhs_val, (int, float)): + # At this point, lhs_val must be a Tensor since we checked both aren't scalars + assert isinstance(lhs_val, (TRTTensor, torch.Tensor)) + lhs_dtype = lhs_val.dtype + rhs_dtype = lhs_dtype + else: + assert isinstance(lhs_val, (TRTTensor, torch.Tensor)) + assert isinstance(rhs_val, (TRTTensor, torch.Tensor)) + lhs_dtype = lhs_val.dtype + rhs_dtype = rhs_val.dtype + # POW operation supports only float32 and int8 inputs lhs_val = get_trt_tensor(ctx, lhs_val, name + "_lhs_val", lhs_dtype) rhs_val = get_trt_tensor(ctx, rhs_val, name + "_rhs_val", rhs_dtype) diff --git a/tests/py/dynamo/conversion/test_embedding_bag_aten.py b/tests/py/dynamo/conversion/test_embedding_bag_aten.py index f09a8e28d1..c080424dc0 100644 --- a/tests/py/dynamo/conversion/test_embedding_bag_aten.py +++ b/tests/py/dynamo/conversion/test_embedding_bag_aten.py @@ -13,58 +13,59 @@ class TestEmbeddingBagConverter(DispatchTestCase): [ # mode=0: sum, mode=1: mean, mode=2: max # 1D input - param( - test_name="1d_indices_1", - weight=torch.randn((10, 2), dtype=torch.float16), - indices=torch.tensor( - [1, 2, 4, 5, 4, 3, 2, 6, 8, 1, 2], dtype=torch.int32 - ), - offsets=torch.tensor([0, 2, 4], dtype=torch.int32), - scale_grad_by_freq=False, - mode=0, - sparse=True, - per_sample_weights=None, - include_last_offset=False, - padding_idx=-1, - ), - param( - test_name="1d_indices_2", - weight=torch.randn((10, 2), dtype=torch.float16), - indices=torch.tensor( - [1, 2, 4, 5, 4, 3, 2, 6, 8, 1, 2], dtype=torch.int32 - ), - offsets=torch.tensor([0, 2, 4], dtype=torch.int32), - scale_grad_by_freq=False, - mode=1, - sparse=True, - per_sample_weights=None, - include_last_offset=True, - padding_idx=-1, - ), - param( - test_name="1d_indices_3", - weight=torch.randn((10, 4), dtype=torch.float16), - indices=torch.tensor([1, 2, 4, 5, 4, 3, 2, 9], dtype=torch.int32), - offsets=torch.tensor([0, 2, 8], dtype=torch.int32), - scale_grad_by_freq=False, - mode=2, - sparse=False, - per_sample_weights=None, - include_last_offset=False, - padding_idx=-1, - ), - param( - test_name="1d_indices_4", - weight=torch.randn((10, 4), dtype=torch.float16), - indices=torch.tensor([1, 2, 4, 5, 4, 3, 2, 9], dtype=torch.int32), - offsets=torch.tensor([0, 2, 8], dtype=torch.int32), - scale_grad_by_freq=False, - mode=0, - sparse=False, - per_sample_weights=torch.randn((8,), dtype=torch.float16), - include_last_offset=True, - padding_idx=-1, - ), + # TODO: add the strong type support for embedding bag op, disable the test case for now + # param( + # test_name="1d_indices_1", + # weight=torch.randn((10, 2), dtype=torch.float16), + # indices=torch.tensor( + # [1, 2, 4, 5, 4, 3, 2, 6, 8, 1, 2], dtype=torch.int32 + # ), + # offsets=torch.tensor([0, 2, 4], dtype=torch.int32), + # scale_grad_by_freq=False, + # mode=0, + # sparse=True, + # per_sample_weights=None, + # include_last_offset=False, + # padding_idx=-1, + # ), + # param( + # test_name="1d_indices_2", + # weight=torch.randn((10, 2), dtype=torch.float16), + # indices=torch.tensor( + # [1, 2, 4, 5, 4, 3, 2, 6, 8, 1, 2], dtype=torch.int32 + # ), + # offsets=torch.tensor([0, 2, 4], dtype=torch.int32), + # scale_grad_by_freq=False, + # mode=1, + # sparse=True, + # per_sample_weights=None, + # include_last_offset=True, + # padding_idx=-1, + # ), + # param( + # test_name="1d_indices_3", + # weight=torch.randn((10, 4), dtype=torch.float16), + # indices=torch.tensor([1, 2, 4, 5, 4, 3, 2, 9], dtype=torch.int32), + # offsets=torch.tensor([0, 2, 8], dtype=torch.int32), + # scale_grad_by_freq=False, + # mode=2, + # sparse=False, + # per_sample_weights=None, + # include_last_offset=False, + # padding_idx=-1, + # ), + # param( + # test_name="1d_indices_4", + # weight=torch.randn((10, 4), dtype=torch.float16), + # indices=torch.tensor([1, 2, 4, 5, 4, 3, 2, 9], dtype=torch.int32), + # offsets=torch.tensor([0, 2, 8], dtype=torch.int32), + # scale_grad_by_freq=False, + # mode=0, + # sparse=False, + # per_sample_weights=torch.randn((8,), dtype=torch.float16), + # include_last_offset=True, + # padding_idx=-1, + # ), param( test_name="1d_indices_5", weight=torch.randn((10, 4), dtype=torch.float32), diff --git a/tests/py/dynamo/models/test_engine_cache.py b/tests/py/dynamo/models/test_engine_cache.py index a349253338..61b5d74679 100644 --- a/tests/py/dynamo/models/test_engine_cache.py +++ b/tests/py/dynamo/models/test_engine_cache.py @@ -271,6 +271,11 @@ def remove_timing_cache(path=TIMING_CACHE_PATH): @unittest.skipIf( not importlib.util.find_spec("torchvision"), "torchvision not installed" ) + @unittest.skipIf( + torch_trt.ENABLED_FEATURES.tensorrt_rtx, + # TODO: need to fix this https://github.com/pytorch/TensorRT/issues/3752 + "There is bug in refit, so we skip the test for now", + ) def test_dynamo_compile_with_custom_engine_cache(self): model = models.resnet18(pretrained=True).eval().to("cuda") @@ -341,6 +346,11 @@ def test_dynamo_compile_with_custom_engine_cache(self): @unittest.skipIf( not importlib.util.find_spec("torchvision"), "torchvision not installed" ) + @unittest.skipIf( + torch_trt.ENABLED_FEATURES.tensorrt_rtx, + # TODO: need to fix this https://github.com/pytorch/TensorRT/issues/3752 + "There is bug in refit, so we skip the test for now", + ) def test_dynamo_compile_change_input_shape(self): """Runs compilation 3 times, the cache should miss each time""" model = models.resnet18(pretrained=True).eval().to("cuda") @@ -657,6 +667,11 @@ def forward(self, c, d): @unittest.skipIf( not importlib.util.find_spec("torchvision"), "torchvision not installed" ) + @unittest.skipIf( + torch_trt.ENABLED_FEATURES.tensorrt_rtx, + # TODO: need to fix this https://github.com/pytorch/TensorRT/issues/3752 + "There is bug in refit, so we skip the test for now", + ) def test_caching_small_model(self): from torch_tensorrt.dynamo._refit import refit_module_weights diff --git a/tests/py/dynamo/models/test_weight_stripped_engine.py b/tests/py/dynamo/models/test_weight_stripped_engine.py index d2079d11bf..2d53b23851 100644 --- a/tests/py/dynamo/models/test_weight_stripped_engine.py +++ b/tests/py/dynamo/models/test_weight_stripped_engine.py @@ -279,6 +279,11 @@ def test_engine_caching_saves_weight_stripped_engine(self): not importlib.util.find_spec("torchvision"), "torchvision is not installed", ) + @unittest.skipIf( + not torch_trt.ENABLED_FEATURES.tensorrt_rtx, + # TODO: need to fix this https://github.com/pytorch/TensorRT/issues/3752 + "There is bug in refit, so we skip the test for now", + ) def test_dynamo_compile_with_refittable_weight_stripped_engine(self): pyt_model = models.resnet18(pretrained=True).eval().to("cuda") example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),) From c2c7c0ea5d8c3e00eab0f40627e71f51110005e3 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 27 Aug 2025 10:45:51 -0700 Subject: [PATCH 26/30] add black-forest-labs/FLUX.1-Kontext-dev support for rtx perf --- examples/apps/flux_demo.py | 12 +++++++++++- .../dynamo/automatic_plugin/test_automatic_plugin.py | 4 ++++ tools/llm/torchtrt_ext/sdpa_converter.py | 2 +- tools/perf/Flux/flux_perf.py | 7 ++++++- tools/perf/perf_run.py | 3 ++- 5 files changed, 24 insertions(+), 4 deletions(-) diff --git a/examples/apps/flux_demo.py b/examples/apps/flux_demo.py index 5220f38ec6..c061bb5d81 100644 --- a/examples/apps/flux_demo.py +++ b/examples/apps/flux_demo.py @@ -52,8 +52,13 @@ def compile_model( print(f"\nUsing {args.dtype}") - pipe = FluxPipeline.from_pretrained( + if args.model not in [ "black-forest-labs/FLUX.1-dev", + "black-forest-labs/FLUX.1-Kontext-dev", + ]: + raise ValueError(f"Model {args.model} is not supported") + pipe = FluxPipeline.from_pretrained( + args.model, torch_dtype=torch.float16, ).to(torch.float16) @@ -262,6 +267,11 @@ def main(args): parser = argparse.ArgumentParser( description="Run Flux quantization with different dtypes" ) + parser.add_argument( + "--model", + default="black-forest-labs/FLUX.1-dev", + help="Model to use", + ) parser.add_argument( "--use_sdpa", action="store_true", diff --git a/tests/py/dynamo/automatic_plugin/test_automatic_plugin.py b/tests/py/dynamo/automatic_plugin/test_automatic_plugin.py index 8ab47def08..4be532981f 100644 --- a/tests/py/dynamo/automatic_plugin/test_automatic_plugin.py +++ b/tests/py/dynamo/automatic_plugin/test_automatic_plugin.py @@ -59,6 +59,10 @@ def elementwise_mul(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: ) +@unittest.skipIf( + torch_trt.ENABLED_FEATURES.tensorrt_rtx, + "TensorRT RTX does not support plugins", +) class TestAutomaticPlugin(DispatchTestCase): @parameterized.expand( [ diff --git a/tools/llm/torchtrt_ext/sdpa_converter.py b/tools/llm/torchtrt_ext/sdpa_converter.py index 47083c7b48..d05b0379a4 100644 --- a/tools/llm/torchtrt_ext/sdpa_converter.py +++ b/tools/llm/torchtrt_ext/sdpa_converter.py @@ -15,7 +15,7 @@ cast_trt_tensor, get_trt_tensor, ) -from torch_tensorrt.fx.types import TRTTensor +from torch_tensorrt.dynamo.types import TRTTensor logger = logging.getLogger(__name__) diff --git a/tools/perf/Flux/flux_perf.py b/tools/perf/Flux/flux_perf.py index 969f4c93d8..e0df18577d 100644 --- a/tools/perf/Flux/flux_perf.py +++ b/tools/perf/Flux/flux_perf.py @@ -54,7 +54,7 @@ def main(args): else: pipe = ( FluxPipeline.from_pretrained( - "black-forest-labs/FLUX.1-dev", + args.model, torch_dtype=torch.float16, ) .to(torch.float16) @@ -68,6 +68,11 @@ def main(args): parser = argparse.ArgumentParser( description="Run Flux quantization with different dtypes" ) + parser.add_argument( + "--model", + default="black-forest-labs/FLUX.1-dev", + help="Model to use", + ) parser.add_argument( "--use_sdpa", action="store_true", diff --git a/tools/perf/perf_run.py b/tools/perf/perf_run.py index f7bc94d27d..1ea82d0936 100644 --- a/tools/perf/perf_run.py +++ b/tools/perf/perf_run.py @@ -11,7 +11,6 @@ import numpy as np import pandas as pd -import tensorrt as trt # Importing supported Backends import torch @@ -436,6 +435,8 @@ def run_tensorrt( precision, batch_size=1, ): + import tensorrt as trt + logger = trt.Logger(trt.Logger.WARNING) compile_time_s = 0 if params["is_trt_engine"]: From 50be4b70512d12f90b2cca8513591be6e177892e Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 27 Aug 2025 12:27:29 -0700 Subject: [PATCH 27/30] resolve comments --- py/torch_tensorrt/_features.py | 18 +++++++- .../dynamo/conversion/aten_ops_converters.py | 43 +++++++++---------- .../dynamo/conversion/impl/quantize.py | 24 ++++++----- 3 files changed, 51 insertions(+), 34 deletions(-) diff --git a/py/torch_tensorrt/_features.py b/py/torch_tensorrt/_features.py index 866da6d946..90bd16f445 100644 --- a/py/torch_tensorrt/_features.py +++ b/py/torch_tensorrt/_features.py @@ -23,6 +23,7 @@ "qdp_plugin", "windows_cross_compile", "tensorrt_rtx", + "tensorrt", ], ) @@ -42,7 +43,7 @@ linked_file_runtime_full_path = os.path.join(trtorch_dir, linked_file_runtime) _TENSORRT_RTX = tensorrt._package_name == "tensorrt_rtx" - +_TENSORRT = tensorrt._package_name == "tensorrt" _TS_FE_AVAIL = os.path.isfile(linked_file_full_path) _TORCHTRT_RT_AVAIL = _TS_FE_AVAIL or os.path.isfile(linked_file_runtime_full_path) _DYNAMO_FE_AVAIL = version.parse(sanitized_torch_version()) >= version.parse("2.1.dev") @@ -64,6 +65,7 @@ _QDP_PLUGIN_AVAIL, _WINDOWS_CROSS_COMPILE, _TENSORRT_RTX, + _TENSORRT, ) T = TypeVar("T") @@ -71,10 +73,11 @@ def _enabled_features_str() -> str: enabled = lambda x: "ENABLED" if x else "DISABLED" - out_str: str = f"Enabled Features:\n - Dynamo Frontend: {enabled(_DYNAMO_FE_AVAIL)}\n - Torch-TensorRT Runtime: {enabled(_TORCHTRT_RT_AVAIL)}\n - FX Frontend: {enabled(_FX_FE_AVAIL)}\n - TorchScript Frontend: {enabled(_TS_FE_AVAIL)}\n - Refit: {enabled(_REFIT_AVAIL)}\n - QDP Plugin: {enabled(_QDP_PLUGIN_AVAIL)} \n - TensorRT-RTX: {enabled(_TENSORRT_RTX)}\n" # type: ignore[no-untyped-call] + out_str: str = f"Enabled Features:\n - Dynamo Frontend: {enabled(_DYNAMO_FE_AVAIL)}\n - Torch-TensorRT Runtime: {enabled(_TORCHTRT_RT_AVAIL)}\n - FX Frontend: {enabled(_FX_FE_AVAIL)}\n - TorchScript Frontend: {enabled(_TS_FE_AVAIL)}\n - Refit: {enabled(_REFIT_AVAIL)}\n - QDP Plugin: {enabled(_QDP_PLUGIN_AVAIL)} \n - TensorRT-RTX: {enabled(_TENSORRT_RTX)}\n - TensorRT: {enabled(_TENSORRT)}\n" # type: ignore[no-untyped-call] return out_str +# this is for tensorrt_rtx only def needs_tensorrt_rtx(f: Callable[..., Any]) -> Callable[..., Any]: def wrapper(*args: List[Any], **kwargs: Dict[str, Any]) -> Any: if ENABLED_FEATURES.tensorrt_rtx: @@ -83,6 +86,17 @@ def wrapper(*args: List[Any], **kwargs: Dict[str, Any]) -> Any: raise NotImplementedError("TensorRT-RTX is not available") +# this is for tensorrt only +def needs_tensorrt(f: Callable[..., Any]) -> Callable[..., Any]: + def wrapper(*args: List[Any], **kwargs: Dict[str, Any]) -> Any: + if ENABLED_FEATURES.tensorrt: + return f(*args, **kwargs) + else: + raise NotImplementedError("TensorRT is not available") + + return wrapper + + def needs_torch_tensorrt_runtime(f: Callable[..., Any]) -> Callable[..., Any]: def wrapper(*args: List[Any], **kwargs: Dict[str, Any]) -> Any: if ENABLED_FEATURES.torch_tensorrt_runtime: diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py index 2cb9baf727..b3b95764f9 100644 --- a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py +++ b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py @@ -8,7 +8,7 @@ import torch from tensorrt import ITensor as TRTTensor from torch.fx.node import Argument, Node, Target -from torch_tensorrt import ENABLED_FEATURES +from torch_tensorrt._features import needs_tensorrt from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.dynamo._settings import CompilationSettings from torch_tensorrt.dynamo._SourceIR import SourceIR @@ -3594,28 +3594,27 @@ def aten_ops_full( # currently nonzero is not supported for tensorrt_rtx -# TODO: lan to remove this once rtx team has fixed the bug -if not ENABLED_FEATURES.tensorrt_rtx: - - @dynamo_tensorrt_converter( - torch.ops.aten.nonzero.default, - supports_dynamic_shapes=True, - requires_output_allocator=True, +# TODO: lan to add the nonzero support once tensorrt_rtx team has added the support +@needs_tensorrt +@dynamo_tensorrt_converter( + torch.ops.aten.nonzero.default, + supports_dynamic_shapes=True, + requires_output_allocator=True, +) +def aten_ops_nonzero( + ctx: ConversionContext, + target: Target, + args: Tuple[Argument, ...], + kwargs: Dict[str, Argument], + name: str, +) -> Union[TRTTensor, Sequence[TRTTensor]]: + return impl.unary.nonzero( + ctx, + target, + SourceIR.ATEN, + name, + args[0], ) - def aten_ops_nonzero( - ctx: ConversionContext, - target: Target, - args: Tuple[Argument, ...], - kwargs: Dict[str, Argument], - name: str, - ) -> Union[TRTTensor, Sequence[TRTTensor]]: - return impl.unary.nonzero( - ctx, - target, - SourceIR.ATEN, - name, - args[0], - ) @dynamo_tensorrt_converter(torch.ops.aten.linear.default, supports_dynamic_shapes=True) diff --git a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py index b609385ff8..159983da99 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py @@ -35,6 +35,19 @@ def get_ir(target: Target) -> SourceIR: return SourceIR.UNKNOWN +def validate_int8_activation_quantization(name: str, dtype: trt.DataType) -> None: + if ( + dtype == trt.DataType.INT8 + and ".input_quantizer" in name + and ENABLED_FEATURES.tensorrt_rtx + ): + # RTX does not support int8 activation quantization + # TODO: lan to remove this once rtx team has added the support for int8 activation quantization + raise NotImplementedError( + "TensorRT-RTX does not support int8 activation quantization, only support int8 weight quantization" + ) + + def quantize( ctx: ConversionContext, target: Target, @@ -78,16 +91,7 @@ def quantize( dtype = trt.DataType.FP8 max_bound = 448 - if ( - dtype == trt.DataType.INT8 - and ".input_quantizer" in name - and ENABLED_FEATURES.tensorrt_rtx - ): - # RTX does not support int8 activation quantization - # TODO: lan to remove this once rtx team has added the support for int8 activation quantization - raise NotImplementedError( - "TensorRT-RTX does not support int8 activation quantization, only support int8 weight quantization" - ) + validate_int8_activation_quantization(name, dtype) axis = None # int8 weight quantization is per-channel quantization(it can have one or multiple amax values) From 5b1b2515658bb623cf3a75419b6845d6280716e7 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 27 Aug 2025 16:08:00 -0700 Subject: [PATCH 28/30] resolve comments --- py/torch_tensorrt/_features.py | 2 ++ .../dynamo/conversion/aten_ops_converters.py | 2 +- .../automatic_plugin/test_automatic_plugin.py | 3 ++- .../py/dynamo/conversion/test_nonzero_aten.py | 20 ++++--------------- 4 files changed, 9 insertions(+), 18 deletions(-) diff --git a/py/torch_tensorrt/_features.py b/py/torch_tensorrt/_features.py index 90bd16f445..1e60abc032 100644 --- a/py/torch_tensorrt/_features.py +++ b/py/torch_tensorrt/_features.py @@ -85,6 +85,8 @@ def wrapper(*args: List[Any], **kwargs: Dict[str, Any]) -> Any: else: raise NotImplementedError("TensorRT-RTX is not available") + return wrapper + # this is for tensorrt only def needs_tensorrt(f: Callable[..., Any]) -> Callable[..., Any]: diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py index b3b95764f9..54ce3176f1 100644 --- a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py +++ b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py @@ -3595,12 +3595,12 @@ def aten_ops_full( # currently nonzero is not supported for tensorrt_rtx # TODO: lan to add the nonzero support once tensorrt_rtx team has added the support -@needs_tensorrt @dynamo_tensorrt_converter( torch.ops.aten.nonzero.default, supports_dynamic_shapes=True, requires_output_allocator=True, ) +@needs_tensorrt def aten_ops_nonzero( ctx: ConversionContext, target: Target, diff --git a/tests/py/dynamo/automatic_plugin/test_automatic_plugin.py b/tests/py/dynamo/automatic_plugin/test_automatic_plugin.py index 4be532981f..c6d765c7e6 100644 --- a/tests/py/dynamo/automatic_plugin/test_automatic_plugin.py +++ b/tests/py/dynamo/automatic_plugin/test_automatic_plugin.py @@ -1,3 +1,4 @@ +import unittest from typing import Tuple import torch @@ -60,7 +61,7 @@ def elementwise_mul(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: @unittest.skipIf( - torch_trt.ENABLED_FEATURES.tensorrt_rtx, + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "TensorRT RTX does not support plugins", ) class TestAutomaticPlugin(DispatchTestCase): diff --git a/tests/py/dynamo/conversion/test_nonzero_aten.py b/tests/py/dynamo/conversion/test_nonzero_aten.py index 7408f3d221..b81644ed54 100644 --- a/tests/py/dynamo/conversion/test_nonzero_aten.py +++ b/tests/py/dynamo/conversion/test_nonzero_aten.py @@ -10,6 +10,10 @@ from .harness import DispatchTestCase +@unittest.skipIf( + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + "nonzero is not supported for tensorrt_rtx", +) class TestNonZeroConverter(DispatchTestCase): @parameterized.expand( [ @@ -20,10 +24,6 @@ class TestNonZeroConverter(DispatchTestCase): ((2, 3, 4, 5), torch.float), ] ) - @unittest.skipIf( - torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, - "nonzero is not supported for tensorrt_rtx", - ) def test_nonzero_dds(self, input_shape, dtype): class NonZero(nn.Module): # This is a DDS network @@ -46,10 +46,6 @@ def forward(self, input): ((2, 3, 4, 5), torch.float), ] ) - @unittest.skipIf( - torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, - "nonzero is not supported for tensorrt_rtx", - ) def test_nonzero_non_dds(self, input_shape, dtype): class NonZero(nn.Module): # This is a static network @@ -89,10 +85,6 @@ def forward(self, input): ), ] ) - @unittest.skipIf( - torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, - "nonzero is not supported for tensorrt_rtx", - ) def test_nonzero_dynamic_shape_dds(self, _, min_shape, opt_shape, max_shape, dtype): class NonZero(nn.Module): def forward(self, input): @@ -134,10 +126,6 @@ def forward(self, input): ), ] ) - @unittest.skipIf( - torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, - "nonzero is not supported for tensorrt_rtx", - ) def test_nonzero_dynamic_shape_non_dds( self, _, min_shape, opt_shape, max_shape, dtype ): From a30261fd9c032db12a2a4ed31fff57f4c97dd624 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 27 Aug 2025 17:11:26 -0700 Subject: [PATCH 29/30] fix test error --- .../automatic_plugin/test_automatic_plugin.py | 92 +++++++++---------- .../models/test_weight_stripped_engine.py | 2 +- 2 files changed, 45 insertions(+), 49 deletions(-) diff --git a/tests/py/dynamo/automatic_plugin/test_automatic_plugin.py b/tests/py/dynamo/automatic_plugin/test_automatic_plugin.py index c6d765c7e6..83e367ff5f 100644 --- a/tests/py/dynamo/automatic_plugin/test_automatic_plugin.py +++ b/tests/py/dynamo/automatic_plugin/test_automatic_plugin.py @@ -12,59 +12,55 @@ from ..conversion.harness import DispatchTestCase -@triton.jit -def elementwise_mul_kernel(X, Y, Z, BLOCK_SIZE: tl.constexpr): - # Program ID determines the block of data each thread will process - pid = tl.program_id(0) - # Compute the range of elements that this thread block will work on - block_start = pid * BLOCK_SIZE - # Range of indices this thread will handle - offsets = block_start + tl.arange(0, BLOCK_SIZE) - # Load elements from the X and Y tensors - x_vals = tl.load(X + offsets) - y_vals = tl.load(Y + offsets) - # Perform the element-wise multiplication - z_vals = x_vals * y_vals - # Store the result in Z - tl.store(Z + offsets, z_vals) - - -@torch.library.custom_op("torchtrt_ex::elementwise_mul", mutates_args=()) # type: ignore[misc] -def elementwise_mul(X: torch.Tensor, Y: torch.Tensor) -> torch.Tensor: - # Ensure the tensors are on the GPU - assert X.is_cuda and Y.is_cuda, "Tensors must be on CUDA device." - assert X.shape == Y.shape, "Tensors must have the same shape." - - # Create output tensor - Z = torch.empty_like(X) - - # Define block size - BLOCK_SIZE = 1024 - - # Grid of programs - grid = lambda meta: (X.numel() // meta["BLOCK_SIZE"],) - - # Launch the kernel - elementwise_mul_kernel[grid](X, Y, Z, BLOCK_SIZE=BLOCK_SIZE) - - return Z - - -@torch.library.register_fake("torchtrt_ex::elementwise_mul") -def elementwise_mul(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: - return x - - -torch_tensorrt.dynamo.conversion.plugins.custom_op( - "torchtrt_ex::elementwise_mul", supports_dynamic_shapes=True -) - - @unittest.skipIf( torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "TensorRT RTX does not support plugins", ) class TestAutomaticPlugin(DispatchTestCase): + @triton.jit + def elementwise_mul_kernel(X, Y, Z, BLOCK_SIZE: tl.constexpr): + # Program ID determines the block of data each thread will process + pid = tl.program_id(0) + # Compute the range of elements that this thread block will work on + block_start = pid * BLOCK_SIZE + # Range of indices this thread will handle + offsets = block_start + tl.arange(0, BLOCK_SIZE) + # Load elements from the X and Y tensors + x_vals = tl.load(X + offsets) + y_vals = tl.load(Y + offsets) + # Perform the element-wise multiplication + z_vals = x_vals * y_vals + # Store the result in Z + tl.store(Z + offsets, z_vals) + + @torch.library.custom_op("torchtrt_ex::elementwise_mul", mutates_args=()) # type: ignore[misc] + def elementwise_mul(X: torch.Tensor, Y: torch.Tensor) -> torch.Tensor: + # Ensure the tensors are on the GPU + assert X.is_cuda and Y.is_cuda, "Tensors must be on CUDA device." + assert X.shape == Y.shape, "Tensors must have the same shape." + + # Create output tensor + Z = torch.empty_like(X) + + # Define block size + BLOCK_SIZE = 1024 + + # Grid of programs + grid = lambda meta: (X.numel() // meta["BLOCK_SIZE"],) + + # Launch the kernel + elementwise_mul_kernel[grid](X, Y, Z, BLOCK_SIZE=BLOCK_SIZE) + + return Z + + @torch.library.register_fake("torchtrt_ex::elementwise_mul") + def elementwise_mul(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: + return x + + torch_tensorrt.dynamo.conversion.plugins.custom_op( + "torchtrt_ex::elementwise_mul", supports_dynamic_shapes=True + ) + @parameterized.expand( [ ((64, 64), torch.float), diff --git a/tests/py/dynamo/models/test_weight_stripped_engine.py b/tests/py/dynamo/models/test_weight_stripped_engine.py index 2d53b23851..d34d7d1edc 100644 --- a/tests/py/dynamo/models/test_weight_stripped_engine.py +++ b/tests/py/dynamo/models/test_weight_stripped_engine.py @@ -280,7 +280,7 @@ def test_engine_caching_saves_weight_stripped_engine(self): "torchvision is not installed", ) @unittest.skipIf( - not torch_trt.ENABLED_FEATURES.tensorrt_rtx, + torch_trt.ENABLED_FEATURES.tensorrt_rtx, # TODO: need to fix this https://github.com/pytorch/TensorRT/issues/3752 "There is bug in refit, so we skip the test for now", ) From 0d0ab96132e38b3901f2e044bd1c6a9754221c81 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 27 Aug 2025 21:04:52 -0700 Subject: [PATCH 30/30] fix test error --- py/torch_tensorrt/_features.py | 15 ++- .../dynamo/conversion/aten_ops_converters.py | 4 +- .../test_automatic_plugin_with_attrs.py | 95 ++++++++++--------- .../test_flashinfer_rmsnorm.py | 34 +++---- 4 files changed, 74 insertions(+), 74 deletions(-) diff --git a/py/torch_tensorrt/_features.py b/py/torch_tensorrt/_features.py index 1e60abc032..f7e4e91626 100644 --- a/py/torch_tensorrt/_features.py +++ b/py/torch_tensorrt/_features.py @@ -23,7 +23,6 @@ "qdp_plugin", "windows_cross_compile", "tensorrt_rtx", - "tensorrt", ], ) @@ -43,7 +42,6 @@ linked_file_runtime_full_path = os.path.join(trtorch_dir, linked_file_runtime) _TENSORRT_RTX = tensorrt._package_name == "tensorrt_rtx" -_TENSORRT = tensorrt._package_name == "tensorrt" _TS_FE_AVAIL = os.path.isfile(linked_file_full_path) _TORCHTRT_RT_AVAIL = _TS_FE_AVAIL or os.path.isfile(linked_file_runtime_full_path) _DYNAMO_FE_AVAIL = version.parse(sanitized_torch_version()) >= version.parse("2.1.dev") @@ -65,7 +63,6 @@ _QDP_PLUGIN_AVAIL, _WINDOWS_CROSS_COMPILE, _TENSORRT_RTX, - _TENSORRT, ) T = TypeVar("T") @@ -73,11 +70,10 @@ def _enabled_features_str() -> str: enabled = lambda x: "ENABLED" if x else "DISABLED" - out_str: str = f"Enabled Features:\n - Dynamo Frontend: {enabled(_DYNAMO_FE_AVAIL)}\n - Torch-TensorRT Runtime: {enabled(_TORCHTRT_RT_AVAIL)}\n - FX Frontend: {enabled(_FX_FE_AVAIL)}\n - TorchScript Frontend: {enabled(_TS_FE_AVAIL)}\n - Refit: {enabled(_REFIT_AVAIL)}\n - QDP Plugin: {enabled(_QDP_PLUGIN_AVAIL)} \n - TensorRT-RTX: {enabled(_TENSORRT_RTX)}\n - TensorRT: {enabled(_TENSORRT)}\n" # type: ignore[no-untyped-call] + out_str: str = f"Enabled Features:\n - Dynamo Frontend: {enabled(_DYNAMO_FE_AVAIL)}\n - Torch-TensorRT Runtime: {enabled(_TORCHTRT_RT_AVAIL)}\n - FX Frontend: {enabled(_FX_FE_AVAIL)}\n - TorchScript Frontend: {enabled(_TS_FE_AVAIL)}\n - Refit: {enabled(_REFIT_AVAIL)}\n - QDP Plugin: {enabled(_QDP_PLUGIN_AVAIL)} \n - TensorRT-RTX: {enabled(_TENSORRT_RTX)}\n" # type: ignore[no-untyped-call] return out_str -# this is for tensorrt_rtx only def needs_tensorrt_rtx(f: Callable[..., Any]) -> Callable[..., Any]: def wrapper(*args: List[Any], **kwargs: Dict[str, Any]) -> Any: if ENABLED_FEATURES.tensorrt_rtx: @@ -88,13 +84,14 @@ def wrapper(*args: List[Any], **kwargs: Dict[str, Any]) -> Any: return wrapper -# this is for tensorrt only -def needs_tensorrt(f: Callable[..., Any]) -> Callable[..., Any]: +def needs_not_tensorrt_rtx(f: Callable[..., Any]) -> Callable[..., Any]: def wrapper(*args: List[Any], **kwargs: Dict[str, Any]) -> Any: - if ENABLED_FEATURES.tensorrt: + if not ENABLED_FEATURES.tensorrt_rtx: return f(*args, **kwargs) else: - raise NotImplementedError("TensorRT is not available") + raise NotImplementedError( + "This is only available in non TensorRT-RTX environments, currently running in TensorRT-RTX" + ) return wrapper diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py index 54ce3176f1..4482a00f79 100644 --- a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py +++ b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py @@ -8,7 +8,7 @@ import torch from tensorrt import ITensor as TRTTensor from torch.fx.node import Argument, Node, Target -from torch_tensorrt._features import needs_tensorrt +from torch_tensorrt._features import needs_not_tensorrt_rtx from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.dynamo._settings import CompilationSettings from torch_tensorrt.dynamo._SourceIR import SourceIR @@ -3600,7 +3600,7 @@ def aten_ops_full( supports_dynamic_shapes=True, requires_output_allocator=True, ) -@needs_tensorrt +@needs_not_tensorrt_rtx def aten_ops_nonzero( ctx: ConversionContext, target: Target, diff --git a/tests/py/dynamo/automatic_plugin/test_automatic_plugin_with_attrs.py b/tests/py/dynamo/automatic_plugin/test_automatic_plugin_with_attrs.py index 6d82c15ea2..5153ead976 100644 --- a/tests/py/dynamo/automatic_plugin/test_automatic_plugin_with_attrs.py +++ b/tests/py/dynamo/automatic_plugin/test_automatic_plugin_with_attrs.py @@ -11,56 +11,57 @@ from ..conversion.harness import DispatchTestCase -@triton.jit -def elementwise_scale_mul_kernel(X, Y, Z, a, b, BLOCK_SIZE: tl.constexpr): - pid = tl.program_id(0) - # Compute the range of elements that this thread block will work on - block_start = pid * BLOCK_SIZE - # Range of indices this thread will handle - offsets = block_start + tl.arange(0, BLOCK_SIZE) - # Load elements from the X and Y tensors - x_vals = tl.load(X + offsets) - y_vals = tl.load(Y + offsets) - # Perform the element-wise multiplication - z_vals = x_vals * y_vals * a + b - # Store the result in Z - tl.store(Z + offsets, z_vals) - - -@torch.library.custom_op("torchtrt_ex::elementwise_scale_mul", mutates_args=()) # type: ignore[misc] -def elementwise_scale_mul( - X: torch.Tensor, Y: torch.Tensor, b: float = 0.2, a: int = 2 -) -> torch.Tensor: - # Ensure the tensors are on the GPU - assert X.is_cuda and Y.is_cuda, "Tensors must be on CUDA device." - assert X.shape == Y.shape, "Tensors must have the same shape." - - # Create output tensor - Z = torch.empty_like(X) - - # Define block size - BLOCK_SIZE = 1024 - - # Grid of programs - grid = lambda meta: (X.numel() // meta["BLOCK_SIZE"],) - - # Launch the kernel with parameters a and b - elementwise_scale_mul_kernel[grid](X, Y, Z, a, b, BLOCK_SIZE=BLOCK_SIZE) - - return Z - - -@torch.library.register_fake("torchtrt_ex::elementwise_scale_mul") -def _(x: torch.Tensor, y: torch.Tensor, b: float = 0.2, a: int = 2) -> torch.Tensor: - return x - - -torch_tensorrt.dynamo.conversion.plugins.custom_op( - "torchtrt_ex::elementwise_scale_mul", supports_dynamic_shapes=True +@unittest.skipIf( + torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + "TensorRT RTX does not support plugins", ) +class TestAutomaticPlugin(DispatchTestCase): + @triton.jit + def elementwise_scale_mul_kernel(X, Y, Z, a, b, BLOCK_SIZE: tl.constexpr): + pid = tl.program_id(0) + # Compute the range of elements that this thread block will work on + block_start = pid * BLOCK_SIZE + # Range of indices this thread will handle + offsets = block_start + tl.arange(0, BLOCK_SIZE) + # Load elements from the X and Y tensors + x_vals = tl.load(X + offsets) + y_vals = tl.load(Y + offsets) + # Perform the element-wise multiplication + z_vals = x_vals * y_vals * a + b + # Store the result in Z + tl.store(Z + offsets, z_vals) + + @torch.library.custom_op("torchtrt_ex::elementwise_scale_mul", mutates_args=()) # type: ignore[misc] + def elementwise_scale_mul( + X: torch.Tensor, Y: torch.Tensor, b: float = 0.2, a: int = 2 + ) -> torch.Tensor: + # Ensure the tensors are on the GPU + assert X.is_cuda and Y.is_cuda, "Tensors must be on CUDA device." + assert X.shape == Y.shape, "Tensors must have the same shape." + + # Create output tensor + Z = torch.empty_like(X) + + # Define block size + BLOCK_SIZE = 1024 + + # Grid of programs + grid = lambda meta: (X.numel() // meta["BLOCK_SIZE"],) + + # Launch the kernel with parameters a and b + elementwise_scale_mul_kernel[grid](X, Y, Z, a, b, BLOCK_SIZE=BLOCK_SIZE) + + return Z + + @torch.library.register_fake("torchtrt_ex::elementwise_scale_mul") + def _(x: torch.Tensor, y: torch.Tensor, b: float = 0.2, a: int = 2) -> torch.Tensor: + return x + + torch_tensorrt.dynamo.conversion.plugins.custom_op( + "torchtrt_ex::elementwise_scale_mul", supports_dynamic_shapes=True + ) -class TestAutomaticPlugin(DispatchTestCase): @parameterized.expand( [ ((64, 64), torch.float), diff --git a/tests/py/dynamo/automatic_plugin/test_flashinfer_rmsnorm.py b/tests/py/dynamo/automatic_plugin/test_flashinfer_rmsnorm.py index 500faee5f9..6068a002d1 100644 --- a/tests/py/dynamo/automatic_plugin/test_flashinfer_rmsnorm.py +++ b/tests/py/dynamo/automatic_plugin/test_flashinfer_rmsnorm.py @@ -17,26 +17,28 @@ # import flashinfer -@torch.library.custom_op("flashinfer::rmsnorm", mutates_args=()) # type: ignore[misc] -def flashinfer_rmsnorm( - input: torch.Tensor, weight: torch.Tensor, eps: float = 1e-6 -) -> torch.Tensor: - return flashinfer.norm.rmsnorm(input, weight) - +@unittest.skip("Not Available") +@unittest.skipIf( + not importlib.util.find_spec("flashinfer") + or torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, + "flashinfer not installed or TensorRT RTX is present", +) +class TestAutomaticPlugin(DispatchTestCase): -@torch.library.register_fake("flashinfer::rmsnorm") -def _(input: torch.Tensor, weight: torch.Tensor, b: float = 1e-6) -> torch.Tensor: - return input + @torch.library.custom_op("flashinfer::rmsnorm", mutates_args=()) # type: ignore[misc] + def flashinfer_rmsnorm( + input: torch.Tensor, weight: torch.Tensor, eps: float = 1e-6 + ) -> torch.Tensor: + return flashinfer.norm.rmsnorm(input, weight) + @torch.library.register_fake("flashinfer::rmsnorm") + def _(input: torch.Tensor, weight: torch.Tensor, b: float = 1e-6) -> torch.Tensor: + return input -torch_tensorrt.dynamo.conversion.plugins.custom_op( - "flashinfer::rmsnorm", supports_dynamic_shapes=True -) - + torch_tensorrt.dynamo.conversion.plugins.custom_op( + "flashinfer::rmsnorm", supports_dynamic_shapes=True + ) -@unittest.skip("Not Available") -@unittest.skipIf(not importlib.util.find_spec("flashinfer"), "flashinfer not installed") -class TestAutomaticPlugin(DispatchTestCase): @parameterized.expand( [ ((64, 64), (64,), torch.float16),