diff --git a/build_rocm_python3 b/build_rocm_python3 index 6f4d977b6b4327..92a16b48032ae8 100755 --- a/build_rocm_python3 +++ b/build_rocm_python3 @@ -20,7 +20,7 @@ while getopts "hrn" opt; do restriction=true ;; n) - nightly=true + nightly=true esac done shift "$((OPTIND-1))" @@ -47,35 +47,22 @@ else fi if [ -f /usertools/rocm.bazelrc ]; then - # Use the bazelrc files in /usertools if available - TF_PKG_LOC=bazel-bin/tensorflow/tools/pip_package/wheel_house - if [[ -n $nightly ]]; then - # Remove any previous builds and build nightly - rm -f $TF_PKG_LOC/tf_nightly_rocm*.whl - export project_name=tf_nightly_rocm - python3 tensorflow/tools/ci_build/update_version.py --nightly --rocm_version && - bazel --bazelrc=/usertools/rocm.bazelrc build $RESOURCE_OPTION --config=rocm --repo_env=WHEEL_NAME=tf_nightly_rocm --action_env=project_name=tf_nightly_rocm --action_env=TF_PYTHON_VERSION=$PYTHON_VERSION tensorflow/tools/pip_package:wheel --verbose_failures && - pip3 install --upgrade $TF_PKG_LOC/tf_nightly_rocm*.whl - else - # Remove any previous builds and build release - rm -f $TF_PKG_LOC/tensorflow*.whl - python3 tensorflow/tools/ci_build/update_version.py --rocm_version && - bazel --bazelrc=/usertools/rocm.bazelrc build $RESOURCE_OPTION --config=rocm --repo_env=WHEEL_NAME=tensorflow_rocm --action_env=project_name=tensorflow_rocm --action_env=TF_PYTHON_VERSION=$PYTHON_VERSION tensorflow/tools/pip_package:wheel --verbose_failures && - pip3 install --upgrade $TF_PKG_LOC/tensorflow*.whl - fi + # Use the bazelrc files in /usertools if available + # Also, this is likely a tensorflow-build container so put the whl in /tf/pkg + TF_PKG_LOC=/tf/pkg + # Remove any previous builds and build nightly + rm -f $TF_PKG_LOC/tensorflow*.whl + python3 tensorflow/tools/ci_build/update_version.py --nightly --rocm_version && + bazel --bazelrc=/usertools/rocm.bazelrc build $RESOURCE_OPTION --config=rocm --action_env=TF_PYTHON_VERSION=$PYTHON_VERSION tensorflow/tools/pip_package:wheel --verbose_failures && + cp ./bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow*.whl $TF_PKG_LOC/ && + pip3 install --upgrade $TF_PKG_LOC/tensorflow*.whl else - # Legacy style: run configure then build - TF_PKG_LOC=bazel-bin/tensorflow/tools/pip_package/wheel_house - yes "" | TF_NEED_CLANG=0 ROCM_PATH=$ROCM_INSTALL_DIR TF_NEED_ROCM=1 PYTHON_BIN_PATH=/usr/bin/python3 ./configure && - if [[ -n $nightly ]]; then - # Remove any previous builds and build nightly - rm -f $TF_PKG_LOC/tf_nightly_rocm*.whl - bazel build $RESOURCE_OPTION --config=opt --config=rocm --repo_env=WHEEL_NAME=tf_nightly_rocm --action_env=project_name=tf_nightly_rocm //tensorflow/tools/pip_package:wheel --verbose_failures && - pip3 install --upgrade $TF_PKG_LOC/tf_nightly_rocm*.whl - else - # Remove any previous builds and build release - rm -f $TF_PKG_LOC/tensorflow*.whl - bazel build $RESOURCE_OPTION --config=opt --config=rocm --repo_env=WHEEL_NAME=tensorflow_rocm --action_env=project_name=tensorflow_rocm //tensorflow/tools/pip_package:wheel --verbose_failures && - pip3 install --upgrade $TF_PKG_LOC/tensorflow*.whl - fi + # Legacy style: run configure then build + TF_PKG_LOC=/tmp/tensorflow_pkg + yes "" | TF_NEED_CLANG=0 ROCM_PATH=$ROCM_INSTALL_DIR TF_NEED_ROCM=1 PYTHON_BIN_PATH=/usr/bin/python3 ./configure && + # Remove any previous builds and build nightly + rm -f $TF_PKG_LOC/tensorflow*.whl + bazel build $RESOURCE_OPTION --config=opt --config=rocm //tensorflow/tools/pip_package:wheel --verbose_failures && + cp ./bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow*.whl $TF_PKG_LOC/ && + pip3 install --upgrade $TF_PKG_LOC/tf_nightly_rocm*.whl fi diff --git a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc index 8d4972db51ba7a..d3a5954cb08905 100644 --- a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc +++ b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc @@ -116,7 +116,8 @@ bool HasFastFP16Support(const DeviceProperties& props) { #elif TENSORFLOW_USE_ROCM absl::flat_hash_set FP16SupportedDevices = { {"gfx906"}, {"gfx908"}, {"gfx90a"}, {"gfx910"}, {"gfx940"}, {"gfx941"}, - {"gfx942"}, {"gfx1010"}, {"gfx1012"}, {"gfx1030"}, + {"gfx942"}, {"gfx950"}, + {"gfx1010"}, {"gfx1012"}, {"gfx1030"}, {"gfx1100"}, {"gfx1101"}, {"gfx1102"}, {"gfx1200"}, {"gfx1201"} }; diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc index 0617a27869394a..4e26b0198534fc 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc @@ -73,6 +73,7 @@ inline GpuStats GetNumGPUs(const Cluster& cluster) { compute_capability_it->second == "gfx940" || compute_capability_it->second == "gfx941" || compute_capability_it->second == "gfx942" || + compute_capability_it->second == "gfx950" || compute_capability_it->second == "gfx1101" || compute_capability_it->second == "gfx1102" || compute_capability_it->second == "gfx1200" || diff --git a/tensorflow/core/util/gpu_device_functions.h b/tensorflow/core/util/gpu_device_functions.h index 272290d73dacd1..5e9d960d63aed3 100644 --- a/tensorflow/core/util/gpu_device_functions.h +++ b/tensorflow/core/util/gpu_device_functions.h @@ -743,7 +743,7 @@ __device__ inline double GpuAtomicAdd(double* ptr, double value) { } #endif -#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__ +#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx950__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__ #define ADDRSP1 __attribute__((address_space(1))) __device__ float @@ -963,7 +963,7 @@ __device__ inline int64_t GpuAtomicMin(int64_t* ptr, int64_t value) { } #endif -#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__ +#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx950__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__ // Low level instructions don't return. For now, assume that return value // is always unused. __device__ float GpuAtomicAdd(float* dst, float val) { @@ -978,7 +978,7 @@ __device__ inline T GpuAtomicAddShared(T* ptr, T value) { return GpuAtomicAdd(ptr, value); } -#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__ +#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx950__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__ __device__ float GpuAtomicAddShared(float* dst, float val) { atomicAdd(dst, val); return val; diff --git a/tensorflow/tools/ci_build/update_version.py b/tensorflow/tools/ci_build/update_version.py index 3265533f803751..4614aa4faf859a 100755 --- a/tensorflow/tools/ci_build/update_version.py +++ b/tensorflow/tools/ci_build/update_version.py @@ -349,6 +349,8 @@ def main(): check_all_files() old_version = get_current_semver_version() + print(f"{old_version = } {args.nightly = } {args.version = } {args.rocm_version = }") + args.nightly = True if args.nightly: if args.version: @@ -385,6 +387,7 @@ def main(): # Update Apple Silicon release CI files for release builds only update_m1_builds(old_version, new_version) + print(f"{old_version = } {new_version = }") update_version_h(old_version, new_version) update_setup_dot_py(old_version, new_version) update_readme(old_version, new_version) @@ -397,6 +400,8 @@ def main(): print("Identifier String: %s -> %s\n" % (old_version.identifier_string, new_version.identifier_string)) check_for_old_version(old_version, new_version) + print(f"{new_version.identifier_string = }") + # exit(-1) if __name__ == "__main__": diff --git a/tensorflow/tools/pip_package/build_pip_package.py b/tensorflow/tools/pip_package/build_pip_package.py index 4809d5ec7a7c50..39d1f0aa026ec4 100644 --- a/tensorflow/tools/pip_package/build_pip_package.py +++ b/tensorflow/tools/pip_package/build_pip_package.py @@ -139,19 +139,20 @@ def prepare_headers(headers: list[str], srcs_dir: str) -> None: "external/local_tsl/": "tensorflow", } - for file in headers: - if file.endswith("cc.inc"): - continue - - if any(i in file for i in path_to_exclude): - continue - - for path, val in path_to_replace.items(): - if path in file: - copy_file(file, os.path.join(srcs_dir, val), path) - break - else: - copy_file(file, srcs_dir) + if headers is not None and len(headers) > 0: + for file in headers: + if file.endswith("cc.inc"): + continue + + if any(i in file for i in path_to_exclude): + continue + + for path, val in path_to_replace.items(): + if path in file: + copy_file(file, os.path.join(srcs_dir, val), path) + break + else: + copy_file(file, srcs_dir) create_local_config_python( os.path.join(srcs_dir, "external/local_config_python") diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014 index bb754568d66c02..2a36d41bbd8a0e 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014 @@ -8,7 +8,7 @@ COPY setup.packages.rocm.cs7.sh setup.packages.rocm.cs7.sh COPY builder.packages.rocm.cs7.txt builder.packages.rocm.cs7.txt RUN /setup.packages.rocm.cs7.sh /builder.packages.rocm.cs7.txt -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install ROCM diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 index 2ce772270d375f..cfe2136ce21650 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 @@ -17,7 +17,7 @@ COPY setup.packages.rocm.el8.sh setup.packages.rocm.el8.sh COPY builder.packages.rocm.el8.txt builder.packages.rocm.el8.txt RUN /setup.packages.rocm.el8.sh /builder.packages.rocm.el8.txt -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install ROCM diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 index 6dddb73d1b49a6..17a958961ea35d 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 @@ -2,7 +2,7 @@ FROM ubuntu:20.04 ################################################################################ -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install build dependencies diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 index a3a8edddd0f909..a80fa6cf0cdded 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 @@ -2,7 +2,7 @@ FROM ubuntu:22.04 ################################################################################ -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install build dependencies diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub24 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub24 index 45c6bbe00030fe..c9cdb6ba051b14 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub24 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub24 @@ -2,7 +2,7 @@ FROM ubuntu:24.04 ################################################################################ -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install build dependencies diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh b/tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh index 7d3a78d5c50dad..a2e6aef89be127 100755 --- a/tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh +++ b/tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh @@ -145,7 +145,7 @@ then echo "build:rocm_base --copt=-fclang-abi-compat=17" >> /etc/bazel.bazelrc fi -GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS:-"gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100"} +GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS:-"gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100"} echo $ROCM_VERSION echo $ROCM_REPO diff --git a/third_party/xla/xla/service/gpu/llvm_gpu_backend/amdgpu_backend.cc b/third_party/xla/xla/service/gpu/llvm_gpu_backend/amdgpu_backend.cc index 4b285b6ba106ab..c8b95f49722526 100644 --- a/third_party/xla/xla/service/gpu/llvm_gpu_backend/amdgpu_backend.cc +++ b/third_party/xla/xla/service/gpu/llvm_gpu_backend/amdgpu_backend.cc @@ -340,7 +340,8 @@ std::string MapGCNArchNameTokenToFeatureStr(const std::string& token, return "+sramecc"; } else if (token == "sramecc-") { if (gfx == "gfx90a" || gfx == "gfx940" || gfx == "gfx941" || - gfx == "gfx942" || gfx == "gfx1101" || gfx == "gfx1102" || + gfx == "gfx942" || gfx == "gfx950" || + gfx == "gfx1101" || gfx == "gfx1102" || gfx == "gfx1200" || gfx == "gfx1201") return ""; return "-sramecc"; diff --git a/third_party/xla/xla/service/gpu/transforms/conv_rewriter_test.cc b/third_party/xla/xla/service/gpu/transforms/conv_rewriter_test.cc index 062ae3c9958504..44266f34ab532d 100644 --- a/third_party/xla/xla/service/gpu/transforms/conv_rewriter_test.cc +++ b/third_party/xla/xla/service/gpu/transforms/conv_rewriter_test.cc @@ -791,6 +791,13 @@ TEST_F(ConvRewriterTest, TestInvalidTypes) { ::testing::HasSubstr( "FP8 convolutions are only supported on CUDA GPUs"))); + s = ConvRewriter(se::RocmComputeCapability{"gfx950"}).Run(m.get()).status(); + EXPECT_THAT(s, tsl::testing::StatusIs( + absl::StatusCode::kUnimplemented, + ::testing::HasSubstr( + "FP8 convolutions are only supported on CUDA GPUs"))); + + // Test unsupported FP8 type module_with_type = absl::StrReplaceAll(module_str, {{"TYPE", "f8e4m3fnuz"}}); TF_ASSERT_OK_AND_ASSIGN(m, ParseAndReturnVerifiedModule(module_with_type)); diff --git a/third_party/xla/xla/stream_executor/device_description.h b/third_party/xla/xla/stream_executor/device_description.h index 323a167385c610..e69ece46a6eef1 100644 --- a/third_party/xla/xla/stream_executor/device_description.h +++ b/third_party/xla/xla/stream_executor/device_description.h @@ -78,13 +78,13 @@ class RocmComputeCapability { bool gfx9_mi100_or_later() const { static constexpr absl::string_view kList[] = {"gfx908", "gfx90a", "gfx940", - "gfx941", "gfx942"}; + "gfx941", "gfx942", "gfx950"}; return absl::c_count(kList, gfx_version()) != 0; } bool gfx9_mi200_or_later() const { static constexpr absl::string_view kList[] = {"gfx90a", "gfx940", "gfx941", - "gfx942"}; + "gfx942", "gfx950"}; return absl::c_count(kList, gfx_version()) != 0; } @@ -157,6 +157,7 @@ class RocmComputeCapability { "gfx908", // MI100 "gfx90a", // MI200 "gfx940", "gfx941", "gfx942", // MI300 + "gfx950", "gfx1030", // RX68xx / RX69xx "gfx1100", "gfx1101", "gfx1102", // RX7900 "gfx1200", "gfx1201", // RX8900