Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 18 additions & 31 deletions build_rocm_python3
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ while getopts "hrn" opt; do
restriction=true
;;
n)
nightly=true
nightly=true
esac
done
shift "$((OPTIND-1))"
Expand All @@ -47,35 +47,22 @@ else
fi

if [ -f /usertools/rocm.bazelrc ]; then
# Use the bazelrc files in /usertools if available

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are these changes needed?

Copy link
Author

@zahiqbal zahiqbal Feb 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reverted it. I added this to fix CI build, however it is only use for locally build. I may merge in separate PR if required

TF_PKG_LOC=bazel-bin/tensorflow/tools/pip_package/wheel_house
if [[ -n $nightly ]]; then
# Remove any previous builds and build nightly
rm -f $TF_PKG_LOC/tf_nightly_rocm*.whl
export project_name=tf_nightly_rocm
python3 tensorflow/tools/ci_build/update_version.py --nightly --rocm_version &&
bazel --bazelrc=/usertools/rocm.bazelrc build $RESOURCE_OPTION --config=rocm --repo_env=WHEEL_NAME=tf_nightly_rocm --action_env=project_name=tf_nightly_rocm --action_env=TF_PYTHON_VERSION=$PYTHON_VERSION tensorflow/tools/pip_package:wheel --verbose_failures &&
pip3 install --upgrade $TF_PKG_LOC/tf_nightly_rocm*.whl
else
# Remove any previous builds and build release
rm -f $TF_PKG_LOC/tensorflow*.whl
python3 tensorflow/tools/ci_build/update_version.py --rocm_version &&
bazel --bazelrc=/usertools/rocm.bazelrc build $RESOURCE_OPTION --config=rocm --repo_env=WHEEL_NAME=tensorflow_rocm --action_env=project_name=tensorflow_rocm --action_env=TF_PYTHON_VERSION=$PYTHON_VERSION tensorflow/tools/pip_package:wheel --verbose_failures &&
pip3 install --upgrade $TF_PKG_LOC/tensorflow*.whl
fi
# Use the bazelrc files in /usertools if available
# Also, this is likely a tensorflow-build container so put the whl in /tf/pkg
TF_PKG_LOC=/tf/pkg
# Remove any previous builds and build nightly
rm -f $TF_PKG_LOC/tensorflow*.whl
python3 tensorflow/tools/ci_build/update_version.py --nightly --rocm_version &&
bazel --bazelrc=/usertools/rocm.bazelrc build $RESOURCE_OPTION --config=rocm --action_env=TF_PYTHON_VERSION=$PYTHON_VERSION tensorflow/tools/pip_package:wheel --verbose_failures &&
cp ./bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow*.whl $TF_PKG_LOC/ &&
pip3 install --upgrade $TF_PKG_LOC/tensorflow*.whl
else
# Legacy style: run configure then build
TF_PKG_LOC=bazel-bin/tensorflow/tools/pip_package/wheel_house
yes "" | TF_NEED_CLANG=0 ROCM_PATH=$ROCM_INSTALL_DIR TF_NEED_ROCM=1 PYTHON_BIN_PATH=/usr/bin/python3 ./configure &&
if [[ -n $nightly ]]; then
# Remove any previous builds and build nightly
rm -f $TF_PKG_LOC/tf_nightly_rocm*.whl
bazel build $RESOURCE_OPTION --config=opt --config=rocm --repo_env=WHEEL_NAME=tf_nightly_rocm --action_env=project_name=tf_nightly_rocm //tensorflow/tools/pip_package:wheel --verbose_failures &&
pip3 install --upgrade $TF_PKG_LOC/tf_nightly_rocm*.whl
else
# Remove any previous builds and build release
rm -f $TF_PKG_LOC/tensorflow*.whl
bazel build $RESOURCE_OPTION --config=opt --config=rocm --repo_env=WHEEL_NAME=tensorflow_rocm --action_env=project_name=tensorflow_rocm //tensorflow/tools/pip_package:wheel --verbose_failures &&
pip3 install --upgrade $TF_PKG_LOC/tensorflow*.whl
fi
# Legacy style: run configure then build
TF_PKG_LOC=/tmp/tensorflow_pkg
yes "" | TF_NEED_CLANG=0 ROCM_PATH=$ROCM_INSTALL_DIR TF_NEED_ROCM=1 PYTHON_BIN_PATH=/usr/bin/python3 ./configure &&
# Remove any previous builds and build nightly
rm -f $TF_PKG_LOC/tensorflow*.whl
bazel build $RESOURCE_OPTION --config=opt --config=rocm //tensorflow/tools/pip_package:wheel --verbose_failures &&
cp ./bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow*.whl $TF_PKG_LOC/ &&
pip3 install --upgrade $TF_PKG_LOC/tf_nightly_rocm*.whl
fi
3 changes: 2 additions & 1 deletion tensorflow/core/grappler/optimizers/auto_mixed_precision.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ bool HasFastFP16Support(const DeviceProperties& props) {
#elif TENSORFLOW_USE_ROCM
absl::flat_hash_set<std::string> FP16SupportedDevices = {
{"gfx906"}, {"gfx908"}, {"gfx90a"}, {"gfx910"}, {"gfx940"}, {"gfx941"},
{"gfx942"}, {"gfx1010"}, {"gfx1012"}, {"gfx1030"},
{"gfx942"}, {"gfx950"},

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any gfx that is not listed here at this point? Should we just return true?

{"gfx1010"}, {"gfx1012"}, {"gfx1030"},
{"gfx1100"}, {"gfx1101"}, {"gfx1102"},
{"gfx1200"}, {"gfx1201"}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ inline GpuStats GetNumGPUs(const Cluster& cluster) {
compute_capability_it->second == "gfx940" ||
compute_capability_it->second == "gfx941" ||
compute_capability_it->second == "gfx942" ||
compute_capability_it->second == "gfx950" ||
compute_capability_it->second == "gfx1101" ||
compute_capability_it->second == "gfx1102" ||
compute_capability_it->second == "gfx1200" ||
Expand Down
6 changes: 3 additions & 3 deletions tensorflow/core/util/gpu_device_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ __device__ inline double GpuAtomicAdd(double* ptr, double value) {
}
#endif

#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx950__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__

#define ADDRSP1 __attribute__((address_space(1)))
__device__ float
Expand Down Expand Up @@ -963,7 +963,7 @@ __device__ inline int64_t GpuAtomicMin(int64_t* ptr, int64_t value) {
}
#endif

#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx950__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
// Low level instructions don't return. For now, assume that return value
// is always unused.
__device__ float GpuAtomicAdd(float* dst, float val) {
Expand All @@ -978,7 +978,7 @@ __device__ inline T GpuAtomicAddShared(T* ptr, T value) {
return GpuAtomicAdd(ptr, value);
}

#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx950__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
__device__ float GpuAtomicAddShared(float* dst, float val) {
atomicAdd(dst, val);
return val;
Expand Down
5 changes: 5 additions & 0 deletions tensorflow/tools/ci_build/update_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,8 @@ def main():

check_all_files()
old_version = get_current_semver_version()
print(f"{old_version = } {args.nightly = } {args.version = } {args.rocm_version = }")
args.nightly = True

if args.nightly:
if args.version:
Expand Down Expand Up @@ -385,6 +387,7 @@ def main():
# Update Apple Silicon release CI files for release builds only
update_m1_builds(old_version, new_version)

print(f"{old_version = } {new_version = }")
update_version_h(old_version, new_version)
update_setup_dot_py(old_version, new_version)
update_readme(old_version, new_version)
Expand All @@ -397,6 +400,8 @@ def main():
print("Identifier String: %s -> %s\n" % (old_version.identifier_string, new_version.identifier_string))

check_for_old_version(old_version, new_version)
print(f"{new_version.identifier_string = }")
# exit(-1)


if __name__ == "__main__":
Expand Down
27 changes: 14 additions & 13 deletions tensorflow/tools/pip_package/build_pip_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,19 +139,20 @@ def prepare_headers(headers: list[str], srcs_dir: str) -> None:
"external/local_tsl/": "tensorflow",
}

for file in headers:
if file.endswith("cc.inc"):
continue

if any(i in file for i in path_to_exclude):
continue

for path, val in path_to_replace.items():
if path in file:
copy_file(file, os.path.join(srcs_dir, val), path)
break
else:
copy_file(file, srcs_dir)
if headers is not None and len(headers) > 0:
for file in headers:
if file.endswith("cc.inc"):
continue

if any(i in file for i in path_to_exclude):
continue

for path, val in path_to_replace.items():
if path in file:
copy_file(file, os.path.join(srcs_dir, val), path)
break
else:
copy_file(file, srcs_dir)

create_local_config_python(
os.path.join(srcs_dir, "external/local_config_python")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ COPY setup.packages.rocm.cs7.sh setup.packages.rocm.cs7.sh
COPY builder.packages.rocm.cs7.txt builder.packages.rocm.cs7.txt
RUN /setup.packages.rocm.cs7.sh /builder.packages.rocm.cs7.txt

ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}

# Install ROCM
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ COPY setup.packages.rocm.el8.sh setup.packages.rocm.el8.sh
COPY builder.packages.rocm.el8.txt builder.packages.rocm.el8.txt
RUN /setup.packages.rocm.el8.sh /builder.packages.rocm.el8.txt

ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}

# Install ROCM
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
FROM ubuntu:20.04
################################################################################

ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}

# Install build dependencies
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
FROM ubuntu:22.04
################################################################################

ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}

# Install build dependencies
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
FROM ubuntu:24.04
################################################################################

ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}

# Install build dependencies
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ then
echo "build:rocm_base --copt=-fclang-abi-compat=17" >> /etc/bazel.bazelrc
fi

GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS:-"gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100"}
GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS:-"gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100"}

echo $ROCM_VERSION
echo $ROCM_REPO
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,8 @@ std::string MapGCNArchNameTokenToFeatureStr(const std::string& token,
return "+sramecc";
} else if (token == "sramecc-") {
if (gfx == "gfx90a" || gfx == "gfx940" || gfx == "gfx941" ||

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should remove this? This was a workaround for one specific machine. Albeit we might just do it unconditionally.

gfx == "gfx942" || gfx == "gfx1101" || gfx == "gfx1102" ||
gfx == "gfx942" || gfx == "gfx950" ||
gfx == "gfx1101" || gfx == "gfx1102" ||
gfx == "gfx1200" || gfx == "gfx1201")
return "";
return "-sramecc";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,13 @@ TEST_F(ConvRewriterTest, TestInvalidTypes) {
::testing::HasSubstr(
"FP8 convolutions are only supported on CUDA GPUs")));

s = ConvRewriter(se::RocmComputeCapability{"gfx950"}).Run(m.get()).status();
EXPECT_THAT(s, tsl::testing::StatusIs(
absl::StatusCode::kUnimplemented,
::testing::HasSubstr(
"FP8 convolutions are only supported on CUDA GPUs")));


// Test unsupported FP8 type
module_with_type = absl::StrReplaceAll(module_str, {{"TYPE", "f8e4m3fnuz"}});
TF_ASSERT_OK_AND_ASSIGN(m, ParseAndReturnVerifiedModule(module_with_type));
Expand Down
5 changes: 3 additions & 2 deletions third_party/xla/xla/stream_executor/device_description.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,13 @@ class RocmComputeCapability {

bool gfx9_mi100_or_later() const {
static constexpr absl::string_view kList[] = {"gfx908", "gfx90a", "gfx940",
"gfx941", "gfx942"};
"gfx941", "gfx942", "gfx950"};
return absl::c_count(kList, gfx_version()) != 0;
}

bool gfx9_mi200_or_later() const {
static constexpr absl::string_view kList[] = {"gfx90a", "gfx940", "gfx941",
"gfx942"};
"gfx942", "gfx950"};
return absl::c_count(kList, gfx_version()) != 0;
}

Expand Down Expand Up @@ -157,6 +157,7 @@ class RocmComputeCapability {
"gfx908", // MI100
"gfx90a", // MI200
"gfx940", "gfx941", "gfx942", // MI300
"gfx950",
"gfx1030", // RX68xx / RX69xx
"gfx1100", "gfx1101", "gfx1102", // RX7900
"gfx1200", "gfx1201", // RX8900
Expand Down