Skip to content

Updated fbgemm_rocm commit to align with upstream #2398

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: release/2.7
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 89 additions & 13 deletions .ci/pytorch/common_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,34 @@ function pip_install_whl() {
fi
}

function pip_build_and_install() {
local build_target=$1
local wheel_dir=$2

local found_whl=0
for file in "${wheel_dir}"/*.whl
do
if [[ -f "${file}" ]]; then
found_whl=1
break
fi
done

# Build the wheel if it doesn't exist
if [ "${found_whl}" == "0" ]; then
python3 -m pip wheel \
--no-build-isolation \
--no-deps \
--no-use-pep517 \
-w "${wheel_dir}" \
"${build_target}"
fi

for file in "${wheel_dir}"/*.whl
do
pip_install_whl "${file}"
done
}

function pip_install() {
# retry 3 times
Expand Down Expand Up @@ -174,25 +202,73 @@ function install_torchrec_and_fbgemm() {

if [[ "$BUILD_ENVIRONMENT" == *rocm* ]] ; then
# install torchrec first because it installs fbgemm nightly on top of rocm fbgemm
pip_install --no-use-pep517 "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
pip_build_and_install "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}" dist/torchrec
pip_uninstall fbgemm-gpu-nightly

# Set ROCM_HOME isn't available, use ROCM_PATH if set or /opt/rocm
ROCM_HOME="${ROCM_HOME:-${ROCM_PATH:-/opt/rocm}}"

# Find rocm_version.h header file for ROCm version extract
rocm_version_h="${ROCM_HOME}/include/rocm-core/rocm_version.h"
if [ ! -f "$rocm_version_h" ]; then
rocm_version_h="${ROCM_HOME}/include/rocm_version.h"
fi

# Error out if rocm_version.h not found
if [ ! -f "$rocm_version_h" ]; then
echo "Error: rocm_version.h not found in expected locations." >&2
exit 1
fi

# Extract major, minor and patch ROCm version numbers
MAJOR_VERSION=$(grep 'ROCM_VERSION_MAJOR' "$rocm_version_h" | awk '{print $3}')
MINOR_VERSION=$(grep 'ROCM_VERSION_MINOR' "$rocm_version_h" | awk '{print $3}')
PATCH_VERSION=$(grep 'ROCM_VERSION_PATCH' "$rocm_version_h" | awk '{print $3}')
ROCM_INT=$((MAJOR_VERSION * 10000 + MINOR_VERSION * 100 + PATCH_VERSION))
echo "ROCm version: $ROCM_INT"
export BUILD_ROCM_VERSION="$MAJOR_VERSION.$MINOR_VERSION"

pip_install tabulate # needed for newer fbgemm
pip_install patchelf # needed for rocm fbgemm
git clone --recursive https://github.com/pytorch/fbgemm
pushd fbgemm/fbgemm_gpu
git checkout "${fbgemm_commit}"
python setup.py install \
--package_variant=rocm \
-DHIP_ROOT_DIR="${ROCM_PATH}" \
-DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \
-DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA"
popd
pushd /tmp

local wheel_dir=dist/fbgemm_gpu
local found_whl=0
for file in "${wheel_dir}"/*.whl
do
if [[ -f "${file}" ]]; then
found_whl=1
break
fi
done

# Build the wheel if it doesn't exist
if [ "${found_whl}" == "0" ]; then
git clone --recursive https://github.com/pytorch/fbgemm
pushd fbgemm/fbgemm_gpu
git checkout "${fbgemm_commit}"
python setup.py bdist_wheel \
--build-variant=rocm \
-DHIP_ROOT_DIR="${ROCM_PATH}" \
-DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \
-DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA"
popd

# Save the wheel before cleaning up
mkdir -p dist/fbgemm_gpu
cp fbgemm/fbgemm_gpu/dist/*.whl dist/fbgemm_gpu
fi

for file in "${wheel_dir}"/*.whl
do
pip_install_whl "${file}"
done

rm -rf fbgemm
popd
else
# See https://github.com/pytorch/pytorch/issues/106971
CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu"
pip_install --no-use-pep517 "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
pip_build_and_install "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}" dist/torchrec
pip_build_and_install "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#subdirectory=fbgemm_gpu" dist/fbgemm_gpu
fi
}

Expand Down
2 changes: 1 addition & 1 deletion .github/ci_commit_pins/fbgemm_rocm.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5fb5024118e9bb9decf96c2b0b1a8f0010bf56be
7f1de94a4c2d14f59ad4ca84538c36084ea6b2c8