From 549e9ecbe5437ce5ec7487618544b50b17934aa8 Mon Sep 17 00:00:00 2001 From: vadseshu Date: Thu, 16 Apr 2026 13:10:31 +0000 Subject: [PATCH 1/2] Added gfx950a to pyt_mochi_inference.ubuntu dockerfile to enable fa --- docker/pyt_mochi_inference.ubuntu.amd.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/pyt_mochi_inference.ubuntu.amd.Dockerfile b/docker/pyt_mochi_inference.ubuntu.amd.Dockerfile index 4cbbd1e..fb9426f 100644 --- a/docker/pyt_mochi_inference.ubuntu.amd.Dockerfile +++ b/docker/pyt_mochi_inference.ubuntu.amd.Dockerfile @@ -33,7 +33,7 @@ RUN mkdir -p $WORKSPACE_DIR WORKDIR $WORKSPACE_DIR ARG FA_REPO="https://github.com/Dao-AILab/flash-attention.git" -ARG PYTORCH_ROCM_ARCH=gfx90a;gfx942;gfx1100;gfx1101;gfx1200;gfx1201 +ARG PYTORCH_ROCM_ARCH=gfx950;gfx90a;gfx942;gfx1100;gfx1101;gfx1200;gfx1201 RUN git clone ${FA_REPO} RUN cd flash-attention \ && git submodule update --init \ From 0c40e046b7982e0b04f91d56f3ea7fe62ca4023e Mon Sep 17 00:00:00 2001 From: vadseshu Date: Mon, 20 Apr 2026 14:13:53 +0000 Subject: [PATCH 2/2] fix(docker): Hunyuan video AMD image build and runtime imports Pin transformers to 4.x so diffusers 0.32.2 can import FLAX_WEIGHTS_NAME. Patch rocmProfileData rocpd_python Makefile to avoid pip install --user inside the base image venv. Flash-attention is installed with FLASH_ATTENTION_TRITON_AMD_ENABLE for headless/CI-friendly builds; replace prior wheel build from pinned SHA. --- docker/pyt_hy_video.ubuntu.amd.Dockerfile | 35 ++++++++++++++++------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/docker/pyt_hy_video.ubuntu.amd.Dockerfile b/docker/pyt_hy_video.ubuntu.amd.Dockerfile index 062fc9f..70a7f6a 100644 --- a/docker/pyt_hy_video.ubuntu.amd.Dockerfile +++ b/docker/pyt_hy_video.ubuntu.amd.Dockerfile @@ -6,7 +6,8 @@ ARG work_dir=/hunyuanvideo WORKDIR $work_dir -RUN pip install diffusers==0.32.2 distvae yunchang==0.6.0 opencv-python accelerate +# Pin transformers 4.x for diffusers 0.32.2 (FLAX_WEIGHTS_NAME removed in v5). +RUN pip install diffusers==0.32.2 distvae yunchang==0.6.0 opencv-python accelerate "transformers>=4.44.0,<5.0" RUN pip install imageio imageio-ffmpeg RUN pip install beautifulsoup4==4.12.3 RUN pip install sentencepiece==0.1.99 @@ -18,22 +19,34 @@ RUN git clone https://github.com/xdit-project/xDiT && \ cd xDiT && git checkout 775a5263d95518a733e4f239ad21228b755598bb && \ pip install --no-deps -e . -# flash attn -ARG FA_SHA="22c0358" -ARG FA_REPO="https://github.com/ROCm/flash-attention.git" -ARG PYTORCH_ROCM_ARCH="gfx942" -RUN git clone ${FA_REPO} && \ - cd flash-attention && \ - git checkout ${FA_SHA} && \ - git submodule update --init && \ - GPU_ARCHS=${PYTORCH_ROCM_ARCH} python3 setup.py bdist_wheel --dist-dir=dist && \ - pip install dist/*.whl; +# flash attn (avoid ARG name PYTORCH_ROCM_ARCH: base image ENV can shadow it and expand to "") +# ROCm flash-attention: FA_GPU_ARCH=native needs a visible GPU at compile time and fails in CI/docker. +# Coerce native/empty to gfx942 for headless CI; for MI350 pass --build-arg FA_GPU_ARCH=gfx950 (needs FA_SHA with gfx950 in setup.py). +#ARG FA_SHA="b3ae4966b2567811880db10d9e040a775b99c7d7" +#ARG FA_REPO="https://github.com/ROCm/flash-attention.git" +#ARG FA_GPU_ARCH=gfx942 +#RUN git clone ${FA_REPO} && \ +# cd flash-attention && \ +# git checkout ${FA_SHA} && \ +# git submodule update --init && \ +# F='${FA_GPU_ARCH}' && \ +# if [ -z "$F" ]; then F=gfx942; fi && \ +# if [ "$F" = "native" ]; then F=gfx942; fi && \ +# GPU_ARCHS="$F" python3 setup.py bdist_wheel --dist-dir=dist && \ +# pip install dist/*.whl; +RUN git clone https://github.com/ROCm/flash-attention.git \ + && cd flash-attention \ + && FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" pip install --no-build-isolation . + + # RPD profiling RUN apt update && \ apt install -y sqlite3 libsqlite3-dev libfmt-dev +# Upstream uses pip --user; venv disallows that. RUN git clone https://github.com/ROCmSoftwarePlatform/rocmProfileData && \ cd rocmProfileData && \ + sed -i 's/pip install --user/pip install/g' rocpd_python/Makefile && \ make && make install && \ cd rocpd_python && python setup.py install && cd .. && \ cd rpd_tracer && python setup.py install && cd ..