Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 24 additions & 11 deletions docker/pyt_hy_video.ubuntu.amd.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ ARG work_dir=/hunyuanvideo

WORKDIR $work_dir

RUN pip install diffusers==0.32.2 distvae yunchang==0.6.0 opencv-python accelerate
# Pin transformers 4.x for diffusers 0.32.2 (FLAX_WEIGHTS_NAME removed in v5).
RUN pip install diffusers==0.32.2 distvae yunchang==0.6.0 opencv-python accelerate "transformers>=4.44.0,<5.0"
RUN pip install imageio imageio-ffmpeg
RUN pip install beautifulsoup4==4.12.3
RUN pip install sentencepiece==0.1.99
Expand All @@ -18,22 +19,34 @@ RUN git clone https://github.com/xdit-project/xDiT && \
cd xDiT && git checkout 775a5263d95518a733e4f239ad21228b755598bb && \
pip install --no-deps -e .

# flash attn
ARG FA_SHA="22c0358"
ARG FA_REPO="https://github.com/ROCm/flash-attention.git"
ARG PYTORCH_ROCM_ARCH="gfx942"
RUN git clone ${FA_REPO} && \
cd flash-attention && \
git checkout ${FA_SHA} && \
git submodule update --init && \
GPU_ARCHS=${PYTORCH_ROCM_ARCH} python3 setup.py bdist_wheel --dist-dir=dist && \
pip install dist/*.whl;
# flash attn (avoid ARG name PYTORCH_ROCM_ARCH: base image ENV can shadow it and expand to "")
# ROCm flash-attention: FA_GPU_ARCH=native needs a visible GPU at compile time and fails in CI/docker.
# Coerce native/empty to gfx942 for headless CI; for MI350 pass --build-arg FA_GPU_ARCH=gfx950 (needs FA_SHA with gfx950 in setup.py).
#ARG FA_SHA="b3ae4966b2567811880db10d9e040a775b99c7d7"
#ARG FA_REPO="https://github.com/ROCm/flash-attention.git"
#ARG FA_GPU_ARCH=gfx942
#RUN git clone ${FA_REPO} && \
# cd flash-attention && \
# git checkout ${FA_SHA} && \
# git submodule update --init && \
# F='${FA_GPU_ARCH}' && \
# if [ -z "$F" ]; then F=gfx942; fi && \
# if [ "$F" = "native" ]; then F=gfx942; fi && \
# GPU_ARCHS="$F" python3 setup.py bdist_wheel --dist-dir=dist && \
# pip install dist/*.whl;
RUN git clone https://github.com/ROCm/flash-attention.git \
&& cd flash-attention \
&& FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" pip install --no-build-isolation .
Comment on lines +25 to +39
Copy link
Copy Markdown
Collaborator

@gargrahul gargrahul Apr 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vadseshu let us pin FA to a working commit as suggested.




# RPD profiling
RUN apt update && \
apt install -y sqlite3 libsqlite3-dev libfmt-dev
# Upstream uses pip --user; venv disallows that.
RUN git clone https://github.com/ROCmSoftwarePlatform/rocmProfileData && \
cd rocmProfileData && \
sed -i 's/pip install --user/pip install/g' rocpd_python/Makefile && \
make && make install && \
cd rocpd_python && python setup.py install && cd .. && \
cd rpd_tracer && python setup.py install && cd ..
2 changes: 1 addition & 1 deletion docker/pyt_mochi_inference.ubuntu.amd.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ RUN mkdir -p $WORKSPACE_DIR
WORKDIR $WORKSPACE_DIR

ARG FA_REPO="https://github.com/Dao-AILab/flash-attention.git"
ARG PYTORCH_ROCM_ARCH=gfx90a;gfx942;gfx1100;gfx1101;gfx1200;gfx1201
ARG PYTORCH_ROCM_ARCH=gfx950;gfx90a;gfx942;gfx1100;gfx1101;gfx1200;gfx1201
RUN git clone ${FA_REPO}
RUN cd flash-attention \
&& git submodule update --init \
Expand Down