diff --git a/docker/pyt_hy_video.ubuntu.amd.Dockerfile b/docker/pyt_hy_video.ubuntu.amd.Dockerfile index 062fc9f..70a7f6a 100644 --- a/docker/pyt_hy_video.ubuntu.amd.Dockerfile +++ b/docker/pyt_hy_video.ubuntu.amd.Dockerfile @@ -6,7 +6,8 @@ ARG work_dir=/hunyuanvideo WORKDIR $work_dir -RUN pip install diffusers==0.32.2 distvae yunchang==0.6.0 opencv-python accelerate +# Pin transformers 4.x for diffusers 0.32.2 (FLAX_WEIGHTS_NAME removed in v5). +RUN pip install diffusers==0.32.2 distvae yunchang==0.6.0 opencv-python accelerate "transformers>=4.44.0,<5.0" RUN pip install imageio imageio-ffmpeg RUN pip install beautifulsoup4==4.12.3 RUN pip install sentencepiece==0.1.99 @@ -18,22 +19,34 @@ RUN git clone https://github.com/xdit-project/xDiT && \ cd xDiT && git checkout 775a5263d95518a733e4f239ad21228b755598bb && \ pip install --no-deps -e . -# flash attn -ARG FA_SHA="22c0358" -ARG FA_REPO="https://github.com/ROCm/flash-attention.git" -ARG PYTORCH_ROCM_ARCH="gfx942" -RUN git clone ${FA_REPO} && \ - cd flash-attention && \ - git checkout ${FA_SHA} && \ - git submodule update --init && \ - GPU_ARCHS=${PYTORCH_ROCM_ARCH} python3 setup.py bdist_wheel --dist-dir=dist && \ - pip install dist/*.whl; +# flash attn (avoid ARG name PYTORCH_ROCM_ARCH: base image ENV can shadow it and expand to "") +# ROCm flash-attention: FA_GPU_ARCH=native needs a visible GPU at compile time and fails in CI/docker. +# Coerce native/empty to gfx942 for headless CI; for MI350 pass --build-arg FA_GPU_ARCH=gfx950 (needs FA_SHA with gfx950 in setup.py). +#ARG FA_SHA="b3ae4966b2567811880db10d9e040a775b99c7d7" +#ARG FA_REPO="https://github.com/ROCm/flash-attention.git" +#ARG FA_GPU_ARCH=gfx942 +#RUN git clone ${FA_REPO} && \ +# cd flash-attention && \ +# git checkout ${FA_SHA} && \ +# git submodule update --init && \ +# F='${FA_GPU_ARCH}' && \ +# if [ -z "$F" ]; then F=gfx942; fi && \ +# if [ "$F" = "native" ]; then F=gfx942; fi && \ +# GPU_ARCHS="$F" python3 setup.py bdist_wheel --dist-dir=dist && \ +# pip install dist/*.whl; +RUN git clone https://github.com/ROCm/flash-attention.git \ + && cd flash-attention \ + && FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" pip install --no-build-isolation . + + # RPD profiling RUN apt update && \ apt install -y sqlite3 libsqlite3-dev libfmt-dev +# Upstream uses pip --user; venv disallows that. RUN git clone https://github.com/ROCmSoftwarePlatform/rocmProfileData && \ cd rocmProfileData && \ + sed -i 's/pip install --user/pip install/g' rocpd_python/Makefile && \ make && make install && \ cd rocpd_python && python setup.py install && cd .. && \ cd rpd_tracer && python setup.py install && cd .. diff --git a/docker/pyt_mochi_inference.ubuntu.amd.Dockerfile b/docker/pyt_mochi_inference.ubuntu.amd.Dockerfile index 4cbbd1e..fb9426f 100644 --- a/docker/pyt_mochi_inference.ubuntu.amd.Dockerfile +++ b/docker/pyt_mochi_inference.ubuntu.amd.Dockerfile @@ -33,7 +33,7 @@ RUN mkdir -p $WORKSPACE_DIR WORKDIR $WORKSPACE_DIR ARG FA_REPO="https://github.com/Dao-AILab/flash-attention.git" -ARG PYTORCH_ROCM_ARCH=gfx90a;gfx942;gfx1100;gfx1101;gfx1200;gfx1201 +ARG PYTORCH_ROCM_ARCH=gfx950;gfx90a;gfx942;gfx1100;gfx1101;gfx1200;gfx1201 RUN git clone ${FA_REPO} RUN cd flash-attention \ && git submodule update --init \