From 90fd1465f04a0b5516fd49b63d2ce3cf84d52cc6 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Mon, 27 Oct 2025 11:10:34 +0100 Subject: [PATCH 01/11] [E2E] Align benchmarks/dynamo configs to match what torch-xpu-ops uses Signed-off-by: Anatoly Myachev --- .github/workflows/e2e-reusable.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/e2e-reusable.yml b/.github/workflows/e2e-reusable.yml index 71446621de..135229e9ff 100644 --- a/.github/workflows/e2e-reusable.yml +++ b/.github/workflows/e2e-reusable.yml @@ -72,6 +72,18 @@ jobs: - name: Checkout repository uses: actions/checkout@v5 + - name: Set torch-xpu-ops commit id + run: | + TORCH_XPU_OPS_COMMIT_ID="$(<.github/pins/e2e_reference_torch-xpu-ops.txt)" + echo "TORCH_XPU_OPS_COMMIT_ID=$TORCH_XPU_OPS_COMMIT_ID" | tee -a "$GITHUB_ENV" + + - name: Clone torch-xpu-ops repository + uses: actions/checkout@v5 + with: + repository: intel/torch-xpu-ops + ref: ${{ env.TORCH_XPU_OPS_COMMIT_ID }} + path: torch-xpu-ops + - name: Load pip cache id: pip-cache uses: ./.github/actions/load @@ -97,6 +109,10 @@ jobs: with: ref: ${{ inputs.pytorch_ref }} + - name: Update PyTorch benchmarks/dynamo configs using torch-xpu-ops custom configs + run: | + rsync -avz torch-xpu-ops/.ci/benchmarks/ pytorch/benchmarks/dynamo/ + - name: Identify pinned versions run: | cd pytorch From b599938674fcf31091d2774dd589a4c517661be3 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Mon, 27 Oct 2025 11:18:51 +0100 Subject: [PATCH 02/11] install rsync Signed-off-by: Anatoly Myachev --- .github/workflows/e2e-reusable.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/e2e-reusable.yml b/.github/workflows/e2e-reusable.yml index 135229e9ff..ab2320b0a2 100644 --- a/.github/workflows/e2e-reusable.yml +++ b/.github/workflows/e2e-reusable.yml @@ -111,6 +111,7 @@ jobs: - name: Update PyTorch benchmarks/dynamo configs using torch-xpu-ops custom configs run: | + sudo apt install -y rsync rsync -avz torch-xpu-ops/.ci/benchmarks/ pytorch/benchmarks/dynamo/ - name: Identify pinned versions From 260d5f1b39c9a787ce0ebc49f714d9e657e44b4f Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Mon, 27 Oct 2025 11:29:48 +0100 Subject: [PATCH 03/11] update torch-xpu-ops pin Signed-off-by: Anatoly Myachev --- .github/pins/e2e_reference_torch-xpu-ops.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/pins/e2e_reference_torch-xpu-ops.txt b/.github/pins/e2e_reference_torch-xpu-ops.txt index 47097a86a0..775841cbc7 100644 --- a/.github/pins/e2e_reference_torch-xpu-ops.txt +++ b/.github/pins/e2e_reference_torch-xpu-ops.txt @@ -1 +1 @@ -ce9db15136c5e8ba1b51710aae574ce4791c5d73 +779f89911779b8c7296aaec3cf74945c18acc270 From 21cde65dea0c1fa77693b6378bcb511961a9b2a1 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Mon, 27 Oct 2025 18:29:08 +0100 Subject: [PATCH 04/11] add '--disable-cudagraphs' Signed-off-by: Anatoly Myachev --- scripts/inductor_xpu_test.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/inductor_xpu_test.sh b/scripts/inductor_xpu_test.sh index b039507d8d..3e86ceb012 100755 --- a/scripts/inductor_xpu_test.sh +++ b/scripts/inductor_xpu_test.sh @@ -54,10 +54,10 @@ if (( $EUID == 0 )); then fi if [[ $DT == "amp_bf16" ]]; then - ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --amp -d${DEVICE} -n10 --no-skip --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log + ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --amp --disable-cudagraphs -d${DEVICE} -n10 --no-skip --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log elif [[ $DT == "amp_fp16" ]]; then export INDUCTOR_AMP_DT=float16 - ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --amp -d${DEVICE} -n10 --no-skip --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log + ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --amp --disable-cudagraphs -d${DEVICE} -n10 --no-skip --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log else - ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --${DT} -d${DEVICE} -n10 --no-skip --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log + ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --${DT} --disable-cudagraphs -d${DEVICE} -n10 --no-skip --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log fi From 2908c90f3d45ea4a7c0d0b8617ba13177d0214e5 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Mon, 27 Oct 2025 19:01:40 +0100 Subject: [PATCH 05/11] update torchbench installation Signed-off-by: Anatoly Myachev --- .github/workflows/e2e-reusable.yml | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/e2e-reusable.yml b/.github/workflows/e2e-reusable.yml index ab2320b0a2..ca3dfb1b33 100644 --- a/.github/workflows/e2e-reusable.yml +++ b/.github/workflows/e2e-reusable.yml @@ -154,7 +154,7 @@ jobs: - name: Install python test dependencies run: | - pip install pyyaml pandas scipy 'numpy==1.26.4' psutil pyre_extensions torchrec + pip install pyyaml pandas scipy 'numpy==1.26.4' psutil - name: Install transformers package if: ${{ inputs.suite == 'huggingface' }} @@ -203,13 +203,21 @@ jobs: if: ${{ inputs.suite == 'torchbench' }} run: | cd benchmark + sed -i 's/^ *pynvml.*//' requirements.txt + pip install -r requirements.txt + echo "PYTHONPATH=${PWD}:${PYTHONPATH}" >> ${GITHUB_ENV} + # for dlrm + pip install pyre-extensions + curl -fsSL https://raw.githubusercontent.com/facebookresearch/dlrm/refs/heads/torchrec-dlrm/requirements.txt |xargs pip install + # for soft_actor_critic, temp fix + pip install git+https://github.com/nocoding03/gym@fix-np + if [[ "${{ inputs.only_one_model }}" ]]; then python install.py "${{ inputs.only_one_model }}" else # install all models - python install.py + python install.py --continue_on_fail fi - pip install -e . - name: Run e2e ${{ inputs.test_mode }} tests env: From 020a92f0fbb9cdd231912d42642aa6f83f78fcad Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Mon, 27 Oct 2025 20:09:10 +0100 Subject: [PATCH 06/11] return 'pip indtall -e .' Signed-off-by: Anatoly Myachev --- .github/workflows/e2e-reusable.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/e2e-reusable.yml b/.github/workflows/e2e-reusable.yml index ca3dfb1b33..11f5e05e4e 100644 --- a/.github/workflows/e2e-reusable.yml +++ b/.github/workflows/e2e-reusable.yml @@ -218,6 +218,7 @@ jobs: # install all models python install.py --continue_on_fail fi + pip install -e . - name: Run e2e ${{ inputs.test_mode }} tests env: From eedf6ab53c2ab83c905b5338ff6d2840617397bd Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Mon, 27 Oct 2025 21:09:43 +0100 Subject: [PATCH 07/11] Apply suggestion from @anmyachev --- .github/workflows/e2e-reusable.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/e2e-reusable.yml b/.github/workflows/e2e-reusable.yml index 11f5e05e4e..5fa7c3c04a 100644 --- a/.github/workflows/e2e-reusable.yml +++ b/.github/workflows/e2e-reusable.yml @@ -205,7 +205,6 @@ jobs: cd benchmark sed -i 's/^ *pynvml.*//' requirements.txt pip install -r requirements.txt - echo "PYTHONPATH=${PWD}:${PYTHONPATH}" >> ${GITHUB_ENV} # for dlrm pip install pyre-extensions curl -fsSL https://raw.githubusercontent.com/facebookresearch/dlrm/refs/heads/torchrec-dlrm/requirements.txt |xargs pip install From 5325768ef8e5a47d7658584deb2e81cf85928955 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 28 Oct 2025 13:17:27 +0000 Subject: [PATCH 08/11] don't use symlink in load action by default Signed-off-by: Anatoly Myachev --- .github/actions/load/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/load/action.yml b/.github/actions/load/action.yml index 2124442724..6bc3b76eb0 100644 --- a/.github/actions/load/action.yml +++ b/.github/actions/load/action.yml @@ -12,7 +12,7 @@ inputs: required: true symlink: description: Create a symlink instead of copying from cache - default: "true" + default: "false" enabled: description: Enable cache default: "true" From f4d5b13f0b49aa0fbbbdea01622f2667addf41b8 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 28 Oct 2025 15:29:17 +0100 Subject: [PATCH 09/11] Apply suggestion from @anmyachev --- .github/workflows/e2e-reusable.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/e2e-reusable.yml b/.github/workflows/e2e-reusable.yml index 5fa7c3c04a..46a851d5b0 100644 --- a/.github/workflows/e2e-reusable.yml +++ b/.github/workflows/e2e-reusable.yml @@ -113,6 +113,7 @@ jobs: run: | sudo apt install -y rsync rsync -avz torch-xpu-ops/.ci/benchmarks/ pytorch/benchmarks/dynamo/ + ls pytorch/benchmarks/dynamo/ - name: Identify pinned versions run: | From e1027c28acd61c14cd9c54ff709bcea8275482ca Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 28 Oct 2025 16:24:18 +0000 Subject: [PATCH 10/11] define 'HF_TOKEN' also Signed-off-by: Anatoly Myachev --- .github/workflows/e2e-reusable.yml | 1 + scripts/inductor_xpu_test.sh | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/e2e-reusable.yml b/.github/workflows/e2e-reusable.yml index 46a851d5b0..31bb65d9c8 100644 --- a/.github/workflows/e2e-reusable.yml +++ b/.github/workflows/e2e-reusable.yml @@ -222,6 +222,7 @@ jobs: - name: Run e2e ${{ inputs.test_mode }} tests env: + HF_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} run: | cd pytorch diff --git a/scripts/inductor_xpu_test.sh b/scripts/inductor_xpu_test.sh index 3e86ceb012..18a3687a93 100755 --- a/scripts/inductor_xpu_test.sh +++ b/scripts/inductor_xpu_test.sh @@ -54,10 +54,10 @@ if (( $EUID == 0 )); then fi if [[ $DT == "amp_bf16" ]]; then - ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --amp --disable-cudagraphs -d${DEVICE} -n10 --no-skip --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log + ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --amp --disable-cudagraphs -d${DEVICE} -n10 --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log elif [[ $DT == "amp_fp16" ]]; then export INDUCTOR_AMP_DT=float16 - ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --amp --disable-cudagraphs -d${DEVICE} -n10 --no-skip --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log + ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --amp --disable-cudagraphs -d${DEVICE} -n10 --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log else - ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --${DT} --disable-cudagraphs -d${DEVICE} -n10 --no-skip --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log + ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --${DT} --disable-cudagraphs -d${DEVICE} -n10 --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log fi From 91e9746843a6c7b58fe28ede7a04e01468240ea2 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 28 Oct 2025 17:45:36 +0000 Subject: [PATCH 11/11] move HF tokens to global scope Signed-off-by: Anatoly Myachev --- .github/workflows/e2e-reusable.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/e2e-reusable.yml b/.github/workflows/e2e-reusable.yml index 31bb65d9c8..4fbe569021 100644 --- a/.github/workflows/e2e-reusable.yml +++ b/.github/workflows/e2e-reusable.yml @@ -50,6 +50,8 @@ env: TRITON_DISABLE_LINE_INFO: 1 PYTHON_VERSION: "3.10" BENCHMARK_REPO: pytorch/benchmark + HF_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} jobs: run_tests: @@ -221,9 +223,6 @@ jobs: pip install -e . - name: Run e2e ${{ inputs.test_mode }} tests - env: - HF_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} run: | cd pytorch