diff --git a/.github/actions/load/action.yml b/.github/actions/load/action.yml
index 2124442724..6bc3b76eb0 100644
--- a/.github/actions/load/action.yml
+++ b/.github/actions/load/action.yml
@@ -12,7 +12,7 @@ inputs:
     required: true
   symlink:
     description: Create a symlink instead of copying from cache
-    default: "true"
+    default: "false"
   enabled:
     description: Enable cache
     default: "true"
diff --git a/.github/pins/e2e_reference_torch-xpu-ops.txt b/.github/pins/e2e_reference_torch-xpu-ops.txt
index 47097a86a0..775841cbc7 100644
--- a/.github/pins/e2e_reference_torch-xpu-ops.txt
+++ b/.github/pins/e2e_reference_torch-xpu-ops.txt
@@ -1 +1 @@
-ce9db15136c5e8ba1b51710aae574ce4791c5d73
+779f89911779b8c7296aaec3cf74945c18acc270
diff --git a/.github/workflows/e2e-reusable.yml b/.github/workflows/e2e-reusable.yml
index 71446621de..4fbe569021 100644
--- a/.github/workflows/e2e-reusable.yml
+++ b/.github/workflows/e2e-reusable.yml
@@ -50,6 +50,8 @@ env:
   TRITON_DISABLE_LINE_INFO: 1
   PYTHON_VERSION: "3.10"
   BENCHMARK_REPO: pytorch/benchmark
+  HF_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+  HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 
 jobs:
   run_tests:
@@ -72,6 +74,18 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v5
 
+      - name: Set torch-xpu-ops commit id
+        run: |
+          TORCH_XPU_OPS_COMMIT_ID="$(<.github/pins/e2e_reference_torch-xpu-ops.txt)"
+          echo "TORCH_XPU_OPS_COMMIT_ID=$TORCH_XPU_OPS_COMMIT_ID" | tee -a "$GITHUB_ENV"
+
+      - name: Clone torch-xpu-ops repository
+        uses: actions/checkout@v5
+        with:
+          repository: intel/torch-xpu-ops
+          ref: ${{ env.TORCH_XPU_OPS_COMMIT_ID }}
+          path: torch-xpu-ops
+
       - name: Load pip cache
         id: pip-cache
         uses: ./.github/actions/load
@@ -97,6 +111,12 @@ jobs:
         with:
           ref: ${{ inputs.pytorch_ref }}
 
+      - name: Update PyTorch benchmarks/dynamo configs using torch-xpu-ops custom configs
+        run: |
+          sudo apt install -y rsync
+          rsync -avz torch-xpu-ops/.ci/benchmarks/ pytorch/benchmarks/dynamo/
+          ls pytorch/benchmarks/dynamo/
+
       - name: Identify pinned versions
         run: |
           cd pytorch
@@ -137,7 +157,7 @@ jobs:
 
       - name: Install python test dependencies
         run: |
-          pip install pyyaml pandas scipy 'numpy==1.26.4' psutil pyre_extensions torchrec
+          pip install pyyaml pandas scipy 'numpy==1.26.4' psutil
 
       - name: Install transformers package
         if: ${{ inputs.suite == 'huggingface' }}
@@ -186,17 +206,23 @@ jobs:
         if: ${{ inputs.suite == 'torchbench' }}
         run: |
           cd benchmark
+          sed -i 's/^ *pynvml.*//' requirements.txt
+          pip install -r requirements.txt
+          # for dlrm
+          pip install pyre-extensions
+          curl -fsSL https://raw.githubusercontent.com/facebookresearch/dlrm/refs/heads/torchrec-dlrm/requirements.txt |xargs pip install
+          # for soft_actor_critic, temp fix
+          pip install git+https://github.com/nocoding03/gym@fix-np
+
           if [[ "${{ inputs.only_one_model }}" ]]; then
             python install.py "${{ inputs.only_one_model }}"
           else
             # install all models
-            python install.py
+            python install.py --continue_on_fail
           fi
           pip install -e .
 
       - name: Run e2e ${{ inputs.test_mode }} tests
-        env:
-          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
         run: |
           cd pytorch
 
diff --git a/scripts/inductor_xpu_test.sh b/scripts/inductor_xpu_test.sh
index b039507d8d..18a3687a93 100755
--- a/scripts/inductor_xpu_test.sh
+++ b/scripts/inductor_xpu_test.sh
@@ -54,10 +54,10 @@ if (( $EUID == 0 )); then
 fi
 
 if [[ $DT == "amp_bf16" ]]; then
-    ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --amp -d${DEVICE} -n10 --no-skip --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log
+    ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --amp --disable-cudagraphs -d${DEVICE} -n10 --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log
 elif [[ $DT == "amp_fp16" ]]; then
     export INDUCTOR_AMP_DT=float16
-    ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --amp -d${DEVICE} -n10 --no-skip --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra}  --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log
+    ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --amp --disable-cudagraphs -d${DEVICE} -n10 --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra}  --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log
 else
-    ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --${DT} -d${DEVICE} -n10 --no-skip --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra}  --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log
+    ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --${DT} --disable-cudagraphs -d${DEVICE} -n10 --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra}  --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log
 fi