diff --git a/.github/workflows/ce_daily_dev.yml b/.github/workflows/ce_daily_dev.yml
index c53d7f1b6..6745d55f5 100644
--- a/.github/workflows/ce_daily_dev.yml
+++ b/.github/workflows/ce_daily_dev.yml
@@ -1,6 +1,9 @@
 name: CE Daily Dev
 
 on:
+  pull_request:
+    types: [opened, synchronize]
+    branches: [develop]
   schedule:
     - cron: '0 19 * * *'
   workflow_dispatch: # 可选：允许手动点 Run
@@ -44,1083 +47,28 @@ defaults:
     shell: bash
 
 jobs:
-  single_card_test:
-    name: Unit test (single card)
-    runs-on:
-      group: Fleet-H-single-card
-    strategy:
-      fail-fast: false
-      max-parallel: 2
-      matrix:
-        include:
-          - cuda: "12.6"
-            python: "3.10"
-          - cuda: "12.9"
-            python: "3.10"
-          - cuda: "13.0"
-            python: "3.10"
-          - cuda: "12.9"
-            python: "3.11"
-          - cuda: "12.9"
-            python: "3.12"
-          - cuda: "12.9"
-            python: "3.13"
-    env:
-      PIP_CACHE_DIR: /home/.cache/pip
-      CACHE_DIR: /home/.cache
-      TASK: paddlefleet-CE-${{ matrix.cuda }}-${{ matrix.python }}-DEV-single-card-test
-    steps:
-      - name: Determine the runner
-        run: |
-          gpu_id=$(( $(echo $PWD | awk -F'/' '{print $3}' | awk -F'-' '{print $2}') + 3 ))
-          cuda=${{ matrix.cuda }}
-          python_version=${{ matrix.python }}
-          echo GPU_DEVICES="$gpu_id" >> $GITHUB_ENV
-          if [ "${cuda}" == "12.9" ]; then
-            docker_image=${docker_image_cu129}
-            cuda_version="cu129"
-            paddle_url=${paddle_url_cu129}
-          elif [ "${cuda}" == "13.0" ]; then
-            docker_image=${docker_image_cu130}
-            cuda_version="cu130"
-            paddle_url=${paddle_url_cu130}
-          else
-            docker_image=${docker_image_cu126}
-            cuda_version="cu126"
-            paddle_url=${paddle_url_cu126}
-          fi
-          echo "DOCKER_IMAGE=${docker_image}" >> $GITHUB_ENV
-          echo "CUDA_VERSION=${cuda_version}" >> $GITHUB_ENV
-          echo "PYTHON_VERSION=${python_version}" >> $GITHUB_ENV
-          echo "PADDLE_URL=${paddle_url}" >> $GITHUB_ENV
-      - name: Check docker image and run container
-        env:
-          GPU_DEVICES: ${{ env.GPU_DEVICES }}
-          DOCKER_IMAGE: ${{ env.DOCKER_IMAGE }}
-          CUDA_VERSION: ${{ env.CUDA_VERSION }}
-          PYTHON_VERSION: ${{ env.PYTHON_VERSION }}
-          PADDLE_URL: ${{ env.PADDLE_URL }}
-
-        run: |
-          container_name=${TASK}-$(date +%Y%m%d-%H%M%S)
-          echo "container_name=${container_name}" >> ${{ github.env }}
-          docker pull ${DOCKER_IMAGE}
-          docker run -d -t --name ${container_name} --gpus "\"device=${GPU_DEVICES}\"" --shm-size=32G \
-            -v "/dev/shm:/dev/shm"  \
-            -v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
-            -v ${{ github.workspace }}/../../..:/root \
-            -v /ssd1/paddle-1/action_cache:/home/.cache \
-            -v ${{ github.workspace }}:/paddle \
-            -e BRANCH \
-            -e PR_ID \
-            -e COMMIT_ID \
-            -e PADDLE_ROOT \
-            -e ci_scripts \
-            -e CACHE_DIR \
-            -e no_proxy \
-            -e use_release \
-            -e repo_flag="paddlefleet" \
-            -e PIP_CACHE_DIR \
-            -e work_dir \
-            -e CUDA_VERSION \
-            -e PYTHON_VERSION \
-            -e PADDLE_URL \
-            -e GITHUB_HEAD_REF="${{ github.head_ref }}" \
-            -e GITHUB_REPO_NAME="${{ github.repository }}" \
-            -e GITHUB_TOKEN="${{ secrets.GITHUB_TOKEN }}" \
-            -e GITHUB_RUN_ID="${{ github.run_id }}" \
-            -w /paddle --network host ${DOCKER_IMAGE}
-
-
-      - name: Single card test
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -xce '
-          pwd
-          find . -maxdepth 1 -name "--*" -delete
-          rm -rf * .[^.]*
-          source /root/proxy
-          mkdir -p /home/.cache/pip
-          pip cache dir
-          if [ ${PYTHON_VERSION} != "3.10" ]; then
-            wget https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
-            bash miniconda.sh -b -p $HOME/miniconda -u
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main
-            conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r
-            conda init bash
-            conda create -n python${PYTHON_VERSION} python=${PYTHON_VERSION} -y
-            conda activate python${PYTHON_VERSION}
-            paddle_whl=paddlepaddle_gpu-0.0.0-cp${PYTHON_VERSION//./}-cp${PYTHON_VERSION//./}-linux_x86_64.whl
-            paddle_url="${PADDLE_URL}${paddle_whl}"
-          else
-            paddle_url=${PADDLE_URL}
-          fi
-          if [ ${CUDA_VERSION} == "cu129" ]; then
-            paddle_whl=paddlepaddle_gpu-0.0.0-cp${PYTHON_VERSION//./}-cp${PYTHON_VERSION//./}-linux_x86_64.whl
-            paddle_url="${PADDLE_URL}${paddle_whl}"
-          fi
-          echo "Install uv"
-          export WITH_COVERAGE=ON
-          pip install --upgrade pip
-          git clone https://github.com/PaddlePaddle/PaddleFleet.git
-          git config --global --add safe.directory /paddle/PaddleFleet
-          cd PaddleFleet
-          git config user.name "PaddleCI"
-          git config user.email "paddle_ci@example.com"
-          git config pull.rebase false
-          pip install colorlog>=6.10.1
-          python -m pip install --pre  paddlefleet --index-url https://www.paddlepaddle.org.cn/packages/nightly/${CUDA_VERSION}/ --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/${CUDA_VERSION}/ --extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple
-          if [ "${use_release}" == "true" ]; then
-            echo "Using pre-built paddle package from develop branch."
-            pip install ${paddle_url} --index-url=https://www.paddlepaddle.org.cn/packages/nightly/${CUDA_VERSION}/ --force-reinstall --no-cache-dir
-          fi
-          pip install uv coverage==7.6.1 bce-python-sdk==0.8.74 wrapt matplotlib==3.10.8 pytest parameterized
-          echo "Paddle Commit"
-          python -c "import paddle; print(paddle.version.commit)"
-          echo "PaddleFleet Commit"
-          python -c "import paddlefleet; print(paddlefleet.version.commit)"
-          export PYTHONPATH=/paddle/PaddleFleet:$PYTHONPATH
-          bash ci/single_card_test.sh
-          single_card_exit_code=$?
-          if [[ "$single_card_exit_code" != "0" ]]; then
-            echo -e "::error:: \033[31mSingle card test failed.\033[0m"
-            exit 1
-          else
-            echo -e "\033[32mSingle card test succeeded.\033[0m"
-          fi
-          '
-
-      - name: Single card sonic moe test
-        if: matrix.python == '3.12' && matrix.cuda == '12.9' && always()
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -xce '
-          pwd
-          source /root/proxy
-          source "$HOME/miniconda/etc/profile.d/conda.sh"
-          conda activate python${{ matrix.python }}
-          echo "Paddle Commit"
-          python -c "import paddle; print(paddle.version.commit)"
-          echo "PaddleFleet Commit"
-          python -c "import paddlefleet; print(paddlefleet.version.commit)"
-          cd PaddleFleet
-          bash ci/single_card_sonic.sh
-          single_card_exit_code=$?
-          if [[ "$single_card_exit_code" != "0" ]]; then
-            echo -e "::error:: \033[31mSingle card test failed.\033[0m"
-            exit 1
-          else
-            echo -e "\033[32mSingle card test succeeded.\033[0m"
-          fi
-          '
-
-      - name: Terminate and delete the container
-        if: ${{ always() }}
-        run: |
-          set +e
-          docker exec -t ${{ env.container_name }} /bin/bash -c 'bash ci/clean_uv_cache.sh; rm -rf * .[^.]*'
-          docker rm -f ${{ env.container_name }}
-
-  multi-card_test:
-    name: Unit test (multi-card)
-    runs-on:
-      group: Fleet-H-multi-card
-    strategy:
-      fail-fast: false
-      max-parallel: 1
-      matrix:
-        include:
-          - cuda: "12.6"
-            python: "3.10"
-          - cuda: "12.9"
-            python: "3.10"
-          - cuda: "13.0"
-            python: "3.10"
-          - cuda: "12.9"
-            python: "3.11"
-          - cuda: "12.9"
-            python: "3.12"
-          - cuda: "12.9"
-            python: "3.13"
-    env:
-      PIP_CACHE_DIR: /home/.cache/pip
-      TASK: paddlefleet-CE-${{ matrix.cuda }}-${{ matrix.python }}-multi-card_test-dev
-    steps:
-      - name: setup cuda and python
-        run: |
-          cuda=${{ matrix.cuda }}
-          python_version=${{ matrix.python }}
-          if [ "${cuda}" == "12.9" ]; then
-            docker_image=${docker_image_cu129}
-            cuda_version="cu129"
-            paddle_url=${paddle_url_cu129}
-          elif [ "${cuda}" == "13.0" ]; then
-            docker_image=${docker_image_cu130}
-            cuda_version="cu130"
-            paddle_url=${paddle_url_cu130}
-          else
-            docker_image=${docker_image_cu126}
-            cuda_version="cu126"
-            paddle_url=${paddle_url_cu126}
-          fi
-          echo "DOCKER_IMAGE=${docker_image}" >> $GITHUB_ENV
-          echo "CUDA_VERSION=${cuda_version}" >> $GITHUB_ENV
-          echo "PYTHON_VERSION=${python_version}" >> $GITHUB_ENV
-          echo "PADDLE_URL=${paddle_url}" >> $GITHUB_ENV
-      - name: Check docker image and run container
-        env:
-          DOCKER_IMAGE: ${{ env.DOCKER_IMAGE }}
-          CUDA_VERSION: ${{ env.CUDA_VERSION }}
-          PYTHON_VERSION: ${{ env.PYTHON_VERSION }}
-          PADDLE_URL: ${{ env.PADDLE_URL }}
-        run: |
-          container_name=${TASK}-$(date +%Y%m%d-%H%M%S)
-          echo "container_name=${container_name}" >> ${{ github.env }}
-          docker pull ${DOCKER_IMAGE}
-          docker run -d -t --gpus all --name ${container_name} \
-            -v "/dev/shm:/dev/shm"  \
-            -v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
-            -v ${{ github.workspace }}/../../..:/root \
-            -v /ssd1/paddle-1/action_cache:/home/.cache \
-            -v ${{ github.workspace }}:/paddle \
-            -e BRANCH \
-            -e PR_ID \
-            -e COMMIT_ID \
-            -e PADDLE_ROOT \
-            -e ci_scripts \
-            -e CACHE_DIR \
-            -e no_proxy \
-            -e PIP_CACHE_DIR \
-            -e repo_flag="paddlefleet" \
-            -e use_release \
-            -e work_dir \
-            -e CUDA_VERSION \
-            -e PYTHON_VERSION \
-            -e PADDLE_URL \
-            -e GITHUB_SHA="${{ github.event.pull_request.head.sha }}" \
-            -e GITHUB_HEAD_REF="${{ github.head_ref }}" \
-            -e GITHUB_REPO_NAME="${{ github.repository }}" \
-            -e GITHUB_EVENT_NAME="${{ github.event_name }}" \
-            -e GITHUB_TOKEN="${{ secrets.GITHUB_TOKEN }}" \
-            -e GITHUB_RUN_ID="${{ github.run_id }}" \
-            -w /paddle --network host ${DOCKER_IMAGE}
-
-      - name: Install PaddleFleet
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -ce '
-          find . -maxdepth 1 -name "--*" -delete
-          rm -rf * .[^.]*
-          source /root/proxy
-          pip install --upgrade pip
-          if [ ${PYTHON_VERSION} != "3.10" ]; then
-            wget https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
-            bash miniconda.sh -b -p $HOME/miniconda -u
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main
-            conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r
-            conda init bash
-            conda create -n python${PYTHON_VERSION} python=${PYTHON_VERSION} -y
-            conda activate python${PYTHON_VERSION}
-            paddle_whl=paddlepaddle_gpu-0.0.0-cp${PYTHON_VERSION//./}-cp${PYTHON_VERSION//./}-linux_x86_64.whl
-            paddle_url="${PADDLE_URL}${paddle_whl}"
-          else
-            paddle_url=${PADDLE_URL}
-          fi
-          if [ ${CUDA_VERSION} == "cu129" ]; then
-            paddle_whl=paddlepaddle_gpu-0.0.0-cp${PYTHON_VERSION//./}-cp${PYTHON_VERSION//./}-linux_x86_64.whl
-            paddle_url="${PADDLE_URL}${paddle_whl}"
-          fi
-          git clone https://github.com/PaddlePaddle/PaddleFleet.git
-          cd PaddleFleet
-          git config --global --add safe.directory /paddle/PaddleFleet
-          git config user.name "PaddleCI"
-          git config user.email "paddle_ci@example.com"
-          git config pull.rebase false
-          git submodule update --init --recursive
-          pip install colorlog>=6.10.1
-          pip install nvidia-cutlass-dsl==4.2.1
-          python -m pip install --pre  paddlefleet --index-url https://www.paddlepaddle.org.cn/packages/nightly/${CUDA_VERSION}/ --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/${CUDA_VERSION}/ --extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple
-          if [ "${use_release}" == "true" ]; then
-            echo "Using pre-built paddle package from develop branch."
-            pip install ${paddle_url} --index-url=https://www.paddlepaddle.org.cn/packages/nightly/${CUDA_VERSION}/ --force-reinstall --no-cache-dir
-          fi
-          pip install uv coverage==7.6.1 bce-python-sdk==0.8.74 wrapt pytest matplotlib==3.10.8
-          wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq
-          chmod +x /usr/local/bin/yq
-          '
-
-      - name: Multi-card test
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -ce '
-          if [ ${PYTHON_VERSION} != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          echo "Paddle Commit"
-          python -c "import paddle; print(paddle.version.commit)"
-          echo "PaddleFleet Commit"
-          python -c "import paddlefleet; print(paddlefleet.version.commit)"
-          export PYTHONPATH=/paddle/PaddleFleet:$PYTHONPATH
-          bash PaddleFleet/ci/multi-card_test.sh
-          multi_card_exit_code=$?
-          if [[ "$multi_card_exit_code" != "0" ]]; then
-            echo -e "::error:: \033[31mMulti card test failed.\033[0m"
-            exit 1
-          else
-            echo -e "\033[32mMulti card test succeeded.\033[0m"
-          fi
-          '
-
-      - name: Terminate and delete the container
-        if: ${{ always() }}
-        run: |
-          set +e
-          docker exec -t ${{ env.container_name }} /bin/bash -c 'bash ci/clean_uv_cache.sh; rm -rf * .[^.]*'
-          docker rm -f ${{ env.container_name }}
-
-
-  integration-test-H20-single-card:
-    name: Integration test (H20, single card)
-    runs-on:
-      group: Fleet-H-single-card
-    strategy:
-      fail-fast: false
-      max-parallel: 2
-      matrix:
-        include:
-          - cuda: "12.6"
-            python: "3.10"
-          - cuda: "12.9"
-            python: "3.10"
-          - cuda: "13.0"
-            python: "3.10"
-          - cuda: "12.9"
-            python: "3.11"
-          - cuda: "12.9"
-            python: "3.12"
-          - cuda: "12.9"
-            python: "3.13"
-    env:
-      PIP_CACHE_DIR: /home/.cache/pip
-      CACHE_DIR: /home/.cache
-      TASK: paddlefleet-CE-${{ matrix.cuda }}-${{ matrix.python }}-DEV-integration-test-single-card-release
-    steps:
-      - name: Determine the runner
-        run: |
-          gpu_id=$(( $(echo $PWD | awk -F'/' '{print $3}' | awk -F'-' '{print $2}') + 3 ))
-          cuda=${{ matrix.cuda }}
-          python_version=${{ matrix.python }}
-          echo GPU_DEVICES="$gpu_id" >> $GITHUB_ENV
-          if [ "${cuda}" == "12.9" ]; then
-            docker_image=${docker_image_cu129}
-            cuda_version="cu129"
-            paddle_url=${paddle_url_cu129}
-          elif [ "${cuda}" == "13.0" ]; then
-            docker_image=${docker_image_cu130}
-            cuda_version="cu130"
-            paddle_url=${paddle_url_cu130}
-          else
-            docker_image=${docker_image_cu126}
-            cuda_version="cu126"
-            paddle_url=${paddle_url_cu126}
-          fi
-          echo "DOCKER_IMAGE=${docker_image}" >> $GITHUB_ENV
-          echo "CUDA_VERSION=${cuda_version}" >> $GITHUB_ENV
-          echo "PYTHON_VERSION=${python_version}" >> $GITHUB_ENV
-          echo "PADDLE_URL=${paddle_url}" >> $GITHUB_ENV
-          echo "BASE_NAME=${cuda_version}-${python_version}-H20" >> $GITHUB_ENV
-      - name: Check docker image and run container
-        env:
-          GPU_DEVICES: ${{ env.GPU_DEVICES }}
-          DOCKER_IMAGE: ${{ env.DOCKER_IMAGE }}
-          CUDA_VERSION: ${{ env.CUDA_VERSION }}
-          PYTHON_VERSION: ${{ env.PYTHON_VERSION }}
-          PADDLE_URL: ${{ env.PADDLE_URL }}
-          BASE_NAME: ${{ env.BASE_NAME }}
-        run: |
-          container_name=${TASK}-$(date +%Y%m%d-%H%M%S)
-          echo "container_name=${container_name}" >> ${{ github.env }}
-          docker pull ${DOCKER_IMAGE}
-          set -x
-          docker run -d -t --name ${container_name} --gpus "\"device=${GPU_DEVICES}\"" --shm-size=32G \
-            -v "/dev/shm:/dev/shm"  \
-            -v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
-            -v ${{ github.workspace }}/../../..:/root \
-            -v /ssd1/paddle-1/action_cache:/home/.cache \
-            -v ${{ github.workspace }}:/workspace \
-            -e BRANCH \
-            -e PR_ID \
-            -e COMMIT_ID \
-            -e PADDLE_ROOT \
-            -e ci_scripts \
-            -e CACHE_DIR \
-            -e no_proxy \
-            -e use_release \
-            -e PIP_CACHE_DIR \
-            -e CUDA_VERSION \
-            -e PYTHON_VERSION \
-            -e PADDLE_URL \
-            -e BASE_NAME \
-            -e GITHUB_SHA="${{ github.event.pull_request.head.sha }}" \
-            -e GITHUB_HEAD_REF="${{ github.head_ref }}" \
-            -e GITHUB_REPO_NAME="${{ github.repository }}" \
-            -e GITHUB_EVENT_NAME="${{ github.event_name }}" \
-            -e GITHUB_TOKEN="${{ secrets.GITHUB_TOKEN }}" \
-            -e GITHUB_RUN_ID="${{ github.run_id }}" \
-            -w /workspace --network host ${DOCKER_IMAGE}
-
-      - name: Install PaddleFleet
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -ce '
-          nvidia-smi
-          find . -maxdepth 1 -name "--*" -delete
-          rm -rf * .[^.]*
-          source /root/proxy
-          mkdir -p /home/.cache/pip
-          pip cache dir
-          pip install --upgrade pip
-          if [ ${PYTHON_VERSION} != "3.10" ]; then
-            wget https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
-            bash miniconda.sh -b -p $HOME/miniconda -u
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main
-            conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r
-            conda init bash
-            conda create -n python${PYTHON_VERSION} python=${PYTHON_VERSION} -y
-            conda activate python${PYTHON_VERSION}
-          fi
-          git clone https://github.com/PaddlePaddle/PaddleFleet.git
-          cd PaddleFleet
-          git config --global --add safe.directory /workspace/PaddleFleet
-          git config user.name "PaddleCI"
-          git config user.email "paddle_ci@example.com"
-          git config pull.rebase false
-          pip install colorlog>=6.10.1
-          pip install nvidia-cutlass-dsl==4.2.1
-          python -m pip install --pre  paddlefleet --index-url https://www.paddlepaddle.org.cn/packages/nightly/${CUDA_VERSION}/ --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/${CUDA_VERSION}/ --extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple
-          wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq
-          chmod +x /usr/local/bin/yq
-          '
-
-      - name: Install PaddleFormers
-        id: formers_install
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -ce '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-            paddle_whl=paddlepaddle_gpu-0.0.0-cp${PYTHON_VERSION//./}-cp${PYTHON_VERSION//./}-linux_x86_64.whl
-            paddle_url="${PADDLE_URL}${paddle_whl}"
-          else
-            paddle_url=${PADDLE_URL}
-          fi
-          if [ ${CUDA_VERSION} == "cu129" ]; then
-            paddle_whl=paddlepaddle_gpu-0.0.0-cp${PYTHON_VERSION//./}-cp${PYTHON_VERSION//./}-linux_x86_64.whl
-            paddle_url="${PADDLE_URL}${paddle_whl}"
-          fi
-          if [ ${CUDA_VERSION} == "cu130" ]; then
-            pip install blinker==1.9.0 --ignore-installed
-          fi
-          git clone -b develop https://github.com/PaddlePaddle/PaddleFormers.git
-          cd PaddleFormers
-          git config --global --add safe.directory /workspace/PaddleFormers
-          git config user.name "PaddleCI"
-          git config user.email "paddle_ci@example.com"
-          git config pull.rebase false
-          git log -1
-          sed -i "s/from gpt_provider import GPTModelProvider/from paddleformers.transformers.gpt_provider import GPTModelProvider/g" examples/experiments/paddlefleet/glm45_provider.py
-          sed -i "s/from gpt_provider import GPTModelProvider/from paddleformers.transformers.gpt_provider import GPTModelProvider/g" examples/experiments/paddlefleet/qwen_provider.py
-          pip install -e . --extra-index-url=https://www.paddlepaddle.org.cn/packages/nightly/${CUDA_VERSION}/
-          if [ "${use_release}" == "true" ]; then
-            echo "Using pre-built paddle package from develop branch."
-            pip install ${paddle_url} --index-url=https://www.paddlepaddle.org.cn/packages/nightly/${CUDA_VERSION}/ --force-reinstall --no-cache-dir
-          fi
-          pip install uv coverage==7.6.1 bce-python-sdk==0.8.74 wrapt matplotlib==3.10.8 pytest parameterized
-          pip install librosa==0.11.0
-          echo "Paddle Commit"
-          python -c "import paddle; print(paddle.version.commit)"
-          echo "PaddleFleet Commit"
-          python -c "import paddlefleet; print(paddlefleet.version.commit)"
-          echo "paddleformers Commit"
-          python -c "import paddleformers; print(paddleformers.version.commit)"
-          '
-
-      - name: Proprocess for integration test
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -ce '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          bash -x PaddleFormers/tests/integration_test/preprocess.sh
-          preprocess_exit_code=$?
-          if [[ "$preprocess_exit_code" != "0" ]]; then
-            echo -e "::error:: \033[31mPreprocess failed.\033[0m"
-            exit 1
-          else
-            echo -e "\033[32mPreprocess succeeded.\033[0m"
-          fi
-          '
-
-      - name: Integration test (GLM4.5 single-card)
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          bash -x PaddleFormers/tests/integration_test/glm45_pt_single_card.sh
-          glm45_single_card_exit_code=$?
-          if [[ "$glm45_single_card_exit_code" != "0" ]]; then
-            export case_name="glm45_single_card"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: GLM4.5 single-card.\033[0m"
-          fi
-          '
-
-      - name: Integration test (Qwen3-30B-A3B single-card)
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          bash -x PaddleFormers/tests/integration_test/qwen3_single_card.sh
-          qwen3_single_card_exit_code=$?
-          if [[ "$qwen3_single_card_exit_code" != "0" ]]; then
-            export case_name="qwen3_single_card"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: Qwen3-30B-A3B single-card.\033[0m"
-          fi
-          '
-
-      - name: Qwen3-vl-8k-single-card
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          timeout 5m bash -x PaddleFormers/tests/integration_test/qwen3vl_sft_single_card.sh single
-          exit_code=$?
-          if [[ "$exit_code" != "0" ]]; then
-            export case_name="qwen3vl_sft_single_card"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: Qwen3-vl-8k-single-card.\033[0m"
-          fi
-          '
-
-
-      - name: Terminate and delete the container
-        if: ${{ always() }}
-        run: |
-          set +e
-          docker exec -t ${{ env.container_name }} /bin/bash -c 'bash PaddleFleet/ci/clean_uv_cache.sh; rm -rf * .[^.]*'
-          docker rm -f ${{ env.container_name }}
-
-  integration-test-H20-multi-card:
-    name: Integration test (H20, multi-card)
-    runs-on:
-      group: Fleet-H-multi-card
-    strategy:
-      fail-fast: false
-      max-parallel: 1
-      matrix:
-        include:
-          - cuda: "12.6"
-            python: "3.10"
-          - cuda: "12.9"
-            python: "3.10"
-          - cuda: "13.0"
-            python: "3.10"
-          - cuda: "12.9"
-            python: "3.11"
-          - cuda: "12.9"
-            python: "3.12"
-          - cuda: "12.9"
-            python: "3.13"
-    env:
-      PIP_CACHE_DIR: /home/.cache/pip
-      CACHE_DIR: /home/.cache
-      TASK: paddlefleet-CE-${{ matrix.cuda }}-${{ matrix.python }}-DEV-integration-test-multi-card-release
-    steps:
-      - name: setup cuda and python
-        run: |
-          cuda=${{ matrix.cuda }}
-          python_version=${{ matrix.python }}
-          if [ "${cuda}" == "12.9" ]; then
-            docker_image=${docker_image_cu129}
-            cuda_version="cu129"
-            paddle_url=${paddle_url_cu129}
-          elif [ "${cuda}" == "13.0" ]; then
-            docker_image=${docker_image_cu130}
-            cuda_version="cu130"
-            paddle_url=${paddle_url_cu130}
-          else
-            docker_image=${docker_image_cu126}
-            cuda_version="cu126"
-            paddle_url=${paddle_url_cu126}
-          fi
-          echo "DOCKER_IMAGE=${docker_image}" >> $GITHUB_ENV
-          echo "CUDA_VERSION=${cuda_version}" >> $GITHUB_ENV
-          echo "PYTHON_VERSION=${python_version}" >> $GITHUB_ENV
-          echo "PADDLE_URL=${paddle_url}" >> $GITHUB_ENV
-          echo "BASE_NAME=${cuda_version}-${python_version}-H20" >> $GITHUB_ENV
-      - name: Check docker image and run container
-        env:
-          DOCKER_IMAGE: ${{ env.DOCKER_IMAGE }}
-          CUDA_VERSION: ${{ env.CUDA_VERSION }}
-          PYTHON_VERSION: ${{ env.PYTHON_VERSION }}
-          PADDLE_URL: ${{ env.PADDLE_URL }}
-          BASE_NAME: ${{ env.BASE_NAME }}
-        run: |
-          container_name=${TASK}-$(date +%Y%m%d-%H%M%S)
-          echo "container_name=${container_name}" >> ${{ github.env }}
-          docker pull ${DOCKER_IMAGE}
-          docker run -d -t --name ${container_name} --gpus all --shm-size=32G \
-            -v "/dev/shm:/dev/shm"  \
-            -v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
-            -v ${{ github.workspace }}/../../..:/root \
-            -v /ssd1/paddle-1/action_cache:/home/.cache \
-            -v ${{ github.workspace }}:/workspace \
-            -e BRANCH \
-            -e PR_ID \
-            -e COMMIT_ID \
-            -e PADDLE_ROOT \
-            -e ci_scripts \
-            -e CACHE_DIR \
-            -e no_proxy \
-            -e use_release \
-            -e PIP_CACHE_DIR \
-            -e CUDA_VERSION \
-            -e PYTHON_VERSION \
-            -e PADDLE_URL \
-            -e BASE_NAME \
-            -e GITHUB_SHA="${{ github.event.pull_request.head.sha }}" \
-            -e GITHUB_HEAD_REF="${{ github.head_ref }}" \
-            -e GITHUB_REPO_NAME="${{ github.repository }}" \
-            -e GITHUB_EVENT_NAME="${{ github.event_name }}" \
-            -e GITHUB_TOKEN="${{ secrets.GITHUB_TOKEN }}" \
-            -e GITHUB_RUN_ID="${{ github.run_id }}" \
-            -w /workspace --network host ${DOCKER_IMAGE}
-
-      - name: Install PaddleFleet
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -ce '
-          find . -maxdepth 1 -name "--*" -delete
-          rm -rf * .[^.]*
-          source /root/proxy
-          mkdir -p /home/.cache/pip
-          pip cache dir
-          pip install --upgrade pip
-          if [ ${PYTHON_VERSION} != "3.10" ]; then
-            wget https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
-            bash miniconda.sh -b -p $HOME/miniconda -u
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main
-            conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r
-            conda init bash
-            conda create -n python${PYTHON_VERSION} python=${PYTHON_VERSION} -y
-            conda activate python${PYTHON_VERSION}
-          fi
-          git clone https://github.com/PaddlePaddle/PaddleFleet.git
-          cd PaddleFleet
-          git config --global --add safe.directory /workspace/PaddleFleet
-          git config user.name "PaddleCI"
-          git config user.email "paddle_ci@example.com"
-          git config pull.rebase false
-          pip install colorlog>=6.10.1
-          python -m pip install --pre  paddlefleet --index-url https://www.paddlepaddle.org.cn/packages/nightly/$CUDA_VERSION/ --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/${CUDA_VERSION}/ --extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple
-          wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq
-          chmod +x /usr/local/bin/yq
-          '
-
-      - name: Install PaddleFormers
-        id: formers_install
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -ce '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-            paddle_whl=paddlepaddle_gpu-0.0.0-cp${PYTHON_VERSION//./}-cp${PYTHON_VERSION//./}-linux_x86_64.whl
-            paddle_url="${PADDLE_URL}${paddle_whl}"
-          else
-            paddle_url=${PADDLE_URL}
-          fi
-          if [ ${CUDA_VERSION} == "cu129" ]; then
-            paddle_whl=paddlepaddle_gpu-0.0.0-cp${PYTHON_VERSION//./}-cp${PYTHON_VERSION//./}-linux_x86_64.whl
-            paddle_url="${PADDLE_URL}${paddle_whl}"
-          fi
-          if [ ${CUDA_VERSION} == "cu130" ]; then
-            pip install blinker==1.9.0 --ignore-installed
-          fi
-          git clone -b develop https://github.com/PaddlePaddle/PaddleFormers.git
-          cd PaddleFormers
-          cp examples/experiments/paddlefleet/glm45.json examples/experiments/paddlefleet/glm45_fp8.json
-          git config --global --add safe.directory /workspace/PaddleFormers
-          git config user.name "PaddleCI"
-          git config user.email "paddle_ci@example.com"
-          git config pull.rebase false
-          git log -1
-          pip install -e . --extra-index-url=https://www.paddlepaddle.org.cn/packages/nightly/${CUDA_VERSION}/
-          if [ "${use_release}" == "true" ]; then
-            echo "Using pre-built paddle package from develop branch."
-            pip install ${paddle_url} --index-url=https://www.paddlepaddle.org.cn/packages/nightly/${CUDA_VERSION}/ --force-reinstall --no-cache-dir
-          fi
-          pip install uv coverage==7.6.1 bce-python-sdk==0.8.74 wrapt
-          pip install bce-python-sdk==0.8.74
-          pip install coverage==7.6.1
-          pip install librosa==0.11.0
-          echo "Paddle Commit"
-          python -c "import paddle; print(paddle.version.commit)"
-          echo "PaddleFleet Commit"
-          python -c "import paddlefleet; print(paddlefleet.version.commit)"
-          echo "paddleformers Commit"
-          python -c "import paddleformers; print(paddleformers.version.commit)"
-          '
-
-      - name: GLM4.5 pre-train
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          bash -x PaddleFormers/tests/integration_test/glm45_pt.sh
-          glm45_exit_code=$?
-          if [[ "$glm45_exit_code" != "0" ]]; then
-            export case_name="glm45_pt"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: GLM4.5.\033[0m"
-          fi
-          '
-
-      - name: GLM4.5 sft
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          bash -x PaddleFormers/tests/integration_test/glm45_sft.sh
-          glm45_exit_code=$?
-          if [[ "$glm45_exit_code" != "0" ]]; then
-            export case_name="glm45_sft"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: GLM4.5.\033[0m"
-          fi
-          '
-
-      - name: GLM4.5 lora
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          bash -x PaddleFormers/tests/integration_test/glm45_lora.sh
-          glm45_exit_code=$?
-          if [[ "$glm45_exit_code" != "0" ]]; then
-            export case_name="glm45_lora"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: GLM4.5.\033[0m"
-          fi
-          '
-
-      - name: GLM4.5 dpo
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          bash -x PaddleFormers/tests/integration_test/glm45_dpo.sh
-          glm45_exit_code=$?
-          if [[ "$glm45_exit_code" != "0" ]]; then
-            export case_name="glm45_dpo"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: GLM4.5 dpo.\033[0m"
-          fi
-          '
-
-      - name: GLM4.5 dpo_lora
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          timeout 5m bash -x PaddleFormers/tests/integration_test/glm45_dpo_lora.sh
-          exit_code=$?
-          if [[ "$exit_code" != "0" ]]; then
-            export case_name="glm45_dpo_lora"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: GLM4.5 dpo_lora.\033[0m"
-          fi
-          '
-
-
-      - name: GLM4.5 pre-train (Grouped GEMM)
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${CUDA_VERSION}" == "cu126" ]; then
-            exit 0
-          fi
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          bash -x PaddleFormers/tests/integration_test/glm45_pt_grouped_gemm.sh
-          glm45_exit_code=$?
-          if [[ "$glm45_exit_code" != "0" ]]; then
-            export case_name="glm45_pt_grouped_gemm"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: GLM4.5 Grouped GEMM.\033[0m"
-          fi
-          '
-
-      - name: Integration test (GLM4.5 multi-card FP8)
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${CUDA_VERSION}" == "cu126" ]; then
-            exit 0
-          fi
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          bash -x PaddleFormers/tests/integration_test/glm45_pt_fp8.sh
-          glm45_exit_code=$?
-          if [[ "$glm45_exit_code" != "0" ]]; then
-            export case_name="glm45_pt_fp8"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: GLM4.5 FP8.\033[0m"
-          fi
-          '
-
-      - name: GLM4.5 pre-train (EP4)
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          bash -x PaddleFormers/tests/integration_test/glm45_pt_ep4.sh
-          glm45_exit_code=$?
-          if [[ "$glm45_exit_code" != "0" ]]; then
-            export case_name="glm45_pt_ep4"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: GLM4.5 EP4.\033[0m"
-          fi
-          '
-
-      - name: Qwen pre-train
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          bash -x PaddleFormers/tests/integration_test/qwen.sh pt
-          exit_code=$?
-          if [[ "$exit_code" != "0" ]]; then
-            export case_name="qwen_pt"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: Qwen pre-train.\033[0m"
-          fi
-          '
-
-      - name: Qwen sft
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          bash -x PaddleFormers/tests/integration_test/qwen.sh sft
-          exit_code=$?
-          if [[ "$exit_code" != "0" ]]; then
-            export case_name="qwen_sft"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: Qwen sft.\033[0m"
-          fi
-          '
-
-      - name: Qwen lora
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          bash -x PaddleFormers/tests/integration_test/qwen.sh lora
-          exit_code=$?
-          if [[ "$exit_code" != "0" ]]; then
-            export case_name="qwen_lora"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: Qwen lora.\033[0m"
-          fi
-          '
-
-      - name: Qwen vl sft
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          timeout 5m bash -x PaddleFormers/tests/integration_test/qwen3vl_sft.sh tp8 h20
-          exit_code=$?
-          if [[ "$exit_code" != "0" ]]; then
-            export case_name="qwen3vl_sft_h20_tp8_multi_card"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: Qwen vl sft.\033[0m"
-          fi
-          '
-
-      - name: Qwen vl lora
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          timeout 5m bash -x PaddleFormers/tests/integration_test/qwen3vl_lora.sh h20
-          exit_code=$?
-          if [[ "$exit_code" != "0" ]]; then
-            export case_name="qwen3vl_lora_h20_multi_card"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: Qwen vl lora.\033[0m"
-          fi
-          '
-
-      - name: Qwen vl moe
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          timeout 10m bash -x PaddleFormers/tests/integration_test/qwen3vl_sft.sh moe h20
-          exit_code=$?
-          if [[ "$exit_code" != "0" ]]; then
-            export case_name="qwen3vl_sft_h20_moe_multi_card"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: Qwen vl moe.\033[0m"
-          fi
-          '
-
-      - name: Qwen3-vl-8k-fsdp
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          timeout 10m bash -x PaddleFormers/tests/integration_test/qwen3vl_sft.sh fsdp h20
-          exit_code=$?
-          if [[ "$exit_code" != "0" ]]; then
-            export case_name="qwen3vl_sft_h20_fsdp_multi_card"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: Qwen3-vl-8k-fsdp.\033[0m"
-          fi
-          '
-
-      - name: Terminate and delete the container
-        if: ${{ always() }}
-        run: |
-          set +e
-          docker exec -t ${{ env.container_name }} /bin/bash -c 'bash PaddleFleet/ci/clean_uv_cache.sh; rm -rf * .[^.]*'
-          docker rm -f ${{ env.container_name }}
-
-
-  integration-test-a100:
-    name: Integration test (A100)
+  integration-test-H20-single-card:
+    name: Integration test (H20, single card)
     runs-on:
-      group: Distribute
+      group: Fleet-H-single-card
     strategy:
       fail-fast: false
       max-parallel: 2
       matrix:
         include:
-          - cuda: "12.6"
-            python: "3.10"
-          - cuda: "12.9"
-            python: "3.10"
-          - cuda: "13.0"
-            python: "3.10"
-          - cuda: "12.9"
-            python: "3.11"
           - cuda: "12.9"
             python: "3.12"
-          - cuda: "12.9"
-            python: "3.13"
     env:
       PIP_CACHE_DIR: /home/.cache/pip
       CACHE_DIR: /home/.cache
-      TASK: paddlefleet-CE-${{ matrix.cuda }}-${{ matrix.python }}-DEV-integration-test-A100
+      TASK: paddlefleet-CE-${{ matrix.cuda }}-${{ matrix.python }}-DEV-integration-test-single-card-release
     steps:
-      - name: setup cuda and python
+      - name: Determine the runner
         run: |
+          gpu_id=$(( $(echo $PWD | awk -F'/' '{print $3}' | awk -F'-' '{print $2}') + 3 ))
           cuda=${{ matrix.cuda }}
           python_version=${{ matrix.python }}
+          echo GPU_DEVICES="$gpu_id" >> $GITHUB_ENV
           if [ "${cuda}" == "12.9" ]; then
             docker_image=${docker_image_cu129}
             cuda_version="cu129"
@@ -1138,9 +86,10 @@ jobs:
           echo "CUDA_VERSION=${cuda_version}" >> $GITHUB_ENV
           echo "PYTHON_VERSION=${python_version}" >> $GITHUB_ENV
           echo "PADDLE_URL=${paddle_url}" >> $GITHUB_ENV
-          echo "BASE_NAME=${cuda_version}-${python_version}-A100" >> $GITHUB_ENV
+          echo "BASE_NAME=${cuda_version}-${python_version}-H20" >> $GITHUB_ENV
       - name: Check docker image and run container
         env:
+          GPU_DEVICES: ${{ env.GPU_DEVICES }}
           DOCKER_IMAGE: ${{ env.DOCKER_IMAGE }}
           CUDA_VERSION: ${{ env.CUDA_VERSION }}
           PYTHON_VERSION: ${{ env.PYTHON_VERSION }}
@@ -1150,7 +99,8 @@ jobs:
           container_name=${TASK}-$(date +%Y%m%d-%H%M%S)
           echo "container_name=${container_name}" >> ${{ github.env }}
           docker pull ${DOCKER_IMAGE}
-          docker run -d -t --name ${container_name} --gpus all --shm-size=32G \
+          set -x
+          docker run -d -t --name ${container_name} --gpus "\"device=${GPU_DEVICES}\"" --shm-size=32G \
             -v "/dev/shm:/dev/shm"  \
             -v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
             -v ${{ github.workspace }}/../../..:/root \
@@ -1180,6 +130,7 @@ jobs:
       - name: Install PaddleFleet
         run: |
           docker exec -t ${{ env.container_name }} /bin/bash -ce '
+          nvidia-smi
           find . -maxdepth 1 -name "--*" -delete
           rm -rf * .[^.]*
           source /root/proxy
@@ -1203,8 +154,6 @@ jobs:
           git config user.email "paddle_ci@example.com"
           git config pull.rebase false
           pip install colorlog>=6.10.1
-          pip uninstall paddlefleet -y
-          pip install colorlog>=6.10.1
           pip install nvidia-cutlass-dsl==4.2.1
           python -m pip install --pre  paddlefleet --index-url https://www.paddlepaddle.org.cn/packages/nightly/${CUDA_VERSION}/ --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/${CUDA_VERSION}/ --extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple
           wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq
@@ -1238,14 +187,14 @@ jobs:
           git config user.email "paddle_ci@example.com"
           git config pull.rebase false
           git log -1
+          sed -i "s/from gpt_provider import GPTModelProvider/from paddleformers.transformers.gpt_provider import GPTModelProvider/g" examples/experiments/paddlefleet/glm45_provider.py
+          sed -i "s/from gpt_provider import GPTModelProvider/from paddleformers.transformers.gpt_provider import GPTModelProvider/g" examples/experiments/paddlefleet/qwen_provider.py
           pip install -e . --extra-index-url=https://www.paddlepaddle.org.cn/packages/nightly/${CUDA_VERSION}/
           if [ "${use_release}" == "true" ]; then
             echo "Using pre-built paddle package from develop branch."
             pip install ${paddle_url} --index-url=https://www.paddlepaddle.org.cn/packages/nightly/${CUDA_VERSION}/ --force-reinstall --no-cache-dir
           fi
-          pip install uv coverage==7.6.1 bce-python-sdk==0.8.74 wrapt
-          pip install bce-python-sdk==0.8.74
-          pip install coverage==7.6.1
+          pip install uv coverage==7.6.1 bce-python-sdk==0.8.74 wrapt matplotlib==3.10.8 pytest parameterized
           pip install librosa==0.11.0
           echo "Paddle Commit"
           python -c "import paddle; print(paddle.version.commit)"
@@ -1255,139 +204,26 @@ jobs:
           python -c "import paddleformers; print(paddleformers.version.commit)"
           '
 
-      - name: GLM4.5 pre-train
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          bash -x PaddleFormers/tests/integration_test/glm45_a100.sh pt
-          exit_code=$?
-          if [ ${exit_code} -ne 0 ]; then
-            export case_name="glm45_pt_multi_card_a100"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: GLM4.5 pre-train.\033[0m"
-          fi
-          '
-
-      - name: GLM4.5 sft
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          timeout 5m bash -x PaddleFormers/tests/integration_test/glm45_a100.sh sft
-          exit_code=$?
-          if [ ${exit_code} -ne 0 ]; then
-            export case_name="glm45_sft_multi_card_a100"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: GLM4.5 sft.\033[0m"
-          fi
-          '
-
-      - name: GLM4.5 lora
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          timeout 5m bash -x PaddleFormers/tests/integration_test/glm45_a100.sh lora
-          exit_code=$?
-          if [ ${exit_code} -ne 0 ]; then
-            export case_name="glm45_lora_multi_card_a100"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: GLM4.5 lora.\033[0m"
-          fi
-          '
-
-      - name: GLM4.5 dpo
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          timeout 5m bash -x PaddleFormers/tests/integration_test/glm45_a100.sh dpo
-          exit_code=$?
-          if [ ${exit_code} -ne 0 ]; then
-            export case_name="glm45_dpo_multi_card_a100"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: GLM4.5 dpo.\033[0m"
-          fi
-          '
-
-      - name: GLM4.5 dpo_lora
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          timeout 5m bash -x PaddleFormers/tests/integration_test/glm45_a100.sh dpo_lora
-          exit_code=$?
-          if [ ${exit_code} -ne 0 ]; then
-            export case_name="glm45_dpo_lora_multi_card_a100"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: GLM4.5 dpo_lora.\033[0m"
-          fi
-          '
-
-      - name: Qwen pre-train
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          timeout 5m bash -x PaddleFormers/tests/integration_test/qwen3_a100.sh pt
-          exit_code=$?
-          if [ ${exit_code} -ne 0 ]; then
-            export case_name="qwen_pt_multi_card_a100"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: Qwen pre-train.\033[0m"
-          fi
-          '
-
-      - name: Qwen sft
+      - name: Proprocess for integration test
         if: (success() || failure()) && steps.formers_install.conclusion == 'success'
         run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
+          docker exec -t ${{ env.container_name }} /bin/bash -ce '
           source /root/proxy
           if [ "${PYTHON_VERSION}" != "3.10" ]; then
             source "$HOME/miniconda/etc/profile.d/conda.sh"
             conda activate python${PYTHON_VERSION}
           fi
-          timeout 5m bash -x PaddleFormers/tests/integration_test/qwen3_a100.sh sft
-          exit_code=$?
-          if [ ${exit_code} -ne 0 ]; then
-            export case_name="qwen_sft_multi_card_a100"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
+          bash -x PaddleFormers/tests/integration_test/preprocess.sh
+          preprocess_exit_code=$?
+          if [[ "$preprocess_exit_code" != "0" ]]; then
+            echo -e "::error:: \033[31mPreprocess failed.\033[0m"
+            exit 1
           else
-            echo -e "\033[32mIntegration test succeeded: Qwen sft.\033[0m"
+            echo -e "\033[32mPreprocess succeeded.\033[0m"
           fi
           '
 
-      - name: Qwen lora
+      - name: Integration test (GLM4.5 single-card)
         if: (success() || failure()) && steps.formers_install.conclusion == 'success'
         run: |
           docker exec -t ${{ env.container_name }} /bin/bash -c '
@@ -1396,17 +232,17 @@ jobs:
             source "$HOME/miniconda/etc/profile.d/conda.sh"
             conda activate python${PYTHON_VERSION}
           fi
-          timeout 5m bash -x PaddleFormers/tests/integration_test/qwen3_a100.sh lora
-          exit_code=$?
-          if [ ${exit_code} -ne 0 ]; then
-            export case_name="qwen_lora_multi_card_a100"
+          bash -x PaddleFormers/tests/integration_test/glm45_pt_single_card.sh
+          glm45_single_card_exit_code=$?
+          if [[ "$glm45_single_card_exit_code" != "0" ]]; then
+            export case_name="glm45_single_card"
             bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
           else
-            echo -e "\033[32mIntegration test succeeded: Qwen lora.\033[0m"
+            echo -e "\033[32mIntegration test succeeded: GLM4.5 single-card.\033[0m"
           fi
           '
 
-      - name: Qwen vl sft
+      - name: Integration test (Qwen3-30B-A3B single-card)
         if: (success() || failure()) && steps.formers_install.conclusion == 'success'
         run: |
           docker exec -t ${{ env.container_name }} /bin/bash -c '
@@ -1415,53 +251,36 @@ jobs:
             source "$HOME/miniconda/etc/profile.d/conda.sh"
             conda activate python${PYTHON_VERSION}
           fi
-          timeout 5m bash -x PaddleFormers/tests/integration_test/qwen3vl_sft.sh tp8 a100
-          exit_code=$?
-          if [ ${exit_code} -ne 0 ]; then
-            export case_name="qwen3vl_sft_a100_tp8_multi_card"
+          bash -x PaddleFormers/tests/integration_test/qwen3_single_card.sh
+          qwen3_single_card_exit_code=$?
+          if [[ "$qwen3_single_card_exit_code" != "0" ]]; then
+            export case_name="qwen3_single_card"
             bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
           else
-            echo -e "\033[32mIntegration test succeeded: Qwen vl sft.\033[0m"
+            echo -e "\033[32mIntegration test succeeded: Qwen3-30B-A3B single-card.\033[0m"
           fi
           '
 
-      - name: Qwen vl lora
+      - name: Qwen3-vl-8k-single-card
         if: (success() || failure()) && steps.formers_install.conclusion == 'success'
         run: |
           docker exec -t ${{ env.container_name }} /bin/bash -c '
           source /root/proxy
+          sleep 24h
           if [ "${PYTHON_VERSION}" != "3.10" ]; then
             source "$HOME/miniconda/etc/profile.d/conda.sh"
             conda activate python${PYTHON_VERSION}
           fi
-          timeout 5m bash -x PaddleFormers/tests/integration_test/qwen3vl_lora.sh a100
+          timeout 5m bash -x PaddleFormers/tests/integration_test/qwen3vl_sft_single_card.sh single
           exit_code=$?
-          if [ ${exit_code} -ne 0 ]; then
-            export case_name="qwen3vl_lora_a100_multi_card"
+          if [[ "$exit_code" != "0" ]]; then
+            export case_name="qwen3vl_sft_single_card"
             bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
           else
-            echo -e "\033[32mIntegration test succeeded: Qwen vl lora.\033[0m"
+            echo -e "\033[32mIntegration test succeeded: Qwen3-vl-8k-single-card.\033[0m"
           fi
           '
 
-      - name: Qwen vl moe
-        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
-        run: |
-          docker exec -t ${{ env.container_name }} /bin/bash -c '
-          source /root/proxy
-          if [ "${PYTHON_VERSION}" != "3.10" ]; then
-            source "$HOME/miniconda/etc/profile.d/conda.sh"
-            conda activate python${PYTHON_VERSION}
-          fi
-          timeout 10m bash -x PaddleFormers/tests/integration_test/qwen3vl_sft.sh moe a100
-          exit_code=$?
-          if [ ${exit_code} -ne 0 ]; then
-            export case_name="qwen3vl_sft_a100_moe_multi_card"
-            bash PaddleFleet/ci/check_ce_precision.sh $case_name $BASE_NAME
-          else
-            echo -e "\033[32mIntegration test succeeded: Qwen vl moe.\033[0m"
-          fi
-          '
 
       - name: Terminate and delete the container
         if: ${{ always() }}