diff --git a/.github/workflows/_build-tutorials-base.yml b/.github/workflows/_build-tutorials-base.yml new file mode 100644 index 00000000000..e33c8c4bd67 --- /dev/null +++ b/.github/workflows/_build-tutorials-base.yml @@ -0,0 +1,194 @@ +name: Build tutorials + +on: + workflow_call: + inputs: + USE_NIGHTLY: + description: "Use nightly builds inside build.sh" + required: false + type: number + default: 0 + UPLOAD: + description: "Upload built docs to PR preview and main site" + required: false + type: number + default: 0 + +jobs: + worker: + name: pytorch_tutorial_build_worker + strategy: + matrix: + include: + - { shard: 1, num_shards: 15, runner: "linux.g5.12xlarge.nvidia.gpu" } + - { shard: 2, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } + - { shard: 3, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } + - { shard: 4, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } + - { shard: 5, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } + - { shard: 6, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } + - { shard: 7, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } + - { shard: 8, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } + - { shard: 9, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } + - { shard: 10, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } + - { shard: 11, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } + - { shard: 12, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } + - { shard: 13, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } + - { shard: 14, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } + - { shard: 15, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } + fail-fast: false + runs-on: ${{ matrix.runner }} + steps: + - name: Setup SSH (Click me for login details) + uses: pytorch/test-infra/.github/actions/setup-ssh@main + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + instructions: | + All testing is done inside the container, to start an interactive session run: + docker exec -it $(docker container ps --format '{{.ID}}') bash + + - name: Checkout Tutorials + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Setup Linux + uses: pytorch/pytorch/.github/actions/setup-linux@main + + - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG + uses: pytorch/test-infra/.github/actions/setup-nvidia@main + + - name: Calculate/build docker image + id: calculate-docker-image + uses: pytorch/test-infra/.github/actions/calculate-docker-image@main + with: + docker-image-name: tutorials + + - name: Pull docker image + uses: pytorch/test-infra/.github/actions/pull-docker-image@main + with: + docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} + + - name: Build + shell: bash + env: + DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} + NUM_WORKERS: ${{ matrix.num_shards }} + WORKER_ID: ${{ matrix.shard }} + COMMIT_ID: ${{ github.sha }} + JOB_TYPE: worker + COMMIT_SOURCE: ${{ github.ref }} + USE_NIGHTLY: ${{ inputs.USE_NIGHTLY }} + UPLOAD: ${{ inputs.UPLOAD }} + run: | + set -ex + + chmod +x ".jenkins/build.sh" + + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e WORKER_ID \ + -e NUM_WORKERS \ + -e COMMIT_ID \ + -e JOB_TYPE \ + -e COMMIT_SOURCE \ + -e USE_NIGHTLY \ + -e UPLOAD \ + --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ + --tty \ + --detach \ + --shm-size=2gb \ + --name="${container_name}" \ + -v "${GITHUB_WORKSPACE}:/var/lib/workspace" \ + -w /var/lib/workspace \ + "${DOCKER_IMAGE}" + ) + + docker exec -u ci-user -t "${container_name}" sh -c ".jenkins/build.sh" + + - name: Teardown Linux + uses: pytorch/test-infra/.github/actions/teardown-linux@main + if: always() + + manager: + name: pytorch_tutorial_build_manager + needs: worker + runs-on: [self-hosted, linux.2xlarge] + if: ${{ inputs.UPLOAD == 1 }} + environment: ${{ github.ref == 'refs/heads/main' && 'pytorchbot-env' || '' }} + steps: + - name: Setup SSH (Click me for login details) + uses: pytorch/test-infra/.github/actions/setup-ssh@main + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + instructions: | + All testing is done inside the container, to start an interactive session run: + docker exec -it $(docker container ps --format '{{.ID}}') bash + + - name: Checkout Tutorials + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Setup Linux + uses: pytorch/pytorch/.github/actions/setup-linux@main + + - name: Calculate/build docker image + id: calculate-docker-image + uses: pytorch/test-infra/.github/actions/calculate-docker-image@main + with: + docker-image-name: tutorials + + - name: Pull docker image + uses: pytorch/test-infra/.github/actions/pull-docker-image@main + with: + docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} + + - name: Build + shell: bash + env: + DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} + NUM_WORKERS: 15 + WORKER_ID: 0 + COMMIT_ID: ${{ github.sha }} + JOB_TYPE: manager + COMMIT_SOURCE: ${{ github.ref }} + GITHUB_PYTORCHBOT_TOKEN: ${{ secrets.PYTORCHBOT_TOKEN }} + USE_NIGHTLY: ${{ inputs.USE_NIGHTLY }} + run: | + set -ex + + chmod +x ".jenkins/build.sh" + + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e WORKER_ID \ + -e NUM_WORKERS \ + -e COMMIT_ID \ + -e JOB_TYPE \ + -e COMMIT_SOURCE \ + -e GITHUB_PYTORCHBOT_TOKEN \ + -e USE_NIGHTLY \ + --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ + --tty \ + --detach \ + --name="${container_name}" \ + -v "${GITHUB_WORKSPACE}:/var/lib/workspace" \ + -w /var/lib/workspace \ + "${DOCKER_IMAGE}" + ) + + docker exec -u ci-user -t "${container_name}" sh -c ".jenkins/build.sh" + + - name: Upload docs preview + uses: seemethere/upload-artifact-s3@v5 + if: ${{ github.event_name == 'pull_request' }} + with: + retention-days: 14 + s3-bucket: doc-previews + if-no-files-found: error + path: docs + s3-prefix: pytorch/tutorials/${{ github.event.pull_request.number }} + + - name: Teardown Linux + uses: pytorch/test-infra/.github/actions/teardown-linux@main + if: always() diff --git a/.github/workflows/build-tutorials-nightly.yml b/.github/workflows/build-tutorials-nightly.yml new file mode 100644 index 00000000000..60f8d3a1fdc --- /dev/null +++ b/.github/workflows/build-tutorials-nightly.yml @@ -0,0 +1,35 @@ +name: Build tutorials (nightly/test) +# This is a workflow to build tutorials using nightly or the test/release +# candidate builds for pytorch libraries. It downloads torch and other torch +# related libraries from the nightly or test channel and checks that the +# tutorials can run. This workflow will not upload the built docs anywhere in +# order to prevent polluting the official documentation. + +# During releases, this workflow should be run on PRs to verify that the +# tutorials work with the test/rc builds before the official release is made. +# When there is no release candidate, this workflow should only be run on the +# main branch since nightly can be unstable and we do not want to block PRs due +# to failures in this workflow. + +# To change the channel between nightly and test/rc, change the index used to +# download the binaries in .jenkins/build.sh. +on: + # Only main branch for now. Uncomment the below line to enable it on PRs. + # pull_request: + + # Comment out the below line to disable on the main branch + push: + branches: + - main + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} + cancel-in-progress: true + +jobs: + build: + uses: ./.github/workflows/_build-tutorials-base.yml + secrets: inherit + with: + USE_NIGHTLY: 1 + UPLOAD: 0 diff --git a/.github/workflows/build-tutorials.yml b/.github/workflows/build-tutorials.yml index 94cfd5843a0..58372d557e6 100644 --- a/.github/workflows/build-tutorials.yml +++ b/.github/workflows/build-tutorials.yml @@ -11,173 +11,9 @@ concurrency: cancel-in-progress: true jobs: - worker: - name: pytorch_tutorial_build_worker - strategy: - matrix: - include: - - { shard: 1, num_shards: 15, runner: "linux.g5.12xlarge.nvidia.gpu" } - - { shard: 2, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } - - { shard: 3, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } - - { shard: 4, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } - - { shard: 5, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } - - { shard: 6, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } - - { shard: 7, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } - - { shard: 8, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } - - { shard: 9, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } - - { shard: 10, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } - - { shard: 11, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } - - { shard: 12, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } - - { shard: 13, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } - - { shard: 14, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } - - { shard: 15, num_shards: 15, runner: "linux.g5.4xlarge.nvidia.gpu" } - fail-fast: false - runs-on: ${{ matrix.runner }} - steps: - - name: Setup SSH (Click me for login details) - uses: pytorch/test-infra/.github/actions/setup-ssh@main - with: - github-secret: ${{ secrets.GITHUB_TOKEN }} - instructions: | - All testing is done inside the container, to start an interactive session run: - docker exec -it $(docker container ps --format '{{.ID}}') bash - - - name: Checkout Tutorials - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - name: Setup Linux - uses: pytorch/pytorch/.github/actions/setup-linux@main - - - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - uses: pytorch/test-infra/.github/actions/setup-nvidia@main - - - name: Calculate/build docker image - id: calculate-docker-image - uses: pytorch/test-infra/.github/actions/calculate-docker-image@main - with: - docker-image-name: tutorials - - - name: Pull docker image - uses: pytorch/test-infra/.github/actions/pull-docker-image@main - with: - docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - - - name: Build - shell: bash - env: - DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} - NUM_WORKERS: ${{ matrix.num_shards }} - WORKER_ID: ${{ matrix.shard }} - COMMIT_ID: ${{ github.sha }} - JOB_TYPE: worker - COMMIT_SOURCE: ${{ github.ref }} - run: | - set -ex - - chmod +x ".jenkins/build.sh" - - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e WORKER_ID \ - -e NUM_WORKERS \ - -e COMMIT_ID \ - -e JOB_TYPE \ - -e COMMIT_SOURCE \ - --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ - --tty \ - --detach \ - --shm-size=2gb \ - --name="${container_name}" \ - -v "${GITHUB_WORKSPACE}:/var/lib/workspace" \ - -w /var/lib/workspace \ - "${DOCKER_IMAGE}" - ) - - docker exec -u ci-user -t "${container_name}" sh -c ".jenkins/build.sh" - - - name: Teardown Linux - uses: pytorch/test-infra/.github/actions/teardown-linux@main - if: always() - - manager: - name: pytorch_tutorial_build_manager - needs: worker - runs-on: [self-hosted, linux.2xlarge] - environment: ${{ github.ref == 'refs/heads/main' && 'pytorchbot-env' || '' }} - steps: - - name: Setup SSH (Click me for login details) - uses: pytorch/test-infra/.github/actions/setup-ssh@main - with: - github-secret: ${{ secrets.GITHUB_TOKEN }} - instructions: | - All testing is done inside the container, to start an interactive session run: - docker exec -it $(docker container ps --format '{{.ID}}') bash - - - name: Checkout Tutorials - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - name: Setup Linux - uses: pytorch/pytorch/.github/actions/setup-linux@main - - - name: Calculate/build docker image - id: calculate-docker-image - uses: pytorch/test-infra/.github/actions/calculate-docker-image@main - with: - docker-image-name: tutorials - - - name: Pull docker image - uses: pytorch/test-infra/.github/actions/pull-docker-image@main - with: - docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - - - name: Build - shell: bash - env: - DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} - NUM_WORKERS: 15 - WORKER_ID: ${{ matrix.shard }} - COMMIT_ID: ${{ github.sha }} - JOB_TYPE: manager - COMMIT_SOURCE: ${{ github.ref }} - GITHUB_PYTORCHBOT_TOKEN: ${{ secrets.PYTORCHBOT_TOKEN }} - run: | - set -ex - - chmod +x ".jenkins/build.sh" - - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e WORKER_ID \ - -e NUM_WORKERS \ - -e COMMIT_ID \ - -e JOB_TYPE \ - -e COMMIT_SOURCE \ - -e GITHUB_PYTORCHBOT_TOKEN \ - --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ - --tty \ - --detach \ - --name="${container_name}" \ - -v "${GITHUB_WORKSPACE}:/var/lib/workspace" \ - -w /var/lib/workspace \ - "${DOCKER_IMAGE}" - ) - - docker exec -u ci-user -t "${container_name}" sh -c ".jenkins/build.sh" - - - name: Upload docs preview - uses: seemethere/upload-artifact-s3@v5 - if: ${{ github.event_name == 'pull_request' }} - with: - retention-days: 14 - s3-bucket: doc-previews - if-no-files-found: error - path: docs - s3-prefix: pytorch/tutorials/${{ github.event.pull_request.number }} - - - name: Teardown Linux - uses: pytorch/test-infra/.github/actions/teardown-linux@main - if: always() + build: + uses: ./.github/workflows/_build-tutorials-base.yml + secrets: inherit + with: + USE_NIGHTLY: 0 + UPLOAD: 1 diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 72d30655db8..32ceec660da 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -19,7 +19,13 @@ sudo apt-get install -y pandoc # NS: Path to python runtime should already be part of docker container # export PATH=/opt/conda/bin:$PATH -#Install PyTorch Nightly for test. +# Install PyTorch Nightly for test. +if [ "${USE_NIGHTLY:-0}" -eq 1 ]; then + sudo pip uninstall -y torch torchvision torchaudio + pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu130 + pip show torch +fi + # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). # sudo pip uninstall -y fbgemm-gpu torchrec @@ -114,8 +120,10 @@ if [[ "${JOB_TYPE}" == "worker" ]]; then python .jenkins/validate_tutorials_built.py # Step 6: Copy generated files to S3, tag with commit ID - 7z a worker_${WORKER_ID}.7z docs - awsv2 s3 cp worker_${WORKER_ID}.7z s3://${BUCKET_NAME}/${COMMIT_ID}/worker_${WORKER_ID}.7z + if [ "${UPLOAD:-0}" -eq 1 ]; then + 7z a worker_${WORKER_ID}.7z docs + awsv2 s3 cp worker_${WORKER_ID}.7z s3://${BUCKET_NAME}/${COMMIT_ID}/worker_${WORKER_ID}.7z + fi elif [[ "${JOB_TYPE}" == "manager" ]]; then # Step 1: Generate no-plot HTML pages for all tutorials pip3 install -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme