diff --git a/.github/actions/setup-python-tools/action.yml b/.github/actions/setup-python-tools/action.yml index 9a7572a95..36e1b914c 100644 --- a/.github/actions/setup-python-tools/action.yml +++ b/.github/actions/setup-python-tools/action.yml @@ -37,28 +37,16 @@ runs: using: "composite" # needs: comment-link-to-workflow # Ensure that a comment is posted with workflow id steps: - # Step 1: Set up Python environment (Python 3.9.13). - - name: Set up Python - uses: actions/setup-python@v4 + # Step 1: Set up Python environment + - name: "Set up Python" + uses: actions/setup-python@v6 with: - # Available versions: https://raw.githubusercontent.com/actions/python-versions/main/versions-manifest.json - # Ensure to use a version that has support for arm64-darwin so we can build for Apple Silicon (macOS 14). - python-version: '3.9.13' + python-version-file: ".python-version" - # Step 2: Install pip-tools, which is used to generate hashed requirements. - # Note_1: pip 25.1 has a bug that causes pip-tools to fail with the following error: - # File ".../python3.9/site-packages/piptools/repositories/pypi.py", line 452, in allow_all_wheels - # self.finder.find_all_candidates.cache_clear() - # AttributeError: 'function' object has no attribute 'cache_clear' - # Note_2: Even though some wheels are guarded behind conditionals i.e. only use this if platform = linux; - # pip-tools 7.5.0 fails with the following error: - # pip._internal.exceptions.UnsupportedWheel: pyg_lib-0.4....linux_x86_64.whl is not a supported wheel on this platform. - # Thus, we fix the pip version to 25.0.1 and pip-tools version to 7.4.1. - - name: Install pip-tools - shell: bash - run: | - python -m pip install "pip==25.0.1" - python -m pip install "pip-tools==7.4.1" + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + version: "0.9.5" # Matches the version in install_py_deps.sh # Step 3: Set up Gcloud AUTH using Workload Identity Federation # See following for context: https://cloud.google.com/blog/products/identity-security/enabling-keyless-authentication-from-github-actions diff --git a/.github/cloud_builder/run_command_on_active_checkout.yaml b/.github/cloud_builder/run_command_on_active_checkout.yaml index 63ea9be26..91135a88e 100644 --- a/.github/cloud_builder/run_command_on_active_checkout.yaml +++ b/.github/cloud_builder/run_command_on_active_checkout.yaml @@ -3,7 +3,7 @@ substitutions: options: logging: CLOUD_LOGGING_ONLY steps: - - name: us-central1-docker.pkg.dev/external-snap-ci-github-gigl/gigl-base-images/gigl-builder:6a94ae7cad3ec0c633246b0c9340a5095527deb9.63.2 + - name: us-central1-docker.pkg.dev/external-snap-ci-github-gigl/gigl-base-images/gigl-builder:51af343c1c298ab465a96ecffd4e50ea6dffacb7.88.1 entrypoint: /bin/bash args: - -c @@ -18,15 +18,18 @@ steps: echo "Setting up environment..." # gcloud runner will run as a non-root user, but all paths/profiles, etc are set up for root + mkdir -p /builder/home/.local/bin + cp -r /root/.local/bin/ /builder/home/.local/ echo "source /root/.bashrc" >> ~/.bashrc echo "source /root/.profile" >> ~/.profile source ~/.profile + docker version docker buildx create --driver=docker-container --use docker run --rm --privileged multiarch/qemu-user-static --reset -p yes gcloud auth configure-docker us-central1-docker.pkg.dev # Install GiGL - pip install -e ./python/ + uv pip install -e . # The builder operates in its own user dir, usually /workspace, # so we need to copy the gigl tools dir to the current cloud_builder's user dir. # See: containers/Dockerfile.builder. diff --git a/.github/scripts/update_docker_image_refs.sh b/.github/scripts/update_docker_image_refs.sh new file mode 100644 index 000000000..815b1eee2 --- /dev/null +++ b/.github/scripts/update_docker_image_refs.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# Script to update dep_vars.env and cloud builder config with new Docker image references + +set -e + +echo "Writing new image names to dep_vars.env:" +echo " DOCKER_LATEST_BASE_CUDA_IMAGE_NAME_WITH_TAG=${GIGL_BASE_CUDA_IMAGE}" +echo " DOCKER_LATEST_BASE_CPU_IMAGE_NAME_WITH_TAG=${GIGL_BASE_CPU_IMAGE}" +echo " DOCKER_LATEST_BASE_DATAFLOW_IMAGE_NAME_WITH_TAG=${GIGL_BASE_DATAFLOW_IMAGE}" +echo " DOCKER_LATEST_BUILDER_IMAGE_NAME_WITH_TAG=${GIGL_BUILDER_IMAGE}" + +sed -i "s|^DOCKER_LATEST_BASE_CUDA_IMAGE_NAME_WITH_TAG=.*|DOCKER_LATEST_BASE_CUDA_IMAGE_NAME_WITH_TAG=${GIGL_BASE_CUDA_IMAGE}|" dep_vars.env +sed -i "s|^DOCKER_LATEST_BASE_CPU_IMAGE_NAME_WITH_TAG=.*|DOCKER_LATEST_BASE_CPU_IMAGE_NAME_WITH_TAG=${GIGL_BASE_CPU_IMAGE}|" dep_vars.env +sed -i "s|^DOCKER_LATEST_BASE_DATAFLOW_IMAGE_NAME_WITH_TAG=.*|DOCKER_LATEST_BASE_DATAFLOW_IMAGE_NAME_WITH_TAG=${GIGL_BASE_DATAFLOW_IMAGE}|" dep_vars.env +sed -i "s|name: us-central1-docker\.pkg\.dev.*|name: ${GIGL_BUILDER_IMAGE}|" .github/cloud_builder/run_command_on_active_checkout.yaml diff --git a/.github/workflows/build-base-docker-images.yml b/.github/workflows/build-base-docker-images.yml index 9d848ecad..faec56e8e 100644 --- a/.github/workflows/build-base-docker-images.yml +++ b/.github/workflows/build-base-docker-images.yml @@ -6,6 +6,7 @@ on: pr_number: description: 'PR to run the workflow on' required: true + env: DOCKER_BUILDKIT: 1 GIGL_BASE_CUDA_IMAGE: us-central1-docker.pkg.dev/${{ vars.GCP_PROJECT_ID }}/public-gigl/gigl-cuda-base:${{ github.sha }}.${{ github.run_number }}.${{ github.run_attempt }} @@ -16,6 +17,7 @@ env: jobs: comment-workflow-started: + runs-on: ubuntu-latest steps: - name: Comment on PR @@ -29,7 +31,7 @@ jobs: Once done, the workflow will update the `dep_vars.env` file with the new image names. build-cuda-base-image: - runs-on: gigl-large-instances # x64 Ubuntu:latest w/ 4 cores, 16GB RAM, 150 GB SSD + runs-on: gigl-large-instances # x64 Ubuntu:latest w/ 8-cores, 32GB RAM, 300 GB SSD permissions: # Needed for gcloud auth: https://github.com/google-github-actions/auth contents: 'read' @@ -41,7 +43,7 @@ jobs: github-token: ${{ secrets.GITHUB_TOKEN }} pr_number: ${{ inputs.pr_number }} - name: Setup Machine for building Docker images - uses: snapchat/gigl/.github/actions/setup-python-tools@main + uses: ./.github/actions/setup-python-tools with: setup_gcloud: "true" try_cleaning_disk_space: "true" @@ -56,8 +58,8 @@ jobs: docker push ${GIGL_BASE_CUDA_IMAGE} echo "Pushed CUDA base image to ${GIGL_BASE_CUDA_IMAGE}" - build-cpu-base-images: - runs-on: gigl-large-instances # x64 Ubuntu:latest w/ 4 cores, 16GB RAM, 150 GB SSD + build-cpu-base-image: + runs-on: ubuntu-latest permissions: # Needed for gcloud auth: https://github.com/google-github-actions/auth contents: 'read' @@ -69,14 +71,13 @@ jobs: github-token: ${{ secrets.GITHUB_TOKEN }} pr_number: ${{ inputs.pr_number }} - name: Setup Machine for building Docker images - uses: snapchat/gigl/.github/actions/setup-python-tools@main + uses: ./.github/actions/setup-python-tools with: setup_gcloud: "true" try_cleaning_disk_space: "true" gcp_project_id: ${{ vars.GCP_PROJECT_ID }} workload_identity_provider: ${{ secrets.WORKLOAD_IDENTITY_PROVIDER }} gcp_service_account_email: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }} - - name: Build and Push CPU Base Image and Docker CPU Image run: | gcloud auth configure-docker us-central1-docker.pkg.dev @@ -85,8 +86,30 @@ jobs: docker push ${GIGL_BASE_CPU_IMAGE} echo "Pushed CPU base image to ${GIGL_BASE_CPU_IMAGE}" - echo "Will use CPU image ${GIGL_BASE_CPU_IMAGE} as base image for Dataflow image." - docker build -f ./containers/Dockerfile.dataflow.base --build-arg BASE_IMAGE=${GIGL_BASE_CPU_IMAGE} -t ${GIGL_BASE_DATAFLOW_IMAGE} . + build-dataflow-base-image: + runs-on: ubuntu-latest + permissions: + # Needed for gcloud auth: https://github.com/google-github-actions/auth + contents: 'read' + id-token: 'write' + steps: + - name: Checkout PR Branch + uses: snapchat/gigl/.github/actions/checkout-pr-branch@main + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + pr_number: ${{ inputs.pr_number }} + - name: Setup Machine for building Docker images + uses: ./.github/actions/setup-python-tools + with: + setup_gcloud: "true" + try_cleaning_disk_space: "true" + gcp_project_id: ${{ vars.GCP_PROJECT_ID }} + workload_identity_provider: ${{ secrets.WORKLOAD_IDENTITY_PROVIDER }} + gcp_service_account_email: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }} + - name: Build and Push Dataflow Base Image + run: | + gcloud auth configure-docker us-central1-docker.pkg.dev + docker build -f ./containers/Dockerfile.dataflow.base -t ${GIGL_BASE_DATAFLOW_IMAGE} . docker push ${GIGL_BASE_DATAFLOW_IMAGE} echo "Pushed Dataflow base image to ${GIGL_BASE_DATAFLOW_IMAGE}" @@ -103,7 +126,7 @@ jobs: github-token: ${{ secrets.GITHUB_TOKEN }} pr_number: ${{ inputs.pr_number }} - name: Setup Machine for building Docker images - uses: snapchat/gigl/.github/actions/setup-python-tools@main + uses: ./.github/actions/setup-python-tools with: setup_gcloud: "true" try_cleaning_disk_space: "true" @@ -124,7 +147,8 @@ jobs: build-and-commit-base-images: needs: - build-cuda-base-image - - build-cpu-base-images + - build-cpu-base-image + - build-dataflow-base-image - build-builder-image runs-on: ubuntu-latest steps: @@ -134,23 +158,12 @@ jobs: github-token: ${{ secrets.GITHUB_TOKEN }} pr_number: ${{ inputs.pr_number }} should_leave_progress_comments: "false" - command: | - echo "Writing new image names to dep_vars.env:" - echo " DOCKER_LATEST_BASE_CUDA_IMAGE_NAME_WITH_TAG=${GIGL_BASE_CUDA_IMAGE}" - echo " DOCKER_LATEST_BASE_CPU_IMAGE_NAME_WITH_TAG=${GIGL_BASE_CPU_IMAGE}" - echo " DOCKER_LATEST_BASE_DATAFLOW_IMAGE_NAME_WITH_TAG=${GIGL_BASE_DATAFLOW_IMAGE}" - echo " DOCKER_LATEST_BUILDER_IMAGE_NAME_WITH_TAG=${GIGL_BUILDER_IMAGE}" - sed -i "s|^DOCKER_LATEST_BASE_CUDA_IMAGE_NAME_WITH_TAG=.*|DOCKER_LATEST_BASE_CUDA_IMAGE_NAME_WITH_TAG=${GIGL_BASE_CUDA_IMAGE}|" dep_vars.env - sed -i "s|^DOCKER_LATEST_BASE_CPU_IMAGE_NAME_WITH_TAG=.*|DOCKER_LATEST_BASE_CPU_IMAGE_NAME_WITH_TAG=${GIGL_BASE_CPU_IMAGE}|" dep_vars.env - sed -i "s|^DOCKER_LATEST_BASE_DATAFLOW_IMAGE_NAME_WITH_TAG=.*|DOCKER_LATEST_BASE_DATAFLOW_IMAGE_NAME_WITH_TAG=${GIGL_BASE_DATAFLOW_IMAGE}|" dep_vars.env - sed -i "s|name: us-central1-docker\.pkg\.dev.*|name: ${GIGL_BUILDER_IMAGE}|" .github/cloud_builder/run_command_on_active_checkout.yaml - + command: bash .github/scripts/update_docker_image_refs.sh - name: Commit and Push Dep Vars uses: snapchat/gigl/.github/actions/commit-and-push@main with: commit_message: "[AUTOMATED] Update dep.vars, and other relevant files with new image names" github_token: ${{ secrets.GITHUB_TOKEN }} - - uses: snapchat/gigl/.github/actions/comment-on-pr@main with: pr_number: ${{ inputs.pr_number }} diff --git a/.github/workflows/on-pr-comment.yml b/.github/workflows/on-pr-comment.yml index 4d6032747..cc57aed64 100644 --- a/.github/workflows/on-pr-comment.yml +++ b/.github/workflows/on-pr-comment.yml @@ -24,7 +24,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v4 with: - python-version: '3.x' + python-version-file: ".python-version" - name: Install PyYAML run: pip install PyYAML @@ -164,6 +164,7 @@ jobs: workload_identity_provider: ${{ secrets.WORKLOAD_IDENTITY_PROVIDER }} gcp_service_account_email: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }} command: | + # sourcing .profile is important to resolve paths for java, sbt, et al. + # It is setup in the setup-python-tools action. source ~/.profile - make check_format - make assert_yaml_configs_parse + make lint_test diff --git a/.github/workflows/on-pr-merge.yml b/.github/workflows/on-pr-merge.yml index 75bfccd9d..0e1f9ddd0 100644 --- a/.github/workflows/on-pr-merge.yml +++ b/.github/workflows/on-pr-merge.yml @@ -26,7 +26,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Setup development environment - uses: snapchat/gigl/.github/actions/setup-python-tools@main + uses: ./.github/actions/setup-python-tools with: setup_gcloud: "true" gcp_project_id: ${{ vars.GCP_PROJECT_ID }} @@ -37,7 +37,7 @@ jobs: # using GFile library (a.k.a anything that does IO w/ Tensorflow). GFile does not understand # how to leverage Workload Identity Federation to read assets from GCS, et al. See: # https://github.com/tensorflow/tensorflow/issues/57104 - uses: snapchat/gigl/.github/actions/run-cloud-run-command-on-active-checkout@main + uses: ./.github/actions/run-cloud-run-command-on-active-checkout with: cmd: "make unit_test_py" service_account: ${{ secrets.gcp_service_account_email }} @@ -53,14 +53,18 @@ jobs: steps: - uses: actions/checkout@v4 - name: Setup development environment - uses: snapchat/gigl/.github/actions/setup-python-tools@main + uses: ./.github/actions/setup-python-tools with: setup_gcloud: "true" gcp_project_id: ${{ vars.GCP_PROJECT_ID }} workload_identity_provider: ${{ secrets.workload_identity_provider }} gcp_service_account_email: ${{ secrets.gcp_service_account_email }} - name: Run Scala Unit Tests - uses: snapchat/gigl/.github/actions/run-cloud-run-command-on-active-checkout@main + # We use cloud run here instead of using github hosted runners because of limitation of tests + # using GFile library (a.k.a anything that does IO w/ Tensorflow). GFile does not understand + # how to leverage Workload Identity Federation to read assets from GCS, et al. See: + # https://github.com/tensorflow/tensorflow/issues/57104 + uses: ./.github/actions/run-cloud-run-command-on-active-checkout with: cmd: "make unit_test_scala" service_account: ${{ secrets.gcp_service_account_email }} @@ -72,14 +76,14 @@ jobs: steps: - uses: actions/checkout@v4 - name: Setup development environment - uses: snapchat/gigl/.github/actions/setup-python-tools@main + uses: ./.github/actions/setup-python-tools with: setup_gcloud: "true" gcp_project_id: ${{ vars.GCP_PROJECT_ID }} workload_identity_provider: ${{ secrets.workload_identity_provider }} gcp_service_account_email: ${{ secrets.gcp_service_account_email }} - name: Run Integration Tests - uses: snapchat/gigl/.github/actions/run-cloud-run-command-on-active-checkout@main + uses: ./.github/actions/run-cloud-run-command-on-active-checkout with: cmd: "make integration_test" service_account: ${{ secrets.gcp_service_account_email }} @@ -91,14 +95,14 @@ jobs: steps: - uses: actions/checkout@v4 - name: Setup development environment - uses: snapchat/gigl/.github/actions/setup-python-tools@main + uses: ./.github/actions/setup-python-tools with: setup_gcloud: "true" gcp_project_id: ${{ vars.GCP_PROJECT_ID }} workload_identity_provider: ${{ secrets.workload_identity_provider }} gcp_service_account_email: ${{ secrets.gcp_service_account_email }} - name: Run E2E Tests - uses: snapchat/gigl/.github/actions/run-cloud-run-command-on-active-checkout@main + uses: ./.github/actions/run-cloud-run-command-on-active-checkout with: cmd: "make run_all_e2e_tests" service_account: ${{ secrets.gcp_service_account_email }} @@ -111,14 +115,14 @@ jobs: # steps: # - uses: actions/checkout@v4 # - name: Setup development environment - # uses: snapchat/gigl/.github/actions/setup-python-tools@main + # uses: ./.github/actions/setup-python-tools # with: # setup_gcloud: "true" # gcp_project_id: ${{ vars.GCP_PROJECT_ID }} # workload_identity_provider: ${{ secrets.workload_identity_provider }} # gcp_service_account_email: ${{ secrets.gcp_service_account_email }} # - name: Run Example Notebook E2E Tests - # uses: snapchat/gigl/.github/actions/run-cloud-run-command-on-active-checkout@main + # uses: ./.github/actions/run-cloud-run-command-on-active-checkout # with: # cmd: "make notebooks_test" # service_account: ${{ secrets.gcp_service_account_email }} @@ -130,7 +134,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Setup development environment - uses: snapchat/gigl/.github/actions/setup-python-tools@main + uses: ./.github/actions/setup-python-tools with: install_dev_deps: "true" setup_gcloud: "true" @@ -138,7 +142,9 @@ jobs: workload_identity_provider: ${{ secrets.workload_identity_provider }} gcp_service_account_email: ${{ secrets.gcp_service_account_email }} - name: Run Lint Tests + shell: bash run: | + # sourcing .profile is important to resolve paths for java, sbt, et al. + # It is setup in the setup-python-tools action. source ~/.profile - make check_format - make assert_yaml_configs_parse + make lint_test diff --git a/.github/workflows/release-documentation.yml b/.github/workflows/release-documentation.yml index 5e9345b61..bc25d47ba 100644 --- a/.github/workflows/release-documentation.yml +++ b/.github/workflows/release-documentation.yml @@ -44,7 +44,7 @@ jobs: # We also make gigl available w/ editable install `-e` so that autodoc can find it. - name: Install necessary doc dependencies run: | - pip install -e "./python[docs]" + uv sync --extra docs - name: Sphinx build run: | make build_docs diff --git a/Makefile b/Makefile index aa06a1155..8d11a4e50 100644 --- a/Makefile +++ b/Makefile @@ -151,7 +151,7 @@ format: format_py format_scala format_md type_check: uv run mypy ${PYTHON_DIRS} --check-untyped-defs -lint_test: check_format assert_yaml_config_parse +lint_test: check_format assert_yaml_configs_parse @echo "Lint checks pass!" # compiles current working state of scala projects to local jars diff --git a/containers/Dockerfile.builder b/containers/Dockerfile.builder index 884d7afe5..90820fa6f 100644 --- a/containers/Dockerfile.builder +++ b/containers/Dockerfile.builder @@ -3,7 +3,7 @@ # This dockerfile is contains all Dev dependencies, and is used by gcloud # builders for running tests, et al. -FROM condaforge/miniforge3:25.3.0-1 +FROM ubuntu:noble-20251001 SHELL ["/bin/bash", "-c"] @@ -21,12 +21,20 @@ RUN apt-get update && apt-get install && apt-get install -y \ cmake \ sudo \ build-essential \ + curl \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* - +# Dec 1, 2025 (svij-sc): +# GCP Cloud build agents run an older version of docker deamon +# with max Docker API version support of 1.41. https://docs.cloud.google.com/build/docs/overview#docker +# At the time of writing Docker Client > v28 has deprecated support for < v1.44. +# https://docs.docker.com/engine/release-notes/29/#breaking-changes +# Thus we use v28.5.2, and also manually set the API version to 1.41 to ensure compatibility. +ENV DOCKER_CLIENT_VERSION=28.5.2 +ENV DOCKER_API_VERSION=1.41 RUN curl -fsSL https://get.docker.com -o get-docker.sh && \ - sh get-docker.sh && \ + sh get-docker.sh --version ${DOCKER_CLIENT_VERSION} && \ rm get-docker.sh # Install Google Cloud CLI @@ -39,26 +47,34 @@ RUN mkdir -p /tools && \ ENV PATH="/tools/google-cloud-sdk/bin:/usr/lib/jvm/java-1.11.0-openjdk-amd64/bin:$PATH" ENV JAVA_HOME="/usr/lib/jvm/java-1.11.0-openjdk-amd64" -# Create the environment: -# TODO: (svij) Build env using single entrypoint `make initialize_environment` for better maintainability -RUN conda create -y --override-channels --channel conda-forge --name gigl python=3.9 pip - -# Update path so any call for python executables in the built image defaults to using the gnn conda environment -ENV PATH=/opt/conda/envs/gigl/bin:$PATH -# For debugging purposes, we also initialize respective conda env in bashrc -RUN conda init bash -RUN echo "conda activate gigl" >> ~/.bashrc - +WORKDIR /gigl_deps # We copy the tools directory from the host machine to the container # to avoid re-downloading the dependencies as some of them require GCP credentials. # and, mounting GCP credentials to build time can be a pain and more prone to # accidental leaking of credentials. -COPY tools gigl_deps/tools -COPY dep_vars.env gigl_deps/dep_vars.env -COPY requirements gigl_deps/requirements -COPY python/gigl/scripts gigl_deps/python/gigl/scripts -RUN pip install --upgrade pip -RUN cd gigl_deps && bash ./requirements/install_py_deps.sh --dev -RUN cd gigl_deps && bash ./requirements/install_scala_deps.sh +COPY tools tools +COPY pyproject.toml pyproject.toml +COPY uv.lock uv.lock +COPY dep_vars.env dep_vars.env +COPY requirements requirements +# Needed to install GLT +COPY python/gigl/scripts python/gigl/scripts + + +COPY .python-version tmp/.python-version +RUN bash ./requirements/install_py_deps.sh --dev + +# The UV_PROJECT_ENVIRONMENT environment variable can be used to configure the project virtual environment path +# Since the above command should have created the .venv, we activate by default for any future uv commands. +# We also need to set VIRTUAL_ENV so pip envocations can find the virtual environment. +ENV UV_PROJECT_ENVIRONMENT=/gigl_deps/.venv +ENV VIRTUAL_ENV="${UV_PROJECT_ENVIRONMENT}" +# We just created a virtual environment, lets add the bin to the path +ENV PATH="${UV_PROJECT_ENVIRONMENT}/bin:${PATH}" +# We also need to make UV detectable by the system +ENV PATH="/root/.local/bin:${PATH}" +RUN bash ./requirements/install_scala_deps.sh + +WORKDIR / CMD [ "/bin/bash" ] diff --git a/containers/Dockerfile.cpu.base b/containers/Dockerfile.cpu.base index bbd9cd4f5..d548fd69b 100644 --- a/containers/Dockerfile.cpu.base +++ b/containers/Dockerfile.cpu.base @@ -1,9 +1,11 @@ # syntax=docker/dockerfile:1 -FROM condaforge/miniforge3:25.3.0-1 +FROM ubuntu:noble-20251001 SHELL ["/bin/bash", "-c"] +ENV DEBIAN_FRONTEND=noninteractive + # TODO(mkolodner-sc): iputils-ping temporarily needed to setup inter-job VAI communication for GLT Inference. # Once VAI natively supports this communication, we can remove this requirement. RUN apt-get update && apt-get install -y \ @@ -12,23 +14,31 @@ RUN apt-get update && apt-get install -y \ wget \ cmake \ iputils-ping \ + curl \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Create the environment: -# TODO: (svij) Build env using single entrypoint `make initialize_environment` for better maintainability -RUN conda create -y --override-channels --channel conda-forge --name gnn python=3.9 pip -# Update path so any call for python executables in the built image defaults to using the gnn conda environment -ENV PATH=/opt/conda/envs/gnn/bin:$PATH +WORKDIR /gigl_deps + +COPY pyproject.toml pyproject.toml +COPY uv.lock uv.lock +COPY requirements requirements +COPY python/gigl/scripts python/gigl/scripts +COPY .python-version .python-version + +RUN bash ./requirements/install_py_deps.sh -# For debugging purposes, we also initialize respective conda env in bashrc -RUN conda init bash -RUN echo "conda activate gnn" >> ~/.bashrc +# The UV_PROJECT_ENVIRONMENT environment variable can be used to configure the project virtual environment path +# Since the above command should have created the .venv, we activate by default for any future uv commands. +# We also need to set VIRTUAL_ENV so pip envocations can find the virtual environment. +ENV UV_PROJECT_ENVIRONMENT=/gigl_deps/.venv +ENV VIRTUAL_ENV="${UV_PROJECT_ENVIRONMENT}" +# We just created a virtual environment, lets add the bin to the path +ENV PATH="${UV_PROJECT_ENVIRONMENT}/bin:${PATH}" +# We also need to make UV detectable by the system +ENV PATH="/root/.local/bin:${PATH}" -COPY requirements tmp/requirements -COPY python/gigl/scripts tmp/python/gigl/scripts -RUN pip install --upgrade pip -RUN cd tmp && bash ./requirements/install_py_deps.sh +WORKDIR / CMD [ "/bin/bash" ] diff --git a/containers/Dockerfile.cuda.base b/containers/Dockerfile.cuda.base index e9bbf4869..df09e88bf 100644 --- a/containers/Dockerfile.cuda.base +++ b/containers/Dockerfile.cuda.base @@ -1,39 +1,42 @@ # syntax=docker/dockerfile:1 -# Used to generate hashed requirements.txt -FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04 + +FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-devel SHELL ["/bin/bash", "-c"] +ENV DEBIAN_FRONTEND=noninteractive + +# Already has python 3.11 installed - no need to install it again. +# We use system python since it has packages pre-installed for us. +ENV UV_SYSTEM_PYTHON=true +ENV UV_PROJECT_ENVIRONMENT=/opt/conda/ # Install basic dependencies # TODO(mkolodner-sc): iputils-ping temporarily needed to setup inter-job VAI communication for GLT Inference. # Once VAI natively supports this communication, we can remove this requirement. -RUN apt-get update \ - && apt-get upgrade -y \ - && apt-get install -y build-essential git wget cmake iputils-ping \ +RUN apt-get update && apt-get install -y \ + build-essential \ + git \ + wget \ + cmake \ + iputils-ping \ + curl \ + unzip \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Install Miniconda -ENV CONDA_DIR=/opt/conda -RUN wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" && bash Miniforge3.sh -b -p ${CONDA_DIR} -ENV PATH=${CONDA_DIR}/bin:$PATH +WORKDIR /gigl_deps -# Create the conda env environment: -# TODO: (svij) Build env using single entrypoint `make initialize_environment` for better maintainability -RUN conda create -y --override-channels --channel conda-forge --name gnn python=3.9 pip +COPY pyproject.toml pyproject.toml +COPY uv.lock uv.lock +COPY requirements requirements +COPY python/gigl/scripts python/gigl/scripts -# Update path so any call for python executables in the built image defaults to using the gnn conda environment -ENV PATH=${CONDA_DIR}/envs/gnn/bin:$PATH +RUN bash ./requirements/install_py_deps.sh -# The default bashrc exists early and is mainly for local dev niceties - we delete so we can -# Setup Conda for debugging purposes -RUN rm ~/.bashrc && touch ~/.bashrc && conda init bash -RUN echo "conda activate gnn" >> ~/.bashrc +# We also need to make UV detectable by the system +ENV PATH="/root/.local/bin:${PATH}" -COPY requirements tmp/requirements -COPY python/gigl/scripts tmp/python/gigl/scripts -RUN pip install --upgrade pip -RUN cd tmp && bash ./requirements/install_py_deps.sh +WORKDIR / CMD [ "/bin/bash" ] diff --git a/containers/Dockerfile.dataflow.base b/containers/Dockerfile.dataflow.base index 5e135092e..434985331 100644 --- a/containers/Dockerfile.dataflow.base +++ b/containers/Dockerfile.dataflow.base @@ -1,5 +1,34 @@ -# Use the main Dockerfile.cpu.base as the base -ARG BASE_IMAGE -FROM $BASE_IMAGE +FROM apache/beam_python3.11_sdk:2.56.0 -COPY --from=apache/beam_python3.9_sdk:2.53.0 /opt/apache/beam /opt/apache/beam +ENV DEBIAN_FRONTEND=noninteractive + +# We use system python for dataflow images since it has python and apache beam pre-installed. +ENV UV_SYSTEM_PYTHON=true +ENV UV_PROJECT_ENVIRONMENT=/usr/local + +# TODO(mkolodner-sc): iputils-ping temporarily needed to setup inter-job VAI communication for GLT Inference. +# Once VAI natively supports this communication, we can remove this requirement. +RUN apt-get update && apt-get install -y \ + build-essential \ + git \ + wget \ + cmake \ + iputils-ping \ + curl \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + + +WORKDIR /gigl_deps + +COPY pyproject.toml pyproject.toml +COPY uv.lock uv.lock +COPY requirements requirements +COPY python/gigl/scripts python/gigl/scripts + +RUN bash ./requirements/install_py_deps.sh --skip-glt-post-install + +# We also need to make UV detectable by the system +ENV PATH="/root/.local/bin:${PATH}" + +WORKDIR / diff --git a/containers/Dockerfile.dataflow.src b/containers/Dockerfile.dataflow.src index b4d39e706..78fb6213e 100644 --- a/containers/Dockerfile.dataflow.src +++ b/containers/Dockerfile.dataflow.src @@ -2,30 +2,17 @@ ARG BASE_IMAGE FROM $BASE_IMAGE -# Ensure same as deployment/containers/Dockerfile.dataflow.src ================================================== # Copy the source WORKDIR /gigl -RUN touch __init__.py - -# Note: main package files must live in root of the repo for the python package to be built correctly for Dataflow workers. -# See https://beam.apache.org/documentation/sdks/python-pipeline-dependencies/#create-reproducible-environments. COPY MANIFEST.in MANIFEST.in -COPY python/setup.py setup.py COPY pyproject.toml pyproject.toml +COPY uv.lock uv.lock COPY dep_vars.env dep_vars.env COPY deployment deployment -COPY python/snapchat snapchat +COPY python python COPY examples examples -COPY python/gigl gigl - -# enables usage of tcm as the memory allocator instead of default C memory allocators. Mainly, advantageous for CPU training jobs -# Either boosts performance or does not make any improvement compared to default settings. -# PyTorch recommendation: https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#switch-memory-allocator -# Replace `libtcmalloc` with `libjemalloc` if you want to try jem memory allocator -ENV LD_PRELOAD /opt/conda/envs/gnn/lib/libtcmalloc.so:$LD_PRELOAD -# ================================================================================================================= +RUN uv pip install -e . -# Set the entrypoint to Apache Beam SDK launcher. -ENTRYPOINT ["/opt/apache/beam/boot"] +WORKDIR / diff --git a/containers/Dockerfile.src b/containers/Dockerfile.src index 089a58996..911c3d6b7 100644 --- a/containers/Dockerfile.src +++ b/containers/Dockerfile.src @@ -4,22 +4,17 @@ FROM $BASE_IMAGE # Copy the source WORKDIR /gigl -RUN touch __init__.py # Note: main package files must live in root of the repo for the python package to be built correctly for Dataflow workers. -# See https://beam.apache.org/documentation/sdks/python-pipeline-dependencies/#create-reproducible-environments. +# See https://beam.apache.org/documentation/sdks/python-pipxeline-dependencies/#create-reproducible-environments. +WORKDIR /gigl + COPY MANIFEST.in MANIFEST.in -COPY python/setup.py setup.py COPY pyproject.toml pyproject.toml +COPY uv.lock uv.lock COPY dep_vars.env dep_vars.env COPY deployment deployment -COPY python/snapchat snapchat +COPY python python COPY examples examples -COPY python/gigl gigl -# enables usage of tcm as the memory allocator instead of default C memory allocators. Mainly, advantageous for CPU training jobs -# Either boosts performance or does not make any improvement compared to default settings. -# PyTorch recommendation: https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#switch-memory-allocator -# Replace `libtcmalloc` with `libjemalloc` if you want to try jem memory allocator -# nshah: This creates huge memory overhead with graphlearn-torch usage. Disabling this memory allocator. -# ENV LD_PRELOAD /opt/conda/envs/gnn/lib/libtcmalloc.so:$LD_PRELOAD +RUN uv pip install -e . diff --git a/dep_vars.env b/dep_vars.env index 09af0820d..784ed142f 100644 --- a/dep_vars.env +++ b/dep_vars.env @@ -1,7 +1,7 @@ # Note this file only supports static key value pairs so it can be loaded by make, bash, python, and sbt without any additional parsing. -DOCKER_LATEST_BASE_CUDA_IMAGE_NAME_WITH_TAG=us-central1-docker.pkg.dev/external-snap-ci-github-gigl/public-gigl/gigl-cuda-base:6a94ae7cad3ec0c633246b0c9340a5095527deb9.63.2 -DOCKER_LATEST_BASE_CPU_IMAGE_NAME_WITH_TAG=us-central1-docker.pkg.dev/external-snap-ci-github-gigl/public-gigl/gigl-cpu-base:6a94ae7cad3ec0c633246b0c9340a5095527deb9.63.2 -DOCKER_LATEST_BASE_DATAFLOW_IMAGE_NAME_WITH_TAG=us-central1-docker.pkg.dev/external-snap-ci-github-gigl/public-gigl/gigl-dataflow-base:6a94ae7cad3ec0c633246b0c9340a5095527deb9.63.2 +DOCKER_LATEST_BASE_CUDA_IMAGE_NAME_WITH_TAG=us-central1-docker.pkg.dev/external-snap-ci-github-gigl/public-gigl/gigl-cuda-base:51af343c1c298ab465a96ecffd4e50ea6dffacb7.88.1 +DOCKER_LATEST_BASE_CPU_IMAGE_NAME_WITH_TAG=us-central1-docker.pkg.dev/external-snap-ci-github-gigl/public-gigl/gigl-cpu-base:51af343c1c298ab465a96ecffd4e50ea6dffacb7.88.1 +DOCKER_LATEST_BASE_DATAFLOW_IMAGE_NAME_WITH_TAG=us-central1-docker.pkg.dev/external-snap-ci-github-gigl/public-gigl/gigl-dataflow-base:51af343c1c298ab465a96ecffd4e50ea6dffacb7.88.1 DEFAULT_GIGL_RELEASE_SRC_IMAGE_CUDA=us-central1-docker.pkg.dev/external-snap-ci-github-gigl/public-gigl/src-cuda:0.0.11 DEFAULT_GIGL_RELEASE_SRC_IMAGE_CPU=us-central1-docker.pkg.dev/external-snap-ci-github-gigl/public-gigl/src-cpu:0.0.11 diff --git a/deployment/configs/e2e_cicd_resource_config.yaml b/deployment/configs/e2e_cicd_resource_config.yaml index ec6fa9540..21fe6ab0a 100644 --- a/deployment/configs/e2e_cicd_resource_config.yaml +++ b/deployment/configs/e2e_cicd_resource_config.yaml @@ -44,7 +44,7 @@ split_generator_config: trainer_config: vertex_ai_trainer_config: machine_type: "n1-highmem-8" # set to `ACCELERATOR_TYPE_UNSPECIFIED` for cpu training - gpu_type: nvidia-tesla-p100 + gpu_type: NVIDIA_TESLA_T4 gpu_limit: 1 # set to 0 for cpu training num_replicas: 2 inferencer_config: diff --git a/deployment/configs/unittest_resource_config.yaml b/deployment/configs/unittest_resource_config.yaml index 2b6666ec3..c9335d81d 100644 --- a/deployment/configs/unittest_resource_config.yaml +++ b/deployment/configs/unittest_resource_config.yaml @@ -46,7 +46,7 @@ split_generator_config: trainer_config: vertex_ai_trainer_config: machine_type: "n1-highmem-8" - gpu_type: nvidia-tesla-p100 # set to `ACCELERATOR_TYPE_UNSPECIFIED` for cpu training + gpu_type: NVIDIA_TESLA_T4 # set to `ACCELERATOR_TYPE_UNSPECIFIED` for cpu training gpu_limit: 1 # set to 0 for cpu training num_replicas: 2 inferencer_config: diff --git a/docs/examples/configs/template_resource_config.yaml b/docs/examples/configs/template_resource_config.yaml index f8335b501..f87d315f8 100644 --- a/docs/examples/configs/template_resource_config.yaml +++ b/docs/examples/configs/template_resource_config.yaml @@ -31,11 +31,11 @@ split_generator_config: # Dataproc config trainer_config: vertex_ai_trainer_config: # or local_trainer_config machine_type: "n1-highmem-8" - gpu_type: nvidia-tesla-p100 + gpu_type: NVIDIA_TESLA_T4 gpu_limit: 1 num_replicas: 2 inferencer_config: num_workers: 1 max_num_workers: 256 machine_type: "c3-standard-22" - disk_size_gb: 100 \ No newline at end of file + disk_size_gb: 100 diff --git a/docs/user_guide/config_guides/resource_config_guide.md b/docs/user_guide/config_guides/resource_config_guide.md index d909650c4..decc23df1 100644 --- a/docs/user_guide/config_guides/resource_config_guide.md +++ b/docs/user_guide/config_guides/resource_config_guide.md @@ -54,7 +54,7 @@ split_generator_config: trainer_config: vertex_ai_trainer_config: machine_type: "" # e.g. n1-highmem-16 - gpu_type: "" # e.g. nvidia-tesla-p100 + gpu_type: "" # e.g. NVIDIA_TESLA_T4 gpu_limit: 1 num_replicas: 1 inferencer_config: diff --git a/mypy.ini b/mypy.ini index 7259770b1..d488c2a83 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,6 +1,6 @@ # Global options: [mypy] -python_version = 3.9 +python_version = 3.11 # Ignore modules that don't have any existing stubs diff --git a/python/gigl/common/utils/compute/serialization/serialize_protos.py b/python/gigl/common/utils/compute/serialization/serialize_protos.py index dac0ed7d2..97bfb09cb 100644 --- a/python/gigl/common/utils/compute/serialization/serialize_protos.py +++ b/python/gigl/common/utils/compute/serialization/serialize_protos.py @@ -8,10 +8,10 @@ from snapchat.research.gbml import graph_schema_pb2 """ -In dataflow, we use wrapper object as key, value beam DoFn outputs and also for shuffle. We only -need to serialize the proto itself and not the wrapper. The proto objects also do not contain Map, -therefore can be deterministic. Which is specially important when shuffling with proto wrapper -objects as key. +In dataflow, we use wrapper object as key, value beam DoFn outputs and also for shuffle. We only +need to serialize the proto itself and not the wrapper. The proto objects also do not contain Map, +therefore can be deterministic. Which is specially important when shuffling with proto wrapper +objects as key. """ diff --git a/python/gigl/distributed/utils/networking.py b/python/gigl/distributed/utils/networking.py index cf733c4e3..7d2ba46b9 100644 --- a/python/gigl/distributed/utils/networking.py +++ b/python/gigl/distributed/utils/networking.py @@ -155,7 +155,7 @@ def get_internal_ip_from_node( # Other nodes will receive the master's IP via broadcast ip_list = [None] - device = "cuda" if torch.cuda.is_available() else "cpu" + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") torch.distributed.broadcast_object_list(ip_list, src=node_rank, device=device) node_ip = ip_list[0] logger.info(f"Rank {rank} received master node's internal IP: {node_ip}") diff --git a/python/gigl/nn/models.py b/python/gigl/nn/models.py index 9fa29f62e..15adde632 100644 --- a/python/gigl/nn/models.py +++ b/python/gigl/nn/models.py @@ -397,16 +397,16 @@ def _weighted_layer_sum( Returns: torch.Tensor: Weighted sum of all layer embeddings, shape [N, D]. """ - if len(all_layer_embeddings) != len(self._layer_weights): + if len(all_layer_embeddings) != len(self._layer_weights): # type: ignore # https://github.com/Snapchat/GiGL/issues/408 raise ValueError( - f"Got {len(all_layer_embeddings)} layer tensors but {len(self._layer_weights)} weights." + f"Got {len(all_layer_embeddings)} layer tensors but {len(self._layer_weights)} weights." # type: ignore # https://github.com/Snapchat/GiGL/issues/408 ) # Stack all layer embeddings and compute weighted sum # _layer_weights is already a tensor buffer registered in __init__ stacked = torch.stack(all_layer_embeddings, dim=0) # shape [K+1, N, D] w = self._layer_weights.to(stacked.device) # shape [K+1], ensure on same device - out = (stacked * w.view(-1, 1, 1)).sum( + out = (stacked * w.view(-1, 1, 1)).sum( # type: ignore # https://github.com/Snapchat/GiGL/issues/408 dim=0 ) # shape [N, D], w_0*X_0 + w_1*X_1 + ... diff --git a/python/gigl/src/common/modeling_task_specs/graphsage_template_modeling_spec.py b/python/gigl/src/common/modeling_task_specs/graphsage_template_modeling_spec.py index c91532cb2..9cae41eb8 100644 --- a/python/gigl/src/common/modeling_task_specs/graphsage_template_modeling_spec.py +++ b/python/gigl/src/common/modeling_task_specs/graphsage_template_modeling_spec.py @@ -174,9 +174,12 @@ def train( early_stop_counter = 0 best_val_loss = float("inf") + assert hasattr(self.model, "graph_backend") + assert isinstance(self.model.graph_backend, GraphBackend) + graph_backend = self.model.graph_backend data_loaders: Dataloaders = self._dataloaders.get_training_dataloaders( gbml_config_pb_wrapper=gbml_config_pb_wrapper, - graph_backend=self.model.graph_backend, + graph_backend=graph_backend, device=device, ) @@ -411,9 +414,12 @@ def eval( logger.info("Start testing...") + assert hasattr(self.model, "graph_backend") + assert isinstance(self.model.graph_backend, GraphBackend) + graph_backend = self.model.graph_backend data_loaders: Dataloaders = self._dataloaders.get_test_dataloaders( gbml_config_pb_wrapper=gbml_config_pb_wrapper, - graph_backend=self.model.graph_backend, + graph_backend=graph_backend, device=device, ) diff --git a/python/gigl/src/common/modeling_task_specs/node_classification_modeling_task_spec.py b/python/gigl/src/common/modeling_task_specs/node_classification_modeling_task_spec.py index 5fa98ca95..e8c8c54d2 100644 --- a/python/gigl/src/common/modeling_task_specs/node_classification_modeling_task_spec.py +++ b/python/gigl/src/common/modeling_task_specs/node_classification_modeling_task_spec.py @@ -200,7 +200,7 @@ def score( assert root_node_labels is not None results: InferBatchResults = self.infer_batch(batch=batch, device=device) - num_correct_in_batch = int((results.predictions == root_node_labels).sum()) + num_correct_in_batch = int((results.predictions == root_node_labels).sum()) # type: ignore # https://github.com/Snapchat/GiGL/issues/408 num_correct += num_correct_in_batch num_evaluated += len(batch.root_node_labels) diff --git a/python/gigl/src/common/modeling_task_specs/utils/infer.py b/python/gigl/src/common/modeling_task_specs/utils/infer.py index 0222feb28..13804bea8 100644 --- a/python/gigl/src/common/modeling_task_specs/utils/infer.py +++ b/python/gigl/src/common/modeling_task_specs/utils/infer.py @@ -139,8 +139,8 @@ def infer_task_inputs( decoder = model.module.decode batch_result_types = model.module.tasks.result_types else: - decoder = model.decode - batch_result_types = model.tasks.result_types + decoder = model.decode # type: ignore # https://github.com/Snapchat/GiGL/issues/408 + batch_result_types = model.tasks.result_types # type: ignore # https://github.com/Snapchat/GiGL/issues/408 # If we only have losses which only require the input batch, don't forward here and return the # input batch immediately to minimize computation we don't need, such as encoding and decoding. diff --git a/python/gigl/src/common/models/layers/feature_interaction.py b/python/gigl/src/common/models/layers/feature_interaction.py index aa7ad737f..afa025365 100644 --- a/python/gigl/src/common/models/layers/feature_interaction.py +++ b/python/gigl/src/common/models/layers/feature_interaction.py @@ -149,7 +149,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: def reset_parameters(self): for layer in self._layers: - layer.reset_parameters() + if hasattr(layer, "reset_parameters") and callable(layer.reset_parameters): + layer.reset_parameters() def __repr__(self) -> str: return f"{self.__class__.__name__}(in_dim={self._in_dim}, num_layers={self._num_layers}, projection_dim={self._projection_dim}, diag_scale={self._diag_scale}, use_bias={self._use_bias})" diff --git a/python/gigl/src/common/models/layers/loss.py b/python/gigl/src/common/models/layers/loss.py index 958e0304a..b03c150e5 100644 --- a/python/gigl/src/common/models/layers/loss.py +++ b/python/gigl/src/common/models/layers/loss.py @@ -142,7 +142,7 @@ def _calculate_softmax_loss( ) # shape=[num_pos_nodes] loss = F.cross_entropy( - input=all_scores / self.softmax_temperature, + input=all_scores / self.softmax_temperature, # type: ignore # https://github.com/Snapchat/GiGL/issues/408 target=ys, reduction="sum", ) diff --git a/python/gigl/src/common/models/layers/task.py b/python/gigl/src/common/models/layers/task.py index 35f00aec2..b82dee44b 100644 --- a/python/gigl/src/common/models/layers/task.py +++ b/python/gigl/src/common/models/layers/task.py @@ -709,7 +709,7 @@ def _get_all_tasks( for task in list(self._task_to_weights_map.keys()): fn = self._task_to_fn_map[task] weight = self._task_to_weights_map[task] - tasks_list.append((fn, weight)) + tasks_list.append((fn, weight)) # type: ignore # https://github.com/Snapchat/GiGL/issues/408 return tasks_list def add_task( diff --git a/python/gigl/src/mocking/lib/pyg_datasets_forks.py b/python/gigl/src/mocking/lib/pyg_datasets_forks.py index de026b61d..e83abfe0c 100644 --- a/python/gigl/src/mocking/lib/pyg_datasets_forks.py +++ b/python/gigl/src/mocking/lib/pyg_datasets_forks.py @@ -1,7 +1,7 @@ """ Our mocking logic uses public datasets like Cora and DBLP from PyG. PyG datasets are -downloaded from public sources which may not be available or rate-limit us. We thus -override the dataset classes to download the datasets from GCS buckets to avoid issues. +downloaded from public sources which may not be available or rate-limit us. We thus +override the dataset classes to download the datasets from GCS buckets to avoid issues. """ from torch_geometric.data import extract_zip diff --git a/python/tests/unit/src/common/modeling_task_spec_utils/early_stop_test.py b/python/tests/unit/src/common/modeling_task_spec_utils/early_stop_test.py index 1f38ac487..880821876 100644 --- a/python/tests/unit/src/common/modeling_task_spec_utils/early_stop_test.py +++ b/python/tests/unit/src/common/modeling_task_spec_utils/early_stop_test.py @@ -94,7 +94,7 @@ def test_early_stopping( for step_num, value in enumerate(mocked_criteria_values): has_metric_improved, should_early_stop = early_stopper.step(value=value) if model is not None: - model.foo += 1 + model.foo += 1 # type: ignore # https://github.com/Snapchat/GiGL/issues/408 if step_num in improvement_steps: self.assertTrue(has_metric_improved) else: diff --git a/testing/e2e_tests/e2e_tests.yaml b/testing/e2e_tests/e2e_tests.yaml index b084b9479..44b4445f0 100644 --- a/testing/e2e_tests/e2e_tests.yaml +++ b/testing/e2e_tests/e2e_tests.yaml @@ -2,16 +2,16 @@ # This file contains all the test specifications that can be run via the e2e test script tests: cora_nalp_test: - task_config_uri: "gigl/src/mocking/configs/e2e_node_anchor_based_link_prediction_template_gbml_config.yaml" + task_config_uri: "python/gigl/src/mocking/configs/e2e_node_anchor_based_link_prediction_template_gbml_config.yaml" resource_config_uri: "${oc.env:GIGL_TEST_DEFAULT_RESOURCE_CONFIG,deployment/configs/e2e_cicd_resource_config.yaml}" cora_snc_test: - task_config_uri: "gigl/src/mocking/configs/e2e_supervised_node_classification_template_gbml_config.yaml" + task_config_uri: "python/gigl/src/mocking/configs/e2e_supervised_node_classification_template_gbml_config.yaml" resource_config_uri: "${oc.env:GIGL_TEST_DEFAULT_RESOURCE_CONFIG,deployment/configs/e2e_cicd_resource_config.yaml}" cora_udl_test: - task_config_uri: "gigl/src/mocking/configs/e2e_udl_node_anchor_based_link_prediction_template_gbml_config.yaml" + task_config_uri: "python/gigl/src/mocking/configs/e2e_udl_node_anchor_based_link_prediction_template_gbml_config.yaml" resource_config_uri: "${oc.env:GIGL_TEST_DEFAULT_RESOURCE_CONFIG,deployment/configs/e2e_cicd_resource_config.yaml}" dblp_nalp_test: - task_config_uri: "gigl/src/mocking/configs/dblp_node_anchor_based_link_prediction_template_gbml_config.yaml" + task_config_uri: "python/gigl/src/mocking/configs/dblp_node_anchor_based_link_prediction_template_gbml_config.yaml" resource_config_uri: "${oc.env:GIGL_TEST_DEFAULT_RESOURCE_CONFIG,deployment/configs/e2e_cicd_resource_config.yaml}" hom_cora_sup_test: task_config_uri: "examples/link_prediction/configs/e2e_hom_cora_sup_task_config.yaml" diff --git a/uv.lock b/uv.lock index e3df8684b..10f25efd8 100644 --- a/uv.lock +++ b/uv.lock @@ -893,7 +893,7 @@ requires-dist = [ { name = "numpy" }, { name = "omegaconf", specifier = ">=2.3.0,<3.0.0" }, { name = "pandas" }, - { name = "pip" }, + { name = "pip", specifier = "~=25.3" }, { name = "protobuf" }, { name = "pyarrow", marker = "extra == 'transform'", specifier = "==10.0.1" }, { name = "pyg-lib", marker = "sys_platform != 'darwin' and extra == 'pyg27-torch28-cpu'", index = "https://data.pyg.org/whl/torch-2.8.0+cpu.html", conflict = { package = "gigl", extra = "pyg27-torch28-cpu" } },