From d68a7059ea8741333f8ac327d7000befc7768ddf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= Date: Tue, 8 Jul 2025 10:57:30 +0000 Subject: [PATCH 1/7] GitHub Actions build refactor Co-authored-by: Ihor Indyk --- .bazelrc | 10 -- .../workflows/build_and_publish_template.yml | 87 ++++++++++++ .github/workflows/publish_release.yml | 13 ++ .github/workflows/python-tests.yml | 2 +- .gitignore | 12 ++ cpp/array_record_reader.cc | 2 +- oss/{build.Dockerfile => Dockerfile} | 10 +- oss/build_whl.sh | 35 ++--- oss/runner_common.sh | 126 +++++------------- oss/test_with_grain.py | 5 + oss/test_with_tf.py | 6 + python/array_record_data_source_test.py | 2 +- setup.py | 3 +- 13 files changed, 190 insertions(+), 123 deletions(-) delete mode 100644 .bazelrc create mode 100644 .github/workflows/build_and_publish_template.yml create mode 100644 .github/workflows/publish_release.yml create mode 100644 .gitignore rename oss/{build.Dockerfile => Dockerfile} (78%) create mode 100644 oss/test_with_grain.py create mode 100644 oss/test_with_tf.py diff --git a/.bazelrc b/.bazelrc deleted file mode 100644 index d60866a..0000000 --- a/.bazelrc +++ /dev/null @@ -1,10 +0,0 @@ -build -c opt -build --cxxopt=-std=c++17 -build --host_cxxopt=-std=c++17 -build --experimental_repo_remote_exec - -# TODO(fchern): Use non-hardcode path. -build --action_env=PYTHON_BIN_PATH="/usr/bin/python3" -build --action_env=PYTHON_LIB_PATH="/usr/lib/python3" -build --repo_env=PYTHON_BIN_PATH="/usr/bin/python3" -build --python_path="/usr/bin/python3" diff --git a/.github/workflows/build_and_publish_template.yml b/.github/workflows/build_and_publish_template.yml new file mode 100644 index 0000000..d9ebf6c --- /dev/null +++ b/.github/workflows/build_and_publish_template.yml @@ -0,0 +1,87 @@ +# This workflow builds ArrayRecord wheels and uploads them as artifacts. + +name: Build & Publish Template + +on: + workflow_call: + inputs: + pypi_project_url: + required: true + type: string + is_nightly: + required: true + type: boolean + +defaults: + run: + shell: bash + +permissions: + contents: read + +jobs: + build-and-test: + name: "Python ${{ matrix.python-version }} on ${{ matrix.os }}" + runs-on: "${{ matrix.os }}" + + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12", "3.13"] + os: [ubuntu-22.04, ubuntu-22.04-arm, macos-14] + + env: + USE_BAZEL_VERSION: "7.2.1" + steps: + - name: Set up Bazel + uses: bazel-contrib/setup-bazel@0.15.0 + - name: Check Bazel installation + run: | + which bazel + bazel version + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - uses: "actions/checkout@v3" + - name: Create directory + run: | + mkdir -p /tmp/array_record + cp -r . /tmp/array_record + - name: Build package + run: | + set -xe + export PYTHON_VERSION=${{ matrix.python-version }} + export PYTHON_MAJOR_VERSION=$(echo $PYTHON_VERSION | cut -d. -f1) + export PYTHON_MINOR_VERSION=$(echo $PYTHON_VERSION | cut -d. -f2) + export BAZEL_VERSION="7.2.1" + export OUTPUT_DIR="/tmp/array_record" + export SOURCE_DIR="/tmp/array_record" + . "${SOURCE_DIR}"'/oss/runner_common.sh' + build_and_test_array_record + - name: Upload ArrayRecord artifacts + uses: actions/upload-artifact@v4 + with: + name: built-array-record-wheels-${{ matrix.os }}-${{ matrix.python-version }} + path: /tmp/array_record/all_dist/*.whl + + publish-wheel: + runs-on: ubuntu-22.04 + needs: build-and-test + permissions: + id-token: write + environment: + name: pypi + url: ${{ inputs.pypi_project_url }} + steps: + - name: Download ArrayRecord artifacts + uses: actions/download-artifact@v4 + with: + pattern: built-array-record-wheels-* + path: dist/ + merge-multiple: true + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + attestations: false + verbose: true diff --git a/.github/workflows/publish_release.yml b/.github/workflows/publish_release.yml new file mode 100644 index 0000000..7981472 --- /dev/null +++ b/.github/workflows/publish_release.yml @@ -0,0 +1,13 @@ +name: Build and Publish Release + +on: workflow_dispatch + +jobs: + call-workflow: + uses: ./.github/workflows/build_and_publish_template.yml + permissions: + contents: read + id-token: write + with: + pypi_project_url: https://pypi.org/project/array-record + is_nightly: false diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 6289979..88f53a5 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -19,7 +19,7 @@ jobs: run: | docker build --progress=plain --no-cache \ --build-arg PYTHON_VERSION=${{ matrix.python-version }} \ - -t array_record:latest - < oss/build.Dockerfile + -t array_record:latest - < oss/Dockerfile - name: Build wheels and test run: | docker run --rm -a stdin -a stdout -a stderr \ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4c6107b --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# Bazel outputs +bazel-array_record +bazel-bin +bazel-out +bazel-testlogs + +MODULE.bazel.lock diff --git a/cpp/array_record_reader.cc b/cpp/array_record_reader.cc index cf0f5c0..b6c5026 100644 --- a/cpp/array_record_reader.cc +++ b/cpp/array_record_reader.cc @@ -80,7 +80,7 @@ class ChunkOffset { public: virtual ~ChunkOffset() {} virtual uint64_t operator[](size_t idx) const = 0; - virtual size_t size() const = 0; + virtual uint64_t size() const = 0; bool empty() const { return size() == 0; } }; diff --git a/oss/build.Dockerfile b/oss/Dockerfile similarity index 78% rename from oss/build.Dockerfile rename to oss/Dockerfile index 17fd296..0931212 100644 --- a/oss/build.Dockerfile +++ b/oss/Dockerfile @@ -6,13 +6,15 @@ ARG AUDITWHEEL_PLATFORM FROM quay.io/pypa/${AUDITWHEEL_PLATFORM} ARG PYTHON_VERSION -ARG PYTHON_BIN ARG BAZEL_VERSION ENV DEBIAN_FRONTEND=noninteractive RUN ulimit -n 1024 && yum install -y rsync -ENV PATH="${PYTHON_BIN}:${PATH}" +ENV PYTHON_BIN_PATH=/opt/python/cp${PYTHON_VERSION}-cp${PYTHON_VERSION}/bin +ENV PATH="${PYTHON_BIN_PATH}:${PATH}" + +ENV PYTHON_BIN=${PYTHON_BIN_PATH}/python # Download the correct bazel version and make sure it's on path. RUN BAZEL_ARCH_SUFFIX="$(uname -m | sed s/aarch64/arm64/)" \ @@ -21,7 +23,7 @@ RUN BAZEL_ARCH_SUFFIX="$(uname -m | sed s/aarch64/arm64/)" \ # Install dependencies needed for array_record. RUN --mount=type=cache,target=/root/.cache \ - ${PYTHON_BIN}/python -m pip install -U \ + $PYTHON_BIN -m pip install -U \ absl-py \ auditwheel \ etils[epath] \ @@ -30,4 +32,4 @@ RUN --mount=type=cache,target=/root/.cache \ twine \ wheel; -WORKDIR "/tmp/array_record" \ No newline at end of file +WORKDIR "/tmp/array_record" diff --git a/oss/build_whl.sh b/oss/build_whl.sh index 5967208..8c7bccd 100755 --- a/oss/build_whl.sh +++ b/oss/build_whl.sh @@ -1,22 +1,12 @@ #!/bin/bash + # Build wheel for the python version specified by $PYTHON_VERSION. # Optionally, can set the environment variable $PYTHON_BIN to refer to a # specific python interpreter. set -e -x -if [ -z ${PYTHON_BIN} ]; then - if [ -z ${PYTHON_VERSION} ]; then - PYTHON_BIN=$(which python3) - else - PYTHON_BIN=$(which python${PYTHON_VERSION}) - fi -fi - -PYTHON_MAJOR_VERSION=$(${PYTHON_BIN} -c 'import sys; print(sys.version_info.major)') -PYTHON_MINOR_VERSION=$(${PYTHON_BIN} -c 'import sys; print(sys.version_info.minor)') -PYTHON_VERSION="${PYTHON_MAJOR_VERSION}.${PYTHON_MINOR_VERSION}" -export PYTHON_VERSION="${PYTHON_VERSION}" +OUTPUT_DIR="${OUTPUT_DIR:-/tmp/array_record}" function write_to_bazelrc() { echo "$1" >> .bazelrc @@ -31,6 +21,8 @@ function main() { write_to_bazelrc "build --host_cxxopt=-std=c++17" write_to_bazelrc "build --experimental_repo_remote_exec" write_to_bazelrc "build --python_path=\"${PYTHON_BIN}\"" + write_to_bazelrc "test --python_path=\"${PYTHON_BIN}\"" + PLATFORM="$(uname)" if [ -n "${CROSSTOOL_TOP}" ]; then write_to_bazelrc "build --crosstool_top=${CROSSTOOL_TOP}" @@ -39,8 +31,8 @@ function main() { export USE_BAZEL_VERSION="${BAZEL_VERSION}" bazel clean - bazel build ... - bazel test --verbose_failures --test_output=errors ... + bazel build ... --action_env MACOSX_DEPLOYMENT_TARGET='11.0' --action_env PYTHON_BIN_PATH="${PYTHON_BIN}" + bazel test --verbose_failures --test_output=errors ... --action_env PYTHON_BIN_PATH="${PYTHON_BIN}" DEST="/tmp/array_record/all_dist" # Create the directory, then do dirname on a non-existent file inside it to @@ -68,7 +60,11 @@ function main() { pushd ${TMPDIR} echo $(date) : "=== Building wheel" - ${PYTHON_BIN} setup.py bdist_wheel --python-tag py3${PYTHON_MINOR_VERSION} + if [ "$(uname)" = "Darwin" ]; then + "$PYTHON_BIN" setup.py bdist_wheel --python-tag py3"${PYTHON_MINOR_VERSION}" --plat-name macosx_11_0_"$(uname -m)" + else + "$PYTHON_BIN" setup.py bdist_wheel --python-tag py3"${PYTHON_MINOR_VERSION}" + fi if [ -n "${AUDITWHEEL_PLATFORM}" ]; then echo $(date) : "=== Auditing wheel" @@ -81,6 +77,15 @@ function main() { popd echo $(date) : "=== Output wheel file is in: ${DEST}" + + # Install ArrayRecord from the wheel and run smoke tests. + # TF is not available on Python 3.13 and above. + if (( "${PYTHON_MINOR_VERSION}" < 13 )); then + $PYTHON_BIN -m pip install --find-links=/tmp/grain/all_dist --pre array-record + $PYTHON_BIN -m pip install jax tensorflow grain + $PYTHON_BIN oss/test_with_grain.py + $PYTHON_BIN oss/test_with_tf.py + fi } main diff --git a/oss/runner_common.sh b/oss/runner_common.sh index 1ebc289..2dee5e0 100644 --- a/oss/runner_common.sh +++ b/oss/runner_common.sh @@ -1,102 +1,48 @@ #!/bin/bash -# Builds ArrayRecord from source code located in SOURCE_DIR producing wheels -# under $SOURCE_DIR/all_dist. -function build_and_test_array_record_linux() { - SOURCE_DIR=$1 - - # Automatically decide which platform to build for by checking on which - # platform this runs. - AUDITWHEEL_PLATFORM="manylinux2014_$(uname -m)" +set -e -x - # Using a previous version of Blaze to avoid: - # https://github.com/bazelbuild/bazel/issues/8622 - export BAZEL_VERSION="8.0.0" +OUTPUT_DIR="${OUTPUT_DIR:-/tmp/array_record}" - # Build wheels for multiple Python minor versions. - PYTHON_MAJOR_VERSION=3 - for PYTHON_MINOR_VERSION in 10 11 12 - do - PYTHON_VERSION=${PYTHON_MAJOR_VERSION}.${PYTHON_MINOR_VERSION} - PYTHON_BIN=/opt/python/cp${PYTHON_MAJOR_VERSION}${PYTHON_MINOR_VERSION}-cp${PYTHON_MAJOR_VERSION}${PYTHON_MINOR_VERSION}/bin +setup_env_vars_py() { + # This controls the python binary to use. + PYTHON_MAJOR_VERSION="$1" + PYTHON_MINOR_VERSION="$2" + PYENV_PYTHON_VERSION="${PYTHON_MAJOR_VERSION}"'.'"${PYTHON_MINOR_VERSION}" + PYTHON='python'"${PYENV_PYTHON_VERSION}" + export PYTHON + PYTHON_BIN="$(which python)" + export PYTHON_BIN +} - # Cleanup older images. +# Builds ArrayRecord from source code located in SOURCE_DIR producing wheels +# under $SOURCE_DIR/all_dist. +build_and_test_array_record() { + printf 'Creating ArrayRecord wheel for Python Version %s\n' "$PYTHON_VERSION" + if [ "$(uname)" = "Darwin" ]; then + setup_env_vars_py "$PYTHON_MAJOR_VERSION" "$PYTHON_MINOR_VERSION" + "$PYTHON_BIN" -m pip install -U setuptools wheel etils[epath] + sh "${SOURCE_DIR}"'/oss/build_whl.sh' + else + # Automatically decide which platform to build for by checking on which + # platform this runs. + AUDITWHEEL_PLATFORM='manylinux2014_'"$(uname -m)" docker rmi -f array_record:${PYTHON_VERSION} docker rm -f array_record - DOCKER_BUILDKIT=1 docker build --progress=plain --no-cache \ - --build-arg AUDITWHEEL_PLATFORM=${AUDITWHEEL_PLATFORM} \ - --build-arg PYTHON_VERSION=${PYTHON_VERSION} \ - --build-arg PYTHON_BIN=${PYTHON_BIN} \ - --build-arg BAZEL_VERSION=${BAZEL_VERSION} \ - -t array_record:${PYTHON_VERSION} - < ${SOURCE_DIR}/oss/build.Dockerfile + --build-arg AUDITWHEEL_PLATFORM="${AUDITWHEEL_PLATFORM}" \ + --build-arg PYTHON_VERSION="${PYTHON_MAJOR_VERSION}""${PYTHON_MINOR_VERSION}" \ + --build-arg BAZEL_VERSION="${BAZEL_VERSION}" \ + -t array_record:"${PYTHON_VERSION}" "${SOURCE_DIR}"'/oss' docker run --rm -a stdin -a stdout -a stderr \ - --env PYTHON_BIN="${PYTHON_BIN}/python" \ - --env BAZEL_VERSION=${BAZEL_VERSION} \ - --env AUDITWHEEL_PLATFORM=${AUDITWHEEL_PLATFORM} \ - -v $SOURCE_DIR:/tmp/array_record \ - --name array_record array_record:${PYTHON_VERSION} \ - bash oss/build_whl.sh - done - - ls ${SOURCE_DIR}/all_dist/*.whl -} - -function install_and_init_pyenv { - pyenv_root=${1:-$HOME/.pyenv} - export PYENV_ROOT=$pyenv_root - if [[ ! -d $PYENV_ROOT ]]; then - echo "Installing pyenv.." - git clone https://github.com/pyenv/pyenv.git "$PYENV_ROOT" - export PATH="/home/kbuilder/.local/bin:$PYENV_ROOT/bin:$PATH" - eval "$(pyenv init --path)" + --env PYTHON_VERSION="${PYTHON_MAJOR_VERSION}"'.'"${PYTHON_MINOR_VERSION}" \ + --env PYTHON_MAJOR_VERSION="${PYTHON_MAJOR_VERSION}" \ + --env PYTHON_MINOR_VERSION="${PYTHON_MINOR_VERSION}" \ + --env BAZEL_VERSION="${BAZEL_VERSION}" \ + --env AUDITWHEEL_PLATFORM="${AUDITWHEEL_PLATFORM}" \ + -v "${SOURCE_DIR}":"${OUTPUT_DIR}" \ + --name array_record array_record:"${PYTHON_VERSION}" \ + sh oss/build_whl.sh fi - - echo "Python setup..." - pyenv install -s "$PYENV_PYTHON_VERSION" - pyenv global "$PYENV_PYTHON_VERSION" - PYTHON=$(pyenv which python) -} - -function setup_env_vars_py310 { - # This controls the python binary to use. - PYTHON=python3.10 - PYTHON_STR=python3.10 - PYTHON_MAJOR_VERSION=3 - PYTHON_MINOR_VERSION=10 - # This is for pyenv install. - PYENV_PYTHON_VERSION=3.10.13 -} - -function update_bazel_macos { - BAZEL_VERSION=$1 - ARCH="$(uname -m)" - curl -L https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-darwin-${ARCH}.sh -O - ls - chmod +x bazel-*.sh - ./bazel-${BAZEL_VERSION}-installer-darwin-${ARCH}.sh --user - rm -f ./bazel-${BAZEL_VERSION}-installer-darwin-${ARCH}.sh - # Add new bazel installation to path - PATH="/Users/kbuilder/bin:$PATH" -} - -function build_and_test_array_record_macos() { - SOURCE_DIR=$1 - # Set up Bazel. - # Using a previous version of Bazel to avoid: - # https://github.com/bazelbuild/bazel/issues/8622 - export BAZEL_VERSION="8.0.0" - update_bazel_macos ${BAZEL_VERSION} - bazel --version - - # Set up Pyenv. - setup_env_vars_py310 - install_and_init_pyenv - - # Build and test ArrayRecord. - cd ${SOURCE_DIR} - bash ${SOURCE_DIR}/oss/build_whl.sh - - ls ${SOURCE_DIR}/all_dist/*.whl } diff --git a/oss/test_with_grain.py b/oss/test_with_grain.py new file mode 100644 index 0000000..c494bac --- /dev/null +++ b/oss/test_with_grain.py @@ -0,0 +1,5 @@ +import grain.python as grain +print("imported grain") + +import tensorflow as tf +print("imported TF", flush=True) diff --git a/oss/test_with_tf.py b/oss/test_with_tf.py new file mode 100644 index 0000000..54bd590 --- /dev/null +++ b/oss/test_with_tf.py @@ -0,0 +1,6 @@ +import tensorflow as tf +print(f"iindyk: imported TF", flush=True) +from array_record.python import array_record_module + +print("tf", flush=True) +print(f"iindyk: Done {tf.Tensor}", flush=True) diff --git a/python/array_record_data_source_test.py b/python/array_record_data_source_test.py index f0e4a3a..24e2650 100644 --- a/python/array_record_data_source_test.py +++ b/python/array_record_data_source_test.py @@ -247,7 +247,7 @@ def test_repr(self): ]) self.assertRegex(repr(ar), r"ArrayRecordDataSource\(hash_of_paths=[\w]+\)") - @flagsaver.flagsaver(grain_use_fast_array_record_reader=False) + # @flagsaver.flagsaver(grain_use_fast_array_record_reader=False) def test_additional_reader_options(self): indices_to_read = [3, 0, 5, 9, 2, 1, 4, 7, 8, 6] ar = array_record_data_source.ArrayRecordDataSource( diff --git a/setup.py b/setup.py index 2560dc5..05155cd 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ def has_ext_modules(self): setup( name='array_record', - version='0.6.0', + version='0.7.2', description='A file format that achieves a new frontier of IO efficiency', author='ArrayRecord team', author_email='no-reply@google.com', @@ -41,6 +41,7 @@ def has_ext_modules(self): 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', ], zip_safe=False, distclass=BinaryDistribution, From 9b9979a670ac0adb25da727ef56138dd52a9ae3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= Date: Wed, 9 Jul 2025 07:03:17 +0000 Subject: [PATCH 2/7] Fix `--find-links` option --- oss/build_whl.sh | 8 +++++--- python/array_record_data_source.py | 2 +- setup.py | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/oss/build_whl.sh b/oss/build_whl.sh index 8c7bccd..f015ff6 100755 --- a/oss/build_whl.sh +++ b/oss/build_whl.sh @@ -69,11 +69,13 @@ function main() { if [ -n "${AUDITWHEEL_PLATFORM}" ]; then echo $(date) : "=== Auditing wheel" auditwheel repair --plat ${AUDITWHEEL_PLATFORM} -w dist dist/*.whl + cp dist/*manylinux*.whl "${DEST}" + else + cp dist/*.whl "${DEST}" fi echo $(date) : "=== Listing wheel" - ls -lrt dist/*.whl - cp dist/*.whl "${DEST}" + ls -lrt "${DEST}"/*.whl popd echo $(date) : "=== Output wheel file is in: ${DEST}" @@ -81,7 +83,7 @@ function main() { # Install ArrayRecord from the wheel and run smoke tests. # TF is not available on Python 3.13 and above. if (( "${PYTHON_MINOR_VERSION}" < 13 )); then - $PYTHON_BIN -m pip install --find-links=/tmp/grain/all_dist --pre array-record + $PYTHON_BIN -m pip install --find-links="${DEST}" --pre array-record $PYTHON_BIN -m pip install jax tensorflow grain $PYTHON_BIN oss/test_with_grain.py $PYTHON_BIN oss/test_with_tf.py diff --git a/python/array_record_data_source.py b/python/array_record_data_source.py index f56e998..117d244 100644 --- a/python/array_record_data_source.py +++ b/python/array_record_data_source.py @@ -46,7 +46,7 @@ def __getitem__(self, record_keys: Sequence[int]) -> Sequence[T]: from absl import logging from etils import epath -from python import array_record_module +from . import array_record_module # TODO(jolesiak): Decide what to do with these flags, e.g., remove them (could # be appropriate if we decide to use asyncio) or move them somewhere else and diff --git a/setup.py b/setup.py index 05155cd..b02b9b1 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ def has_ext_modules(self): setup( name='array_record', - version='0.7.2', + version='0.7.3', description='A file format that achieves a new frontier of IO efficiency', author='ArrayRecord team', author_email='no-reply@google.com', From 8bd05e5b9df5fb666a2b8af8a525e3ffe3ac2d33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= Date: Wed, 9 Jul 2025 09:25:33 +0000 Subject: [PATCH 3/7] Python versions support in the Bazel config --- BUILD | 32 ++++++++-- MODULE.bazel | 62 ++++++++++++++----- requirements.in => test_requirements.in | 0 ...ock.txt => test_requirements_lock_3_10.txt | 4 +- test_requirements_lock_3_11.txt | 26 ++++++++ test_requirements_lock_3_12.txt | 26 ++++++++ test_requirements_lock_3_13.txt | 26 ++++++++ 7 files changed, 151 insertions(+), 25 deletions(-) rename requirements.in => test_requirements.in (100%) rename requirements_lock.txt => test_requirements_lock_3_10.txt (93%) create mode 100644 test_requirements_lock_3_11.txt create mode 100644 test_requirements_lock_3_12.txt create mode 100644 test_requirements_lock_3_13.txt diff --git a/BUILD b/BUILD index 20b8c92..676eb33 100644 --- a/BUILD +++ b/BUILD @@ -1,8 +1,10 @@ # ArrayRecord is a new file format for IO intensive applications. # It supports efficient random access and various compression algorithms. -load("@rules_python//python:pip.bzl", "compile_pip_requirements") - +load("@python//3.10:defs.bzl", compile_pip_requirements_3_10 = "compile_pip_requirements") +load("@python//3.11:defs.bzl", compile_pip_requirements_3_11 = "compile_pip_requirements") +load("@python//3.12:defs.bzl", compile_pip_requirements_3_12 = "compile_pip_requirements") +load("@python//3.13:defs.bzl", compile_pip_requirements_3_13 = "compile_pip_requirements") package(default_visibility = ["//visibility:public"]) @@ -15,8 +17,26 @@ py_library( srcs = ["setup.py"], ) -compile_pip_requirements( - name = "requirements", - requirements_in = "requirements.in", - requirements_txt = "requirements_lock.txt", +compile_pip_requirements_3_10( + name = "requirements_3_10", + requirements_in = "test_requirements.in", + requirements_txt = "test_requirements_lock_3_10.txt", +) + +compile_pip_requirements_3_11( + name = "requirements_3_11", + requirements_in = "test_requirements.in", + requirements_txt = "test_requirements_lock_3_11.txt", +) + +compile_pip_requirements_3_12( + name = "requirements_3_12", + requirements_in = "test_requirements.in", + requirements_txt = "test_requirements_lock_3_12.txt", +) + +compile_pip_requirements_3_13( + name = "requirements_3_13", + requirements_in = "test_requirements.in", + requirements_txt = "test_requirements_lock_3_13.txt", ) diff --git a/MODULE.bazel b/MODULE.bazel index 5868f09..ebfbe54 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -12,41 +12,69 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO(fchern): automate version string alignment with setup.py -VERSION = "0.6.0" - module( name = "array_record", - version = VERSION, + version = "0.7.3", repo_name = "com_google_array_record", ) bazel_dep(name = "rules_proto", version = "7.0.2") -bazel_dep(name = "rules_python", version = "0.40.0") -bazel_dep(name = "platforms", version = "0.0.10") +bazel_dep(name = "rules_python", version = "0.37.0") +bazel_dep(name = "platforms", version = "0.0.9") bazel_dep(name = "protobuf", version = "24.4") # aligns with pygrain bazel_dep(name = "googletest", version = "1.15.2") bazel_dep(name = "abseil-cpp", version = "20240722.0") bazel_dep(name = "abseil-py", version = "2.1.0") bazel_dep(name = "eigen", version = "3.4.0.bcr.3") bazel_dep(name = "riegeli", version = "0.0.0-20241218-3385e3c") -bazel_dep(name = "pybind11_bazel", version = "2.12.0") +bazel_dep(name = "pybind11_bazel", version = "2.11.1") -PYTHON_VERSION = "3.10" +http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") -python = use_extension("@rules_python//python/extensions:python.bzl", "python") -python.toolchain( - ignore_root_user_error = True, # Required for our containerized CI environments. - python_version = PYTHON_VERSION, +http_archive( + name = "pybind11", + build_file = "@pybind11_bazel//:pybind11.BUILD", + sha256 = "201966a61dc826f1b1879a24a3317a1ec9214a918c8eb035be2f30c3e9cfbdcb", + strip_prefix = "pybind11-2.10.3", + urls = ["https://github.com/pybind/pybind11/archive/refs/tags/v2.10.3.zip"], ) +SUPPORTED_PYTHON_VERSIONS = [ + "3.10", + "3.11", + "3.12", + "3.13", +] + +DEFAULT_PYTHON_VERSION = "3.10" + +python_configure = use_extension("@pybind11_bazel//:python_configure.bzl", "extension") +use_repo(python_configure, "local_config_python") + +python = use_extension("@rules_python//python/extensions:python.bzl", "python") + +[ + python.toolchain( + ignore_root_user_error = True, + is_default = python_version == DEFAULT_PYTHON_VERSION, + python_version = python_version, + ) + for python_version in SUPPORTED_PYTHON_VERSIONS +] + +use_repo(python, python = "python_versions") + pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip") # requirements_lock.txt is generated by # bazel run //:requirements.update -pip.parse( - hub_name = "pypi", - python_version = PYTHON_VERSION, - requirements_lock = "//:requirements_lock.txt", -) +[ + pip.parse( + hub_name = "pypi", + python_version = version, + requirements_lock = "test_requirements_lock_" + version.replace(".", "_") + ".txt", + ) + for version in SUPPORTED_PYTHON_VERSIONS +] + use_repo(pip, "pypi") diff --git a/requirements.in b/test_requirements.in similarity index 100% rename from requirements.in rename to test_requirements.in diff --git a/requirements_lock.txt b/test_requirements_lock_3_10.txt similarity index 93% rename from requirements_lock.txt rename to test_requirements_lock_3_10.txt index c10476c..95552d3 100644 --- a/requirements_lock.txt +++ b/test_requirements_lock_3_10.txt @@ -2,12 +2,12 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# bazel run //:requirements.update +# bazel run //:requirements_3_10.update # etils[epath,epy]==1.11.0 \ --hash=sha256:a394cf3476bcec51c221426a70c39cd1006e889456ba41e4d7f12fd6814be7a5 \ --hash=sha256:aff3278a3be7fddf302dfd80335e9f924244666c71239cd91e836f3d055f1c4a - # via -r requirements.in + # via -r test_requirements.in fsspec==2024.12.0 \ --hash=sha256:670700c977ed2fb51e0d9f9253177ed20cbde4a3e5c0283cc5385b5870c8533f \ --hash=sha256:b520aed47ad9804237ff878b504267a3b0b441e97508bd6d2d8774e3db85cee2 diff --git a/test_requirements_lock_3_11.txt b/test_requirements_lock_3_11.txt new file mode 100644 index 0000000..5c7c224 --- /dev/null +++ b/test_requirements_lock_3_11.txt @@ -0,0 +1,26 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# bazel run //:requirements_3_11.update +# +etils[epath,epy]==1.12.2 \ + --hash=sha256:4600bec9de6cf5cb043a171e1856e38b5f273719cf3ecef90199f7091a6b3912 \ + --hash=sha256:c6b9e1f0ce66d1bbf54f99201b08a60ba396d3446d9eb18d4bc39b26a2e1a5ee + # via -r test_requirements.in +fsspec==2025.5.1 \ + --hash=sha256:24d3a2e663d5fc735ab256263c4075f374a174c3410c0b25e5bd1970bceaa462 \ + --hash=sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475 + # via etils +importlib-resources==6.5.2 \ + --hash=sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c \ + --hash=sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec + # via etils +typing-extensions==4.14.1 \ + --hash=sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36 \ + --hash=sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76 + # via etils +zipp==3.23.0 \ + --hash=sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e \ + --hash=sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166 + # via etils diff --git a/test_requirements_lock_3_12.txt b/test_requirements_lock_3_12.txt new file mode 100644 index 0000000..eb44ec6 --- /dev/null +++ b/test_requirements_lock_3_12.txt @@ -0,0 +1,26 @@ +# +# This file is autogenerated by pip-compile with Python 3.12 +# by the following command: +# +# bazel run //:requirements_3_12.update +# +etils[epath,epy]==1.12.2 \ + --hash=sha256:4600bec9de6cf5cb043a171e1856e38b5f273719cf3ecef90199f7091a6b3912 \ + --hash=sha256:c6b9e1f0ce66d1bbf54f99201b08a60ba396d3446d9eb18d4bc39b26a2e1a5ee + # via -r test_requirements.in +fsspec==2025.5.1 \ + --hash=sha256:24d3a2e663d5fc735ab256263c4075f374a174c3410c0b25e5bd1970bceaa462 \ + --hash=sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475 + # via etils +importlib-resources==6.5.2 \ + --hash=sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c \ + --hash=sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec + # via etils +typing-extensions==4.14.1 \ + --hash=sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36 \ + --hash=sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76 + # via etils +zipp==3.23.0 \ + --hash=sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e \ + --hash=sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166 + # via etils diff --git a/test_requirements_lock_3_13.txt b/test_requirements_lock_3_13.txt new file mode 100644 index 0000000..7a5a7b6 --- /dev/null +++ b/test_requirements_lock_3_13.txt @@ -0,0 +1,26 @@ +# +# This file is autogenerated by pip-compile with Python 3.13 +# by the following command: +# +# bazel run //:requirements_3_13.update +# +etils[epath,epy]==1.12.2 \ + --hash=sha256:4600bec9de6cf5cb043a171e1856e38b5f273719cf3ecef90199f7091a6b3912 \ + --hash=sha256:c6b9e1f0ce66d1bbf54f99201b08a60ba396d3446d9eb18d4bc39b26a2e1a5ee + # via -r test_requirements.in +fsspec==2025.5.1 \ + --hash=sha256:24d3a2e663d5fc735ab256263c4075f374a174c3410c0b25e5bd1970bceaa462 \ + --hash=sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475 + # via etils +importlib-resources==6.5.2 \ + --hash=sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c \ + --hash=sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec + # via etils +typing-extensions==4.14.1 \ + --hash=sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36 \ + --hash=sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76 + # via etils +zipp==3.23.0 \ + --hash=sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e \ + --hash=sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166 + # via etils From fce714d5fae42a69718c63dab3738f4447dd14af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= Date: Wed, 9 Jul 2025 10:50:43 +0000 Subject: [PATCH 4/7] Config update --- MODULE.bazel | 6 +++--- oss/build_whl.sh | 9 +++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/MODULE.bazel b/MODULE.bazel index ebfbe54..282ceea 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -19,15 +19,15 @@ module( ) bazel_dep(name = "rules_proto", version = "7.0.2") -bazel_dep(name = "rules_python", version = "0.37.0") -bazel_dep(name = "platforms", version = "0.0.9") +bazel_dep(name = "rules_python", version = "0.40.0") +bazel_dep(name = "platforms", version = "0.0.10") bazel_dep(name = "protobuf", version = "24.4") # aligns with pygrain bazel_dep(name = "googletest", version = "1.15.2") bazel_dep(name = "abseil-cpp", version = "20240722.0") bazel_dep(name = "abseil-py", version = "2.1.0") bazel_dep(name = "eigen", version = "3.4.0.bcr.3") bazel_dep(name = "riegeli", version = "0.0.0-20241218-3385e3c") -bazel_dep(name = "pybind11_bazel", version = "2.11.1") +bazel_dep(name = "pybind11_bazel", version = "2.12.0") http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") diff --git a/oss/build_whl.sh b/oss/build_whl.sh index f015ff6..c1a56e0 100755 --- a/oss/build_whl.sh +++ b/oss/build_whl.sh @@ -16,12 +16,17 @@ function main() { # Remove .bazelrc if it already exists [ -e .bazelrc ] && rm .bazelrc + write_to_bazelrc "build --incompatible_default_to_explicit_init_py" + write_to_bazelrc "build --enable_platform_specific_config" + write_to_bazelrc "build --@rules_python//python/config_settings:python_version=${PYTHON_VERSION}" + write_to_bazelrc "test --@rules_python//python/config_settings:python_version=${PYTHON_VERSION}" + write_to_bazelrc "test --action_env PYTHON_VERSION=${PYTHON_VERSION}" + write_to_bazelrc "test --test_timeout=300" + write_to_bazelrc "build -c opt" write_to_bazelrc "build --cxxopt=-std=c++17" write_to_bazelrc "build --host_cxxopt=-std=c++17" write_to_bazelrc "build --experimental_repo_remote_exec" - write_to_bazelrc "build --python_path=\"${PYTHON_BIN}\"" - write_to_bazelrc "test --python_path=\"${PYTHON_BIN}\"" PLATFORM="$(uname)" if [ -n "${CROSSTOOL_TOP}" ]; then From 772dd3288a3c8bb8f5fdec746de628f77cf3851d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= Date: Fri, 11 Jul 2025 09:56:14 +0000 Subject: [PATCH 5/7] Enforce direct dependencies --- MODULE.bazel | 14 ++------------ oss/README.md | 33 ++++++++++++++++++++++----------- oss/build_whl.sh | 1 + 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/MODULE.bazel b/MODULE.bazel index 282ceea..308fcc9 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -19,9 +19,9 @@ module( ) bazel_dep(name = "rules_proto", version = "7.0.2") -bazel_dep(name = "rules_python", version = "0.40.0") +bazel_dep(name = "rules_python", version = "0.37.0") bazel_dep(name = "platforms", version = "0.0.10") -bazel_dep(name = "protobuf", version = "24.4") # aligns with pygrain +bazel_dep(name = "protobuf", version = "28.3") bazel_dep(name = "googletest", version = "1.15.2") bazel_dep(name = "abseil-cpp", version = "20240722.0") bazel_dep(name = "abseil-py", version = "2.1.0") @@ -29,16 +29,6 @@ bazel_dep(name = "eigen", version = "3.4.0.bcr.3") bazel_dep(name = "riegeli", version = "0.0.0-20241218-3385e3c") bazel_dep(name = "pybind11_bazel", version = "2.12.0") -http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") - -http_archive( - name = "pybind11", - build_file = "@pybind11_bazel//:pybind11.BUILD", - sha256 = "201966a61dc826f1b1879a24a3317a1ec9214a918c8eb035be2f30c3e9cfbdcb", - strip_prefix = "pybind11-2.10.3", - urls = ["https://github.com/pybind/pybind11/archive/refs/tags/v2.10.3.zip"], -) - SUPPORTED_PYTHON_VERSIONS = [ "3.10", "3.11", diff --git a/oss/README.md b/oss/README.md index e227f33..3e2e2d8 100644 --- a/oss/README.md +++ b/oss/README.md @@ -1,15 +1,26 @@ -# Steps to build a new array_record pip package +# Steps to build and publish a new `array_record` PyPI package -1. Update the version number in setup.py +`array_record` supports automatic publishing to PyPI via GitHub Actions. +Once you're ready to create a new release you need to: -2. In the root folder, run +1. Update the version number in `setup.py`. - ``` - ./oss/build_whl.sh - ``` - to use the current `python3` version. Otherwise, optionally set - ``` - PYTHON_VERSION=3.9 ./oss/build_whl.sh - ``` +2. Go to [GitHub Actions page](https://github.com/google/array_record/actions), + select `Build and Publish Release` workflow, and run it. It will spin up a few + test jobs, and once all of them complete successfully, a `publish-wheel` will start. -3. Wheels are in `all_dist/`. +3. On completion you should notice a new release on https://pypi.org/project/array-record/#history. + +--- + +If you want to build a wheel locally in your development environment in the root folder, run: + +```sh +./oss/build_whl.sh +``` +to use the current `python3` version. Otherwise, optionally set: +```sh +PYTHON_VERSION=3.9 ./oss/build_whl.sh +``` + +Wheels are in `all_dist/`. diff --git a/oss/build_whl.sh b/oss/build_whl.sh index c1a56e0..73e062a 100755 --- a/oss/build_whl.sh +++ b/oss/build_whl.sh @@ -27,6 +27,7 @@ function main() { write_to_bazelrc "build --cxxopt=-std=c++17" write_to_bazelrc "build --host_cxxopt=-std=c++17" write_to_bazelrc "build --experimental_repo_remote_exec" + write_to_bazelrc "common --check_direct_dependencies=error" PLATFORM="$(uname)" if [ -n "${CROSSTOOL_TOP}" ]; then From 49893bc9eb5db9e068ba7d251d070ec960cd5327 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= Date: Fri, 11 Jul 2025 10:22:49 +0000 Subject: [PATCH 6/7] Skip TF import test on Darwin --- oss/build_whl.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oss/build_whl.sh b/oss/build_whl.sh index 73e062a..c05730c 100755 --- a/oss/build_whl.sh +++ b/oss/build_whl.sh @@ -88,7 +88,7 @@ function main() { # Install ArrayRecord from the wheel and run smoke tests. # TF is not available on Python 3.13 and above. - if (( "${PYTHON_MINOR_VERSION}" < 13 )); then + if [ "$(uname)" != "Darwin" ] && (( "${PYTHON_MINOR_VERSION}" < 13 )); then $PYTHON_BIN -m pip install --find-links="${DEST}" --pre array-record $PYTHON_BIN -m pip install jax tensorflow grain $PYTHON_BIN oss/test_with_grain.py From 01e57f73071fab8376acf6c838128e3db72bfa18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= Date: Mon, 14 Jul 2025 17:20:04 +0000 Subject: [PATCH 7/7] Fix `python-tests.yml` workflow --- .../workflows/build_and_publish_template.yml | 5 ++- .github/workflows/publish_release.yml | 2 +- .github/workflows/python-tests.yml | 38 ++----------------- oss/build_whl.sh | 4 +- 4 files changed, 11 insertions(+), 38 deletions(-) diff --git a/.github/workflows/build_and_publish_template.yml b/.github/workflows/build_and_publish_template.yml index d9ebf6c..646a8cb 100644 --- a/.github/workflows/build_and_publish_template.yml +++ b/.github/workflows/build_and_publish_template.yml @@ -6,9 +6,9 @@ on: workflow_call: inputs: pypi_project_url: - required: true + required: false type: string - is_nightly: + upload_wheels: required: true type: boolean @@ -66,6 +66,7 @@ jobs: path: /tmp/array_record/all_dist/*.whl publish-wheel: + if: ${{ inputs.upload_wheels }} runs-on: ubuntu-22.04 needs: build-and-test permissions: diff --git a/.github/workflows/publish_release.yml b/.github/workflows/publish_release.yml index 7981472..851b0e3 100644 --- a/.github/workflows/publish_release.yml +++ b/.github/workflows/publish_release.yml @@ -10,4 +10,4 @@ jobs: id-token: write with: pypi_project_url: https://pypi.org/project/array-record - is_nightly: false + upload_wheels: true diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 88f53a5..1f62150 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -5,37 +5,7 @@ on: branches: [main] jobs: - test: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ['3.9', '3.10', '3.11'] - env: - DOCKER_BUILDKIT: 1 - TMP_FOLDER: /tmp/array_record - steps: - - uses: actions/checkout@v2 - - name: Build Docker image - run: | - docker build --progress=plain --no-cache \ - --build-arg PYTHON_VERSION=${{ matrix.python-version }} \ - -t array_record:latest - < oss/Dockerfile - - name: Build wheels and test - run: | - docker run --rm -a stdin -a stdout -a stderr \ - --env PYTHON_VERSION=${{ matrix.python-version }} \ - --volume ${GITHUB_WORKSPACE}:${TMP_FOLDER} --name array_record array_record:latest \ - bash oss/build_whl.sh - - name: Install in a blank Docker and test the import in Python - run: | - docker run --rm -a stdin -a stdout -a stderr \ - --env PYTHON_VERSION=${{ matrix.python-version }} \ - --volume ${GITHUB_WORKSPACE}:/root \ - python:${{ matrix.python-version }} bash -c " - ARRAY_RECORD_VERSION=\$(python /root/setup.py --version 2>&1 /dev/null) - SHORT_PYTHON_VERSION=\${PYTHON_VERSION//./} - ARRAY_RECORD_WHEEL=\"/root/all_dist/array_record-\${ARRAY_RECORD_VERSION}-py\${SHORT_PYTHON_VERSION}-none-any.whl\" - python -m pip install \${ARRAY_RECORD_WHEEL} && - python -c 'import array_record' && - python -c 'from array_record.python import array_record_data_source' - " + call-workflow: + uses: ./.github/workflows/build_and_publish_template.yml + with: + upload_wheels: false diff --git a/oss/build_whl.sh b/oss/build_whl.sh index c05730c..da06dd0 100755 --- a/oss/build_whl.sh +++ b/oss/build_whl.sh @@ -87,9 +87,11 @@ function main() { echo $(date) : "=== Output wheel file is in: ${DEST}" # Install ArrayRecord from the wheel and run smoke tests. + $PYTHON_BIN -m pip install --find-links="${DEST}" --pre array-record + $PYTHON_BIN -c 'import array_record' + $PYTHON_BIN -c 'from array_record.python import array_record_data_source' # TF is not available on Python 3.13 and above. if [ "$(uname)" != "Darwin" ] && (( "${PYTHON_MINOR_VERSION}" < 13 )); then - $PYTHON_BIN -m pip install --find-links="${DEST}" --pre array-record $PYTHON_BIN -m pip install jax tensorflow grain $PYTHON_BIN oss/test_with_grain.py $PYTHON_BIN oss/test_with_tf.py