From fd597e0acd2d9760b8b9e2cdb87e3d9c582bbd1e Mon Sep 17 00:00:00 2001 From: Mateusz Sokol Date: Thu, 21 Aug 2025 11:43:06 -0700 Subject: [PATCH] Revert the last cpp protobuf update and GCS reader support. The required riegeli version depends on protobug that is incompatible with TF. Ported from https://github.com/google/array_record/pull/175 PiperOrigin-RevId: 797851410 --- MODULE.bazel | 15 +++++++-------- oss/build_whl.sh | 10 ++++------ python/BUILD | 2 -- python/array_record_module.cc | 29 ++++++----------------------- setup.py | 2 +- 5 files changed, 18 insertions(+), 40 deletions(-) diff --git a/MODULE.bazel b/MODULE.bazel index 55ba2b8..b3d252d 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -13,7 +13,7 @@ # limitations under the License. # TODO(fchern): automate version string alignment with setup.py -VERSION = "0.8.0" +VERSION = "0.8.1" module( name = "array_record", @@ -21,17 +21,16 @@ module( repo_name = "com_google_array_record", ) -bazel_dep(name = "rules_proto", version = "7.1.0") -bazel_dep(name = "rules_python", version = "1.4.1") -bazel_dep(name = "platforms", version = "0.0.11") -bazel_dep(name = "protobuf", version = "31.1") +bazel_dep(name = "rules_proto", version = "7.0.2") +bazel_dep(name = "rules_python", version = "0.37.0") +bazel_dep(name = "platforms", version = "0.0.10") +bazel_dep(name = "protobuf", version = "28.3") bazel_dep(name = "googletest", version = "1.15.2") -bazel_dep(name = "abseil-cpp", version = "20250127.1") +bazel_dep(name = "abseil-cpp", version = "20240722.0") bazel_dep(name = "abseil-py", version = "2.1.0") bazel_dep(name = "eigen", version = "3.4.0.bcr.3") -bazel_dep(name = "riegeli", version = "0.0.0-20250717-5b2e77e") +bazel_dep(name = "riegeli", version = "0.0.0-20241218-3385e3c") bazel_dep(name = "pybind11_bazel", version = "2.12.0") -bazel_dep(name = "google_cloud_cpp", version = "3.0.0-rc0") SUPPORTED_PYTHON_VERSIONS = [ "3.10", diff --git a/oss/build_whl.sh b/oss/build_whl.sh index e0dc052..85e1cad 100755 --- a/oss/build_whl.sh +++ b/oss/build_whl.sh @@ -90,12 +90,10 @@ function main() { $PYTHON_BIN -m pip install ${OUTPUT_DIR}/all_dist/array_record*.whl $PYTHON_BIN -c 'import array_record' $PYTHON_BIN -c 'from array_record.python import array_record_data_source' - # TF is not available on Python 3.13 and above. - if [ "$(uname)" != "Darwin" ] && (( "${PYTHON_MINOR_VERSION}" < 13 )); then - $PYTHON_BIN -m pip install jax tensorflow>=2.20.0 grain - $PYTHON_BIN oss/test_import_grain.py - $PYTHON_BIN oss/test_import_tensorflow.py - fi + $PYTHON_BIN -m pip install jax tensorflow>=2.20.0 grain + # Re-enable the grain import test once the new version is released. + # $PYTHON_BIN oss/test_import_grain.py + $PYTHON_BIN oss/test_import_tensorflow.py } main diff --git a/python/BUILD b/python/BUILD index d247e61..6e99502 100644 --- a/python/BUILD +++ b/python/BUILD @@ -20,8 +20,6 @@ pybind_extension( "@riegeli//riegeli/base:initializer", "@riegeli//riegeli/bytes:fd_reader", "@riegeli//riegeli/bytes:fd_writer", - "@riegeli//riegeli/gcs:gcs_object", - "@riegeli//riegeli/gcs:gcs_reader", ], ) diff --git a/python/array_record_module.cc b/python/array_record_module.cc index 88b8961..2bff909 100644 --- a/python/array_record_module.cc +++ b/python/array_record_module.cc @@ -22,7 +22,6 @@ limitations under the License. #include #include "absl/status/status.h" -#include "absl/strings/match.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" #include "cpp/array_record_reader.h" @@ -35,8 +34,6 @@ limitations under the License. #include "riegeli/base/maker.h" #include "riegeli/bytes/fd_reader.h" #include "riegeli/bytes/fd_writer.h" -#include "riegeli/gcs/gcs_object.h" -#include "riegeli/gcs/gcs_reader.h" namespace py = pybind11; @@ -53,13 +50,10 @@ PYBIND11_MODULE(array_record_module, m) { throw py::value_error( std::string(status_or_option.status().message())); } - riegeli::FdWriterBase::Options file_writer_options; - file_writer_options.set_buffer_size(size_t{16} << 20); // Release the GIL because IO is time consuming. py::gil_scoped_release scoped_release; return new array_record::ArrayRecordWriter( - riegeli::Maker( - path, std::move(file_writer_options)), + riegeli::Maker(path), status_or_option.value()); }), py::arg("path"), py::arg("options") = "") @@ -90,29 +84,18 @@ PYBIND11_MODULE(array_record_module, m) { std::string(status_or_option.status().message())); } riegeli::FdReaderBase::Options file_reader_options; - riegeli::GcsReader::Options gcs_reader_options; if (kwargs.contains("file_reader_buffer_size")) { auto file_reader_buffer_size = kwargs["file_reader_buffer_size"].cast(); file_reader_options.set_buffer_size(file_reader_buffer_size); - gcs_reader_options.set_buffer_size(file_reader_buffer_size); } // Release the GIL because IO is time consuming. py::gil_scoped_release scoped_release; - if (absl::StartsWith(path, "gs://")) { - return new array_record::ArrayRecordReader( - riegeli::Maker( - google::cloud::storage::Client(), - riegeli::GcsObject(path), std::move(gcs_reader_options)), - status_or_option.value(), - array_record::ArrayRecordGlobalPool()); - } else { - return new array_record::ArrayRecordReader( - riegeli::Maker( - path, std::move(file_reader_options)), - status_or_option.value(), - array_record::ArrayRecordGlobalPool()); - } + return new array_record::ArrayRecordReader( + riegeli::Maker( + path, std::move(file_reader_options)), + status_or_option.value(), + array_record::ArrayRecordGlobalPool()); }), py::arg("path"), py::arg("options") = "", R"( ArrayRecordReader for fast sequential or random access. diff --git a/setup.py b/setup.py index 475b3c9..542b919 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ def has_ext_modules(self): setup( name='array_record', - version='0.8.0', + version='0.8.1', description='A file format that achieves a new frontier of IO efficiency', author='ArrayRecord team', author_email='no-reply@google.com',