From 2c2777ffd96781c0d0c05ccf985db626b157c505 Mon Sep 17 00:00:00 2001 From: Marvin Ritter Date: Wed, 14 Dec 2022 00:48:22 -0800 Subject: [PATCH] Set readahead buffer size to 0. PiperOrigin-RevId: 495238437 --- python/array_record_module.cc | 19 +++++++++++++++---- python/array_record_module_test.py | 8 ++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/python/array_record_module.cc b/python/array_record_module.cc index c877860..e354b75 100644 --- a/python/array_record_module.cc +++ b/python/array_record_module.cc @@ -80,7 +80,8 @@ PYBIND11_MODULE(array_record_module, m) { }); py::class_(m, "ArrayRecordReader") - .def(py::init([](const std::string& path, const std::string& options) { + .def(py::init([](const std::string& path, const std::string& options, + const std::optional file_reader_buffer_size) { auto status_or_option = array_record::ArrayRecordReaderBase::Options::FromString( options); @@ -89,9 +90,15 @@ PYBIND11_MODULE(array_record_module, m) { std::string(status_or_option.status().message())); } std::unique_ptr> file_reader; + riegeli::FdReaderBase::Options file_reader_options; { py::gil_scoped_release scoped_release; - file_reader = std::make_unique>(path); + riegeli::FileReaderBase::Options file_reader_options; + if (file_reader_buffer_size.has_value()) { + file_reader_options.set_buffer_size(*file_reader_buffer_size); + } + file_reader = std::make_unique>(path, + file_reader_options); } if (!file_reader->ok()) { throw std::runtime_error( @@ -102,12 +109,16 @@ PYBIND11_MODULE(array_record_module, m) { status_or_option.value(), array_record::ArrayRecordGlobalPool()); }), - py::arg("path"), py::arg("options") = "", R"( + py::arg("path"), py::arg("options") = "", + py::arg("file_reader_buffer_size") = std::nullopt, R"( ArrayRecordReader for fast sequential or random access. Args: path: File path to the input file. - options: String with following syntax. + options: String with options for ArrayRecord. See syntax below. + file_reader_buffer_size: Optional size of the buffer (in bytes) + for the underlying file (Riegeli) reader. The default buffer + size is 1 MiB. options ::= option? ("," option?)* option ::= diff --git a/python/array_record_module_test.py b/python/array_record_module_test.py index 8ab7d5c..99e4152 100644 --- a/python/array_record_module_test.py +++ b/python/array_record_module_test.py @@ -82,6 +82,14 @@ def test_write_read_non_unicode(self): reader = ArrayRecordReader(self.test_file) self.assertEqual(reader.read(), b) + def test_write_read_with_file_reader_buffer_size(self): + writer = ArrayRecordWriter(self.test_file) + b = b"F\xc3\xb8\xc3\xb6\x97\xc3\xa5r" + writer.write(b) + writer.close() + reader = ArrayRecordReader(self.test_file, file_reader_buffer_size=2**10) + self.assertEqual(reader.read(), b) + def test_batch_read(self): writer = ArrayRecordWriter(self.test_file) test_strs = [b"abc", b"def", b"ghi", b"kkk", b"..."]