Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions python/array_record_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ PYBIND11_MODULE(array_record_module, m) {
});

py::class_<ArrayRecordReader>(m, "ArrayRecordReader")
.def(py::init([](const std::string& path, const std::string& options) {
.def(py::init([](const std::string& path, const std::string& options,
const std::optional<int64_t> file_reader_buffer_size) {
auto status_or_option =
array_record::ArrayRecordReaderBase::Options::FromString(
options);
Expand All @@ -89,9 +90,15 @@ PYBIND11_MODULE(array_record_module, m) {
std::string(status_or_option.status().message()));
}
std::unique_ptr<riegeli::FdReader<>> file_reader;
riegeli::FdReaderBase::Options file_reader_options;
{
py::gil_scoped_release scoped_release;
file_reader = std::make_unique<riegeli::FdReader<>>(path);
riegeli::FileReaderBase::Options file_reader_options;
if (file_reader_buffer_size.has_value()) {
file_reader_options.set_buffer_size(*file_reader_buffer_size);
}
file_reader = std::make_unique<riegeli::FdReader<>>(path,
file_reader_options);
}
if (!file_reader->ok()) {
throw std::runtime_error(
Expand All @@ -102,12 +109,16 @@ PYBIND11_MODULE(array_record_module, m) {
status_or_option.value(),
array_record::ArrayRecordGlobalPool());
}),
py::arg("path"), py::arg("options") = "", R"(
py::arg("path"), py::arg("options") = "",
py::arg("file_reader_buffer_size") = std::nullopt, R"(
ArrayRecordReader for fast sequential or random access.

Args:
path: File path to the input file.
options: String with following syntax.
options: String with options for ArrayRecord. See syntax below.
file_reader_buffer_size: Optional size of the buffer (in bytes)
for the underlying file (Riegeli) reader. The default buffer
size is 1 MiB.

options ::= option? ("," option?)*
option ::=
Expand Down
8 changes: 8 additions & 0 deletions python/array_record_module_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,14 @@ def test_write_read_non_unicode(self):
reader = ArrayRecordReader(self.test_file)
self.assertEqual(reader.read(), b)

def test_write_read_with_file_reader_buffer_size(self):
writer = ArrayRecordWriter(self.test_file)
b = b"F\xc3\xb8\xc3\xb6\x97\xc3\xa5r"
writer.write(b)
writer.close()
reader = ArrayRecordReader(self.test_file, file_reader_buffer_size=2**10)
self.assertEqual(reader.read(), b)

def test_batch_read(self):
writer = ArrayRecordWriter(self.test_file)
test_strs = [b"abc", b"def", b"ghi", b"kkk", b"..."]
Expand Down