diff --git a/tensorstore/driver/BUILD b/tensorstore/driver/BUILD index 23f3dc809..ec85ff46e 100644 --- a/tensorstore/driver/BUILD +++ b/tensorstore/driver/BUILD @@ -23,6 +23,7 @@ DRIVERS = [ "virtual_chunked", "zarr", "zarr3", + "tiff", ] DOCTEST_SOURCES = glob([ diff --git a/tensorstore/driver/index.rst b/tensorstore/driver/index.rst index d25527174..10e4cc862 100644 --- a/tensorstore/driver/index.rst +++ b/tensorstore/driver/index.rst @@ -37,6 +37,7 @@ Chunked storage drivers zarr3/index n5/index neuroglancer_precomputed/index + tiff/index .. json:schema:: KeyValueStoreBackedChunkDriver diff --git a/tensorstore/driver/tiff/BUILD b/tensorstore/driver/tiff/BUILD new file mode 100644 index 000000000..45036f774 --- /dev/null +++ b/tensorstore/driver/tiff/BUILD @@ -0,0 +1,219 @@ +load("//bazel:tensorstore.bzl", "tensorstore_cc_library", "tensorstore_cc_test") +load("//docs:doctest.bzl", "doctest_test") + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +DOCTEST_SOURCES = glob([ + "**/*.rst", + "**/*.yml", +]) + +doctest_test( + name = "doctest_test", + srcs = DOCTEST_SOURCES, +) + +filegroup( + name = "doc_sources", + srcs = DOCTEST_SOURCES, +) + +tensorstore_cc_library( + name = "tiff", + deps = [ + ":driver", + ":zlib_compressor", + ":zstd_compressor", + ], +) + +tensorstore_cc_library( + name = "driver", + srcs = ["driver.cc"], + deps = [ + ":metadata", + "//tensorstore:array", + "//tensorstore:chunk_layout", + "//tensorstore:index", + "//tensorstore/driver", + "//tensorstore/driver:chunk_cache_driver", + "//tensorstore/driver:kvs_backed_chunk_driver", + "//tensorstore/internal/cache:async_cache", + "//tensorstore/internal/cache:kvs_backed_chunk_cache", + "//tensorstore/internal/json_binding:staleness_bound", + "//tensorstore/kvstore", + "//tensorstore/kvstore:generation", + "//tensorstore/kvstore/tiff:tiff_key_value_store", + "//tensorstore/util:result", + "//tensorstore/util:status", + "//tensorstore/util:str_cat", + "//tensorstore/util/execution", + "//tensorstore/util/execution:any_receiver", + "//tensorstore/util/garbage_collection", + "@com_github_nlohmann_json//:json", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings:cord", + ], + alwayslink = 1, +) + +tensorstore_cc_library( + name = "metadata", + srcs = ["metadata.cc"], + hdrs = ["metadata.h"], + deps = [ + ":compressor", + ":zlib_compressor", + ":zstd_compressor", + "//tensorstore:chunk_layout", + "//tensorstore:codec_spec", + "//tensorstore:data_type", + "//tensorstore:index", + "//tensorstore:schema", + "//tensorstore/index_space:dimension_units", + "//tensorstore/internal/log:verbose_flag", + "//tensorstore/internal/riegeli:array_endian_codec", + "//tensorstore/kvstore/tiff:tiff_details", + "//tensorstore/kvstore/tiff:tiff_dir_cache", + "//tensorstore/util/garbage_collection", + "@com_github_nlohmann_json//:json", + "@com_google_absl//absl/log:absl_log", + "@com_google_riegeli//riegeli/bytes:cord_reader", + ], + alwayslink = 1, +) + +tensorstore_cc_test( + name = "metadata_test", + size = "small", + srcs = ["metadata_test.cc"], + deps = [ + ":compressor", + ":metadata", + "//tensorstore:array", + "//tensorstore:codec_spec", + "//tensorstore:data_type", + "//tensorstore:index", + "//tensorstore/internal:json_gtest", + "//tensorstore/internal/json_binding:gtest", + "//tensorstore/internal/riegeli:array_endian_codec", + "//tensorstore/kvstore/tiff:tiff_details", + "//tensorstore/kvstore/tiff:tiff_dir_cache", + "//tensorstore/util:status_testutil", + "@com_github_nlohmann_json//:json", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings:cord", + "@com_google_googletest//:gtest_main", + "@com_google_riegeli//riegeli/bytes:cord_reader", + "@com_google_riegeli//riegeli/bytes:cord_writer", + ], +) + +tensorstore_cc_library( + name = "compressor", + srcs = ["compressor.cc"], + hdrs = [ + "compressor.h", + "compressor_registry.h", + ], + deps = [ + "//tensorstore/internal:json_registry", + "//tensorstore/internal/compression:json_specified_compressor", + "//tensorstore/kvstore/tiff:tiff_details", + "@com_google_absl//absl/container:flat_hash_map", + ], +) + +tensorstore_cc_test( + name = "driver_test", + size = "small", + srcs = ["driver_test.cc"], + deps = [ + ":driver", + ":metadata", + "//tensorstore:array", + "//tensorstore:codec_spec", + "//tensorstore:data_type", + "//tensorstore:index", + "//tensorstore/driver:driver_testutil", + "//tensorstore/internal:global_initializer", + "//tensorstore/internal:json_gtest", + "//tensorstore/internal/json_binding:gtest", + "//tensorstore/internal/riegeli:array_endian_codec", + "//tensorstore/kvstore", + "//tensorstore/kvstore:test_matchers", + "//tensorstore/kvstore:test_util", + "//tensorstore/kvstore/memory", + "//tensorstore/kvstore/tiff:tiff_details", + "//tensorstore/kvstore/tiff:tiff_dir_cache", + "//tensorstore/kvstore/tiff:tiff_test_util", + "//tensorstore/util:status_testutil", + "@com_github_nlohmann_json//:json", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings:cord", + "@com_google_googletest//:gtest_main", + "@com_google_riegeli//riegeli/bytes:cord_reader", + "@com_google_riegeli//riegeli/bytes:cord_writer", + ], +) + +tensorstore_cc_test( + name = "golden_file_test", + size = "small", + srcs = ["golden_file_test.cc"], + args = [ + "--tensorstore_test_data_dir=" + + package_name() + "/testdata", + ], + data = [":testdata"], + deps = [ + ":driver", + "//tensorstore", + "//tensorstore:array", + "//tensorstore:context", + "//tensorstore:index", + "//tensorstore:open", + "//tensorstore:open_mode", + "//tensorstore/internal:path", + "//tensorstore/kvstore/file", + "//tensorstore/util:status_testutil", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/log:absl_log", + "@com_google_googletest//:gtest_main", + ], +) + +filegroup( + name = "testdata", + srcs = glob( + include = [ + "testdata/**", + ], + exclude = ["testdata/*.py"], + ), +) + +tensorstore_cc_library( + name = "zstd_compressor", + srcs = ["zstd_compressor.cc"], + deps = [ + ":compressor", + "//tensorstore/internal/compression:zstd_compressor", + "//tensorstore/internal/json_binding", + ], + alwayslink = 1, +) + +tensorstore_cc_library( + name = "zlib_compressor", + srcs = ["zlib_compressor.cc"], + deps = [ + ":compressor", + "//tensorstore/internal/compression:zlib_compressor", + "//tensorstore/internal/json_binding", + ], + alwayslink = 1, +) diff --git a/tensorstore/driver/tiff/compressor.cc b/tensorstore/driver/tiff/compressor.cc new file mode 100644 index 000000000..2f1f9cda8 --- /dev/null +++ b/tensorstore/driver/tiff/compressor.cc @@ -0,0 +1,72 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/driver/tiff/compressor.h" // For Compressor alias declaration + +#include +#include +#include + +#include "absl/base/no_destructor.h" +#include "absl/container/flat_hash_map.h" +#include "tensorstore/driver/tiff/compressor_registry.h" +#include "tensorstore/internal/compression/json_specified_compressor.h" +#include "tensorstore/internal/json_binding/bindable.h" +#include "tensorstore/internal/json_binding/enum.h" +#include "tensorstore/internal/json_binding/json_binding.h" +#include "tensorstore/internal/json_registry.h" +#include "tensorstore/kvstore/tiff/tiff_details.h" + +namespace tensorstore { +namespace internal_tiff { + +namespace jb = tensorstore::internal_json_binding; +using ::tensorstore::internal_tiff_kvstore::CompressionType; + +internal::JsonSpecifiedCompressor::Registry& GetTiffCompressorRegistry() { + static absl::NoDestructor + registry; + return *registry; +} + +// Defines the mapping from TIFF numeric tag values to the string IDs used +// for compressor registration and CodecSpec JSON representation. +const static auto* const kCompressionTypeToStringIdMap = + new absl::flat_hash_map{ + {CompressionType::kNone, "raw"}, // No compression + {CompressionType::kZStd, "zstd"}, // Zstandard compression + {CompressionType::kDeflate, "zlib"}, // Deflate/Zlib compression. + // { CompressionType::kPackBits, "packbits" }, + }; + +const absl::flat_hash_map& +GetTiffCompressionMap() { + return *kCompressionTypeToStringIdMap; +} + +TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(Compressor, [](auto is_loading, + const auto& options, + auto* obj, auto* j) { + auto& registry = GetTiffCompressorRegistry(); + return jb::Object( + jb::Member("type", + jb::MapValue( + registry.KeyBinder(), + // Map "raw" to a default-constructed Compressor (nullptr) + std::make_pair(Compressor{}, std::string("raw")))), + registry.RegisteredObjectBinder())(is_loading, options, obj, j); +}) + +} // namespace internal_tiff +} // namespace tensorstore diff --git a/tensorstore/driver/tiff/compressor.h b/tensorstore/driver/tiff/compressor.h new file mode 100644 index 000000000..196ad3cb3 --- /dev/null +++ b/tensorstore/driver/tiff/compressor.h @@ -0,0 +1,34 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_DRIVER_TIFF_COMPRESSOR_H_ +#define TENSORSTORE_DRIVER_TIFF_COMPRESSOR_H_ + +// Include the base class required by the JsonSpecifiedCompressor registry +#include "tensorstore/internal/compression/json_specified_compressor.h" +#include "tensorstore/internal/json_binding/bindable.h" // For binder macro + +namespace tensorstore { +namespace internal_tiff { + +class Compressor : public internal::JsonSpecifiedCompressor::Ptr { + public: + TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER( + Compressor, internal::JsonSpecifiedCompressor::FromJsonOptions, + internal::JsonSpecifiedCompressor::ToJsonOptions) +}; +} // namespace internal_tiff +} // namespace tensorstore + +#endif // TENSORSTORE_DRIVER_TIFF_COMPRESSOR_H_ diff --git a/tensorstore/driver/tiff/compressor_registry.h b/tensorstore/driver/tiff/compressor_registry.h new file mode 100644 index 000000000..7cfc9e5ab --- /dev/null +++ b/tensorstore/driver/tiff/compressor_registry.h @@ -0,0 +1,46 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_DRIVER_TIFF_COMPRESSOR_REGISTRY_H_ +#define TENSORSTORE_DRIVER_TIFF_COMPRESSOR_REGISTRY_H_ + +#include + +#include "absl/container/flat_hash_map.h" +#include "tensorstore/internal/compression/json_specified_compressor.h" +#include "tensorstore/internal/json_registry.h" +#include "tensorstore/kvstore/tiff/tiff_details.h" + +namespace tensorstore { +namespace internal_tiff { + +// Returns the global registry instance for TIFF compressors. +// This registry maps string IDs (like "lzw", "deflate") to factories/binders +// capable of creating JsonSpecifiedCompressor instances. +internal::JsonSpecifiedCompressor::Registry& GetTiffCompressorRegistry(); + +// Returns the map from TIFF Compression tag enum to string ID. +const absl::flat_hash_map& +GetTiffCompressionMap(); + +template +void RegisterCompressor(std::string_view id, Binder binder) { + GetTiffCompressorRegistry().Register(id, binder); +} + +} // namespace internal_tiff +} // namespace tensorstore + +#endif // TENSORSTORE_DRIVER_TIFF_COMPRESSOR_REGISTRY_H_ diff --git a/tensorstore/driver/tiff/driver.cc b/tensorstore/driver/tiff/driver.cc new file mode 100644 index 000000000..d38f9b2c1 --- /dev/null +++ b/tensorstore/driver/tiff/driver.cc @@ -0,0 +1,1156 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/driver/driver.h" + +#include +#include +#include +#include +#include + +#include "absl/base/optimization.h" +#include "absl/container/flat_hash_map.h" +#include "absl/log/absl_log.h" +#include "absl/status/status.h" +#include "absl/strings/cord.h" +#include "absl/strings/string_view.h" +#include "tensorstore/array.h" +#include "tensorstore/chunk_layout.h" +#include "tensorstore/driver/chunk_cache_driver.h" +#include "tensorstore/driver/driver_spec.h" +#include "tensorstore/driver/kvs_backed_chunk_driver.h" +#include "tensorstore/driver/registry.h" +#include "tensorstore/driver/tiff/metadata.h" +#include "tensorstore/index_space/index_domain_builder.h" +#include "tensorstore/index_space/internal/propagate_bounds.h" +#include "tensorstore/internal/cache/async_cache.h" +#include "tensorstore/internal/cache/cache.h" +#include "tensorstore/internal/cache/kvs_backed_chunk_cache.h" +#include "tensorstore/internal/json_binding/json_binding.h" +#include "tensorstore/internal/json_binding/staleness_bound.h" // IWYU: pragma keep +#include "tensorstore/kvstore/driver.h" +#include "tensorstore/kvstore/generation.h" +#include "tensorstore/kvstore/kvstore.h" +#include "tensorstore/kvstore/tiff/tiff_key_value_store.h" +#include "tensorstore/util/execution/any_receiver.h" +#include "tensorstore/util/execution/execution.h" +#include "tensorstore/util/garbage_collection/fwd.h" +#include "tensorstore/util/result.h" +#include "tensorstore/util/status.h" +#include "tensorstore/util/str_cat.h" + +namespace tensorstore { +namespace internal_tiff { + +// Avoid anonymous namespace to workaround MSVC bug. +// +// https://developercommunity.visualstudio.com/t/Bug-involving-virtual-functions-templat/10424129 +#ifndef _MSC_VER +namespace { +#endif + +namespace jb = tensorstore::internal_json_binding; + +using ::tensorstore::internal::AsyncCache; +using ::tensorstore::internal::GetOwningCache; +using ::tensorstore::internal_kvs_backed_chunk_driver::KvsDriverSpec; + +// Define the TIFF-specific chunk cache, inheriting from KvsBackedChunkCache. +// This cache handles reading raw tile/strip data from the TiffKeyValueStore +// and decoding it. +class TiffChunkCache : public internal::KvsBackedChunkCache { + // Hot‑path data we compute once and then reuse for every call. + struct FastPath { + DimensionIndex y_grid_dim = -1; + DimensionIndex x_grid_dim = -1; + DimensionIndex sample_grid_dim = -1; + + // Stack label to grid dimension + absl::flat_hash_map stack_to_grid; + + // Stack label to size + absl::flat_hash_map stack_size; + + // Stack label to stride + absl::flat_hash_map stack_stride; + + // Geometry derived from metadata + Index num_cols = 0; // tiles/strips per row + Index num_chunks_per_plane = 0; // planar‑config adjustment + }; + + public: + using Base = internal::KvsBackedChunkCache; + using ReadData = ChunkCache::ReadData; + + explicit TiffChunkCache(kvstore::DriverPtr tiff_kv_store_driver, + std::shared_ptr resolved_metadata, + internal::ChunkGridSpecification grid, + Executor executor) + : Base(std::move(tiff_kv_store_driver)), + resolved_metadata_(std::move(resolved_metadata)), + grid_(std::move(grid)), + executor_(std::move(executor)) { + assert(resolved_metadata_ && "Resolved metadata cannot be null."); + } + + // Returns the grid specification provided during construction. + const internal::ChunkGridSpecification& grid() const override { + return grid_; + } + + const Executor& executor() const override { return executor_; } + + void InitFastPath() { + fast_ = std::make_unique(); + const auto& metadata = *resolved_metadata_; + const auto& grid_spec = this->grid(); + const auto& mapping = metadata.dimension_mapping; + const DimensionIndex grid_rank = grid_spec.grid_rank(); + + const auto& chunked_to_cell = + grid_spec.components[0].chunked_to_cell_dimensions; + + // Helper lambda to find index of a label in a vector + auto find_index = [](const std::vector& vec, + std::string_view label) { + return static_cast(std::find(vec.begin(), vec.end(), label) - + vec.begin()); + }; + + // Classify grid dimensions + for (DimensionIndex g = 0; g < grid_rank; ++g) { + const DimensionIndex ts_dim = chunked_to_cell[g]; + if (mapping.ts_y_dim == ts_dim) { + fast_->y_grid_dim = g; + } else if (mapping.ts_x_dim == ts_dim) { + fast_->x_grid_dim = g; + } else if (mapping.ts_sample_dim == ts_dim) { + fast_->sample_grid_dim = g; + } else { + std::string_view label = mapping.labels_by_ts_dim[ts_dim]; + fast_->stack_to_grid[label] = g; + } + } + + // Pre‑compute strides for stacked dimensions + if (metadata.stacking_info) { + const auto& stacking_info = *metadata.stacking_info; + const auto& sizes = *stacking_info.dimension_sizes; + const auto& order = stacking_info.ifd_sequence_order + ? *stacking_info.ifd_sequence_order + : stacking_info.dimensions; + + uint64_t stride = 1; + for (int i = static_cast(order.size()) - 1; i >= 0; --i) { + std::string_view label = order[i]; + fast_->stack_stride[label] = stride; + size_t idx = find_index(stacking_info.dimensions, label); + fast_->stack_size[label] = sizes[idx]; + stride *= static_cast(sizes[idx]); + } + } + + // Geometry that never changes + const Index chunk_width = metadata.ifd0_chunk_width; + const Index chunk_height = metadata.ifd0_chunk_height; + const Index image_width = metadata.shape[*mapping.ts_x_dim]; + const Index image_height = metadata.shape[*mapping.ts_y_dim]; + + fast_->num_cols = (image_width + chunk_width - 1) / chunk_width; + if (metadata.is_tiled) { + const Index num_rows = (image_height + chunk_height - 1) / chunk_height; + fast_->num_chunks_per_plane = num_rows * fast_->num_cols; + } else { + fast_->num_chunks_per_plane = + (image_height + chunk_height - 1) / chunk_height; + } + } + + std::string GetChunkStorageKey(span cell_indices) override { + using internal_tiff_kvstore::PlanarConfigType; + if (!fast_) { + InitFastPath(); + } + + const FastPath& fast = *fast_; + const auto& metadata = *resolved_metadata_; + + // Determine the target IFD index. + uint32_t target_ifd_index = metadata.base_ifd_index; + + if (metadata.stacking_info) { + const auto& stacking_info = *metadata.stacking_info; + const auto& ifd_iteration_order = + stacking_info.ifd_sequence_order.value_or(stacking_info.dimensions); + + for (std::string_view stack_label : ifd_iteration_order) { + auto grid_dim_it = fast.stack_to_grid.find(stack_label); + if (ABSL_PREDICT_FALSE(grid_dim_it == fast.stack_to_grid.end())) { + ABSL_LOG(FATAL) << "Stacking dimension label '" << stack_label + << "' not found in grid specification."; + } + + DimensionIndex grid_dimension_index = grid_dim_it->second; + uint64_t dimension_stride = fast.stack_stride.find(stack_label)->second; + + target_ifd_index += static_cast( + cell_indices[grid_dimension_index] * dimension_stride); + } + } + + // Compute the linear chunk index within the chosen IFD. + Index y_chunk_index = + (fast.y_grid_dim >= 0) ? cell_indices[fast.y_grid_dim] : 0; + Index x_chunk_index = + (fast.x_grid_dim >= 0) ? cell_indices[fast.x_grid_dim] : 0; + + uint64_t linear_chunk_index = + metadata.is_tiled + ? static_cast(y_chunk_index) * fast.num_cols + + x_chunk_index + : static_cast(y_chunk_index); + + // Planar‑configuration adjustment: add an offset for the sample plane. + if (metadata.planar_config == PlanarConfigType::kPlanar && + metadata.samples_per_pixel > 1) { + Index sample_plane_index = cell_indices[fast.sample_grid_dim]; + linear_chunk_index += + static_cast(sample_plane_index) * fast.num_chunks_per_plane; + } + + // Assemble the final storage‑key string. + auto storage_key = tensorstore::StrCat("chunk/", target_ifd_index, "/", + linear_chunk_index); + return storage_key; + } + + // Decodes chunk data (called by Entry::DoDecode indirectly). + Result, 1>> DecodeChunk( + span chunk_indices, absl::Cord data) override { + TENSORSTORE_ASSIGN_OR_RETURN( + auto decoded_chunk, + internal_tiff::DecodeChunk(*resolved_metadata_, std::move(data))); + absl::InlinedVector, 1> components; + components.emplace_back(std::move(decoded_chunk)); + return components; + } + + // Encodes chunk data (called by Entry::DoEncode indirectly). Read-only. + Result EncodeChunk( + span chunk_indices, + span> component_arrays) override { + return absl::UnimplementedError("TIFF driver is read-only"); + } + + // Defines cache entry behavior, specifically decoding. + class Entry : public Base::Entry { + public: + using OwningCache = TiffChunkCache; + using KvsEntryBase = OwningCache::Base::Entry; + using DecodeReceiver = typename Base::Entry::DecodeReceiver; + using EncodeReceiver = typename Base::Entry::EncodeReceiver; + + // Encodes data for writing back to KvStore. Not supported for read-only. + void DoEncode(std::shared_ptr read_data, + EncodeReceiver receiver) override { + execution::set_error( + receiver, absl::UnimplementedError("TIFF driver is read-only")); + } + + // Override description for debugging/logging. + std::string DescribeChunk() override { + auto& cache = GetOwningCache(*this); + auto cell_indices = this->cell_indices(); + return tensorstore::StrCat("TIFF chunk ", cell_indices, " (key=", + cache.GetChunkStorageKey(cell_indices), ")"); + } + + absl::Status AnnotateError(const absl::Status& error, bool reading) { + return GetOwningCache(*this).kvstore_driver_->AnnotateError( + this->GetKeyValueStoreKey(), reading ? "reading" : "writing", error); + } + }; + + // --- Required Allocation Methods --- + Entry* DoAllocateEntry() final { return new Entry; } + size_t DoGetSizeofEntry() final { return sizeof(Entry); } + + // Allocate the base transaction node type from KvsBackedChunkCache. + TransactionNode* DoAllocateTransactionNode(AsyncCache::Entry& entry) final { + return new Base::TransactionNode(static_cast(entry)); + } + + private: + std::shared_ptr resolved_metadata_; + internal::ChunkGridSpecification grid_; + Executor executor_; + std::unique_ptr fast_; +}; + +// Validator function for positive integers +template +absl::Status ValidatePositive(const T& value) { + if (value <= 0) { + return absl::InvalidArgumentError("Value must be positive"); + } + return absl::OkStatus(); +} + +// TiffDriverSpec: Defines the specification for opening a TIFF TensorStore. +class TiffDriverSpec + : public internal::RegisteredDriverSpec { + public: + constexpr static char id[] = "tiff"; + using Base = internal::RegisteredDriverSpec; + + TiffSpecOptions tiff_options; + TiffMetadataConstraints metadata_constraints; + + constexpr static auto ApplyMembers = [](auto& x, auto f) { + return f(internal::BaseCast(x), x.tiff_options, + x.metadata_constraints); + }; + + static inline const auto default_json_binder = jb::Validate( + [](const auto& options, auto* obj) -> absl::Status { + // Enforce mutual exclusion: if ifd_stacking is present, + // ifd_index must be 0. + if (obj->tiff_options.ifd_stacking && + obj->tiff_options.ifd_index != 0) { + return absl::InvalidArgumentError( + "Cannot specify both \"ifd\" (non-zero) and \"ifd_stacking\" in " + "\"tiff\" options"); + } + // Validate sample_dimension_label against stacking dimensions + if (obj->tiff_options.ifd_stacking && + obj->tiff_options.sample_dimension_label) { + const auto& stack_dims = obj->tiff_options.ifd_stacking->dimensions; + if (std::find(stack_dims.begin(), stack_dims.end(), + *obj->tiff_options.sample_dimension_label) != + stack_dims.end()) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "\"sample_dimension_label\" (\"", + *obj->tiff_options.sample_dimension_label, + "\") conflicts with a label in \"ifd_stacking.dimensions\"")); + } + } + // Validate schema dtype if specified + if (obj->schema.dtype().valid()) { + TENSORSTORE_RETURN_IF_ERROR(ValidateDataType(obj->schema.dtype())); + } + return absl::OkStatus(); + }, + jb::Sequence( + // Copied from kvs_backed_chunk_driver::KvsDriverSpec because + // KvsDriverSpec::store initializer was enforcing directory path. + jb::Member(internal::DataCopyConcurrencyResource::id, + jb::Projection<&KvsDriverSpec::data_copy_concurrency>()), + jb::Member(internal::CachePoolResource::id, + jb::Projection<&KvsDriverSpec::cache_pool>()), + jb::Member("metadata_cache_pool", + jb::Projection<&KvsDriverSpec::metadata_cache_pool>()), + jb::Projection<&KvsDriverSpec::store>( + jb::KvStoreSpecAndPathJsonBinder), + jb::Initialize([](auto* obj) { return absl::OkStatus(); }), + jb::Projection<&KvsDriverSpec::staleness>(jb::Sequence( + jb::Member("recheck_cached_metadata", + jb::Projection(&StalenessBounds::metadata, + jb::DefaultValue([](auto* obj) { + obj->bounded_by_open_time = true; + }))), + jb::Member("recheck_cached_data", + jb::Projection(&StalenessBounds::data, + jb::DefaultInitializedValue())))), + jb::Projection<&KvsDriverSpec::fill_value_mode>(jb::Sequence( + jb::Member( + "fill_missing_data_reads", + jb::Projection<&internal_kvs_backed_chunk_driver:: + FillValueMode::fill_missing_data_reads>( + jb::DefaultValue([](auto* obj) { *obj = true; }))), + jb::Member("store_data_equal_to_fill_value", + jb::Projection< + &internal_kvs_backed_chunk_driver::FillValueMode:: + store_data_equal_to_fill_value>( + jb::DefaultInitializedValue())))), + internal::OpenModeSpecJsonBinder, + jb::Member( + "metadata", + jb::Validate( + [](const auto& options, auto* obj) { + TENSORSTORE_RETURN_IF_ERROR(obj->schema.Set( + obj->metadata_constraints.dtype.value_or(DataType()))); + TENSORSTORE_RETURN_IF_ERROR(obj->schema.Set( + RankConstraint{obj->metadata_constraints.rank})); + return absl::OkStatus(); + }, + jb::Projection<&TiffDriverSpec::metadata_constraints>( + jb::DefaultInitializedValue()))), + jb::Member("tiff", jb::Projection<&TiffDriverSpec::tiff_options>( + jb::DefaultValue([](auto* v) { *v = {}; }))))); + + Result> GetDomain() const override { + return internal_tiff::GetEffectiveDomain(metadata_constraints, schema); + } + + Result GetCodec() const override { + CodecSpec codec_constraint = schema.codec(); + auto tiff_codec = internal::CodecDriverSpec::Make(); + + if (codec_constraint.valid()) { + TENSORSTORE_RETURN_IF_ERROR( + tiff_codec->MergeFrom(codec_constraint), + MaybeAnnotateStatus( + _, "Cannot merge schema codec constraints with tiff driver")); + } + return CodecSpec(std::move(tiff_codec)); + } + + Result GetChunkLayout() const override { + return schema.chunk_layout(); + } + + Result> GetFillValue( + IndexTransformView<> transform) const override { + return schema.fill_value().valid() + ? tensorstore::Result>( + schema.fill_value()) + : tensorstore::Result>{std::in_place}; + } + + Result GetDimensionUnits() const override { + DimensionIndex rank = schema.rank().rank; + if (metadata_constraints.rank != dynamic_rank) { + if (rank != dynamic_rank && rank != metadata_constraints.rank) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "Rank specified in schema (", rank, + ") conflicts with rank specified in metadata constraints (", + metadata_constraints.rank, ")")); + } + rank = metadata_constraints.rank; + } + if (rank == dynamic_rank && metadata_constraints.shape.has_value()) { + rank = metadata_constraints.shape->size(); + } + if (rank == dynamic_rank && schema.domain().valid()) { + rank = schema.domain().rank(); + } + return internal_tiff::GetEffectiveDimensionUnits(rank, schema); + } + + absl::Status ApplyOptions(SpecOptions&& options) override { + if (options.minimal_spec) { + metadata_constraints = {}; + tiff_options = {}; + } + TENSORSTORE_RETURN_IF_ERROR(Base::ApplyOptions(std::move(options))); + return absl::OkStatus(); + } + + Future Open( + internal::DriverOpenRequest request) const override; +}; + +// Initializer structure for TiffDriver +struct TiffDriverInitializer { + internal::CachePtr cache; + size_t component_index; // Always 0 for TIFF. + StalenessBound data_staleness_bound; + StalenessBound metadata_staleness_bound; + internal::PinnedCacheEntry + metadata_cache_entry; + internal_kvs_backed_chunk_driver::FillValueMode fill_value_mode; + std::shared_ptr initial_metadata; + TiffSpecOptions tiff_options; + Schema schema; + Context::Resource + data_copy_concurrency; + Context::Resource cache_pool; + std::optional> + metadata_cache_pool; +}; + +class TiffDriver; + +using TiffDriverBase = internal::RegisteredDriver< + TiffDriver, + internal::ChunkGridSpecificationDriver>; + +class TiffDriver final : public TiffDriverBase { + public: + using Base = TiffDriverBase; + + explicit TiffDriver(TiffDriverInitializer&& initializer) + : Base({std::move(initializer.cache), + initializer.component_index, // Should be 0 + initializer.data_staleness_bound}), + metadata_staleness_bound_(initializer.metadata_staleness_bound), + metadata_cache_entry_(std::move(initializer.metadata_cache_entry)), + fill_value_mode_(initializer.fill_value_mode), + initial_metadata_(std::move(initializer.initial_metadata)), + tiff_options_(std::move(initializer.tiff_options)), + schema_(std::move(initializer.schema)), + data_copy_concurrency_(std::move(initializer.data_copy_concurrency)), + cache_pool_(std::move(initializer.cache_pool)), + metadata_cache_pool_(std::move(initializer.metadata_cache_pool)) { + ABSL_CHECK(component_index() == 0); + ABSL_CHECK(metadata_cache_entry_); + } + + Result> GetMetadata() const { + return initial_metadata_; + } + + Result GetBoundSpec( + internal::OpenTransactionPtr transaction, + IndexTransformView<> transform) override { + auto spec = internal::DriverSpec::Make(); + + TENSORSTORE_ASSIGN_OR_RETURN( + auto final_transform, + GetBoundSpecData(std::move(transaction), *spec, transform)); + + return internal::TransformedDriverSpec{std::move(spec), + std::move(final_transform)}; + } + + void GarbageCollectionVisit( + garbage_collection::GarbageCollectionVisitor& visitor) const override { + Base::GarbageCollectionVisit(visitor); + garbage_collection::GarbageCollectionVisit(visitor, metadata_cache_entry_); + } + + Result GetChunkLayout(IndexTransformView<> transform) override { + const auto& metadata = *initial_metadata_; + + TENSORSTORE_ASSIGN_OR_RETURN( + auto layout, + ApplyInverseIndexTransform(transform, metadata.chunk_layout)); + + TENSORSTORE_RETURN_IF_ERROR(layout.Finalize()); + return layout; + } + + Result GetCodec() override { + const auto& metadata = *initial_metadata_; + auto codec_spec = internal::CodecDriverSpec::Make(); + codec_spec->compressor = metadata.compressor; + return CodecSpec(std::move(codec_spec)); + } + + Result> GetFillValue( + IndexTransformView<> transform) override { + return {std::in_place}; + } + + Result GetDimensionUnits() override { + TENSORSTORE_ASSIGN_OR_RETURN(auto metadata, GetMetadata()); + if (metadata->dimension_units.size() != rank()) { + return absl::InternalError("Metadata dimension_units rank mismatch"); + } + return metadata->dimension_units; + } + + KvStore GetKvstore(const Transaction& transaction) override { + auto& dir_cache = internal::GetOwningCache(*metadata_cache_entry_); + std::string path(metadata_cache_entry_->key()); + return KvStore(kvstore::DriverPtr(dir_cache.kvstore_driver_), + std::move(path), transaction); + } + + Result GetBase( + ReadWriteMode read_write_mode, IndexTransformView<> transform, + const Transaction& transaction) override { + // TIFF driver is not an adapter. + return internal::DriverHandle(); + } + + void Read(ReadRequest request, ReadChunkReceiver receiver) override { + // Replicate logic from ChunkCacheReadWriteDriverMixin + cache()->Read({std::move(request), component_index(), + this->data_staleness_bound().time, + fill_value_mode_.fill_missing_data_reads}, + std::move(receiver)); + } + + void Write(WriteRequest request, WriteChunkReceiver receiver) override { + execution::set_error(receiver, + absl::UnimplementedError("TIFF driver is read-only")); + } + + Future> ResolveBounds( + ResolveBoundsRequest request) override { + // Asynchronously resolve the metadata first. + return MapFuture( + this->data_copy_executor(), + [transform = std::move(request.transform), + options = std::move(request.options)]( + const Result>& + metadata_result) mutable -> Result> { + TENSORSTORE_RETURN_IF_ERROR(metadata_result); + const auto& metadata = *metadata_result.value(); + + // The authoritative domain is defined by the metadata's shape. + // TIFF files inherently have a zero origin. + BoxView<> domain(metadata.shape); + + DimensionSet implicit_lower_bounds( + false); // Always explicit 0 lower bound + DimensionSet implicit_upper_bounds( + false); // Assume fixed upper bounds initially + + if (!(options.mode & fix_resizable_bounds)) { + // If fix_resizable_bounds is *not* set, treat upper bounds + // as implicit. Questionable for TIFF... + implicit_upper_bounds = true; + } + + return PropagateBoundsToTransform(domain, implicit_lower_bounds, + implicit_upper_bounds, + std::move(transform)); + }, + ResolveMetadata(std::move(request.transaction))); + } + + Future> Resize(ResizeRequest request) override { + return absl::UnimplementedError("Resize is not supported by TIFF driver"); + } + + Future GetStorageStatistics( + GetStorageStatisticsRequest request) override { + // TODO(hsidky): Implement GetStorageStatistics. + // Might involve iterating keys in TiffKvStore? Complex. + return absl::UnimplementedError("GetStorageStatistics not implemented"); + } + + // --- Helper for potentially stale metadata access --- + Future> ResolveMetadata( + internal::OpenTransactionPtr transaction) { + // TODO: Transactions are not currently applied to metadata cache + auto read_future = + metadata_cache_entry_->Read({metadata_staleness_bound_.time}); + + // Chain the metadata resolution logic onto the future. + return MapFuture( + this->data_copy_executor(), + [this, tiff_options = this->tiff_options_, + schema = this->schema_](const Result& read_result) + -> Result> { + TENSORSTORE_RETURN_IF_ERROR(read_result); + + // Lock the directory cache entry to get the TiffParseResult. + auto lock = AsyncCache::ReadLock< + const internal_tiff_kvstore::TiffParseResult>( + *this->metadata_cache_entry_); + auto parse_result_ptr = lock.shared_data(); + + if (!parse_result_ptr) { + return absl::NotFoundError( + "TIFF parse result not found or failed to load."); + } + + TENSORSTORE_ASSIGN_OR_RETURN( + auto resolved_metadata, + internal_tiff::ResolveMetadata(*parse_result_ptr, tiff_options, + schema)); + + // TODO: Optionally compare resolved_metadata with initial_metadata_ + // and return an error if incompatible changes occurred? + // For read-only, this might not be strictly necessary unless + // bounds changed in an unsupported way. + + return resolved_metadata; + }, + std::move(read_future)); + } + + // Returns the transform from the external user view to the internal driver + // view. For the base TIFF driver, this is typically identity. + Result> GetExternalToInternalTransform( + const TiffMetadata& metadata, size_t component_index) const { + ABSL_CHECK(component_index == 0); + // Assumes zero origin, adjust if needed for OME-TIFF etc. later. + TENSORSTORE_ASSIGN_OR_RETURN(auto domain, + IndexDomainBuilder(metadata.rank) + .shape(metadata.shape) + .labels(metadata.dimension_labels) + .Finalize()); + return IdentityTransform(domain); + } + + private: + friend class TiffDriverSpec; + + Result> GetBoundSpecData( + internal::OpenTransactionPtr transaction, TiffDriverSpec& spec, + IndexTransformView<> transform) { + TENSORSTORE_ASSIGN_OR_RETURN(auto metadata, GetMetadata()); + + spec.context_binding_state_ = ContextBindingState::bound; + auto& dir_cache = internal::GetOwningCache(*metadata_cache_entry_); + TENSORSTORE_ASSIGN_OR_RETURN(spec.store.driver, + dir_cache.kvstore_driver_->GetBoundSpec()); + // Use the directory cache entry's key as the base path for the spec. + // This assumes the key represents the logical path to the TIFF data. + spec.store.path = metadata_cache_entry_->key(); + + spec.data_copy_concurrency = this->data_copy_concurrency_; + spec.cache_pool = this->cache_pool_; + spec.metadata_cache_pool = this->metadata_cache_pool_; + spec.staleness.data = this->data_staleness_bound(); + spec.staleness.metadata = this->metadata_staleness_bound_; + spec.fill_value_mode = this->fill_value_mode_; + + TENSORSTORE_RETURN_IF_ERROR( + spec.schema.Set(RankConstraint{metadata->rank})); + TENSORSTORE_RETURN_IF_ERROR(spec.schema.Set(metadata->dtype)); + if (this->schema_.fill_value().valid()) { + TENSORSTORE_RETURN_IF_ERROR( + spec.schema.Set(Schema::FillValue(this->schema_.fill_value()))); + } + + // Copy original TIFF-specific options + spec.tiff_options = this->tiff_options_; + spec.metadata_constraints.rank = metadata->rank; + spec.metadata_constraints.shape = metadata->shape; + spec.metadata_constraints.dtype = metadata->dtype; + + TENSORSTORE_ASSIGN_OR_RETURN( + auto external_to_internal, + GetExternalToInternalTransform(*metadata, component_index())); + + IndexTransform<> final_transform = transform; + + // If the driver uses an internal transform compose the inverse of that + // transform with the input transform. + if (external_to_internal.valid()) { + TENSORSTORE_ASSIGN_OR_RETURN(auto internal_to_external, + InverseTransform(external_to_internal)); + TENSORSTORE_ASSIGN_OR_RETURN( + final_transform, + ComposeTransforms(internal_to_external, std::move(final_transform))); + } + + return final_transform; + } + + StalenessBound metadata_staleness_bound_; + internal::PinnedCacheEntry + metadata_cache_entry_; + internal_kvs_backed_chunk_driver::FillValueMode fill_value_mode_; + std::shared_ptr initial_metadata_; + TiffSpecOptions tiff_options_; + Schema schema_; + Context::Resource + data_copy_concurrency_; + Context::Resource cache_pool_; + std::optional> + metadata_cache_pool_; +}; + +/// Creates the ChunkGridSpecification based on the resolved TIFF metadata. +Result GetGridSpec( + const TiffMetadata& metadata) { + using internal::AsyncWriteArray; + using internal::ChunkGridSpecification; + using internal_tiff_kvstore::PlanarConfigType; + + const DimensionIndex rank = metadata.rank; + if (rank == dynamic_rank) { + return absl::InvalidArgumentError( + "Cannot determine grid with unknown rank"); + } + + ChunkGridSpecification::ComponentList components; + std::vector chunked_to_cell_dimensions; + + // Determine which final dimensions correspond to the grid axes. + // Order: Stacked dims, Y, X, Sample (if planar) + if (metadata.stacking_info) { + // Use the sequence order if specified, otherwise use dimension order + const auto& stack_dims_in_final_order = metadata.stacking_info->dimensions; + const auto& sequence = metadata.stacking_info->ifd_sequence_order.value_or( + stack_dims_in_final_order); + for (const auto& label : sequence) { + auto it = metadata.dimension_mapping.ts_stacked_dims.find(label); + if (it != metadata.dimension_mapping.ts_stacked_dims.end()) { + chunked_to_cell_dimensions.push_back(it->second); + } else { + return absl::InternalError(tensorstore::StrCat( + "Stacking dimension '", label, + "' specified in sequence_order/dimensions not found in " + "final mapping")); + } + } + } + if (metadata.dimension_mapping.ts_y_dim.has_value()) { + chunked_to_cell_dimensions.push_back(*metadata.dimension_mapping.ts_y_dim); + } + if (metadata.dimension_mapping.ts_x_dim.has_value()) { + chunked_to_cell_dimensions.push_back(*metadata.dimension_mapping.ts_x_dim); + } + // Add Sample dimension to the grid ONLY if Planar + if (metadata.planar_config == PlanarConfigType::kPlanar && + metadata.dimension_mapping.ts_sample_dim.has_value()) { + chunked_to_cell_dimensions.push_back( + *metadata.dimension_mapping.ts_sample_dim); + } + + const DimensionIndex grid_rank = chunked_to_cell_dimensions.size(); + if (grid_rank == 0 && rank > 0) { + // Check if the only dimension is a non-grid Sample dimension (chunky, spp > + // 1, rank 1) + if (rank == 1 && metadata.dimension_mapping.ts_sample_dim.has_value() && + metadata.planar_config == PlanarConfigType::kChunky) { + // This is valid (e.g., just a list of RGB values), grid rank is 0 + } else { + return absl::InternalError( + "Calculated grid rank is 0 but overall rank > 0 and not solely a " + "sample dimension"); + } + } + if (grid_rank > rank) { + // Sanity check + return absl::InternalError("Calculated grid rank exceeds overall rank"); + } + + SharedArray fill_value; + if (metadata.fill_value.valid()) { + fill_value = metadata.fill_value; + } else { + fill_value = AllocateArray(/*shape=*/span{}, c_order, + value_init, metadata.dtype); + } + TENSORSTORE_ASSIGN_OR_RETURN( + auto fill_value_array, + BroadcastArray(std::move(fill_value), BoxView<>(metadata.shape))); + SharedOffsetArray offset_fill_value(std::move(fill_value_array)); + + Box<> component_bounds(rank); + + ContiguousLayoutOrder component_layout_order = metadata.layout_order; + + AsyncWriteArray::Spec array_spec{std::move(offset_fill_value), + std::move(component_bounds), + component_layout_order}; + + std::vector component_chunk_shape_vec( + metadata.chunk_layout.read_chunk_shape().begin(), + metadata.chunk_layout.read_chunk_shape().end()); + + components.emplace_back(std::move(array_spec), + std::move(component_chunk_shape_vec), + std::move(chunked_to_cell_dimensions)); + + return ChunkGridSpecification(std::move(components)); +} + +struct TiffOpenState : public internal::AtomicReferenceCount { + internal::DriverOpenRequest request_; + kvstore::Spec store_; + Context::Resource + data_copy_concurrency_; + Context::Resource cache_pool_; + std::optional> + metadata_cache_pool_; + StalenessBounds staleness_; + internal_kvs_backed_chunk_driver::FillValueMode fill_value_mode_; + TiffSpecOptions tiff_options_; + TiffMetadataConstraints metadata_constraints_; + Schema schema_; + absl::Time open_time_; + Promise promise_; + + TiffOpenState(const TiffDriverSpec* spec, internal::DriverOpenRequest request) + : request_(std::move(request)), + store_(spec->store), + data_copy_concurrency_(spec->data_copy_concurrency), + cache_pool_(spec->cache_pool), + metadata_cache_pool_(spec->metadata_cache_pool), + staleness_(spec->staleness), + fill_value_mode_(spec->fill_value_mode), + tiff_options_(spec->tiff_options), + metadata_constraints_(spec->metadata_constraints), + schema_(spec->schema), + open_time_(absl::Now()) {} + + // Initiates the open process + void Start(Promise promise); + + // Callback when base KvStore is ready + void OnKvStoreOpen(ReadyFuture future); + + // Callback when TiffDirectoryCache entry read is complete + void OnDirCacheRead( + KvStore base_kvstore, + internal::PinnedCacheEntry + metadata_cache_entry, + ReadyFuture future); +}; + +void TiffOpenState::Start(Promise promise) { + promise_ = std::move(promise); + + LinkValue(WithExecutor(data_copy_concurrency_->executor, + [self = internal::IntrusivePtr(this)]( + Promise promise, + ReadyFuture future) { + self->OnKvStoreOpen(std::move(future)); + }), + promise_, kvstore::Open(store_)); +} + +void TiffOpenState::OnKvStoreOpen(ReadyFuture future) { + Result base_kvstore_result = future.result(); + if (!base_kvstore_result.ok()) { + promise_.SetResult(std::move(base_kvstore_result).status()); + return; + } + KvStore base_kvstore = *std::move(base_kvstore_result); + + const auto& metadata_pool_res = + metadata_cache_pool_.has_value() ? *metadata_cache_pool_ : cache_pool_; + + auto* pool_ptr = metadata_pool_res->get(); + if (!pool_ptr) { + promise_.SetResult( + absl::InvalidArgumentError("Cache pool resource is null or invalid")); + return; + } + + std::string directory_cache_key; + internal::EncodeCacheKey(&directory_cache_key, base_kvstore.driver, + data_copy_concurrency_); + + auto directory_cache = + internal::GetCache( + pool_ptr, directory_cache_key, [&] { + return std::make_unique( + base_kvstore.driver, data_copy_concurrency_->executor); + }); + + auto metadata_cache_entry = + internal::GetCacheEntry(directory_cache, base_kvstore.path); + + StalenessBound metadata_staleness_bound = + staleness_.metadata.BoundAtOpen(open_time_); + auto read_future = + metadata_cache_entry->Read({metadata_staleness_bound.time}); + + LinkValue( + WithExecutor(data_copy_concurrency_->executor, + [self = internal::IntrusivePtr(this), + base_kvstore = std::move(base_kvstore), + metadata_cache_entry = std::move(metadata_cache_entry)]( + Promise promise, + ReadyFuture future) mutable { + self->OnDirCacheRead(std::move(base_kvstore), + std::move(metadata_cache_entry), + std::move(future)); + }), + promise_, std::move(read_future)); +} + +void TiffOpenState::OnDirCacheRead( + KvStore base_kvstore, + internal::PinnedCacheEntry + metadata_cache_entry, + ReadyFuture future) { + // 1. Check if reading the directory cache failed. + if (!future.result().ok()) { + if (promise_.result_needed()) { + promise_.SetResult(metadata_cache_entry->AnnotateError( + future.result().status(), /*reading=*/true)); + } + return; + } + + // 2. Lock the cache entry to access the parsed TiffParseResult. + internal::AsyncCache::ReadLock + lock(*metadata_cache_entry); + auto parse_result = lock.shared_data(); + + if (!parse_result) { + promise_.SetResult(absl::DataLossError( + "TIFF directory cache entry data is null after successful read")); + return; + } + + // 3. Resolve the final TiffMetadata + Result> metadata_result = + internal_tiff::ResolveMetadata(*parse_result, tiff_options_, schema_); + if (!metadata_result.ok()) { + promise_.SetResult(std::move(metadata_result).status()); + return; + } + std::shared_ptr metadata = *std::move(metadata_result); + + // 4. Validate the resolved metadata against user-provided constraints. + absl::Status validate_status = + internal_tiff::ValidateResolvedMetadata(*metadata, metadata_constraints_); + if (!validate_status.ok()) { + promise_.SetResult(internal::ConvertInvalidArgumentToFailedPrecondition( + std::move(validate_status))); + return; + } + + // 5. Validate against read/write mode (TIFF is read-only) + if (request_.read_write_mode != ReadWriteMode::read && + request_.read_write_mode != ReadWriteMode::dynamic) { + promise_.SetResult( + absl::InvalidArgumentError("TIFF driver only supports read mode")); + return; + } + ReadWriteMode driver_read_write_mode = ReadWriteMode::read; // Hardcoded + + // 6. Create TiffChunkCache + Result tiff_kvstore_driver_result = + kvstore::tiff_kvstore::GetTiffKeyValueStoreDriver( + base_kvstore.driver, base_kvstore.path, cache_pool_, + data_copy_concurrency_, metadata_cache_entry); + + if (!tiff_kvstore_driver_result.ok()) { + promise_.SetResult(std::move(tiff_kvstore_driver_result).status()); + return; + } + kvstore::DriverPtr tiff_kvstore_driver = + *std::move(tiff_kvstore_driver_result); + + auto grid_spec_result = GetGridSpec(*metadata); + + if (!grid_spec_result.ok()) { + promise_.SetResult(std::move(grid_spec_result).status()); + return; + } + internal::ChunkGridSpecification grid_spec = *std::move(grid_spec_result); + + std::string chunk_cache_key; + std::string metadata_compat_part; + std::string read_shape_str = tensorstore::StrCat( + tensorstore::span(metadata->chunk_layout.read_chunk_shape())); + + if (metadata->stacking_info) { + auto json_result = jb::ToJson(*metadata->stacking_info); + if (!json_result.ok()) { + promise_.SetResult(std::move(json_result).status()); + return; + } + auto stacking_json = *std::move(json_result); + + metadata_compat_part = absl::StrCat( + "stack", + stacking_json.dump(-1, ' ', false, + nlohmann::json::error_handler_t::replace), + "_dtype", metadata->dtype.name(), "_comp", + static_cast(metadata->compression_type), "_planar", + static_cast(metadata->planar_config), "_spp", + metadata->samples_per_pixel, "_endian", + static_cast(metadata->endian), "_readshape", read_shape_str); + } else { + metadata_compat_part = absl::StrFormat( + "ifd%d_dtype%s_comp%d_planar%d_spp%d_endian%d_readshape%s", + metadata->base_ifd_index, metadata->dtype.name(), + static_cast(metadata->compression_type), + static_cast(metadata->planar_config), metadata->samples_per_pixel, + static_cast(metadata->endian), read_shape_str); + } + + internal::EncodeCacheKey(&chunk_cache_key, metadata_cache_entry->key(), + metadata_compat_part, cache_pool_->get()); + + // 6d. Get or create the TiffChunkCache. + auto chunk_cache = internal::GetCache( + cache_pool_->get(), chunk_cache_key, [&] { + return std::make_unique( + tiff_kvstore_driver, metadata, grid_spec, + data_copy_concurrency_->executor); + }); + if (!chunk_cache) { + promise_.SetResult( + absl::InternalError("Failed to get or create TiffChunkCache")); + return; + } + + // 7. Create TiffDriver + TiffDriverInitializer driver_initializer{ + /*.cache=*/std::move(chunk_cache), + /*.component_index=*/0, // Always 0 for TIFF + /*.data_staleness_bound=*/staleness_.data.BoundAtOpen(open_time_), + /*.metadata_staleness_bound=*/staleness_.metadata.BoundAtOpen(open_time_), + /*.metadata_cache_entry=*/std::move(metadata_cache_entry), + /*.fill_value_mode=*/fill_value_mode_, + /*.initial_metadata=*/metadata, // resolved metadata + /*.tiff_options=*/tiff_options_, + /*.schema=*/schema_, // original schema constraints + /*.data_copy_concurrency=*/data_copy_concurrency_, + /*.cache_pool=*/cache_pool_, + /*.metadata_cache_pool=*/metadata_cache_pool_}; + + auto driver = + internal::MakeIntrusivePtr(std::move(driver_initializer)); + + // 8. Finalize: Get Transform and Set Promise + Result> transform_result = + driver->GetExternalToInternalTransform(*metadata, 0); + if (!transform_result.ok()) { + promise_.SetResult(std::move(transform_result).status()); + return; + } + + internal::Driver::Handle handle{internal::ReadWritePtr( + driver.get(), driver_read_write_mode), + std::move(*transform_result), + internal::TransactionState::ToTransaction( + std::move(request_.transaction))}; + + promise_.SetResult(std::move(handle)); +} + +Future TiffDriverSpec::Open( + internal::DriverOpenRequest request) const { + if (!store.valid()) { + return absl::InvalidArgumentError("\"kvstore\" must be specified"); + } + TENSORSTORE_RETURN_IF_ERROR( + this->OpenModeSpec::Validate(request.read_write_mode)); + + auto state = + internal::MakeIntrusivePtr(this, std::move(request)); + auto [promise, future] = PromiseFuturePair::Make(); + state->Start(std::move(promise)); + + return std::move(future); +} + +#ifndef _MSC_VER +} // namespace +#endif + +} // namespace internal_tiff +} // namespace tensorstore + +TENSORSTORE_DECLARE_GARBAGE_COLLECTION_SPECIALIZATION( + tensorstore::internal_tiff::TiffDriver) + +TENSORSTORE_DEFINE_GARBAGE_COLLECTION_SPECIALIZATION( + tensorstore::internal_tiff::TiffDriver, + tensorstore::garbage_collection::PolymorphicGarbageCollection< + tensorstore::internal_tiff::TiffDriver>) + +namespace { +const tensorstore::internal::DriverRegistration< + tensorstore::internal_tiff::TiffDriverSpec> + registration; +} // namespace \ No newline at end of file diff --git a/tensorstore/driver/tiff/driver_test.cc b/tensorstore/driver/tiff/driver_test.cc new file mode 100644 index 000000000..1c1b39f16 --- /dev/null +++ b/tensorstore/driver/tiff/driver_test.cc @@ -0,0 +1,961 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "absl/status/status.h" +#include "absl/strings/cord.h" +#include "tensorstore/array.h" +#include "tensorstore/box.h" +#include "tensorstore/chunk_layout.h" +#include "tensorstore/codec_spec.h" +#include "tensorstore/context.h" +#include "tensorstore/contiguous_layout.h" +#include "tensorstore/data_type.h" +#include "tensorstore/driver/driver_testutil.h" +#include "tensorstore/index.h" +#include "tensorstore/index_space/dim_expression.h" +#include "tensorstore/index_space/index_domain_builder.h" +#include "tensorstore/internal/global_initializer.h" +#include "tensorstore/internal/json_gtest.h" +#include "tensorstore/kvstore/kvstore.h" +#include "tensorstore/kvstore/memory/memory_key_value_store.h" +#include "tensorstore/kvstore/test_matchers.h" +#include "tensorstore/kvstore/tiff/tiff_test_util.h" +#include "tensorstore/open.h" +#include "tensorstore/open_mode.h" +#include "tensorstore/schema.h" +#include "tensorstore/spec.h" +#include "tensorstore/tensorstore.h" +#include "tensorstore/util/result.h" +#include "tensorstore/util/status_testutil.h" + +namespace { +namespace kvstore = tensorstore::kvstore; + +using ::tensorstore::CodecSpec; +using ::tensorstore::Context; +using ::tensorstore::DimensionIndex; +using ::tensorstore::dtype_v; +using ::tensorstore::GetMemoryKeyValueStore; +using ::tensorstore::Index; +using ::tensorstore::kImplicit; +using ::tensorstore::MatchesJson; +using ::tensorstore::MatchesStatus; +using ::tensorstore::Schema; +using ::tensorstore::Spec; +using ::tensorstore::internal::TestSpecSchema; +using ::tensorstore::internal_tiff_kvstore::testing::PutLE16; +using ::tensorstore::internal_tiff_kvstore::testing::PutLE32; +using ::tensorstore::internal_tiff_kvstore::testing::TiffBuilder; +using ::testing::Contains; +using ::testing::HasSubstr; +using ::testing::Optional; + +class TiffDriverTest : public ::testing::Test { + protected: + Context context_ = Context::Default(); + + // Helper to write float bytes in Little Endian + static void PutLEFloat32(std::string& dst, float f) { + static_assert(sizeof(float) == sizeof(uint32_t)); + uint32_t bits; + // issues + std::memcpy(&bits, &f, sizeof(float)); + PutLE32(dst, bits); + } + + // Helper to write TIFF data to memory kvstore + void WriteTiffData(std::string_view key, const std::string& tiff_data) { + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + tensorstore::KvStore store, + kvstore::Open({{"driver", "memory"}}, context_).result()); + TENSORSTORE_ASSERT_OK(kvstore::Write(store, key, absl::Cord(tiff_data))); + } + + std::string MakeMinimalTiff() { + // 10x20 uint8, 1 channel, chunky, 10x10 tiles + TiffBuilder builder; + builder.StartIfd(10) + .AddEntry(256, 3, 1, 10) // ImageWidth = 10 + .AddEntry(257, 3, 1, 20) // ImageLength = 20 + .AddEntry(277, 3, 1, 1) // SamplesPerPixel = 1 + .AddEntry(258, 3, 1, 8) // BitsPerSample = 8 + .AddEntry(259, 3, 1, 1) // Compression = None + .AddEntry(262, 3, 1, 1) // PhotometricInterpretation = MinIsBlack + .AddEntry(322, 3, 1, 10) // TileWidth = 10 + .AddEntry(323, 3, 1, 10); // TileLength = 10 + // Fake tile data offsets/counts + size_t data_start = builder.CurrentOffset() + 12 * 9 + 4 + 4 * 4; + builder.AddEntry(324, 4, 2, builder.CurrentOffset() + 12 * 9 + 4); + builder.AddEntry(325, 4, 2, builder.CurrentOffset() + 12 * 9 + 4 + 4 * 2); + builder.EndIfd(0); + builder.AddUint32Array( + {(uint32_t)data_start, (uint32_t)(data_start + 100)}); + builder.AddUint32Array({100, 100}); + builder.data_.append(100, '\1'); + builder.data_.append(100, '\2'); + return builder.Build(); + } + + std::string MakeReadTestTiff() { + // 4x6 uint16, 1 channel, chunky, 2x3 tiles + std::vector tile0_data = {1, 2, 3, 7, 8, 9}; + std::vector tile1_data = {4, 5, 6, 10, 11, 12}; + std::vector tile2_data = {13, 14, 15, 19, 20, 21}; + std::vector tile3_data = {16, 17, 18, 22, 23, 24}; + size_t tile_size_bytes = 6 * sizeof(uint16_t); + + TiffBuilder builder; + builder.StartIfd(10) + .AddEntry(256, 3, 1, 6) // Width = 6 + .AddEntry(257, 3, 1, 4) // Height = 4 + .AddEntry(277, 3, 1, 1) // SamplesPerPixel = 1 + .AddEntry(258, 3, 1, 16) // BitsPerSample = 16 + .AddEntry(259, 3, 1, 1) // Compression = None + .AddEntry(262, 3, 1, 1) // Photometric = MinIsBlack + .AddEntry(322, 3, 1, 3) // TileWidth = 3 + .AddEntry(323, 3, 1, 2); // TileLength = 2 + + size_t header_size = 8; + size_t ifd_block_size = 2 + (10 * 12) + 4; + size_t end_of_ifd_offset = header_size + ifd_block_size; + + size_t tile_offsets_array_start_offset = end_of_ifd_offset; + size_t tile_offsets_array_size = 4 * sizeof(uint32_t); + size_t tile_bytecounts_array_start_offset = + tile_offsets_array_start_offset + tile_offsets_array_size; + size_t tile_bytecounts_array_size = 4 * sizeof(uint32_t); + size_t tile_data_start_offset = + tile_bytecounts_array_start_offset + tile_bytecounts_array_size; + + std::vector tile_offsets = { + (uint32_t)(tile_data_start_offset + 0 * tile_size_bytes), + (uint32_t)(tile_data_start_offset + 1 * tile_size_bytes), + (uint32_t)(tile_data_start_offset + 2 * tile_size_bytes), + (uint32_t)(tile_data_start_offset + 3 * tile_size_bytes)}; + std::vector tile_bytecounts(4, tile_size_bytes); + + builder.AddEntry(324, 4, tile_offsets.size(), + tile_offsets_array_start_offset); + builder.AddEntry(325, 4, tile_bytecounts.size(), + tile_bytecounts_array_start_offset); + + builder.EndIfd(0) + .AddUint32Array(tile_offsets) + .AddUint32Array(tile_bytecounts); + + auto append_tile = [&](const std::vector& data) { + for (uint16_t val : data) { + PutLE16(builder.data_, val); + } + }; + append_tile(tile0_data); + append_tile(tile1_data); + append_tile(tile2_data); + append_tile(tile3_data); + + return builder.Build(); + } + + // Generates a 6x8 uint8 image with 3 strips (RowsPerStrip = 2) + std::string MakeStrippedTiff() { + const uint32_t image_width = 8; + const uint32_t image_height = 6; + const uint32_t rows_per_strip = 2; + const uint32_t num_strips = + (image_height + rows_per_strip - 1) / rows_per_strip; + const uint32_t bytes_per_strip = + rows_per_strip * image_width * sizeof(uint8_t); + + const uint16_t num_ifd_entries = 10; + + TiffBuilder builder; + builder.StartIfd(num_ifd_entries) + .AddEntry(256, 3, 1, image_width) // ImageWidth + .AddEntry(257, 3, 1, image_height) // ImageLength + .AddEntry(277, 3, 1, 1) // SamplesPerPixel = 1 + .AddEntry(258, 3, 1, 8) // BitsPerSample = 8 + .AddEntry(339, 3, 1, 1) // SampleFormat = uint + .AddEntry(259, 3, 1, 1) // Compression = None + .AddEntry(262, 3, 1, 1) // PhotometricInterpretation = MinIsBlack + .AddEntry(278, 3, 1, rows_per_strip); // RowsPerStrip + + size_t header_size = 8; + size_t ifd_block_size = 2 + (num_ifd_entries * 12) + 4; + size_t end_of_ifd_offset = header_size + ifd_block_size; + + size_t strip_offsets_array_start_offset = end_of_ifd_offset; + size_t strip_offsets_array_size = num_strips * sizeof(uint32_t); + size_t strip_bytecounts_array_start_offset = + strip_offsets_array_start_offset + strip_offsets_array_size; + size_t strip_bytecounts_array_size = num_strips * sizeof(uint32_t); + size_t strip_data_start_offset = + strip_bytecounts_array_start_offset + strip_bytecounts_array_size; + + std::vector strip_offsets; + std::vector strip_bytecounts; + for (uint32_t i = 0; i < num_strips; ++i) { + strip_offsets.push_back(strip_data_start_offset + i * bytes_per_strip); + strip_bytecounts.push_back(bytes_per_strip); + } + + builder.AddEntry(273, 4, strip_offsets.size(), + strip_offsets_array_start_offset); + builder.AddEntry(279, 4, strip_bytecounts.size(), + strip_bytecounts_array_start_offset); + + builder.EndIfd(0) + .AddUint32Array(strip_offsets) + .AddUint32Array(strip_bytecounts); + + for (uint32_t s = 0; s < num_strips; ++s) { + for (uint32_t i = 0; i < bytes_per_strip; ++i) { + builder.data_.push_back(static_cast(s * 10 + i)); + } + } + + return builder.Build(); + } + + // Generates a 2x3 float32 image with 1x1 tiles + std::string MakeFloatTiff() { + const uint32_t image_width = 3; + const uint32_t image_height = 2; + const uint32_t tile_width = 1; + const uint32_t tile_height = 1; + const uint32_t num_tiles = + (image_height / tile_height) * (image_width / tile_width); + const uint32_t bytes_per_tile = tile_height * tile_width * sizeof(float); + + const uint16_t num_ifd_entries = 11; + + TiffBuilder builder; + builder.StartIfd(num_ifd_entries) + .AddEntry(256, 3, 1, image_width) // ImageWidth + .AddEntry(257, 3, 1, image_height) // ImageLength + .AddEntry(277, 3, 1, 1) // SamplesPerPixel = 1 + .AddEntry(258, 3, 1, 32) // BitsPerSample = 32 + .AddEntry(339, 3, 1, 3) // SampleFormat = IEEEFloat (3) + .AddEntry(259, 3, 1, 1) // Compression = None + .AddEntry(262, 3, 1, 1) // PhotometricInterpretation = MinIsBlack + .AddEntry(322, 3, 1, tile_width) // TileWidth + .AddEntry(323, 3, 1, tile_height); // TileLength + + size_t header_size = 8; + size_t ifd_block_size = 2 + (num_ifd_entries * 12) + 4; + size_t end_of_ifd_offset = header_size + ifd_block_size; + + size_t tile_offsets_array_start_offset = end_of_ifd_offset; + size_t tile_offsets_array_size = num_tiles * sizeof(uint32_t); + size_t tile_bytecounts_array_start_offset = + tile_offsets_array_start_offset + tile_offsets_array_size; + size_t tile_bytecounts_array_size = num_tiles * sizeof(uint32_t); + size_t tile_data_start_offset = + tile_bytecounts_array_start_offset + tile_bytecounts_array_size; + + std::vector tile_offsets; + std::vector tile_bytecounts; + for (uint32_t i = 0; i < num_tiles; ++i) { + tile_offsets.push_back(tile_data_start_offset + i * bytes_per_tile); + tile_bytecounts.push_back(bytes_per_tile); + } + + builder.AddEntry(324, 4, tile_offsets.size(), + tile_offsets_array_start_offset); + builder.AddEntry(325, 4, tile_bytecounts.size(), + tile_bytecounts_array_start_offset); + + builder.EndIfd(0) + .AddUint32Array(tile_offsets) + .AddUint32Array(tile_bytecounts); + + const std::vector values = {1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f}; + for (float val : values) { + PutLEFloat32(builder.data_, val); + } + + return builder.Build(); + } + + // Generates a 2x3 uint8 RGB image with 1x1 tiles (Chunky config) + std::string MakeMultiChannelTiff() { + const uint32_t image_width = 3; + const uint32_t image_height = 2; + const uint32_t samples_per_pixel = 3; // RGB + const uint32_t tile_width = 1; + const uint32_t tile_height = 1; + const uint32_t num_tiles = + (image_height / tile_height) * (image_width / tile_width); + const uint32_t bytes_per_tile = + tile_height * tile_width * samples_per_pixel * sizeof(uint8_t); + + const uint16_t num_ifd_entries = 12; + + std::vector bits_per_sample_data = {8, 8, 8}; + std::vector sample_format_data = {1, 1, 1}; + + TiffBuilder builder; + builder.StartIfd(num_ifd_entries) + .AddEntry(256, 3, 1, image_width) // ImageWidth + .AddEntry(257, 3, 1, image_height) // ImageLength + .AddEntry(277, 3, 1, samples_per_pixel) // SamplesPerPixel + .AddEntry(284, 3, 1, 1) // PlanarConfiguration = Chunky (1) + .AddEntry(259, 3, 1, 1) // Compression = None + .AddEntry(262, 3, 1, 2) // PhotometricInterpretation = RGB (2) + .AddEntry(322, 3, 1, tile_width) // TileWidth + .AddEntry(323, 3, 1, tile_height); // TileLength + + size_t header_size = 8; + size_t ifd_block_size = 2 + (num_ifd_entries * 12) + 4; + size_t current_offset = header_size + ifd_block_size; + size_t bps_array_offset = current_offset; + size_t bps_array_size = bits_per_sample_data.size() * sizeof(uint16_t); + current_offset += bps_array_size; + + size_t sf_array_offset = current_offset; + size_t sf_array_size = sample_format_data.size() * sizeof(uint16_t); + current_offset += sf_array_size; + + size_t tile_offsets_array_offset = current_offset; + size_t tile_offsets_array_size = num_tiles * sizeof(uint32_t); + current_offset += tile_offsets_array_size; + + size_t tile_bytecounts_array_offset = current_offset; + size_t tile_bytecounts_array_size = num_tiles * sizeof(uint32_t); + current_offset += tile_bytecounts_array_size; + + size_t tile_data_start_offset = current_offset; + + std::vector tile_offsets; + std::vector tile_bytecounts; + for (uint32_t i = 0; i < num_tiles; ++i) { + tile_offsets.push_back(tile_data_start_offset + i * bytes_per_tile); + tile_bytecounts.push_back(bytes_per_tile); + } + + builder.AddEntry(258, 3, samples_per_pixel, bps_array_offset); + builder.AddEntry(339, 3, samples_per_pixel, sf_array_offset); + builder.AddEntry(324, 4, tile_offsets.size(), tile_offsets_array_offset); + builder.AddEntry(325, 4, tile_bytecounts.size(), + tile_bytecounts_array_offset); + + builder.EndIfd(0); + + builder.AddUint16Array(bits_per_sample_data); + builder.AddUint16Array(sample_format_data); + builder.AddUint32Array(tile_offsets); + builder.AddUint32Array(tile_bytecounts); + + const std::vector tile_values = { + 1, 2, 3, 2, 3, 4, 3, 4, 5, 11, 12, 13, 12, 13, 14, 13, 14, 15, + }; + for (uint8_t val : tile_values) { + builder.data_.push_back(static_cast(val)); + } + + return builder.Build(); + } + + // Generates a TIFF with two IFDs: + // IFD 0: 2x2 uint8 image, filled with 5 + // IFD 1: 3x3 uint16 image, filled with 99 + std::string MakeMultiIFDTiff() { + TiffBuilder builder; + + const uint32_t ifd0_width = 2; + const uint32_t ifd0_height = 2; + const uint32_t ifd0_num_tiles = 4; + const uint32_t ifd0_bytes_per_tile = 1 * 1 * 1 * sizeof(uint8_t); + const uint16_t ifd0_num_entries = 11; + std::vector ifd0_pixel_data(ifd0_num_tiles * ifd0_bytes_per_tile, + 5); + + const uint32_t ifd1_width = 3; + const uint32_t ifd1_height = 3; + const uint32_t ifd1_num_tiles = 9; + const uint32_t ifd1_bytes_per_tile = 1 * 1 * 1 * sizeof(uint16_t); // 2 + const uint16_t ifd1_num_entries = 11; + std::vector ifd1_pixel_data( + ifd1_num_tiles * (ifd1_bytes_per_tile / sizeof(uint16_t)), 99); + + size_t header_size = 8; + size_t ifd0_block_size = 2 + ifd0_num_entries * 12 + 4; + size_t ifd1_block_size = 2 + ifd1_num_entries * 12 + 4; + + size_t ifd0_start_offset = header_size; + size_t ifd1_start_offset = ifd0_start_offset + ifd0_block_size; + size_t end_of_ifds_offset = ifd1_start_offset + ifd1_block_size; + + size_t ifd0_offsets_loc = end_of_ifds_offset; + size_t ifd0_offsets_size = ifd0_num_tiles * sizeof(uint32_t); + size_t ifd0_counts_loc = ifd0_offsets_loc + ifd0_offsets_size; + size_t ifd0_counts_size = ifd0_num_tiles * sizeof(uint32_t); + size_t ifd0_data_loc = ifd0_counts_loc + ifd0_counts_size; + size_t ifd0_data_size = ifd0_pixel_data.size(); + size_t ifd1_offsets_loc = ifd0_data_loc + ifd0_data_size; + size_t ifd1_offsets_size = ifd1_num_tiles * sizeof(uint32_t); + size_t ifd1_counts_loc = ifd1_offsets_loc + ifd1_offsets_size; + size_t ifd1_counts_size = ifd1_num_tiles * sizeof(uint32_t); + size_t ifd1_data_loc = ifd1_counts_loc + ifd1_counts_size; + + std::vector ifd0_tile_offsets; + std::vector ifd0_tile_counts; + for (uint32_t i = 0; i < ifd0_num_tiles; ++i) { + ifd0_tile_offsets.push_back(ifd0_data_loc + i * ifd0_bytes_per_tile); + ifd0_tile_counts.push_back(ifd0_bytes_per_tile); + } + + std::vector ifd1_tile_offsets; + std::vector ifd1_tile_counts; + for (uint32_t i = 0; i < ifd1_num_tiles; ++i) { + ifd1_tile_offsets.push_back(ifd1_data_loc + i * ifd1_bytes_per_tile); + ifd1_tile_counts.push_back(ifd1_bytes_per_tile); + } + + builder.StartIfd(ifd0_num_entries) + .AddEntry(256, 3, 1, ifd0_width) + .AddEntry(257, 3, 1, ifd0_height) + .AddEntry(277, 3, 1, 1) + .AddEntry(258, 3, 1, 8) + .AddEntry(339, 3, 1, 1) + .AddEntry(259, 3, 1, 1) + .AddEntry(262, 3, 1, 1) + .AddEntry(322, 3, 1, 1) + .AddEntry(323, 3, 1, 1) + .AddEntry(324, 4, ifd0_num_tiles, ifd0_offsets_loc) + .AddEntry(325, 4, ifd0_num_tiles, ifd0_counts_loc); + builder.EndIfd(ifd1_start_offset); + + builder.PadTo(ifd1_start_offset); + builder.StartIfd(ifd1_num_entries) + .AddEntry(256, 3, 1, ifd1_width) + .AddEntry(257, 3, 1, ifd1_height) + .AddEntry(277, 3, 1, 1) + .AddEntry(258, 3, 1, 16) + .AddEntry(339, 3, 1, 1) + .AddEntry(259, 3, 1, 1) + .AddEntry(262, 3, 1, 1) + .AddEntry(322, 3, 1, 1) + .AddEntry(323, 3, 1, 1) + .AddEntry(324, 4, ifd1_num_tiles, ifd1_offsets_loc) + .AddEntry(325, 4, ifd1_num_tiles, ifd1_counts_loc); + builder.EndIfd(0); + + builder.PadTo(end_of_ifds_offset); + builder.AddUint32Array(ifd0_tile_offsets); + builder.AddUint32Array(ifd0_tile_counts); + + for (uint8_t val : ifd0_pixel_data) { + builder.data_.push_back(static_cast(val)); + } + + builder.AddUint32Array(ifd1_tile_offsets); + builder.AddUint32Array(ifd1_tile_counts); + + for (uint16_t val : ifd1_pixel_data) { + PutLE16(builder.data_, val); + } + + return builder.Build(); + } +}; + +// --- Spec Tests --- +TEST_F(TiffDriverTest, SpecFromJsonMinimal) { + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto spec, + Spec::FromJson({{"driver", "tiff"}, {"kvstore", "memory://test/"}})); + EXPECT_TRUE(spec.valid()); +} + +TEST_F(TiffDriverTest, SpecToJsonMinimal) { + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto spec, + Spec::FromJson({{"driver", "tiff"}, {"kvstore", "memory://test/"}})); + // Convert back to JSON using default options (excludes defaults) + EXPECT_THAT(spec.ToJson(), + Optional(MatchesJson( + {{"driver", "tiff"}, + {"kvstore", {{"driver", "memory"}, {"path", "test/"}}}}))); +} + +TEST_F(TiffDriverTest, SpecFromJsonWithOptions) { + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto spec, + Spec::FromJson( + {{"driver", "tiff"}, + {"kvstore", "memory://test/"}, + {"tiff", {{"ifd", 5}}}, + {"metadata", {{"dtype", "uint16"}, {"shape", {30, 40}}}}})); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto schema, spec.schema()); + EXPECT_EQ(dtype_v, schema.dtype()); + EXPECT_EQ(2, schema.rank()); +} + +TEST_F(TiffDriverTest, SpecToJsonWithOptions) { + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto spec, + Spec::FromJson( + {{"driver", "tiff"}, + {"kvstore", "memory://test/"}, + {"tiff", {{"ifd", 5}}}, + {"metadata", {{"dtype", "uint16"}, {"shape", {30, 40}}}}})); + + ::nlohmann::json expected_json = { + {"driver", "tiff"}, + {"kvstore", + {{"driver", "memory"}, + {"path", "test/"}, + {"atomic", true}, + {"memory_key_value_store", "memory_key_value_store"}, + {"context", ::nlohmann::json({})}}}, + {"dtype", "uint16"}, + {"schema", {{"dtype", "uint16"}, {"rank", 2}}}, + {"transform", + {{"input_inclusive_min", {0, 0}}, {"input_exclusive_max", {30, 40}}}}, + {"context", ::nlohmann::json({})}, + {"cache_pool", "cache_pool"}, + {"data_copy_concurrency", "data_copy_concurrency"}, + {"recheck_cached_data", true}, + {"recheck_cached_metadata", "open"}, + {"delete_existing", false}, + {"assume_metadata", false}, + {"assume_cached_metadata", false}, + {"fill_missing_data_reads", true}, + {"store_data_equal_to_fill_value", false}, + {"tiff", {{"ifd", 5}}}, + {"metadata", {{"dtype", "uint16"}, {"shape", {30, 40}}}}}; + + EXPECT_THAT(spec.ToJson(tensorstore::IncludeDefaults{true}), + Optional(MatchesJson(expected_json))); +} + +TEST_F(TiffDriverTest, InvalidSpecExtraMember) { + EXPECT_THAT( + Spec::FromJson( + {{"driver", "tiff"}, {"kvstore", "memory://"}, {"extra", "member"}}), + MatchesStatus(absl::StatusCode::kInvalidArgument, + "Object includes extra members: \"extra\"")); +} + +TEST_F(TiffDriverTest, TestSpecSchemaDtype) { + TestSpecSchema( + {{"driver", "tiff"}, + {"kvstore", "memory://"}, + {"metadata", {{"dtype", "uint16"}}}}, + // Expected schema now includes the default codec: + {{"dtype", "uint16"}, + {"codec", {{"driver", "tiff"}, {"compression", {{"type", "raw"}}}}}}); +} + +TEST_F(TiffDriverTest, TestSpecSchemaRank) { + // Test that specifying shape infers rank, domain, and default layout/codec + TestSpecSchema( + {{"driver", "tiff"}, + {"kvstore", "memory://"}, + {"metadata", {{"shape", {10, 20, 30}}}}}, + {{"rank", 3}, + {"domain", + {{"inclusive_min", {0, 0, 0}}, {"exclusive_max", {10, 20, 30}}}}, + {"codec", {{"driver", "tiff"}}}}); +} + +// --- Open Tests --- +TEST_F(TiffDriverTest, InvalidOpenMissingKvstore) { + // FromJson should succeed structurally, even if kvstore is missing. + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto spec, + Spec::FromJson({{"driver", "tiff"}})); + + // The Open operation should fail because kvstore is missing/invalid. + EXPECT_THAT(tensorstore::Open(spec, context_).result(), + MatchesStatus(absl::StatusCode::kInvalidArgument, + ".*\"kvstore\" must be specified.*")); +} + +TEST_F(TiffDriverTest, OpenNonExisting) { + EXPECT_THAT(tensorstore::Open( + {{"driver", "tiff"}, {"kvstore", "memory://nonexistent.tif"}}, + context_) + .result(), + MatchesStatus(absl::StatusCode::kNotFound, ".*File not found.*")); +} + +TEST_F(TiffDriverTest, OpenMinimalTiff) { + WriteTiffData("minimal.tif", MakeMinimalTiff()); + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, + tensorstore::Open( + { + {"driver", "tiff"}, + {"kvstore", {{"driver", "memory"}, {"path", "minimal.tif"}}}, + }, + context_) + .result()); + + EXPECT_EQ(dtype_v, store.dtype()); + EXPECT_EQ(2, store.rank()); + EXPECT_THAT(store.domain().shape(), ::testing::ElementsAre(20, 10)); + EXPECT_THAT(store.domain().origin(), ::testing::ElementsAre(0, 0)); + + // Check chunk layout derived from TIFF tags + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto layout, store.chunk_layout()); + EXPECT_THAT(layout.read_chunk_shape(), ::testing::ElementsAre(10, 10)); +} + +TEST_F(TiffDriverTest, OpenWithMatchingMetadataConstraint) { + WriteTiffData("minimal.tif", MakeMinimalTiff()); + TENSORSTORE_EXPECT_OK( + tensorstore::Open( + {{"driver", "tiff"}, + {"kvstore", "memory://minimal.tif"}, + // Check that constraints match what's in the file + {"metadata", {{"dtype", "uint8"}, {"shape", {20, 10}}}}}, + context_) + .result()); +} + +TEST_F(TiffDriverTest, OpenWithMismatchedDtypeConstraint) { + WriteTiffData("minimal.tif", MakeMinimalTiff()); + EXPECT_THAT(tensorstore::Open({{"driver", "tiff"}, + {"kvstore", "memory://minimal.tif"}, + {"metadata", {{"dtype", "uint16"}}}}, + context_) + .result(), + MatchesStatus(absl::StatusCode::kInvalidArgument, + ".*dtype.*uint16.* conflicts.*uint8.*")); +} + +TEST_F(TiffDriverTest, OpenWithMismatchedShapeConstraint) { + WriteTiffData("minimal.tif", MakeMinimalTiff()); + EXPECT_THAT(tensorstore::Open({{"driver", "tiff"}, + {"kvstore", "memory://minimal.tif"}, + {"metadata", {{"shape", {20, 11}}}}}, + context_) + .result(), + MatchesStatus(absl::StatusCode::kFailedPrecondition, + ".*Resolved TIFF shape .*20, 10.* does not match " + "user constraint shape .*20, 11.*")); +} + +TEST_F(TiffDriverTest, OpenWithSchemaDtypeMismatch) { + WriteTiffData("minimal.tif", MakeMinimalTiff()); + EXPECT_THAT( + tensorstore::Open({{"driver", "tiff"}, + {"kvstore", "memory://minimal.tif"}, + {"schema", {{"dtype", "int16"}}}}, + context_) + .result(), + MatchesStatus( + absl::StatusCode::kInvalidArgument, + ".*dtype specified in schema.*int16.* conflicts .* dtype .*uint8.*")); +} + +TEST_F(TiffDriverTest, OpenInvalidTiffHeader) { + WriteTiffData("invalid_header.tif", "Not a valid TIFF file"); + EXPECT_THAT(tensorstore::Open({{"driver", "tiff"}, + {"kvstore", "memory://invalid_header.tif"}}, + context_) + .result(), + MatchesStatus(absl::StatusCode::kInvalidArgument, + ".*Invalid TIFF byte order mark.*")); +} + +TEST_F(TiffDriverTest, OpenInvalidIfdIndex) { + WriteTiffData("minimal.tif", MakeMinimalTiff()); + EXPECT_THAT(tensorstore::Open({{"driver", "tiff"}, + {"kvstore", "memory://minimal.tif"}, + {"tiff", {{"ifd", 1}}}}, + context_) + .result(), + MatchesStatus(absl::StatusCode::kNotFound, + ".*Requested IFD index 1 not found.*")); +} + +// --- Read Tests --- +TEST_F(TiffDriverTest, ReadFull) { + WriteTiffData("read_test.tif", MakeReadTestTiff()); + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, + tensorstore::Open( + {{"driver", "tiff"}, {"kvstore", "memory://read_test.tif"}}, context_) + .result()); + + EXPECT_THAT( + tensorstore::Read(store).result(), + Optional(tensorstore::MakeArray({{1, 2, 3, 4, 5, 6}, + {7, 8, 9, 10, 11, 12}, + {13, 14, 15, 16, 17, 18}, + {19, 20, 21, 22, 23, 24}}))); +} + +TEST_F(TiffDriverTest, ReadSlice) { + WriteTiffData("read_test.tif", MakeReadTestTiff()); + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, + tensorstore::Open( + {{"driver", "tiff"}, {"kvstore", "memory://read_test.tif"}}, context_) + .result()); + + // Read a slice covering parts of tiles 0 and 1 + EXPECT_THAT( + tensorstore::Read(store | tensorstore::Dims(0, 1).IndexSlice({1, 2})) + .result(), + Optional(tensorstore::MakeScalarArray(9))); + + // Read a slice within a single tile (tile 2) + EXPECT_THAT( + tensorstore::Read(store | + tensorstore::Dims(0, 1).SizedInterval({2, 1}, {1, 2})) + .result(), + Optional(tensorstore::MakeOffsetArray({2, 1}, {{14, 15}}))); +} + +// --- Metadata / Property Tests --- +TEST_F(TiffDriverTest, Properties) { + WriteTiffData("read_test.tif", MakeReadTestTiff()); + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, + tensorstore::Open( + {{"driver", "tiff"}, {"kvstore", "memory://read_test.tif"}}, context_) + .result()); + + EXPECT_EQ(dtype_v, store.dtype()); + EXPECT_EQ(2, store.rank()); + EXPECT_THAT(store.domain().origin(), ::testing::ElementsAre(0, 0)); + EXPECT_THAT(store.domain().shape(), ::testing::ElementsAre(4, 6)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto layout, store.chunk_layout()); + EXPECT_THAT(layout.read_chunk_shape(), ::testing::ElementsAre(2, 3)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto codec, store.codec()); + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto expected_codec, + CodecSpec::FromJson( + {{"driver", "tiff"}, {"compression", {{"type", "raw"}}}})); + EXPECT_EQ(expected_codec, codec); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto units, store.dimension_units()); + EXPECT_THAT(units, ::testing::ElementsAre(std::nullopt, std::nullopt)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto fill_value, store.fill_value()); + EXPECT_FALSE(fill_value.valid()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto resolved_store, + ResolveBounds(store).result()); + EXPECT_EQ(store.domain(), resolved_store.domain()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto bound_spec, store.spec()); + ASSERT_TRUE(bound_spec.valid()); + + // Check the minimal JSON representation (IncludeDefaults=false) + ::nlohmann::json expected_minimal_json = { + {"driver", "tiff"}, + {"kvstore", {{"driver", "memory"}, {"path", "read_test.tif"}}}, + {"dtype", "uint16"}, + {"transform", + {// Includes the resolved domain + {"input_labels", {"y", "x"}}, + {"input_inclusive_min", {0, 0}}, + {"input_exclusive_max", {4, 6}}}}, + {"metadata", {{"dtype", "uint16"}, {"shape", {4, 6}}}}}; + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto minimal_json, bound_spec.ToJson()); + EXPECT_THAT(minimal_json, MatchesJson(expected_minimal_json)); + + // Check the full JSON representation (IncludeDefaults=true) + ::nlohmann::json expected_full_json = { + {"driver", "tiff"}, + {"kvstore", + {{"driver", "memory"}, + {"path", "read_test.tif"}, + {"atomic", true}, + {"context", ::nlohmann::json({})}, + {"memory_key_value_store", "memory_key_value_store"}}}, + {"dtype", "uint16"}, + {"transform", + {{"input_inclusive_min", {0, 0}}, + {"input_exclusive_max", {4, 6}}, + {"input_labels", {"y", "x"}}}}, + {"metadata", {{"dtype", "uint16"}, {"shape", {4, 6}}}}, + {"tiff", {{"ifd", 0}}}, // Default ifd included + {"schema", {{"rank", 2}, {"dtype", "uint16"}}}, + {"recheck_cached_data", true}, + {"recheck_cached_metadata", "open"}, + {"delete_existing", false}, + {"assume_metadata", false}, + {"assume_cached_metadata", false}, + {"fill_missing_data_reads", true}, + {"store_data_equal_to_fill_value", false}, + {"cache_pool", "cache_pool"}, + {"context", ::nlohmann::json({})}, + {"data_copy_concurrency", "data_copy_concurrency"}}; + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto full_json, bound_spec.ToJson(tensorstore::IncludeDefaults{true})); + EXPECT_THAT(full_json, MatchesJson(expected_full_json)); + + // Test re-opening from the minimal spec + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store2, tensorstore::Open(bound_spec, context_).result()); + EXPECT_EQ(store.dtype(), store2.dtype()); + EXPECT_EQ(store.domain(), store2.domain()); + EXPECT_EQ(store.rank(), store2.rank()); + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto layout2, store2.chunk_layout()); + EXPECT_EQ(layout, layout2); +} + +TEST_F(TiffDriverTest, ReadStrippedTiff) { + WriteTiffData("stripped.tif", MakeStrippedTiff()); + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, + tensorstore::Open( + {{"driver", "tiff"}, {"kvstore", "memory://stripped.tif"}}, context_) + .result()); + + EXPECT_EQ(dtype_v, store.dtype()); + EXPECT_EQ(2, store.rank()); + EXPECT_THAT(store.domain().origin(), ::testing::ElementsAre(0, 0)); + EXPECT_THAT(store.domain().shape(), ::testing::ElementsAre(6, 8)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto layout, store.chunk_layout()); + EXPECT_THAT(layout.read_chunk_shape(), ::testing::ElementsAre(2, 8)); + EXPECT_THAT(layout.write_chunk_shape(), ::testing::ElementsAre(2, 8)); + EXPECT_THAT(layout.inner_order(), ::testing::ElementsAre(0, 1)); + + auto expected_array = + tensorstore::MakeArray({{0, 1, 2, 3, 4, 5, 6, 7}, + {8, 9, 10, 11, 12, 13, 14, 15}, + {10, 11, 12, 13, 14, 15, 16, 17}, + {18, 19, 20, 21, 22, 23, 24, 25}, + {20, 21, 22, 23, 24, 25, 26, 27}, + {28, 29, 30, 31, 32, 33, 34, 35}}); + + EXPECT_THAT(tensorstore::Read(store).result(), Optional(expected_array)); + + // Slice spanning multiple strips. + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto slice_view, + store | tensorstore::Dims(0, 1).SizedInterval({1, 2}, {3, 4})); + + auto expected_slice_array = tensorstore::MakeOffsetArray( + {1, 2}, {{10, 11, 12, 13}, {12, 13, 14, 15}, {20, 21, 22, 23}}); + + EXPECT_THAT(tensorstore::Read(slice_view).result(), + Optional(expected_slice_array)); +} + +TEST_F(TiffDriverTest, ReadFloatTiff) { + WriteTiffData("float_test.tif", MakeFloatTiff()); + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, tensorstore::Open({{"driver", "tiff"}, + {"kvstore", "memory://float_test.tif"}}, + context_) + .result()); + + EXPECT_EQ(dtype_v, store.dtype()); + EXPECT_EQ(2, store.rank()); + EXPECT_THAT(store.domain().origin(), ::testing::ElementsAre(0, 0)); + EXPECT_THAT(store.domain().shape(), ::testing::ElementsAre(2, 3)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto layout, store.chunk_layout()); + EXPECT_THAT(layout.read_chunk_shape(), ::testing::ElementsAre(1, 1)); + EXPECT_THAT(layout.write_chunk_shape(), ::testing::ElementsAre(1, 1)); + EXPECT_THAT(layout.inner_order(), ::testing::ElementsAre(0, 1)); + auto expected_array = + tensorstore::MakeArray({{1.1f, 2.2f, 3.3f}, {4.4f, 5.5f, 6.6f}}); + + EXPECT_THAT(tensorstore::Read(store).result(), Optional(expected_array)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto slice_view, + store | tensorstore::Dims(0, 1).SizedInterval({1, 1}, {1, 2})); + + auto expected_slice_array = + tensorstore::MakeOffsetArray({1, 1}, {{5.5f, 6.6f}}); + EXPECT_THAT(tensorstore::Read(slice_view).result(), expected_slice_array); +} + +TEST_F(TiffDriverTest, ReadMultiChannelTiff) { + WriteTiffData("multi_channel.tif", MakeMultiChannelTiff()); + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, tensorstore::Open({{"driver", "tiff"}, + {"kvstore", "memory://multi_channel.tif"}}, + context_) + .result()); + + EXPECT_EQ(dtype_v, store.dtype()); + EXPECT_EQ(3, store.rank()); + EXPECT_THAT(store.domain().origin(), ::testing::ElementsAre(0, 0, 0)); + EXPECT_THAT(store.domain().shape(), ::testing::ElementsAre(2, 3, 3)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto layout, store.chunk_layout()); + EXPECT_THAT(layout.read_chunk_shape(), ::testing::ElementsAre(1, 1, 3)); + EXPECT_THAT(layout.write_chunk_shape(), ::testing::ElementsAre(1, 1, 3)); + EXPECT_THAT(layout.inner_order(), ::testing::ElementsAre(0, 1, 2)); + + auto expected_array = tensorstore::MakeArray( + {{{1, 2, 3}, {2, 3, 4}, {3, 4, 5}}, + {{11, 12, 13}, {12, 13, 14}, {13, 14, 15}}}); + + EXPECT_THAT(tensorstore::Read(store).result(), Optional(expected_array)); + + // Read single pixel. + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto pixel_view, store | tensorstore::Dims(0, 1).IndexSlice({1, 2})); + auto expected_pixel_array = tensorstore::MakeArray({13, 14, 15}); + + EXPECT_THAT(tensorstore::Read(pixel_view).result(), + Optional(expected_pixel_array)); +} + +TEST_F(TiffDriverTest, ReadNonZeroIFD) { + WriteTiffData("multi_ifd.tif", MakeMultiIFDTiff()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, tensorstore::Open({{"driver", "tiff"}, + {"kvstore", "memory://multi_ifd.tif"}, + {"tiff", {{"ifd", 1}}}}, + context_) + .result()); + + EXPECT_EQ(dtype_v, store.dtype()); + EXPECT_EQ(2, store.rank()); + EXPECT_THAT(store.domain().origin(), ::testing::ElementsAre(0, 0)); + EXPECT_THAT(store.domain().shape(), ::testing::ElementsAre(3, 3)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto layout, store.chunk_layout()); + EXPECT_THAT(layout.read_chunk_shape(), ::testing::ElementsAre(1, 1)); + EXPECT_THAT(layout.inner_order(), ::testing::ElementsAre(0, 1)); + + auto expected_array = tensorstore::AllocateArray( + {3, 3}, tensorstore::ContiguousLayoutOrder::c, tensorstore::value_init); + for (Index i = 0; i < 3; ++i) + for (Index j = 0; j < 3; ++j) expected_array(i, j) = 99; + + EXPECT_THAT(tensorstore::Read(store).result(), Optional(expected_array)); +} + +} // namespace \ No newline at end of file diff --git a/tensorstore/driver/tiff/golden_file_test.cc b/tensorstore/driver/tiff/golden_file_test.cc new file mode 100644 index 000000000..acd941b29 --- /dev/null +++ b/tensorstore/driver/tiff/golden_file_test.cc @@ -0,0 +1,274 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Golden file tests of the TIFF driver. +/// Verifies reading of TIFF files generated by the python script. + +#include +#include +#include + +#include +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/log/absl_log.h" +#include "tensorstore/array.h" +#include "tensorstore/context.h" +#include "tensorstore/data_type.h" +#include "tensorstore/index.h" +#include "tensorstore/internal/path.h" +#include "tensorstore/open.h" +#include "tensorstore/open_mode.h" +#include "tensorstore/schema.h" +#include "tensorstore/spec.h" +#include "tensorstore/tensorstore.h" +#include "tensorstore/util/status.h" +#include "tensorstore/util/status_testutil.h" +#include "tensorstore/util/str_cat.h" + +// Define the flag to locate the test data directory +ABSL_FLAG(std::string, tensorstore_test_data_dir, ".", + "Path to directory containing TIFF test data."); + +namespace { + +using ::tensorstore::Context; +using ::tensorstore::dtype_v; +using ::tensorstore::Index; +using ::tensorstore::MatchesStatus; +using ::tensorstore::internal::IterationBufferPointer; +using ::testing::ElementsAreArray; +using ::testing::Optional; + +// Helper function to generate expected data. +template +tensorstore::SharedArray MakeExpectedArray( + tensorstore::span shape) { + auto array = tensorstore::AllocateArray(shape); + size_t count = 1; // Start counting from 1 + + constexpr bool use_modulo = std::is_integral_v; + T max_val = std::numeric_limits::max(); + size_t modulo_divisor = 0; + if (use_modulo) { + uint64_t divisor_calc = static_cast(max_val) + 1; + if (divisor_calc > 0) { + modulo_divisor = static_cast(divisor_calc); + } + } + + tensorstore::IterateOverArrays( + [&](tensorstore::ElementPointer element_ptr) { + T current_val; + if (use_modulo && modulo_divisor != 0) { + // Python: 1 -> 1, 255 -> 255, 256 -> 0, 257 -> 1 + // C++: 1%256=1, 255%256=255, 256%256=0, 257%256=1 + current_val = static_cast(count % modulo_divisor); + } else { + current_val = static_cast(count); + } + *(element_ptr.data()) = current_val; + count++; + }, + /*constraints=*/{}, array); + return array; +} + +// Structure to hold information for each test case +struct TestCaseInfo { + std::string filename; + ::nlohmann::json spec_override_json; + tensorstore::DataType expected_dtype; + std::vector expected_shape; + std::vector expected_labels; + std::vector expected_chunk_shape; +}; + +class TiffGoldenFileTest : public ::testing::TestWithParam { + public: + std::string GetFullPath(const std::string& filename) { + return tensorstore::internal::JoinPath( + absl::GetFlag(FLAGS_tensorstore_test_data_dir), filename); + } +}; + +TEST_P(TiffGoldenFileTest, ReadAndVerify) { + const auto& test_info = GetParam(); + std::string full_path = GetFullPath(test_info.filename); + ABSL_LOG(INFO) << "Testing TIFF file: " << full_path; + ABSL_LOG(INFO) << "Spec overrides: " << test_info.spec_override_json.dump(); + + ::nlohmann::json spec_json = { + {"driver", "tiff"}, + {"kvstore", {{"driver", "file"}, {"path", full_path}}}}; + spec_json.update(test_info.spec_override_json); + + auto context = Context::Default(); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, + tensorstore::Open(spec_json, context, tensorstore::OpenMode::open, + tensorstore::ReadWriteMode::read) + .result()); + + ASSERT_EQ(test_info.expected_dtype, store.dtype()); + ASSERT_EQ(test_info.expected_shape.size(), store.rank()); + EXPECT_THAT(store.domain().origin(), ::testing::Each(0)); + EXPECT_THAT(store.domain().shape(), + ElementsAreArray(test_info.expected_shape)); + if (!test_info.expected_labels.empty()) { + EXPECT_THAT(store.domain().labels(), + ElementsAreArray(test_info.expected_labels)); + } + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto layout, store.chunk_layout()); + EXPECT_THAT(layout.read_chunk_shape(), + ElementsAreArray(test_info.expected_chunk_shape)); + + tensorstore::SharedArray expected_data; + if (test_info.expected_dtype == dtype_v) { + expected_data = MakeExpectedArray(test_info.expected_shape); + } else if (test_info.expected_dtype == dtype_v) { + expected_data = MakeExpectedArray(test_info.expected_shape); + } else if (test_info.expected_dtype == dtype_v) { + expected_data = MakeExpectedArray(test_info.expected_shape); + } else if (test_info.expected_dtype == dtype_v) { + expected_data = MakeExpectedArray(test_info.expected_shape); + } else { + FAIL() << "Unsupported dtype in test setup: " << test_info.expected_dtype; + } + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto read_data, + tensorstore::Read(store).result()); + + EXPECT_EQ(expected_data, read_data); +} + +// Base dimensions H=32, W=48, Tile=16x16 +const Index H = 32; +const Index W = 48; +const Index TH = 16; +const Index TW = 16; + +const ::nlohmann::json kDefaultTiffSpec = { + {"tiff", ::nlohmann::json::object()}}; + +INSTANTIATE_TEST_SUITE_P( + GoldenFiles, TiffGoldenFileTest, + ::testing::Values( + // Case 1: Z=5, SPP=1, uint8 -> Rank 3 (Z, Y, X) + TestCaseInfo{ + "single/stack_z5_spp1_uint8.tif", + {{"tiff", + {{"ifd_stacking", {{"dimensions", {"z"}}, {"ifd_count", 5}}}}}}, + dtype_v, + {5, H, W}, // Expected Shape (Z, Y, X) + {"z", "y", "x"}, // Expected Labels (default order) + {1, TH, TW} // Expected Chunk Shape (Z, TileH, TileW) + }, + // Case 2: Z=4, SPP=3 (RGB), uint16 -> Rank 4 (Z, Y, X, C) + TestCaseInfo{ + "single/stack_z4_spp3_rgb_uint16.tif", + {{ + "tiff", + {{"ifd_stacking", {{"dimensions", {"z"}}, {"ifd_count", 4}}}, + {"sample_dimension_label", + "c"}} // Match default label assumption + }}, + dtype_v, + {4, H, W, 3}, // Expected Shape (Z, Y, X, C) + {"z", "y", "x", "c"}, // Expected Labels + {1, TH, TW, 3} // Expected Chunk Shape (Z, TileH, TileW, SPP) + }, + // Case 3: T=2, C=3, SPP=1, float32 -> Rank 4 (T, C, Y, X) - Assuming + // default label order t,c + TestCaseInfo{ + "single/stack_t2_c3_spp1_float32.tif", + {{"tiff", + {{"ifd_stacking", + {{"dimensions", {"t", "c"}}, {"dimension_sizes", {2, 3}}}}}}}, + dtype_v, + {2, 3, H, W}, // Expected Shape (T, C, Y, X) + {"t", "c", "y", "x"}, // Expected Labels + {1, 1, TH, TW} // Expected Chunk Shape (T, C, TileH, TileW) + }, + // Case 4: C=3, T=2, SPP=1, uint8, T fastest -> Rank 4 (C, T, Y, X) + TestCaseInfo{ + "single/stack_c3_t2_spp1_t_fastest.tif", + {{"tiff", + {{"ifd_stacking", + {{"dimensions", {"c", "t"}}, + {"dimension_sizes", {3, 2}}, + {"ifd_sequence_order", {"c", "t"}}}}}}}, + dtype_v, + {3, 2, H, W}, // Expected Shape (C, T, Y, X) + {"c", "t", "y", "x"}, // Expected Labels + {1, 1, TH, TW} // Expected Chunk Shape (C, T, TileH, TileW) + }, + TestCaseInfo{ + "single/stack_z3_spp1_uint8_stripped.tif", + {{"tiff", + {{"ifd_stacking", {{"dimensions", {"z"}}, {"ifd_count", 3}}}}}}, + dtype_v, + {3, H, W}, // Expected Shape (Z, Y, X) + {"z", "y", "x"}, // Expected Labels + {1, 32, W}}, + // Case 6: Single IFD, SPP=4 (RGBA), uint8 -> Rank 3 (Y, X, C) + TestCaseInfo{ + "single/single_spp4_rgba_uint8.tif", + { + {"tiff", + {{"sample_dimension_label", + "rgba"}}}, // Specify label used by driver + {"schema", + {{"domain", + {{"labels", {"y", "x", "rgba"}}}}}} // Match driver default + // order Y,X,Sample + }, + dtype_v, + {H, W, 4}, // Expected Shape (Y, X, RGBA) + {"y", "x", "rgba"}, // Expected Labels + {TH, TW, 4} // Expected Chunk Shape (TileH, TileW, SPP) + }, + // Case 8: Z=2, T=3, SPP=1, int16, T fastest -> Rank 4 (Z, T, Y, X) + TestCaseInfo{ + "single/stack_z2_t3_spp1_int16.tif", + {{"tiff", + {{"ifd_stacking", + {{"dimensions", {"z", "t"}}, + {"dimension_sizes", {2, 3}}, + {"ifd_sequence_order", {"z", "t"}}}}}}}, + dtype_v, + {2, 3, H, W}, // Expected Shape (Z, T, Y, X) + {"z", "t", "y", "x"}, // Expected Labels + {1, 1, TH, TW} // Expected Chunk Shape (Z, T, TileH, TileW) + }, + // Case 9: Single IFD, uint8 -> Rank 2 (Y, X), ZStd compressed. + TestCaseInfo{"single/single_zstd_uint8.tif", + nlohmann::json{{"tiff", nlohmann::json::object()}}, + dtype_v, + {H, W}, + {"y", "x"}, + {TH, TW}}, + // Case 10: Single IFD, uint8 -> Rank 2 (Y, X), Zlib compressed. + TestCaseInfo{"single/single_zlib_uint8.tif", + nlohmann::json{{"tiff", nlohmann::json::object()}}, + dtype_v, + {H, W}, + {"y", "x"}, + {TH, TW}})); +} // namespace diff --git a/tensorstore/driver/tiff/index.rst b/tensorstore/driver/tiff/index.rst new file mode 100644 index 000000000..25006dcc3 --- /dev/null +++ b/tensorstore/driver/tiff/index.rst @@ -0,0 +1,262 @@ +.. _tiff-driver: + +``tiff`` Driver +============= + +The ``tiff`` driver provides **read-only** access to `TIFF (Tagged Image File Format) `_ +files backed by any supported :ref:`key_value_store`. It supports reading single Image File Directories (IFDs) or interpreting sequences of IFDs as +additional dimensions (e.g., Z-stacks, time series). + +.. important:: + This driver is **read-only**. It cannot be used to create new TIFF files or modify existing ones. Operations like writing or resizing will fail. + +.. json:schema:: driver/tiff + +TIFF Interpretation Options (`tiff` Object) +------------------------------------------- + +The optional ``tiff`` object in the TensorStore specification controls how the TIFF file is interpreted. You can select one of two modes: + +1. **Single IFD Mode (Default):** Reads data from a single Image File Directory. + * Use the :json:schema:`~driver/tiff.tiff.ifd` property to specify the 0-based index of the directory to read. If omitted, it defaults to `0`. + +2. **Multi-IFD Stacking Mode:** Interprets a sequence of IFDs as additional dimensions. + * Use the :json:schema:`~driver/tiff.tiff.ifd_stacking` object to configure this mode. This is mutually exclusive with specifying a non-zero `ifd`. + + * :json:schema:`~driver/tiff.tiff.ifd_stacking.dimensions` (Required): An array of unique string labels for the dimensions represented by the IFD sequence (e.g., `["z"]`, `["time", "channel"]`). The order typically defines the default placement before the implicit 'y' and 'x' dimensions. + * **Stacked Shape Definition** (One of the following is required): + * :json:schema:`~driver/tiff.tiff.ifd_stacking.ifd_count`: (Positive integer) Required if `dimensions` has only one entry *and* `dimension_sizes` is absent. Defines the size of that single dimension. + * :json:schema:`~driver/tiff.tiff.ifd_stacking.dimension_sizes`: (Array of positive integers) Required if `dimensions` has more than one entry. Must have the same length as `dimensions`. Defines the size of each corresponding stacked dimension. + * :json:schema:`~driver/tiff.tiff.ifd_stacking.ifd_count` (Optional Verification): If specified alongside `dimension_sizes`, the product of `dimension_sizes` *must* equal `ifd_count`. + * :json:schema:`~driver/tiff.tiff.ifd_sequence_order` (Optional): An array of string labels (must be a permutation of `dimensions`) specifying the iteration order of the stacked dimensions within the flat IFD sequence in the file. If omitted, the order defaults to the order in `dimensions`, with the *last* dimension varying fastest. For example, for `dimensions: ["t", "c"]`, the default sequence is `t0c0, t0c1, t0c2, ..., t1c0, t1c1, ...`. + +* :json:schema:`~driver/tiff.tiff.sample_dimension_label` (Optional): A string label for the dimension derived from the `SamplesPerPixel` tag (if > 1). Defaults internally to `c`. This label must not conflict with labels in `ifd_stacking.dimensions`. + +**Rules:** + +* All IFDs involved in stacking must have uniform properties (Width, Height, DataType, Compression, etc.). +* The driver determines the final TensorStore dimension order based on the conceptual structure (Stacked dims..., Y, X, Sample dim) unless overridden by `schema.domain.labels`. + +Compression +----------- + +.. json:schema:: driver/tiff/Compression + +The driver automatically detects and decodes the compression method specified in the TIFF file's `Compression` tag. The following compression types are currently supported for decoding: + +.. json:schema:: driver/tiff/Compression/raw +.. json:schema:: driver/tiff/Compression/zlib +.. json:schema:: driver/tiff/Compression/zstd + +*(Support for other compression types like LZW or PackBits may be added in the future).* + +Mapping to TensorStore Schema +----------------------------- + +The TensorStore schema is derived from the TIFF tags and the interpretation options specified. + +Data Type +~~~~~~~~~ + +TensorStore infers the :json:schema:`~Schema.dtype` from the TIFF `BitsPerSample` and `SampleFormat` tags. Supported mappings include: + +* Unsigned Integers (`SampleFormat=1`): `uint8`, `uint16`, `uint32`, `uint64` +* Signed Integers (`SampleFormat=2`): `int8`, `int16`, `int32`, `int64` +* Floating Point (`SampleFormat=3`): `float32`, `float64` + +Mixed data types across samples (channels) are generally not supported. The driver handles byte order conversion (TIFF Little Endian or Big Endian) automatically based on the file header. + +Domain +~~~~~~ + +The :json:schema:`~Schema.domain` is determined as follows: + +* **Shape:** + * The base shape comes from `ImageLength` (Y) and `ImageWidth` (X). + * An additional dimension is added if `SamplesPerPixel` > 1. + * Dimensions from `ifd_stacking.dimensions` are added. + * The **default dimension order** is conceptually: `(stacked_dims..., 'y', 'x', sample_dim)`. For example, `("z", "y", "x")` or `("t", "c", "y", "x", "rgb")`. The actual final order can be permuted using `schema.domain.labels`. +* **Labels:** + * Default conceptual labels are `y`, `x`, the labels from `ifd_stacking.dimensions`, and the `sample_dimension_label` (or default `c`) if SamplesPerPixel > 1. + * These defaults become the final labels unless overridden by `schema.domain.labels`. +* **Origin:** The domain origin (:json:schema:`~IndexDomain.inclusive_min`) is always zero for all dimensions. +* **Resizing:** The domain is **fixed** and cannot be resized. + +Chunk Layout +~~~~~~~~~~~~ + +The :json:schema:`~Schema.chunk_layout` is derived from the TIFF tiling or strip information: + +* **Grid Shape:** Determined by `TileWidth`/`TileLength` or `ImageWidth`/`RowsPerStrip`, potentially including a size of 1 for stacked dimensions or the sample dimension (if planar). +* **Grid Origin:** Always zero for all dimensions. +* **Inner Order:** Defaults to C order relative to the final TensorStore dimension order (soft constraint). Can be overridden by `schema.chunk_layout.inner_order`. + +Codec +~~~~~ + +The :json:schema:`~Schema.codec` indicates the use of the TIFF driver and specifies the detected :json:schema:`~driver/tiff.codec.compression`. + +.. json:schema:: driver/tiff/Codec + +Fill Value +~~~~~~~~~~ + +TIFF files do not store a fill value. Reads of missing chunks (unlikely in valid TIFFs) will be filled with zero if :json:schema:`~DriverSpec.fill_missing_data_reads` is true (default). + +Dimension Units +~~~~~~~~~~~~~~~ + +Standard TIFF does not reliably store dimension units. Specify units using :json:schema:`Schema.dimension_units` during open. + +Examples +-------- + +.. admonition:: Example: Opening the first IFD (Default) + :class: example + + Opens the first image directory (IFD 0) in the specified TIFF file. + + .. code-block:: json + + { + "driver": "tiff", + "kvstore": {"driver": "file", "path": "/path/to/image.tif"} + } + +.. admonition:: Example: Opening a specific IFD + :class: example + + Opens the 6th image directory (index 5) in the file. + + .. code-block:: json + + { + "driver": "tiff", + "kvstore": {"driver": "file", "path": "/path/to/image.tif"}, + "tiff": { + "ifd": 5 + } + } + +.. admonition:: Example: Simple Z-Stack (50 planes) + :class: example + + Interprets the first 50 IFDs as a Z-stack. Assumes `SamplesPerPixel=1`. + + .. code-block:: json + + { + "driver": "tiff", + "kvstore": {"driver": "file", "path": "/path/to/z_stack.tif"}, + "tiff": { + "ifd_stacking": { + "dimensions": ["z"], + "ifd_count": 50 + } + } + } + // Default TensorStore Dimensions: (z, y, x) + // Resulting Shape (example): [50, height, width] + +.. admonition:: Example: Z-Stack (50 planes) with RGB channels + :class: example + + Interprets 50 IFDs as a Z-stack, where each IFD has `SamplesPerPixel=3`. + Labels the sample dimension 'channel'. + + .. code-block:: json + + { + "driver": "tiff", + "kvstore": {"driver": "file", "path": "/path/to/z_stack_rgb.tif"}, + "tiff": { + "ifd_stacking": { + "dimensions": ["z"], + "ifd_count": 50 + }, + "sample_dimension_label": "channel" + } + } + // Default TensorStore Dimensions: (z, y, x, channel) + // Resulting Shape (example): [50, height, width, 3] + +.. admonition:: Example: Time (10) x Channel (3) Stack, SPP=1 + :class: example + + Interprets 30 IFDs (10 time points * 3 channels) as a T/C stack. + Assumes default IFD sequence order (channel varies fastest: t0c0, t0c1, t0c2, t1c0, ...). + + .. code-block:: json + + { + "driver": "tiff", + "kvstore": {"driver": "file", "path": "/path/to/tc_stack.tif"}, + "tiff": { + "ifd_stacking": { + "dimensions": ["time", "channel"], + "dimension_sizes": [10, 3] + } + } + } + // Default TensorStore Dimensions: (time, channel, y, x) + // Resulting Shape (example): [10, 3, height, width] + +.. admonition:: Example: Time (10) x Channel (3) Stack, SPP=1, Time Fastest in File + :class: example + + Same data shape as above, but specifies that the IFDs in the file are ordered with time varying fastest (c0t0, c0t1, ..., c1t0, c1t1, ...). + + .. code-block:: json + + { + "driver": "tiff", + "kvstore": {"driver": "file", "path": "/path/to/tc_stack_t_fast.tif"}, + "tiff": { + "ifd_stacking": { + "dimensions": ["time", "channel"], + "dimension_sizes": [10, 3], + "ifd_sequence_order": ["channel", "time"] // channel slowest, time fastest + } + } + } + // Default TensorStore Dimensions: (time, channel, y, x) - Order is unaffected by ifd_sequence_order + // Resulting Shape (example): [10, 3, height, width] + +.. admonition:: Example: Overriding Dimension Order with Schema + :class: example + + Opens the T/C stack from the previous example, but forces the final TensorStore dimension order to be Channel, Time, Y, X. + + .. code-block:: json + + { + "driver": "tiff", + "kvstore": {"driver": "file", "path": "/path/to/tc_stack_t_fast.tif"}, + "tiff": { + "ifd_stacking": { + "dimensions": ["time", "channel"], // Conceptual dimensions + "dimension_sizes": [10, 3], + "ifd_sequence_order": ["channel", "time"] + } + }, + "schema": { + "domain": { + "labels": ["channel", "time", "y", "x"] // Desired final order + } + } + } + // Final TensorStore Dimensions: (channel, time, y, x) + // Resulting Shape (example): [3, 10, height, width] + +Limitations +----------- + +* **Read-Only:** The driver cannot create, write to, or resize TIFF files. +* **IFD Uniformity:** When using `ifd_stacking`, all involved IFDs must have consistent Width, Height, SamplesPerPixel, BitsPerSample, SampleFormat, PlanarConfiguration, Compression, and tiling/stripping configuration. +* **Unsupported TIFF Features:** Some TIFF features may not be supported, including: + * Certain compression types (e.g., JPEG, LZW, PackBits - check supported list). + * Planar configuration (`PlanarConfiguration=2`) combined with `ifd_stacking`. + * Mixed data types or bits-per-sample across channels (samples). + * Sub-byte data types (e.g., 1-bit, 4-bit). + * Non-standard SampleFormat values. +* **Metadata Parsing:** Does not currently parse extensive metadata from ImageDescription or OME-XML (though basic properties are read from standard tags). \ No newline at end of file diff --git a/tensorstore/driver/tiff/metadata.cc b/tensorstore/driver/tiff/metadata.cc new file mode 100644 index 000000000..ad12b8a0e --- /dev/null +++ b/tensorstore/driver/tiff/metadata.cc @@ -0,0 +1,1252 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/driver/tiff/metadata.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "absl/log/absl_log.h" +#include "absl/status/status.h" +#include "absl/strings/str_cat.h" +#include "riegeli/bytes/cord_reader.h" +#include "tensorstore/chunk_layout.h" +#include "tensorstore/codec_spec.h" +#include "tensorstore/codec_spec_registry.h" +#include "tensorstore/data_type.h" +#include "tensorstore/driver/tiff/compressor.h" +#include "tensorstore/driver/tiff/compressor_registry.h" +#include "tensorstore/index.h" +#include "tensorstore/index_interval.h" +#include "tensorstore/index_space/dimension_units.h" +#include "tensorstore/index_space/index_domain.h" +#include "tensorstore/index_space/index_domain_builder.h" +#include "tensorstore/internal/json_binding/bindable.h" +#include "tensorstore/internal/json_binding/data_type.h" +#include "tensorstore/internal/json_binding/dimension_indexed.h" +#include "tensorstore/internal/json_binding/enum.h" +#include "tensorstore/internal/json_binding/json_binding.h" +#include "tensorstore/internal/json_binding/std_optional.h" +#include "tensorstore/internal/log/verbose_flag.h" +#include "tensorstore/internal/riegeli/array_endian_codec.h" +#include "tensorstore/kvstore/tiff/tiff_details.h" +#include "tensorstore/rank.h" +#include "tensorstore/schema.h" +#include "tensorstore/serialization/json_bindable.h" +#include "tensorstore/util/constant_vector.h" +#include "tensorstore/util/endian.h" +#include "tensorstore/util/result.h" +#include "tensorstore/util/status.h" +#include "tensorstore/util/str_cat.h" + +namespace tensorstore { +namespace internal_tiff { + +namespace jb = tensorstore::internal_json_binding; +using ::tensorstore::GetConstantVector; +using ::tensorstore::internal_tiff_kvstore::CompressionType; +using ::tensorstore::internal_tiff_kvstore::ImageDirectory; +using ::tensorstore::internal_tiff_kvstore::PlanarConfigType; +using ::tensorstore::internal_tiff_kvstore::SampleFormatType; +using ::tensorstore::internal_tiff_kvstore::TiffParseResult; + +ABSL_CONST_INIT internal_log::VerboseFlag tiff_metadata_logging( + "tiff_metadata"); + +CodecSpec TiffCodecSpec::Clone() const { + return internal::CodecDriverSpec::Make(*this); +} + +absl::Status TiffCodecSpec::DoMergeFrom( + const internal::CodecDriverSpec& other_base) { + if (typeid(other_base) != typeid(TiffCodecSpec)) { + return absl::InvalidArgumentError("Cannot merge non-TIFF codec spec"); + } + const auto& other = static_cast(other_base); + if (other.compressor) { + if (!this->compressor) { + this->compressor = other.compressor; + } else { + TENSORSTORE_ASSIGN_OR_RETURN(auto this_json, + jb::ToJson(this->compressor)); + TENSORSTORE_ASSIGN_OR_RETURN(auto other_json, + jb::ToJson(other.compressor)); + if (!internal_json::JsonSame(this_json, other_json)) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "TIFF compression type mismatch: existing=", this_json.dump(), + ", new=", other_json.dump())); + } + } + } + return absl::OkStatus(); +} + +TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER( + TiffCodecSpec, + jb::Member("compression", jb::Projection<&TiffCodecSpec::compressor>( + jb::DefaultValue([](auto* v) {})))) + +bool operator==(const TiffCodecSpec& a, const TiffCodecSpec& b) { + auto a_json = jb::ToJson(a.compressor); + auto b_json = jb::ToJson(b.compressor); + return (a_json.ok() == b_json.ok()) && + (!a_json.ok() || internal_json::JsonSame(*a_json, *b_json)); +} + +namespace { +const internal::CodecSpecRegistration registration; + +constexpr std::array kSupportedDataTypes{ + DataTypeId::uint8_t, DataTypeId::uint16_t, DataTypeId::uint32_t, + DataTypeId::uint64_t, DataTypeId::int8_t, DataTypeId::int16_t, + DataTypeId::int32_t, DataTypeId::int64_t, DataTypeId::float32_t, + DataTypeId::float64_t, +}; + +std::string GetSupportedDataTypes() { + return absl::StrJoin( + kSupportedDataTypes, ", ", [](std::string* out, DataTypeId id) { + absl::StrAppend(out, kDataTypes[static_cast(id)].name()); + }); +} + +// Maps TIFF SampleFormat and BitsPerSample to TensorStore DataType. +Result GetDataTypeFromTiff(const ImageDirectory& dir) { + if (dir.samples_per_pixel == 0 || dir.bits_per_sample.empty() || + dir.sample_format.empty()) { + return absl::FailedPreconditionError( + "Incomplete TIFF metadata for data type"); + } + // Accept either identical (most files) or uniformly 8‑bit unsigned channels + auto uniform_bits = dir.bits_per_sample[0]; + auto uniform_format = dir.sample_format[0]; + for (size_t i = 1; i < dir.samples_per_pixel; ++i) { + if (dir.bits_per_sample[i] != uniform_bits || + dir.sample_format[i] != uniform_format) { + // allow common RGB 8‑bit + Alpha 8‑bit case + if (uniform_bits == 8 && dir.bits_per_sample[i] == 8 && + uniform_format == + static_cast(SampleFormatType::kUnsignedInteger) && + dir.sample_format[i] == uniform_format) { + continue; + } + return absl::UnimplementedError( + "Mixed bits/sample or sample_format is not supported yet"); + } + } + + switch (uniform_format) { + case static_cast(SampleFormatType::kUnsignedInteger): + if (uniform_bits == 8) return dtype_v; + if (uniform_bits == 16) return dtype_v; + if (uniform_bits == 32) return dtype_v; + if (uniform_bits == 64) return dtype_v; + break; + case static_cast(SampleFormatType::kSignedInteger): + if (uniform_bits == 8) return dtype_v; + if (uniform_bits == 16) return dtype_v; + if (uniform_bits == 32) return dtype_v; + if (uniform_bits == 64) return dtype_v; + break; + case static_cast(SampleFormatType::kIEEEFloat): + if (uniform_bits == 32) return dtype_v; + if (uniform_bits == 64) return dtype_v; + break; + case static_cast(SampleFormatType::kUndefined): + break; + default: + break; + } + return absl::InvalidArgumentError( + StrCat("Unsupported TIFF data type: bits=", uniform_bits, + ", format=", uniform_format)); +} + +// Returns ContiguousLayoutOrder::c or ContiguousLayoutOrder::fortran +// for a given permutation. Any mixed/blocked order is rejected. +Result GetLayoutOrderFromInnerOrder( + span inner_order) { + if (PermutationMatchesOrder(inner_order, ContiguousLayoutOrder::c)) { + return ContiguousLayoutOrder::c; + } + if (PermutationMatchesOrder(inner_order, ContiguousLayoutOrder::fortran)) { + return ContiguousLayoutOrder::fortran; + } + return absl::UnimplementedError( + StrCat("Inner order ", inner_order, + " is not a pure C or Fortran permutation; " + "mixed-strides currently unimplemented")); +} + +// Helper to check IFD uniformity for multi-IFD stacking +absl::Status CheckIfdUniformity(const ImageDirectory& base_ifd, + const ImageDirectory& other_ifd, + size_t ifd_index) { + // Compare essential properties needed for consistent stacking + if (other_ifd.width != base_ifd.width || + other_ifd.height != base_ifd.height) { + return absl::InvalidArgumentError(absl::StrFormat( + "IFD %d dimensions (%d x %d) do not match IFD 0 dimensions (%d x %d)", + ifd_index, other_ifd.width, other_ifd.height, base_ifd.width, + base_ifd.height)); + } + if (other_ifd.chunk_width != base_ifd.chunk_width || + other_ifd.chunk_height != base_ifd.chunk_height) { + return absl::InvalidArgumentError(absl::StrFormat( + "IFD %d chunk dimensions (%d x %d) do not match IFD 0 chunk dimensions " + "(%d x %d)", + ifd_index, other_ifd.chunk_width, other_ifd.chunk_height, + base_ifd.chunk_width, base_ifd.chunk_height)); + } + if (other_ifd.samples_per_pixel != base_ifd.samples_per_pixel) { + return absl::InvalidArgumentError(absl::StrFormat( + "IFD %d SamplesPerPixel (%d) does not match IFD 0 (%d)", ifd_index, + other_ifd.samples_per_pixel, base_ifd.samples_per_pixel)); + } + if (other_ifd.bits_per_sample != base_ifd.bits_per_sample) { + return absl::InvalidArgumentError( + absl::StrFormat("IFD %d BitsPerSample does not match IFD 0")); + } + if (other_ifd.sample_format != base_ifd.sample_format) { + return absl::InvalidArgumentError( + absl::StrFormat("IFD %d SampleFormat does not match IFD 0")); + } + if (other_ifd.compression != base_ifd.compression) { + return absl::InvalidArgumentError(absl::StrFormat( + "IFD %d Compression (%d) does not match IFD 0 (%d)", ifd_index, + other_ifd.compression, base_ifd.compression)); + } + if (other_ifd.planar_config != base_ifd.planar_config) { + return absl::InvalidArgumentError(absl::StrFormat( + "IFD %d PlanarConfiguration (%d) does not match IFD 0 (%d)", ifd_index, + other_ifd.planar_config, base_ifd.planar_config)); + } + return absl::OkStatus(); +} + +// Helper to build the dimension mapping struct +TiffDimensionMapping BuildDimensionMapping( + tensorstore::span final_labels, + const std::optional& stacking_info, + const std::optional& options_sample_label, + tensorstore::span initial_conceptual_labels, + uint16_t samples_per_pixel) { + TiffDimensionMapping mapping; + const DimensionIndex final_rank = final_labels.size(); + if (final_rank == 0) return mapping; + + mapping.labels_by_ts_dim.resize(final_rank); + absl::flat_hash_map final_label_to_index; + for (DimensionIndex i = 0; i < final_rank; ++i) { + final_label_to_index[final_labels[i]] = i; + } + + const std::string default_sample_label = "c"; + const std::string& conceptual_sample_label = + options_sample_label.value_or(default_sample_label); + + std::set conceptual_stack_labels; + if (stacking_info) { + for (const auto& label : stacking_info->dimensions) { + conceptual_stack_labels.insert(label); + } + } + + const std::string conceptual_y_label = "y"; + const std::string conceptual_x_label = "x"; + + // Assume initial_conceptual_labels rank == final_rank after merge + assert(initial_conceptual_labels.size() == final_rank); + + // Map FINAL indices back to INITIAL conceptual labels and identify roles. + for (DimensionIndex final_idx = 0; final_idx < final_rank; ++final_idx) { + DimensionIndex initial_idx = final_idx; + + if (initial_idx >= 0 && initial_idx < initial_conceptual_labels.size()) { + const std::string& conceptual_label = + initial_conceptual_labels[initial_idx]; + mapping.labels_by_ts_dim[final_idx] = conceptual_label; + + if (conceptual_label == conceptual_y_label) { + mapping.ts_y_dim = final_idx; + } else if (conceptual_label == conceptual_x_label) { + mapping.ts_x_dim = final_idx; + } else if (samples_per_pixel > 1 && + conceptual_label == conceptual_sample_label) { + mapping.ts_sample_dim = final_idx; + } else if (conceptual_stack_labels.count(conceptual_label)) { + mapping.ts_stacked_dims[conceptual_label] = final_idx; + } + } else { + // Should not happen if rank remains consistent + mapping.labels_by_ts_dim[final_idx] = ""; + } + } + return mapping; +} + +// Helper to apply TIFF-derived layout constraints onto an existing layout +// object. +absl::Status SetChunkLayoutFromTiffMetadata(DimensionIndex rank, + ChunkLayout initial_layout, + ChunkLayout& merged_layout) { + TENSORSTORE_RETURN_IF_ERROR(merged_layout.Set(RankConstraint{rank})); + if (merged_layout.rank() == dynamic_rank) { + return absl::OkStatus(); + } + assert(merged_layout.rank() == rank); + + // - Chunk Shape (TIFF tile/strip size is a hard constraint) + TENSORSTORE_RETURN_IF_ERROR(merged_layout.Set(ChunkLayout::ChunkShape( + initial_layout.read_chunk_shape(), /*hard_constraint=*/true))); + + // - Grid Origin (TIFF grid origin is implicitly 0, a hard constraint) + TENSORSTORE_RETURN_IF_ERROR(merged_layout.Set(ChunkLayout::GridOrigin( + initial_layout.grid_origin(), /*hard_constraint=*/true))); + + // - Inner Order (TIFF doesn't mandate an order, use C as soft default) + TENSORSTORE_RETURN_IF_ERROR(merged_layout.Set(ChunkLayout::InnerOrder( + initial_layout.inner_order(), /*hard_constraint=*/false))); + + // Apply other constraints from initial_layout as. soft constraints. + TENSORSTORE_RETURN_IF_ERROR(merged_layout.Set(ChunkLayout::WriteChunkElements( + initial_layout.write_chunk_elements().value, /*hard=*/false))); + TENSORSTORE_RETURN_IF_ERROR(merged_layout.Set(ChunkLayout::ReadChunkElements( + initial_layout.read_chunk_elements().value, /*hard=*/false))); + TENSORSTORE_RETURN_IF_ERROR(merged_layout.Set(ChunkLayout::CodecChunkElements( + initial_layout.codec_chunk_elements().value, /*hard=*/false))); + + // Aspect ratios are typically preferences, apply as soft constraints + TENSORSTORE_RETURN_IF_ERROR( + merged_layout.Set(ChunkLayout::WriteChunkAspectRatio( + initial_layout.write_chunk_aspect_ratio(), /*hard=*/false))); + TENSORSTORE_RETURN_IF_ERROR( + merged_layout.Set(ChunkLayout::ReadChunkAspectRatio( + initial_layout.read_chunk_aspect_ratio(), /*hard=*/false))); + TENSORSTORE_RETURN_IF_ERROR( + merged_layout.Set(ChunkLayout::CodecChunkAspectRatio( + initial_layout.codec_chunk_aspect_ratio(), /*hard=*/false))); + + return absl::OkStatus(); +} + +auto ifd_stacking_options_binder = jb::Validate( + [](const auto& options, auto* obj) -> absl::Status { + if (obj->dimensions.empty()) { + return absl::InvalidArgumentError( + "\"dimensions\" must not be empty in \"ifd_stacking\""); + } + + std::set dim_set; + for (const auto& dim : obj->dimensions) { + if (!dim_set.insert(dim).second) { + return absl::InvalidArgumentError( + tensorstore::StrCat("Duplicate dimension label \"", dim, + "\" in \"ifd_stacking.dimensions\"")); + } + } + + if (obj->dimension_sizes) { + if (obj->dimension_sizes->size() != obj->dimensions.size()) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "\"dimension_sizes\" length (", obj->dimension_sizes->size(), + ") must match \"dimensions\" length (", obj->dimensions.size(), + ")")); + } + } + + if (obj->dimensions.size() == 1) { + if (!obj->dimension_sizes && !obj->ifd_count) { + return absl::InvalidArgumentError( + "Either \"dimension_sizes\" or \"ifd_count\" must be specified " + "when \"ifd_stacking.dimensions\" has length 1"); + } + if (obj->dimension_sizes && obj->ifd_count && + static_cast((*obj->dimension_sizes)[0]) != + *obj->ifd_count) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "\"dimension_sizes\" ([", (*obj->dimension_sizes)[0], + "]) conflicts with \"ifd_count\" (", *obj->ifd_count, ")")); + } + } else { // dimensions.size() > 1 + if (!obj->dimension_sizes) { + return absl::InvalidArgumentError( + "\"dimension_sizes\" must be specified when " + "\"ifd_stacking.dimensions\" has length > 1"); + } + if (obj->ifd_count) { + uint64_t product = 1; + uint64_t max_val = std::numeric_limits::max(); + for (Index size : *obj->dimension_sizes) { + uint64_t u_size = static_cast(size); + if (size <= 0) { + return absl::InvalidArgumentError( + "\"dimension_sizes\" must be positive"); + } + if (product > max_val / u_size) { + return absl::InvalidArgumentError( + "Product of \"dimension_sizes\" overflows uint64_t"); + } + product *= u_size; + } + if (product != *obj->ifd_count) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "Product of \"dimension_sizes\" (", product, + ") does not match specified \"ifd_count\" (", *obj->ifd_count, + ")")); + } + } + } + + if (obj->ifd_sequence_order) { + if (obj->ifd_sequence_order->size() != obj->dimensions.size()) { + return absl::InvalidArgumentError( + tensorstore::StrCat("\"ifd_sequence_order\" length (", + obj->ifd_sequence_order->size(), + ") must match \"dimensions\" length (", + obj->dimensions.size(), ")")); + } + std::set order_set(obj->ifd_sequence_order->begin(), + obj->ifd_sequence_order->end()); + if (order_set != dim_set) { + return absl::InvalidArgumentError( + "\"ifd_sequence_order\" must be a permutation of \"dimensions\""); + } + } + return absl::OkStatus(); + }, + jb::Object( + jb::Member( + "dimensions", + jb::Projection<&TiffSpecOptions::IfdStackingOptions::dimensions>( + jb::DefaultBinder<>)), + jb::Member("dimension_sizes", + jb::Projection< + &TiffSpecOptions::IfdStackingOptions::dimension_sizes>( + jb::Optional(jb::DefaultBinder<>))), + jb::Member( + "ifd_count", + jb::Projection<&TiffSpecOptions::IfdStackingOptions::ifd_count>( + jb::Optional(jb::Integer(1)))), + jb::Member( + "ifd_sequence_order", + jb::Projection< + &TiffSpecOptions::IfdStackingOptions::ifd_sequence_order>( + jb::Optional(jb::DefaultBinder<>))))); +} // namespace + +TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER( + TiffMetadataConstraints, + [](auto is_loading, const auto& options, auto* obj, auto* j) { + using T = absl::remove_cvref_t; + DimensionIndex* rank = nullptr; + if constexpr (is_loading.value) { + rank = &obj->rank; + } + return jb::Object( + jb::Member("dtype", jb::Projection<&T::dtype>( + jb::Optional(jb::DataTypeJsonBinder))), + jb::Member("shape", jb::Projection<&T::shape>( + jb::Optional(jb::ShapeVector(rank)))))( + is_loading, options, obj, j); + }) + +TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(TiffSpecOptions::IfdStackingOptions, + ifd_stacking_options_binder); + +TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER( + TiffSpecOptions, + jb::Object( + jb::Member("ifd", + jb::Projection<&TiffSpecOptions::ifd_index>(jb::DefaultValue( + [](auto* v) { *v = 0; }, jb::Integer(0)))), + jb::Member( + "ifd_stacking", + jb::Projection<&TiffSpecOptions::ifd_stacking>(jb::Optional( + jb::DefaultBinder))), + jb::Member("sample_dimension_label", + jb::Projection<&TiffSpecOptions::sample_dimension_label>( + jb::Optional(jb::NonEmptyStringBinder))))) + +Result> ResolveMetadata( + const internal_tiff_kvstore::TiffParseResult& source, + const TiffSpecOptions& options, const Schema& schema) { + ABSL_LOG_IF(INFO, tiff_metadata_logging) + << "Resolving TIFF metadata. Options: " + << jb::ToJson(options).value_or(::nlohmann::json::object()); + + // 1. Initial Setup & IFD Selection/Validation + const ImageDirectory* base_ifd_ptr = nullptr; + uint32_t base_ifd_index = 0; + uint32_t num_ifds_read = 0; + std::optional validated_stacking_info; + std::vector stack_sizes_vec; + + if (options.ifd_stacking) { + validated_stacking_info = *options.ifd_stacking; + const auto& stacking = *validated_stacking_info; + size_t num_stack_dims = stacking.dimensions.size(); + if (num_stack_dims == 0) + return absl::InvalidArgumentError( + "ifd_stacking.dimensions cannot be empty"); + + uint64_t total_ifds_needed = 0; + if (stacking.dimension_sizes) { + if (stacking.dimension_sizes->size() != num_stack_dims) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "\"dimension_sizes\" length (", stacking.dimension_sizes->size(), + ") must match \"dimensions\" length (", num_stack_dims, ")")); + } + stack_sizes_vec = *stacking.dimension_sizes; + total_ifds_needed = 1; + uint64_t max_val = std::numeric_limits::max(); + for (Index size : stack_sizes_vec) { + if (size <= 0) + return absl::InvalidArgumentError( + "\"dimension_sizes\" must be positive"); + uint64_t u_size = static_cast(size); + if (total_ifds_needed > max_val / u_size) { + return absl::InvalidArgumentError( + "Product of dimension_sizes overflows uint64_t"); + } + total_ifds_needed *= u_size; + } + if (stacking.ifd_count && total_ifds_needed != *stacking.ifd_count) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "Product of \"dimension_sizes\" (", total_ifds_needed, + ") does not match specified \"ifd_count\" (", *stacking.ifd_count, + ")")); + } + } else { + if (num_stack_dims > 1) { + return absl::InvalidArgumentError( + "\"dimension_sizes\" is required when more than one stacking " + "dimension is specified"); + } + if (!stacking.ifd_count) { + return absl::InvalidArgumentError( + "Either \"dimension_sizes\" or \"ifd_count\" must be specified for " + "stacking"); + } + if (*stacking.ifd_count <= 0) { + return absl::InvalidArgumentError("\"ifd_count\" must be positive"); + } + total_ifds_needed = *stacking.ifd_count; + stack_sizes_vec.push_back(static_cast(total_ifds_needed)); + validated_stacking_info->dimension_sizes = stack_sizes_vec; + } + + num_ifds_read = total_ifds_needed; + base_ifd_index = 0; + + if (num_ifds_read == 0 || num_ifds_read > source.image_directories.size()) { + return absl::InvalidArgumentError(absl::StrFormat( + "Required %d IFDs for stacking, but only %d available/parsed", + num_ifds_read, source.image_directories.size())); + } + base_ifd_ptr = &source.image_directories[0]; + + for (size_t i = 1; i < num_ifds_read; ++i) { + TENSORSTORE_RETURN_IF_ERROR( + CheckIfdUniformity(*base_ifd_ptr, source.image_directories[i], i)); + } + + } else { + // Single IFD Mode Logic + base_ifd_index = options.ifd_index; + num_ifds_read = 1; + validated_stacking_info = std::nullopt; + + if (base_ifd_index >= source.image_directories.size()) { + return absl::NotFoundError( + absl::StrFormat("Requested IFD index %d not found (found %d IFDs)", + base_ifd_index, source.image_directories.size())); + } + base_ifd_ptr = &source.image_directories[base_ifd_index]; + } + const ImageDirectory& base_ifd = *base_ifd_ptr; + + // 2. Determine Initial Structure + DimensionIndex initial_rank = dynamic_rank; + std::vector initial_shape; + std::vector initial_labels; + PlanarConfigType initial_planar_config = + static_cast(base_ifd.planar_config); + uint16_t initial_samples_per_pixel = base_ifd.samples_per_pixel; + + const std::string implicit_y_label = "y"; + const std::string implicit_x_label = "x"; + const std::string default_sample_label = "c"; + const std::string& sample_label = + options.sample_dimension_label.value_or(default_sample_label); + + initial_shape.clear(); + initial_labels.clear(); + + if (initial_planar_config != PlanarConfigType::kChunky) { + if (initial_samples_per_pixel <= 1) { + // Treat Planar with SPP=1 as Chunky for layout purposes. + ABSL_LOG_IF(WARNING, tiff_metadata_logging) + << "PlanarConfiguration=2 with SamplesPerPixel<=1; treating as " + "Chunky."; + initial_planar_config = PlanarConfigType::kChunky; + } else if (validated_stacking_info) { + // Stacking + Planar is not supported (yet). + return absl::UnimplementedError( + "PlanarConfiguration=2 is not supported with ifd_stacking."); + } else { + // Single IFD Planar: Use {Sample, Y, X} initial order + initial_shape.push_back(static_cast(initial_samples_per_pixel)); + initial_labels.push_back(sample_label); + initial_shape.push_back(static_cast(base_ifd.height)); + initial_labels.push_back(implicit_y_label); + initial_shape.push_back(static_cast(base_ifd.width)); + initial_labels.push_back(implicit_x_label); + initial_rank = 3; + } + } + + if (initial_planar_config == PlanarConfigType::kChunky) { + // Add stacked dimensions first + if (validated_stacking_info) { + initial_shape.insert(initial_shape.end(), stack_sizes_vec.begin(), + stack_sizes_vec.end()); + initial_labels.insert(initial_labels.end(), + validated_stacking_info->dimensions.begin(), + validated_stacking_info->dimensions.end()); + } + initial_shape.push_back(static_cast(base_ifd.height)); + initial_labels.push_back(implicit_y_label); + initial_shape.push_back(static_cast(base_ifd.width)); + initial_labels.push_back(implicit_x_label); + // Add Sample dimension last if Chunky and spp > 1 + if (initial_samples_per_pixel > 1) { + initial_shape.push_back(static_cast(initial_samples_per_pixel)); + initial_labels.push_back(sample_label); + } + initial_rank = initial_shape.size(); + } + + std::set label_set; + for (const auto& label : initial_labels) { + if (!label_set.insert(label).second) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "Duplicate dimension label detected in initial structure: \"", label, + "\"")); + } + } + + // 3. Determine Initial Properties + TENSORSTORE_ASSIGN_OR_RETURN(DataType initial_dtype, + GetDataTypeFromTiff(base_ifd)); + TENSORSTORE_RETURN_IF_ERROR(ValidateDataType(initial_dtype)); + CompressionType initial_compression_type = + static_cast(base_ifd.compression); + PlanarConfigType ifd_planar_config = + static_cast(base_ifd.planar_config); + TENSORSTORE_ASSIGN_OR_RETURN( + ChunkLayout initial_layout, + GetInitialChunkLayout(base_ifd, initial_rank, initial_labels, + ifd_planar_config, initial_samples_per_pixel, + sample_label)); + + // 3.5 Determine Compressor from TIFF tag using the reverse map and registry + Compressor resolved_compressor; + auto const& compression_map = GetTiffCompressionMap(); + auto it = compression_map.find(initial_compression_type); + if (it == compression_map.end()) { + // If the tag value isn't in our map, it's unsupported (or kNone/raw) + if (initial_compression_type != CompressionType::kNone) { + return absl::UnimplementedError( + StrCat("Unsupported TIFF compression type tag: ", + static_cast(initial_compression_type))); + } + } else { + // Found in map, get string ID and create Compressor via registry + std::string_view type_id = it->second; + TENSORSTORE_ASSIGN_OR_RETURN( + resolved_compressor, Compressor::FromJson({{"type", type_id}}), + MaybeAnnotateStatus( + _, "Failed to create compressor instance from TIFF tag")); + if (!resolved_compressor && type_id != "raw") { + return absl::InternalError(StrCat("Compressor type '", type_id, + "' resolved to null unexpectedly")); + } + } + + // 4. Merge with Schema + Schema merged_schema = schema; + + TENSORSTORE_ASSIGN_OR_RETURN( + DataType effective_dtype, + GetEffectiveDataType(TiffMetadataConstraints{/*.dtype=*/initial_dtype}, + merged_schema)); + TENSORSTORE_RETURN_IF_ERROR(ValidateDataType(effective_dtype)); + + TENSORSTORE_ASSIGN_OR_RETURN( + auto final_domain_pair, + GetEffectiveDomain(initial_rank, initial_shape, initial_labels, + merged_schema)); + const IndexDomain<>& final_domain = final_domain_pair.first; + const std::vector& final_labels = final_domain_pair.second; + const DimensionIndex final_rank = final_domain.rank(); + + TENSORSTORE_ASSIGN_OR_RETURN( + ChunkLayout final_layout, + GetEffectiveChunkLayout(initial_layout, merged_schema)); + TENSORSTORE_RETURN_IF_ERROR(final_layout.Finalize()); + + TENSORSTORE_ASSIGN_OR_RETURN( + DimensionUnitsVector final_units, + GetEffectiveDimensionUnits(final_rank, merged_schema)); + + if (merged_schema.fill_value().valid()) { + return absl::InvalidArgumentError( + "fill_value not supported by TIFF format"); + } + + // 4.5 Merge with Schema Codec constraints. + CodecSpec schema_codec = merged_schema.codec(); + if (schema_codec.valid()) { + const internal::CodecDriverSpec* schema_driver_spec_ptr = + schema_codec.get(); + + if (schema_driver_spec_ptr == nullptr || + dynamic_cast(schema_driver_spec_ptr) != nullptr) { + auto temp_codec_spec = internal::CodecDriverSpec::Make(); + temp_codec_spec->compressor = resolved_compressor; + TENSORSTORE_RETURN_IF_ERROR( + temp_codec_spec->MergeFrom(schema_codec), + MaybeAnnotateStatus( + _, + "Schema codec constraints conflict with TIFF file compression")); + resolved_compressor = temp_codec_spec->compressor; + } else { + std::string schema_driver_id = ""; + if (auto j_result = schema_codec.ToJson(); j_result.ok() && + j_result->is_object() && + j_result->contains("driver")) { + schema_driver_id = j_result->value("driver", ""); + } + return absl::InvalidArgumentError( + StrCat("Schema codec driver \"", schema_driver_id, + "\" is incompatible with tiff driver")); + } + } + Compressor final_compressor = std::move(resolved_compressor); + + // 5. Build Final TiffMetadata + auto metadata = std::make_shared(); + metadata->base_ifd_index = base_ifd_index; + metadata->num_ifds_read = num_ifds_read; + metadata->stacking_info = validated_stacking_info; + metadata->endian = source.endian; + metadata->is_tiled = base_ifd.is_tiled; + metadata->planar_config = + static_cast(base_ifd.planar_config); + metadata->samples_per_pixel = initial_samples_per_pixel; + metadata->ifd0_chunk_width = base_ifd.chunk_width; + metadata->ifd0_chunk_height = base_ifd.chunk_height; + metadata->compressor = std::move(final_compressor); + metadata->compression_type = initial_compression_type; + metadata->rank = final_rank; + metadata->shape.assign(final_domain.shape().begin(), + final_domain.shape().end()); + metadata->dtype = effective_dtype; + metadata->chunk_layout = std::move(final_layout); + metadata->fill_value = SharedArray(); + metadata->dimension_units = std::move(final_units); + metadata->dimension_labels = final_labels; + + TENSORSTORE_ASSIGN_OR_RETURN( + metadata->layout_order, + GetLayoutOrderFromInnerOrder(metadata->chunk_layout.inner_order())); + + metadata->dimension_mapping = + BuildDimensionMapping(metadata->dimension_labels, metadata->stacking_info, + options.sample_dimension_label, initial_labels, + metadata->samples_per_pixel); + + ABSL_LOG_IF(INFO, tiff_metadata_logging) + << "Resolved TiffMetadata: rank=" << metadata->rank + << ", shape=" << tensorstore::span(metadata->shape) + << ", labels=" << tensorstore::span(metadata->dimension_labels) + << ", dtype=" << metadata->dtype + << ", chunk_layout=" << metadata->chunk_layout + << ", compression=" << static_cast(metadata->compression_type) + << ", planar_config=" << static_cast(metadata->planar_config); + + return metadata; +} + +absl::Status ValidateResolvedMetadata( + const TiffMetadata& resolved_metadata, + const TiffMetadataConstraints& user_constraints) { + // Validate Rank + if (!RankConstraint::EqualOrUnspecified(resolved_metadata.rank, + user_constraints.rank)) { + return absl::FailedPreconditionError(StrCat( + "Resolved TIFF rank (", resolved_metadata.rank, + ") does not match user constraint rank (", user_constraints.rank, ")")); + } + + if (user_constraints.dtype.has_value() && + resolved_metadata.dtype != *user_constraints.dtype) { + return absl::FailedPreconditionError( + StrCat("Resolved TIFF dtype (", resolved_metadata.dtype, + ") does not match user constraint dtype (", + *user_constraints.dtype, ")")); + } + + if (user_constraints.shape.has_value()) { + if (resolved_metadata.rank != user_constraints.shape->size()) { + return absl::FailedPreconditionError( + StrCat("Rank of resolved TIFF shape (", resolved_metadata.rank, + ") does not match rank of user constraint shape (", + user_constraints.shape->size(), ")")); + } + if (!std::equal(resolved_metadata.shape.begin(), + resolved_metadata.shape.end(), + user_constraints.shape->begin())) { + return absl::FailedPreconditionError(StrCat( + "Resolved TIFF shape ", tensorstore::span(resolved_metadata.shape), + " does not match user constraint shape ", + tensorstore::span(*user_constraints.shape))); + } + } + + // Validate Axes (if added to constraints) + // TODO: Implement axis validation + + // Validate Chunk Shape (if added to constraints) + // TODO: Implement chunk shape validation + + return absl::OkStatus(); +} + +Result GetEffectiveDataType( + const TiffMetadataConstraints& constraints, const Schema& schema) { + DataType dtype = schema.dtype(); + if (constraints.dtype.has_value()) { + if (dtype.valid() && dtype != *constraints.dtype) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "dtype specified in schema (", dtype, + ") conflicts with dtype specified in metadata constraints (", + *constraints.dtype, ")")); + } + dtype = *constraints.dtype; + } + if (dtype.valid()) TENSORSTORE_RETURN_IF_ERROR(ValidateDataType(dtype)); + return dtype; +} + +Result, std::vector>> GetEffectiveDomain( + DimensionIndex initial_rank, span initial_shape, + span initial_labels, const Schema& schema) { + // 1. Validate Rank Compatibility & Determine Final Rank + if (!RankConstraint::EqualOrUnspecified(initial_rank, schema.rank())) { + return absl::FailedPreconditionError( + tensorstore::StrCat("Schema rank constraint ", schema.rank(), + " is incompatible with TIFF rank ", initial_rank)); + } + const DimensionIndex rank = + schema.rank().rank == dynamic_rank ? initial_rank : schema.rank().rank; + if (rank == dynamic_rank) { + return std::make_pair(IndexDomain<>(dynamic_rank), + std::vector{}); + } + if (initial_rank != dynamic_rank && initial_rank != rank) { + return absl::InternalError( + "Rank mismatch after effective rank determination"); + } + + // 2. Determine Final Labels + std::vector final_labels; + bool schema_has_labels = + schema.domain().valid() && !schema.domain().labels().empty(); + if (schema_has_labels) { + if (static_cast(schema.domain().labels().size()) != rank) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "Schema domain labels rank (", schema.domain().labels().size(), + ") does not match effective rank (", rank, ")")); + } + final_labels.assign(schema.domain().labels().begin(), + schema.domain().labels().end()); + } else { + if (initial_labels.size() != rank) { + return absl::InternalError( + tensorstore::StrCat("Initial labels rank (", initial_labels.size(), + ") does not match effective rank (", rank, ")")); + } + final_labels.assign(initial_labels.begin(), initial_labels.end()); + } + + // 3. Build Initial Domain (with final labels for merge compatibility) + IndexDomainBuilder initial_builder(rank); + initial_builder.shape(initial_shape); + initial_builder.labels(final_labels); + initial_builder.implicit_lower_bounds(false); + initial_builder.implicit_upper_bounds(false); + TENSORSTORE_ASSIGN_OR_RETURN(auto initial_domain, initial_builder.Finalize()); + + // 4. Build Effective Schema Domain (with final labels) + IndexDomain<> effective_schema_domain; + if (schema.domain().valid()) { + IndexDomainBuilder schema_builder(rank); + schema_builder.origin(schema.domain().origin()); + schema_builder.shape(schema.domain().shape()); + schema_builder.labels(final_labels); + schema_builder.implicit_lower_bounds( + schema.domain().implicit_lower_bounds()); + schema_builder.implicit_upper_bounds( + schema.domain().implicit_upper_bounds()); + TENSORSTORE_ASSIGN_OR_RETURN(effective_schema_domain, + schema_builder.Finalize()); + } else { + TENSORSTORE_ASSIGN_OR_RETURN( + effective_schema_domain, + IndexDomainBuilder(rank).labels(final_labels).Finalize()); + } + + // 5. Merge Domains + TENSORSTORE_ASSIGN_OR_RETURN( + IndexDomain<> merged_domain_bounds_only, + MergeIndexDomains(effective_schema_domain, initial_domain), + tensorstore::MaybeAnnotateStatus(_, + "Mismatch between TIFF-derived domain " + "and schema domain bounds/shape")); + + return std::make_pair(std::move(merged_domain_bounds_only), + std::move(final_labels)); +} + +Result> GetEffectiveDomain( + const TiffMetadataConstraints& constraints, const Schema& schema) { + DimensionIndex rank = schema.rank().rank; + if (constraints.rank != dynamic_rank) { + if (rank != dynamic_rank && rank != constraints.rank) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "Rank specified in schema (", rank, + ") conflicts with rank specified in metadata constraints (", + constraints.rank, ")")); + } + rank = constraints.rank; + } + if (rank == dynamic_rank && constraints.shape.has_value()) { + rank = constraints.shape->size(); + } + if (rank == dynamic_rank && schema.domain().valid()) { + rank = schema.domain().rank(); + } + // If rank is still dynamic after checking all available sources in the spec + // and constraints, return a dynamic_rank domain. + if (rank == dynamic_rank) { + return IndexDomain<>(); + } + + IndexDomainBuilder builder(rank); + if (constraints.shape) { + if (constraints.shape->size() != rank) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "Internal error: Metadata constraints shape rank (", + constraints.shape->size(), ") conflicts with effective rank (", rank, + ")")); + } + builder.shape(*constraints.shape); + builder.implicit_lower_bounds(false); + builder.implicit_upper_bounds(false); + } else { + builder.implicit_lower_bounds(true); + builder.implicit_upper_bounds(true); + } + + if (schema.domain().valid() && !schema.domain().labels().empty()) { + if (static_cast(schema.domain().labels().size()) != rank) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "Schema domain labels rank (", schema.domain().labels().size(), + ") does not match effective rank (", rank, ")")); + } + builder.labels(schema.domain().labels()); + } + + TENSORSTORE_ASSIGN_OR_RETURN(auto domain_from_constraints, + builder.Finalize()); + + TENSORSTORE_ASSIGN_OR_RETURN( + IndexDomain<> merged_domain, + MergeIndexDomains(schema.domain(), domain_from_constraints), + tensorstore::MaybeAnnotateStatus( + _, "Conflict between schema domain and metadata constraints")); + + return merged_domain; +} + +Result GetEffectiveChunkLayout(ChunkLayout initial_layout, + const Schema& schema) { + ChunkLayout merged_layout = schema.chunk_layout(); + TENSORSTORE_RETURN_IF_ERROR(SetChunkLayoutFromTiffMetadata( + initial_layout.rank(), initial_layout, merged_layout)); + return merged_layout; +} + +Result GetEffectiveDimensionUnits( + DimensionIndex rank, /* const DimensionUnitsVector& initial_units, */ + const Schema& schema) { + // Currently, no initial_units are derived from standard TIFF. + // Start with schema units. + DimensionUnitsVector final_units(schema.dimension_units()); + + if (final_units.empty() && rank != dynamic_rank) { + final_units.resize(rank); + } else if (!final_units.empty() && + static_cast(final_units.size()) != rank) { + return absl::InvalidArgumentError( + tensorstore::StrCat("Schema dimension_units rank (", final_units.size(), + ") conflicts with TIFF-derived rank (", rank, ")")); + } + + // TODO: When OME-XML or other sources provide initial_units, merge here: + // TENSORSTORE_RETURN_IF_ERROR(MergeDimensionUnits(final_units, + // initial_units)); + + return final_units; +} + +Result GetInitialChunkLayout( + const internal_tiff_kvstore::ImageDirectory& base_ifd, + DimensionIndex initial_rank, span initial_labels, + internal_tiff_kvstore::PlanarConfigType initial_planar_config, + uint16_t initial_samples_per_pixel, std::string_view sample_label) { + ChunkLayout layout; + TENSORSTORE_RETURN_IF_ERROR(layout.Set(RankConstraint{initial_rank})); + if (initial_rank == dynamic_rank || initial_rank == 0) { + TENSORSTORE_RETURN_IF_ERROR( + layout.Set(ChunkLayout::ChunkShape({}, /*hard=*/true))); + TENSORSTORE_RETURN_IF_ERROR( + layout.Set(ChunkLayout::CodecChunkShape({}, /*hard=*/true))); + TENSORSTORE_RETURN_IF_ERROR( + layout.Set(ChunkLayout::GridOrigin({}, /*hard=*/true))); + TENSORSTORE_RETURN_IF_ERROR( + layout.Set(ChunkLayout::InnerOrder({}, /*hard=*/false))); + return layout; + } + + // 1. Set Grid Origin (Hard Constraint) + DimensionSet all_dims_hard = DimensionSet::UpTo(initial_rank); + TENSORSTORE_RETURN_IF_ERROR(layout.Set(ChunkLayout::GridOrigin( + GetConstantVector(initial_rank), all_dims_hard))); + + // 2. Set Default Inner Order (Soft Constraint) + std::vector default_inner_order(initial_rank); + std::iota(default_inner_order.begin(), default_inner_order.end(), 0); + TENSORSTORE_RETURN_IF_ERROR(layout.Set( + ChunkLayout::InnerOrder(default_inner_order, /*hard_constraint=*/false))); + + // 3. Determine Initial Chunk Shape (Hard Constraint) + std::vector initial_chunk_shape(initial_rank); + + absl::flat_hash_map label_to_index; + for (DimensionIndex i = 0; i < initial_rank; ++i) { + label_to_index[initial_labels[i]] = i; + } + + // Find indices corresponding to conceptual Y, X, and sample dimensions + DimensionIndex y_dim_idx = -1; + DimensionIndex x_dim_idx = -1; + DimensionIndex sample_dim_idx = -1; + + if (auto it = label_to_index.find("y"); it != label_to_index.end()) { + y_dim_idx = it->second; + } else if (initial_rank >= 2) { + return absl::InternalError( + "Conceptual 'y' dimension label not found in initial labels"); + } + + if (auto it = label_to_index.find("x"); it != label_to_index.end()) { + x_dim_idx = it->second; + } else if (initial_rank >= 1) { + return absl::InternalError( + "Conceptual 'x' dimension label not found in initial labels"); + } + + if (initial_samples_per_pixel > 1) { + if (auto it = label_to_index.find(sample_label); + it != label_to_index.end()) { + sample_dim_idx = it->second; + } else { + return absl::InternalError(tensorstore::StrCat( + "Sample dimension label '", sample_label, + "' not found in initial labels, but SamplesPerPixel=", + initial_samples_per_pixel)); + } + } + + // Assign chunk sizes based on dimension type + for (DimensionIndex i = 0; i < initial_rank; ++i) { + if (i == y_dim_idx) { + initial_chunk_shape[i] = base_ifd.chunk_height; + if (initial_chunk_shape[i] <= 0) + return absl::InvalidArgumentError( + "TIFF TileLength/RowsPerStrip must be positive"); + } else if (i == x_dim_idx) { + initial_chunk_shape[i] = base_ifd.chunk_width; + if (initial_chunk_shape[i] <= 0) + return absl::InvalidArgumentError( + "TIFF TileWidth must be positive (or image width for strips)"); + } else if (i == sample_dim_idx) { + if (initial_planar_config == + internal_tiff_kvstore::PlanarConfigType::kChunky) { + initial_chunk_shape[i] = initial_samples_per_pixel; + } else { // Planar + initial_chunk_shape[i] = 1; + } + if (initial_chunk_shape[i] <= 0) + return absl::InvalidArgumentError("SamplesPerPixel must be positive"); + } else { + initial_chunk_shape[i] = 1; // Assume stacked dims are chunked at size 1 + } + } + + TENSORSTORE_RETURN_IF_ERROR( + layout.Set(ChunkLayout::ChunkShape(initial_chunk_shape, all_dims_hard))); + TENSORSTORE_RETURN_IF_ERROR(layout.Set( + ChunkLayout::CodecChunkShape(initial_chunk_shape, all_dims_hard))); + + // 4. Set Other Defaults (Soft Constraints) + TENSORSTORE_RETURN_IF_ERROR(layout.Set(ChunkLayout::WriteChunkElements( + ChunkLayout::kDefaultShapeValue, /*hard=*/false))); + TENSORSTORE_RETURN_IF_ERROR(layout.Set(ChunkLayout::ReadChunkElements( + ChunkLayout::kDefaultShapeValue, /*hard=*/false))); + TENSORSTORE_RETURN_IF_ERROR(layout.Set(ChunkLayout::CodecChunkElements( + ChunkLayout::kDefaultShapeValue, /*hard=*/false))); + + std::vector default_aspect_ratio( + initial_rank, ChunkLayout::kDefaultAspectRatioValue); + tensorstore::span default_aspect_ratio_span = + default_aspect_ratio; + + TENSORSTORE_RETURN_IF_ERROR(layout.Set(ChunkLayout::WriteChunkAspectRatio( + default_aspect_ratio_span, /*hard=*/false))); + TENSORSTORE_RETURN_IF_ERROR(layout.Set(ChunkLayout::ReadChunkAspectRatio( + default_aspect_ratio_span, /*hard=*/false))); + TENSORSTORE_RETURN_IF_ERROR(layout.Set(ChunkLayout::CodecChunkAspectRatio( + default_aspect_ratio_span, /*hard=*/false))); + + return layout; +} + +Result> DecodeChunk(const TiffMetadata& metadata, + absl::Cord buffer) { + riegeli::CordReader<> base_reader(&buffer); + riegeli::Reader* data_reader = &base_reader; + + std::unique_ptr decompressor_reader; + if (metadata.compressor) { + decompressor_reader = + metadata.compressor->GetReader(base_reader, metadata.dtype.size()); + if (!decompressor_reader) { + return absl::InvalidArgumentError(StrCat( + "Failed to create decompressor reader for TIFF compression type: ", + static_cast(metadata.compression_type))); + } + data_reader = decompressor_reader.get(); + ABSL_LOG_IF(INFO, tiff_metadata_logging) + << "Applied decompressor for type " + << static_cast(metadata.compression_type); + } else { + ABSL_LOG_IF(INFO, tiff_metadata_logging) + << "No decompression needed (raw)."; + } + + tensorstore::span chunk_shape = + metadata.chunk_layout.read_chunk_shape(); + + std::vector buffer_data_shape_vec; + buffer_data_shape_vec.reserve(metadata.rank); + if (metadata.planar_config == PlanarConfigType::kPlanar) { + // Find sample dimension index from mapping + DimensionIndex sample_dim = + metadata.dimension_mapping.ts_sample_dim.value_or(-1); + if (sample_dim == -1 && metadata.samples_per_pixel > 1) + return absl::InternalError( + "Planar config with spp > 1 requires a sample dimension in mapping"); + // Assume chunk shape from layout reflects the grid {1, stack..., h, w} + buffer_data_shape_vec.assign(chunk_shape.begin(), chunk_shape.end()); + } else { // Chunky or single sample + // Grid chunk shape is {stack..., h, w}. Component shape has spp at the end. + buffer_data_shape_vec.assign(chunk_shape.begin(), chunk_shape.end()); + if (static_cast(buffer_data_shape_vec.size()) != + metadata.rank) { + return absl::InternalError(StrCat( + "Internal consistency error: Buffer data shape rank (", + buffer_data_shape_vec.size(), ") does not match component rank (", + metadata.rank, ") in chunky mode")); + } + } + tensorstore::span buffer_data_shape = buffer_data_shape_vec; + + endian source_endian = + (metadata.endian == internal_tiff_kvstore::Endian::kLittle) + ? endian::little + : endian::big; + + TENSORSTORE_ASSIGN_OR_RETURN( + auto decoded_array, internal::DecodeArrayEndian( + *data_reader, metadata.dtype, buffer_data_shape, + source_endian, metadata.layout_order)); + + if (!data_reader->VerifyEndAndClose()) { + return absl::DataLossError( + StrCat("Error reading chunk data: ", data_reader->status().message())); + } + + return decoded_array; +} + +// Validates that dtype is supported by the TIFF driver implementation. +absl::Status ValidateDataType(DataType dtype) { + ABSL_CHECK(dtype.valid()); + if (!absl::c_linear_search(kSupportedDataTypes, dtype.id())) { + return absl::InvalidArgumentError(tensorstore::StrCat( + dtype, " data type is not one of the supported TIFF data types: ", + GetSupportedDataTypes())); + } + return absl::OkStatus(); +} + +} // namespace internal_tiff +} // namespace tensorstore + +TENSORSTORE_DEFINE_SERIALIZER_SPECIALIZATION( + tensorstore::internal_tiff::TiffSpecOptions::IfdStackingOptions, + tensorstore::serialization::JsonBindableSerializer< + tensorstore::internal_tiff::TiffSpecOptions::IfdStackingOptions>()) + +TENSORSTORE_DEFINE_SERIALIZER_SPECIALIZATION( + tensorstore::internal_tiff::TiffSpecOptions, + tensorstore::serialization::JsonBindableSerializer< + tensorstore::internal_tiff::TiffSpecOptions>()) + +TENSORSTORE_DEFINE_SERIALIZER_SPECIALIZATION( + tensorstore::internal_tiff::TiffMetadataConstraints, + tensorstore::serialization::JsonBindableSerializer< + tensorstore::internal_tiff::TiffMetadataConstraints>()) diff --git a/tensorstore/driver/tiff/metadata.h b/tensorstore/driver/tiff/metadata.h new file mode 100644 index 000000000..1ffa5a245 --- /dev/null +++ b/tensorstore/driver/tiff/metadata.h @@ -0,0 +1,334 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_DRIVER_TIFF_METADATA_H_ +#define TENSORSTORE_DRIVER_TIFF_METADATA_H_ + +#include +#include +#include +#include + +#include "tensorstore/array.h" +#include "tensorstore/chunk_layout.h" +#include "tensorstore/codec_spec.h" +#include "tensorstore/data_type.h" +#include "tensorstore/driver/tiff/compressor.h" +#include "tensorstore/index.h" +#include "tensorstore/index_space/dimension_units.h" +#include "tensorstore/kvstore/tiff/tiff_details.h" +#include "tensorstore/kvstore/tiff/tiff_dir_cache.h" +#include "tensorstore/rank.h" +#include "tensorstore/schema.h" +#include "tensorstore/util/endian.h" +#include "tensorstore/util/result.h" + +namespace tensorstore { +namespace internal_tiff { + +/// Options specified in the `TiffDriverSpec` that guide interpretation. +struct TiffSpecOptions { + /// Options specific to multi-IFD stacking mode. + struct IfdStackingOptions { + // Specifies the labels for the dimensions represented by the IFD sequence. + // Required if `ifd_stacking` is specified. + std::vector dimensions; + + // Explicitly defines the size of each corresponding dimension in + // `dimensions`. Must have the same length as `dimensions`. Required if + // `dimensions.size() > 1` and OME-XML is not used/found. Optional if + // `dimensions.size() == 1` (can use `ifd_count` instead). + std::optional> dimension_sizes; + + // Specifies the total number of IFDs involved in the stack OR the size of + // the single dimension if `dimensions.size() == 1` and `dimension_sizes` + // is absent. If specified along with `dimension_sizes`, their product must + // match `ifd_count`. + std::optional ifd_count; + + // Specifies the order of stacked dimensions within the flat IFD sequence. + // Must be a permutation of `dimensions`. Defaults to the order in + // `dimensions` with the last dimension varying fastest. + std::optional> ifd_sequence_order; + + TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER(IfdStackingOptions, + internal_json_binding::NoOptions, + tensorstore::IncludeDefaults) + + constexpr static auto ApplyMembers = [](auto&& x, auto f) { + return f(x.dimensions, x.dimension_sizes, x.ifd_count, + x.ifd_sequence_order); + }; + }; + + // Option A: Single IFD Mode (default behavior if ifd_stacking is absent) + // Specifies which IFD to open. + uint32_t ifd_index = 0; + + // Option B: Multi-IFD Stacking Mode + // Interprets a sequence of IFDs as additional TensorStore dimensions. + std::optional ifd_stacking; + + // Optional Sample Dimension Label + // Specifies the conceptual label for the dimension derived from + // SamplesPerPixel when SamplesPerPixel > 1. If omitted, a default ('c') is + // used internally. + std::optional sample_dimension_label; + + // Future: OME-XML Control + // bool use_ome_xml = true; + + TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER(TiffSpecOptions, + internal_json_binding::NoOptions, + tensorstore::IncludeDefaults) + + constexpr static auto ApplyMembers = [](auto&& x, auto f) { + return f(x.ifd_index, x.ifd_stacking, x.sample_dimension_label); + }; +}; + +/// Stores information about the mapping between final TensorStore dimensions. +struct TiffDimensionMapping { + /// TensorStore dimension index corresponding to logical height (Y). + std::optional ts_y_dim; + /// TensorStore dimension index corresponding to logical width (X). + std::optional ts_x_dim; + /// TensorStore dimension index corresponding to the sample dimension (if spp + /// > 1). + std::optional ts_sample_dim; + + /// Maps stacked dimension labels (from ifd_stacking.dimensions) to their + /// corresponding TensorStore dimension indices. + std::map ts_stacked_dims; + + /// Maps TensorStore dimension indices back to conceptual labels (e.g., "z", + /// "t", "y", "x", "c") Useful for debugging or potentially reconstructing + /// spec. + std::vector labels_by_ts_dim; +}; + +/// Represents the resolved and interpreted metadata for a TIFF TensorStore. +struct TiffMetadata { + // Which IFD was used as the base (0 unless single IFD mode requested specific + // one). + uint32_t base_ifd_index; + + // Number of IFDs used (1 for single IFD mode, >1 for stacked mode). + uint32_t num_ifds_read = 1; + + // Parsed stacking options, if multi-IFD mode was used. + std::optional stacking_info; + + // Core TensorStore Schema components + DimensionIndex rank = dynamic_rank; + + // Derived shape + std::vector shape; + + DataType dtype; + + // Derived chunk layout including order. + ChunkLayout chunk_layout; + + // Represents compression + Compressor compressor; + + // From user spec or default + SharedArray fill_value; + + // Derived from TIFF/OME/user spec + DimensionUnitsVector dimension_units; + + std::vector dimension_labels; + + // Dimension mapping. + TiffDimensionMapping dimension_mapping; + + // Information retained from TIFF for reference/logic + internal_tiff_kvstore::Endian endian; + internal_tiff_kvstore::CompressionType compression_type; + internal_tiff_kvstore::PlanarConfigType planar_config; + uint16_t samples_per_pixel; + + // Chunk sizes from base IFD. + uint32_t ifd0_chunk_width; + uint32_t ifd0_chunk_height; + + // Whether the IFD is tiled or not. + bool is_tiled = false; + + // Pre-calculated layout order enum + ContiguousLayoutOrder layout_order = ContiguousLayoutOrder::c; + + // Returns `true` if a byte‑swap is required on this platform. + bool NeedByteSwap() const { + constexpr bool kHostIsBig = + (tensorstore::endian::native == tensorstore::endian::big); + + return (endian == internal_tiff_kvstore::Endian::kBig) ^ kHostIsBig; + } + + // TODO: Add fields for parsed OME-XML metadata if needed in the future. + // std::shared_ptr ome_metadata; + + TiffMetadata() = default; +}; + +/// Specifies constraints on the TIFF metadata required when opening. +struct TiffMetadataConstraints { + std::optional dtype; + std::optional> shape; + DimensionIndex rank = dynamic_rank; + + TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER(TiffMetadataConstraints, + internal_json_binding::NoOptions, + tensorstore::IncludeDefaults) +}; + +// Codec specification specifically for the TIFF driver. +class TiffCodecSpec : public internal::CodecDriverSpec { + public: + constexpr static char id[] = "tiff"; + + // Stores the compressor constraint, potentially including parameters. + Compressor compressor; + + CodecSpec Clone() const override; + absl::Status DoMergeFrom( + const internal::CodecDriverSpec& other_base) override; + + TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER(TiffCodecSpec, FromJsonOptions, + ToJsonOptions, + ::nlohmann::json::object_t) + + friend bool operator==(const TiffCodecSpec& a, const TiffCodecSpec& b); +}; + +inline bool operator!=(const TiffCodecSpec& a, const TiffCodecSpec& b) { + return !(a == b); +} + +/// Resolves the final metadata by interpreting parsed TIFF data according +/// to spec options and merging with schema constraints. +/// +/// \param source The parsed TIFF directory structure. +/// \param options User-specified interpretation options from the driver spec. +/// \param schema General TensorStore schema constraints. +/// \returns The final, resolved metadata for the driver. +Result> ResolveMetadata( + const internal_tiff_kvstore::TiffParseResult& source, + const TiffSpecOptions& options, const Schema& schema); + +/// Validates the final resolved metadata against explicit user constraints +/// provided in the driver spec. +/// +/// \param resolved_metadata The final metadata produced by `ResolveMetadata`. +/// \param user_constraints Constraints provided by the user in the spec. +absl::Status ValidateResolvedMetadata( + const TiffMetadata& resolved_metadata, + const TiffMetadataConstraints& user_constraints); + +/// Computes the effective data type based on constraints and schema. +/// +/// \param constraints User constraints on the final metadata (e.g., dtype). +/// \param schema General schema constraints (e.g., dtype). +/// \returns The effective data type. Returns `DataType()` (invalid) if neither +/// input specifies a data type. Returns an error if constraints conflict. +Result GetEffectiveDataType( + const TiffMetadataConstraints& constraints, const Schema& schema); + +/// Merges initial domain properties with schema constraints. +/// \returns A pair containing the merged IndexDomain and the final vector of +/// dimension labels. +Result, std::vector>> GetEffectiveDomain( + DimensionIndex initial_rank, span initial_shape, + span initial_labels, const Schema& schema); + +Result> GetEffectiveDomain( + const TiffMetadataConstraints& constraints, const Schema& schema); + +/// Merges an initial ChunkLayout derived from TIFF properties with schema +/// constraints. +Result GetEffectiveChunkLayout(ChunkLayout initial_layout, + const Schema& schema); + +/// Computes the effective dimension units, merging potential initial units +/// (e.g., from OME-XML in the future) with schema constraints. +Result GetEffectiveDimensionUnits( + DimensionIndex rank, /* const DimensionUnitsVector& initial_units, */ + const Schema& schema); + +/// Creates an initial ChunkLayout based on TIFF tags before schema merging. +Result GetInitialChunkLayout( + const internal_tiff_kvstore::ImageDirectory& base_ifd, + DimensionIndex initial_rank, + internal_tiff_kvstore::PlanarConfigType planar_config, + uint16_t samples_per_pixel); + +/// Creates an initial ChunkLayout based on TIFF tags and the initial +/// structure, before merging with schema constraints. +/// +/// This determines the chunk shape, grid origin (always {0,...}), and default +/// inner order (C-order) based on the representative IFD and the initial +/// dimension structure derived from stacking/sample options. Shape and origin +/// are hard constraints; inner order is a soft constraint. +/// +/// \param base_ifd The representative Image File Directory. +/// \param initial_rank The total rank determined from IFD+stacking+samples. +/// \param initial_labels The conceptual dimension labels determined initially +/// (e.g., {"z", "y", "x", "c"}). Needed to map Y/X/Sample dimensions. +/// \param initial_planar_config The planar configuration from the IFD. +/// \param initial_samples_per_pixel SamplesPerPixel from the IFD. +/// \param sample_label The actual label used for the sample dimension (if any). +/// \returns The initial ChunkLayout. +Result GetInitialChunkLayout( + const internal_tiff_kvstore::ImageDirectory& base_ifd, + DimensionIndex initial_rank, span initial_labels, + internal_tiff_kvstore::PlanarConfigType initial_planar_config, + uint16_t initial_samples_per_pixel, std::string_view sample_label); + +/// Decodes a raw chunk buffer based on TIFF metadata. +/// +/// \param metadata The resolved metadata for the TIFF dataset. +/// \param buffer The raw Cord containing the bytes for a single tile/strip. +/// \returns The decoded chunk as a SharedArray, or an error. +Result> DecodeChunk(const TiffMetadata& metadata, + absl::Cord buffer); + +/// Validates that `dtype` is supported by the TIFF driver. +/// +/// Checks if the data type corresponds to a standard TIFF SampleFormat +/// and BitsPerSample combination (uint8/16/32/64, int8/16/32/64, float32/64). +absl::Status ValidateDataType(DataType dtype); + +} // namespace internal_tiff +} // namespace tensorstore + +TENSORSTORE_DECLARE_SERIALIZER_SPECIALIZATION( + tensorstore::internal_tiff::TiffSpecOptions::IfdStackingOptions) +TENSORSTORE_DECLARE_GARBAGE_COLLECTION_NOT_REQUIRED( + tensorstore::internal_tiff::TiffSpecOptions::IfdStackingOptions) + +TENSORSTORE_DECLARE_SERIALIZER_SPECIALIZATION( + tensorstore::internal_tiff::TiffSpecOptions) +TENSORSTORE_DECLARE_GARBAGE_COLLECTION_NOT_REQUIRED( + tensorstore::internal_tiff::TiffSpecOptions) + +TENSORSTORE_DECLARE_SERIALIZER_SPECIALIZATION( + tensorstore::internal_tiff::TiffMetadataConstraints) + +TENSORSTORE_DECLARE_GARBAGE_COLLECTION_NOT_REQUIRED( + tensorstore::internal_tiff::TiffMetadataConstraints) + +#endif // TENSORSTORE_DRIVER_TIFF_METADATA_H_ diff --git a/tensorstore/driver/tiff/metadata_test.cc b/tensorstore/driver/tiff/metadata_test.cc new file mode 100644 index 000000000..a8a6197d6 --- /dev/null +++ b/tensorstore/driver/tiff/metadata_test.cc @@ -0,0 +1,1444 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/driver/tiff/metadata.h" + +#include +#include + +#include +#include + +#include "riegeli/bytes/cord_reader.h" +#include "riegeli/bytes/cord_writer.h" +#include "tensorstore/chunk_layout.h" +#include "tensorstore/codec_spec.h" +#include "tensorstore/data_type.h" +#include "tensorstore/driver/tiff/compressor.h" +#include "tensorstore/index.h" +#include "tensorstore/index_space/dimension_units.h" +#include "tensorstore/index_space/index_domain_builder.h" +#include "tensorstore/internal/json_binding/gtest.h" +#include "tensorstore/internal/json_gtest.h" +#include "tensorstore/internal/riegeli/array_endian_codec.h" +#include "tensorstore/kvstore/tiff/tiff_details.h" +#include "tensorstore/kvstore/tiff/tiff_dir_cache.h" +#include "tensorstore/schema.h" +#include "tensorstore/util/endian.h" +#include "tensorstore/util/result.h" +#include "tensorstore/util/status_testutil.h" + +namespace { + +namespace jb = tensorstore::internal_json_binding; + +using ::tensorstore::AllocateArray; +using ::tensorstore::Box; +using ::tensorstore::ChunkLayout; +using ::tensorstore::CodecSpec; +using ::tensorstore::ContiguousLayoutOrder; +using ::tensorstore::DataType; +using ::tensorstore::DimensionIndex; +using ::tensorstore::DimensionSet; +using ::tensorstore::dtype_v; +using ::tensorstore::dynamic_rank; +using ::tensorstore::endian; +using ::tensorstore::GetConstantVector; +using ::tensorstore::Index; +using ::tensorstore::IndexDomain; +using ::tensorstore::IndexDomainBuilder; +using ::tensorstore::MakeArray; +using ::tensorstore::MatchesStatus; +using ::tensorstore::RankConstraint; +using ::tensorstore::Result; +using ::tensorstore::Schema; +using ::tensorstore::SharedArray; +using ::tensorstore::SharedArrayView; +using ::tensorstore::span; +using ::tensorstore::TestJsonBinderRoundTrip; +using ::tensorstore::TestJsonBinderRoundTripJsonOnly; +using ::tensorstore::internal::CodecDriverSpec; +using ::tensorstore::internal_tiff::Compressor; +using ::tensorstore::internal_tiff::GetEffectiveChunkLayout; +using ::tensorstore::internal_tiff::GetEffectiveDimensionUnits; +using ::tensorstore::internal_tiff::GetEffectiveDomain; +using ::tensorstore::internal_tiff::GetInitialChunkLayout; +using ::tensorstore::internal_tiff::ResolveMetadata; +using ::tensorstore::internal_tiff::TiffCodecSpec; +using ::tensorstore::internal_tiff::TiffMetadata; +using ::tensorstore::internal_tiff::TiffMetadataConstraints; +using ::tensorstore::internal_tiff::TiffSpecOptions; +using ::tensorstore::internal_tiff_kvstore::CompressionType; +using ::tensorstore::internal_tiff_kvstore::Endian; +using ::tensorstore::internal_tiff_kvstore::ImageDirectory; +using ::tensorstore::internal_tiff_kvstore::PlanarConfigType; +using ::tensorstore::internal_tiff_kvstore::SampleFormatType; +using ::tensorstore::internal_tiff_kvstore::TiffParseResult; +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; +using ::testing::Optional; + +// Helper to calculate the number of chunks/tiles/strips +std::tuple CalculateChunkCounts( + uint32_t image_width, uint32_t image_height, uint32_t chunk_width, + uint32_t chunk_height) { + if (chunk_width == 0 || chunk_height == 0) { + return {0, 0, 0}; + } + uint32_t num_cols = (image_width + chunk_width - 1) / chunk_width; + uint32_t num_rows = (image_height + chunk_height - 1) / chunk_height; + uint64_t num_chunks = static_cast(num_rows) * num_cols; + return {num_chunks, num_rows, num_cols}; +} + +// Creates a basic valid ImageDirectory. +ImageDirectory MakeImageDirectory( + uint32_t width = 100, uint32_t height = 80, uint32_t chunk_width = 16, + uint32_t chunk_height = 16, bool is_tiled = true, + uint16_t samples_per_pixel = 1, uint16_t bits_per_sample = 8, + SampleFormatType sample_format = SampleFormatType::kUnsignedInteger, + CompressionType compression = CompressionType::kNone, + PlanarConfigType planar_config = PlanarConfigType::kChunky) { + ImageDirectory dir; + dir.width = width; + dir.height = height; + dir.is_tiled = is_tiled; + if (is_tiled) { + dir.chunk_width = chunk_width; + dir.chunk_height = chunk_height; + } else { + dir.chunk_width = width; + dir.chunk_height = chunk_height; + } + dir.samples_per_pixel = samples_per_pixel; + dir.compression = static_cast(compression); + dir.photometric = (samples_per_pixel >= 3) ? 2 : 1; + dir.planar_config = static_cast(planar_config); + dir.bits_per_sample.assign(samples_per_pixel, bits_per_sample); + dir.sample_format.assign(samples_per_pixel, + static_cast(sample_format)); + + uint64_t num_chunks; + uint32_t num_rows, num_cols; + std::tie(num_chunks, num_rows, num_cols) = CalculateChunkCounts( + dir.width, dir.height, dir.chunk_width, dir.chunk_height); + + // For planar, the count is per plane + if (planar_config == PlanarConfigType::kPlanar && samples_per_pixel > 1) { + num_chunks *= samples_per_pixel; + } + + dir.chunk_offsets.assign(num_chunks, 1000); + dir.chunk_bytecounts.assign( + num_chunks, dir.chunk_width * dir.chunk_height * bits_per_sample / 8); + + return dir; +} + +// Creates a TiffParseResult containing the given directories +TiffParseResult MakeParseResult(std::vector dirs, + Endian endian = Endian::kLittle) { + TiffParseResult result; + result.image_directories = std::move(dirs); + result.endian = endian; + result.full_read = true; + return result; +} +// --- Tests for TiffSpecOptions --- +TEST(SpecOptionsTest, JsonBindingDefault) { + TestJsonBinderRoundTripJsonOnly( + { + /*expected_json=*/{{"ifd", 0}}, + }, + jb::DefaultBinder<>, tensorstore::IncludeDefaults{true}); + TestJsonBinderRoundTripJsonOnly( + { + /*expected_json=*/::nlohmann::json::object(), + }, + jb::DefaultBinder<>, tensorstore::IncludeDefaults{false}); +} + +TEST(SpecOptionsTest, JsonBindingSingleIfdExplicit) { + TestJsonBinderRoundTripJsonOnly({ + {{"ifd", 5}}, // Explicit IFD + }); +} + +TEST(SpecOptionsTest, JsonBindingStackingSimple) { + TestJsonBinderRoundTripJsonOnly({ + {{"ifd_stacking", {{"dimensions", {"z"}}, {"ifd_count", 10}}}}, + }); + TestJsonBinderRoundTripJsonOnly({ + {{"ifd_stacking", {{"dimensions", {"z"}}, {"dimension_sizes", {10}}}}}, + }); +} + +TEST(SpecOptionsTest, JsonBindingStackingMultiDim) { + TestJsonBinderRoundTripJsonOnly({ + {{"ifd_stacking", + {{"dimensions", {"t", "c"}}, {"dimension_sizes", {5, 3}}}}}, + }); +} + +TEST(SpecOptionsTest, JsonBindingStackingMultiDimWithCount) { + TestJsonBinderRoundTripJsonOnly({ + {{"ifd_stacking", + {{"dimensions", {"t", "c"}}, + {"dimension_sizes", {5, 3}}, + {"ifd_count", 15}}}}, + }); +} + +TEST(SpecOptionsTest, JsonBindingStackingWithSequenceOrder) { + TestJsonBinderRoundTripJsonOnly({ + {{"ifd_stacking", + {{"dimensions", {"t", "c"}}, + {"dimension_sizes", {5, 3}}, + {"ifd_sequence_order", {"c", "t"}}}}}, + }); +} + +TEST(SpecOptionsTest, JsonBindingWithSampleLabel) { + TestJsonBinderRoundTripJsonOnly({ + {{"ifd", 3}, {"sample_dimension_label", "channel"}}, + }); + TestJsonBinderRoundTripJsonOnly({ + {{"ifd_stacking", {{"dimensions", {"z"}}, {"ifd_count", 10}}}, + {"sample_dimension_label", "rgba"}}, + }); +} + +TEST(SpecOptionsTest, JsonBindingInvalidIfdNegative) { + EXPECT_THAT(TiffSpecOptions::FromJson({{"ifd", -1}}), + MatchesStatus(absl::StatusCode::kInvalidArgument)); +} + +TEST(SpecOptionsTest, JsonBindingInvalidStackingMissingDims) { + EXPECT_THAT( + TiffSpecOptions::FromJson({{"ifd_stacking", {{"ifd_count", 10}}}}), + MatchesStatus(absl::StatusCode::kInvalidArgument, + ".*\"dimensions\".*missing.*")); +} + +TEST(SpecOptionsTest, JsonBindingInvalidStackingEmptyDims) { + EXPECT_THAT( + TiffSpecOptions::FromJson( + {{"ifd_stacking", + {{"dimensions", nlohmann::json::array()}, {"ifd_count", 10}}}}), + MatchesStatus(absl::StatusCode::kInvalidArgument, + ".*\"dimensions\" must not be empty.*")); +} + +TEST(SpecOptionsTest, JsonBindingInvalidStackingSizeMismatch) { + // dim_sizes length mismatch + EXPECT_THAT(TiffSpecOptions::FromJson( + {{"ifd_stacking", + {{"dimensions", {"t", "c"}}, {"dimension_sizes", {5}}}}}), + MatchesStatus(absl::StatusCode::kInvalidArgument, + ".*\"dimension_sizes\" length \\(1\\) must match " + "\"dimensions\" length \\(2\\).*")); + // ifd_count mismatch with dim_sizes product + EXPECT_THAT( + TiffSpecOptions::FromJson({{"ifd_stacking", + {{"dimensions", {"t", "c"}}, + {"dimension_sizes", {5, 3}}, + {"ifd_count", 16}}}}), + MatchesStatus(absl::StatusCode::kInvalidArgument, + ".*Product of \"dimension_sizes\" \\(15\\) does not " + "match specified \"ifd_count\" \\(16\\).*")); +} + +TEST(SpecOptionsTest, JsonBindingInvalidStackingMissingSizeInfo) { + // Rank 1 stack needs either dimension_sizes or ifd_count + EXPECT_THAT( + TiffSpecOptions::FromJson({{"ifd_stacking", {{"dimensions", {"z"}}}}}), + MatchesStatus( + absl::StatusCode::kInvalidArgument, + ".*Either \"dimension_sizes\" or \"ifd_count\" must be specified.*")); + // Rank > 1 stack needs dimension_sizes + EXPECT_THAT( + TiffSpecOptions::FromJson( + {{"ifd_stacking", {{"dimensions", {"z", "t"}}, {"ifd_count", 10}}}}), + MatchesStatus(absl::StatusCode::kInvalidArgument, + ".*\"dimension_sizes\" must be specified when.*")); +} + +TEST(SpecOptionsTest, JsonBindingInvalidStackingSequenceOrder) { + // Sequence order wrong length + EXPECT_THAT( + TiffSpecOptions::FromJson({{"ifd_stacking", + {{"dimensions", {"t", "c"}}, + {"dimension_sizes", {5, 3}}, + {"ifd_sequence_order", {"t"}}}}}), + MatchesStatus(absl::StatusCode::kInvalidArgument, + ".*\"ifd_sequence_order\" length \\(1\\) must match " + "\"dimensions\" length \\(2\\).*")); + // Sequence order not a permutation + EXPECT_THAT( + TiffSpecOptions::FromJson( + {{"ifd_stacking", + { + {"dimensions", {"t", "c"}}, + {"dimension_sizes", {5, 3}}, + {"ifd_sequence_order", {"t", "z"}} // "z" not in dimensions + }}}), + MatchesStatus(absl::StatusCode::kInvalidArgument, + ".*\"ifd_sequence_order\" must be a permutation of " + "\"dimensions\".*")); +} + +TEST(SpecOptionsTest, JsonBindingInvalidStackingDuplicateDimLabel) { + EXPECT_THAT(TiffSpecOptions::FromJson({{"ifd_stacking", + {{"dimensions", {"z", "z"}}, + {"dimension_sizes", {5, 3}}}}}), + MatchesStatus(absl::StatusCode::kInvalidArgument, + ".*Duplicate dimension label \"z\".*")); +} + +// --- Tests for TiffMetadataConstraints --- +TEST(MetadataConstraintsTest, JsonBinding) { + TestJsonBinderRoundTripJsonOnly({ + ::nlohmann::json::object(), // Empty constraints + {{"dtype", "float32"}}, + {{"shape", {100, 200}}}, + {{"dtype", "int16"}, {"shape", {50, 60, 70}}}, + }); + + EXPECT_THAT(TiffMetadataConstraints::FromJson({{"dtype", 123}}), + MatchesStatus(absl::StatusCode::kInvalidArgument)); + EXPECT_THAT(TiffMetadataConstraints::FromJson({{"shape", {10, "a"}}}), + MatchesStatus(absl::StatusCode::kInvalidArgument)); +} + +// --- Tests for Compressor --- +TEST(CompressorFromJsonTest, CreateRaw) { + ::nlohmann::json raw_json = {{"type", "raw"}}; + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(Compressor compressor, + Compressor::FromJson(raw_json)); + + EXPECT_THAT(compressor, testing::IsNull()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto round_trip_json, + jb::ToJson(compressor)); + EXPECT_THAT(round_trip_json, tensorstore::MatchesJson(raw_json)); +} + +TEST(CompressorFromJsonTest, CreateZstd) { + ::nlohmann::json zstd_json = {{"type", "zstd"}}; + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(Compressor compressor, + Compressor::FromJson(zstd_json)); + + EXPECT_THAT(compressor, testing::NotNull()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto round_trip_json, + jb::ToJson(compressor)); + EXPECT_THAT(round_trip_json, tensorstore::MatchesJson({{"type", "zstd"}})); +} + +TEST(CompressorFromJsonTest, CreateUnsupported) { + ::nlohmann::json unknown_json = {{"type", "nonexistent_compressor"}}; + + Result result = Compressor::FromJson(unknown_json); + + // Expect an error because the type isn't registered + EXPECT_FALSE(result.ok()); + EXPECT_THAT( + result.status(), + MatchesStatus(absl::StatusCode::kInvalidArgument, + ".*\"nonexistent_compressor\".* is not registered.*")); +} + +// --- Tests for GetInitialChunkLayout --- +TEST(GetInitialChunkLayoutTest, TiledChunkySpp1) { + ImageDirectory ifd = + MakeImageDirectory(/*width=*/60, /*height=*/40, + /*chunk_width=*/16, /*chunk_height=*/8, + /*is_tiled=*/true, /*spp=*/1); + DimensionIndex initial_rank = 2; + std::vector initial_labels = {"y", "x"}; + std::string sample_label = "c"; + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + ChunkLayout layout, + GetInitialChunkLayout(ifd, initial_rank, initial_labels, + PlanarConfigType::kChunky, 1, sample_label)); + + EXPECT_EQ(layout.rank(), 2); + auto expected_hard_constraints = DimensionSet::UpTo(initial_rank); + + EXPECT_THAT(layout.grid_origin(), ElementsAre(0, 0)); + EXPECT_EQ(layout.grid_origin().hard_constraint, expected_hard_constraints); + + EXPECT_THAT(span(layout.read_chunk_shape()), + ElementsAre(8, 16)); // {y, x} order + EXPECT_EQ(layout.read_chunk_shape().hard_constraint, + expected_hard_constraints); + + EXPECT_THAT(span(layout.write_chunk_shape()), + ElementsAre(8, 16)); + EXPECT_EQ(layout.write_chunk_shape().hard_constraint, + expected_hard_constraints); + + EXPECT_THAT(span(layout.codec_chunk_shape()), + ElementsAre(8, 16)); + EXPECT_EQ(layout.codec_chunk_shape().hard_constraint, + expected_hard_constraints); + + EXPECT_THAT(layout.inner_order(), ElementsAre(0, 1)); // Default C + EXPECT_FALSE(layout.inner_order().hard_constraint); +} + +TEST(GetInitialChunkLayoutTest, StrippedChunkySpp1) { + ImageDirectory ifd = + MakeImageDirectory(/*width=*/50, /*height=*/35, + /*chunk_width=*/0, /*chunk_height=*/10, + /*is_tiled=*/false, /*spp=*/1); + DimensionIndex initial_rank = 2; + std::vector initial_labels = {"y", "x"}; + std::string sample_label = "c"; + auto expected_hard_constraints = DimensionSet::UpTo(initial_rank); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + ChunkLayout layout, + GetInitialChunkLayout(ifd, initial_rank, initial_labels, + PlanarConfigType::kChunky, 1, sample_label)); + + EXPECT_EQ(layout.rank(), 2); + EXPECT_THAT(layout.grid_origin(), ElementsAre(0, 0)); + EXPECT_EQ(layout.grid_origin().hard_constraint, expected_hard_constraints); + EXPECT_THAT(span(layout.read_chunk_shape()), + ElementsAre(10, 50)); + EXPECT_EQ(layout.read_chunk_shape().hard_constraint, + expected_hard_constraints); + EXPECT_THAT(span(layout.write_chunk_shape()), + ElementsAre(10, 50)); + EXPECT_EQ(layout.write_chunk_shape().hard_constraint, + expected_hard_constraints); + EXPECT_THAT(span(layout.codec_chunk_shape()), + ElementsAre(10, 50)); + EXPECT_EQ(layout.codec_chunk_shape().hard_constraint, + expected_hard_constraints); + + EXPECT_THAT(layout.inner_order(), ElementsAre(0, 1)); + EXPECT_FALSE(layout.inner_order().hard_constraint); +} + +TEST(GetInitialChunkLayoutTest, TiledChunkySpp3) { + ImageDirectory ifd = + MakeImageDirectory(/*width=*/60, /*height=*/40, + /*chunk_width=*/16, /*chunk_height=*/8, + /*is_tiled=*/true, /*spp=*/3); + DimensionIndex initial_rank = 3; + std::vector initial_labels = {"y", "x", "c"}; + std::string sample_label = "c"; + auto expected_hard_constraints = DimensionSet::UpTo(initial_rank); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + ChunkLayout layout, + GetInitialChunkLayout(ifd, initial_rank, initial_labels, + PlanarConfigType::kChunky, 3, sample_label)); + + EXPECT_EQ(layout.rank(), 3); + EXPECT_THAT(layout.grid_origin(), ElementsAre(0, 0, 0)); + EXPECT_EQ(layout.grid_origin().hard_constraint, expected_hard_constraints); + EXPECT_THAT(span(layout.read_chunk_shape()), + ElementsAre(8, 16, 3)); + EXPECT_EQ(layout.read_chunk_shape().hard_constraint, + expected_hard_constraints); + EXPECT_THAT(span(layout.write_chunk_shape()), + ElementsAre(8, 16, 3)); + EXPECT_EQ(layout.write_chunk_shape().hard_constraint, + expected_hard_constraints); + EXPECT_THAT(span(layout.codec_chunk_shape()), + ElementsAre(8, 16, 3)); + EXPECT_EQ(layout.codec_chunk_shape().hard_constraint, + expected_hard_constraints); + + EXPECT_THAT(layout.inner_order(), ElementsAre(0, 1, 2)); + EXPECT_FALSE(layout.inner_order().hard_constraint); +} + +TEST(GetInitialChunkLayoutTest, TiledChunkySpp3YXOrder) { + ImageDirectory ifd = + MakeImageDirectory(/*width=*/60, /*height=*/40, + /*chunk_width=*/16, /*chunk_height=*/8, + /*is_tiled=*/true, /*spp=*/3); + DimensionIndex initial_rank = 3; + std::vector initial_labels = {"c", "y", "x"}; + std::string sample_label = "c"; + auto expected_hard_constraints = DimensionSet::UpTo(initial_rank); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + ChunkLayout layout, + GetInitialChunkLayout(ifd, initial_rank, initial_labels, + PlanarConfigType::kChunky, 3, sample_label)); + + EXPECT_EQ(layout.rank(), 3); + EXPECT_THAT(layout.grid_origin(), ElementsAre(0, 0, 0)); + EXPECT_EQ(layout.grid_origin().hard_constraint, expected_hard_constraints); + EXPECT_THAT(span(layout.read_chunk_shape()), + ElementsAre(3, 8, 16)); + EXPECT_EQ(layout.read_chunk_shape().hard_constraint, + expected_hard_constraints); + EXPECT_THAT(span(layout.write_chunk_shape()), + ElementsAre(3, 8, 16)); + EXPECT_EQ(layout.write_chunk_shape().hard_constraint, + expected_hard_constraints); + EXPECT_THAT(span(layout.codec_chunk_shape()), + ElementsAre(3, 8, 16)); + EXPECT_EQ(layout.codec_chunk_shape().hard_constraint, + expected_hard_constraints); + + EXPECT_THAT(layout.inner_order(), ElementsAre(0, 1, 2)); + EXPECT_FALSE(layout.inner_order().hard_constraint); +} + +TEST(GetInitialChunkLayoutTest, TiledPlanarSpp3) { + ImageDirectory ifd = MakeImageDirectory( + /*width=*/60, /*height=*/40, + /*chunk_width=*/16, /*chunk_height=*/8, + /*is_tiled=*/true, /*spp=*/3, + /*bits=*/8, SampleFormatType::kUnsignedInteger, CompressionType::kNone, + /*planar=*/PlanarConfigType::kPlanar); + DimensionIndex initial_rank = 3; + std::vector initial_labels = {"c", "y", "x"}; + std::string sample_label = "c"; + auto expected_hard_constraints = DimensionSet::UpTo(initial_rank); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + ChunkLayout layout, + GetInitialChunkLayout(ifd, initial_rank, initial_labels, + PlanarConfigType::kPlanar, 3, sample_label)); + + EXPECT_EQ(layout.rank(), 3); + EXPECT_THAT(layout.grid_origin(), ElementsAre(0, 0, 0)); + EXPECT_EQ(layout.grid_origin().hard_constraint, expected_hard_constraints); + EXPECT_THAT(span(layout.read_chunk_shape()), + ElementsAre(1, 8, 16)); + EXPECT_EQ(layout.read_chunk_shape().hard_constraint, + expected_hard_constraints); + EXPECT_THAT(span(layout.write_chunk_shape()), + ElementsAre(1, 8, 16)); + EXPECT_EQ(layout.write_chunk_shape().hard_constraint, + expected_hard_constraints); + EXPECT_THAT(span(layout.codec_chunk_shape()), + ElementsAre(1, 8, 16)); + EXPECT_EQ(layout.codec_chunk_shape().hard_constraint, + expected_hard_constraints); + + EXPECT_THAT(layout.inner_order(), ElementsAre(0, 1, 2)); + EXPECT_FALSE(layout.inner_order().hard_constraint); +} + +TEST(GetInitialChunkLayoutTest, StackedTiledChunkySpp1) { + ImageDirectory ifd = + MakeImageDirectory(/*width=*/60, /*height=*/40, + /*chunk_width=*/16, /*chunk_height=*/8, + /*is_tiled=*/true, /*spp=*/1); + DimensionIndex initial_rank = 3; + std::vector initial_labels = {"z", "y", "x"}; + std::string sample_label = "c"; + auto expected_hard_constraints = DimensionSet::UpTo(initial_rank); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + ChunkLayout layout, + GetInitialChunkLayout(ifd, initial_rank, initial_labels, + PlanarConfigType::kChunky, 1, sample_label)); + + EXPECT_EQ(layout.rank(), 3); + EXPECT_THAT(layout.grid_origin(), ElementsAre(0, 0, 0)); + EXPECT_EQ(layout.grid_origin().hard_constraint, expected_hard_constraints); + EXPECT_THAT(span(layout.read_chunk_shape()), + ElementsAre(1, 8, 16)); + EXPECT_EQ(layout.read_chunk_shape().hard_constraint, + expected_hard_constraints); + EXPECT_THAT(span(layout.write_chunk_shape()), + ElementsAre(1, 8, 16)); + EXPECT_EQ(layout.write_chunk_shape().hard_constraint, + expected_hard_constraints); + EXPECT_THAT(span(layout.codec_chunk_shape()), + ElementsAre(1, 8, 16)); + EXPECT_EQ(layout.codec_chunk_shape().hard_constraint, + expected_hard_constraints); + + EXPECT_THAT(layout.inner_order(), ElementsAre(0, 1, 2)); + EXPECT_FALSE(layout.inner_order().hard_constraint); +} + +// --- Tests for GetEffectiveChunkLayout --- +TEST(GetEffectiveChunkLayoutTest, InitialOnly) { + ImageDirectory ifd = + MakeImageDirectory(/*width=*/60, /*height=*/40, + /*chunk_width=*/16, /*chunk_height=*/8); + DimensionIndex rank = 2; + std::vector labels = {"y", "x"}; + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + ChunkLayout initial_layout, + GetInitialChunkLayout(ifd, rank, labels, PlanarConfigType::kChunky, 1, + "c")); + Schema schema; + DimensionSet expected_hard_constraints = DimensionSet::UpTo(rank); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + ChunkLayout effective_layout, + GetEffectiveChunkLayout(initial_layout, schema)); + + EXPECT_EQ(effective_layout.rank(), 2); + EXPECT_THAT(span(effective_layout.read_chunk_shape()), + ElementsAre(8, 16)); + EXPECT_EQ(effective_layout.read_chunk_shape().hard_constraint, + expected_hard_constraints); + EXPECT_THAT(effective_layout.grid_origin(), ElementsAre(0, 0)); + EXPECT_EQ(effective_layout.grid_origin().hard_constraint, + expected_hard_constraints); + EXPECT_THAT(effective_layout.inner_order(), ElementsAre(0, 1)); + EXPECT_EQ(effective_layout.inner_order().hard_constraint, + initial_layout.inner_order().hard_constraint); +} + +TEST(GetEffectiveChunkLayoutTest, SchemaHardInnerOrder) { + ImageDirectory ifd = + MakeImageDirectory(/*width=*/60, /*height=*/40, + /*chunk_width=*/16, /*chunk_height=*/8); + DimensionIndex rank = 2; + std::vector labels = {"y", "x"}; + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + ChunkLayout initial_layout, + GetInitialChunkLayout(ifd, rank, labels, PlanarConfigType::kChunky, 1, + "c")); + Schema schema; + TENSORSTORE_ASSERT_OK( + schema.Set(ChunkLayout::InnerOrder({1, 0}, /*hard=*/true))); + DimensionSet expected_hard_constraints = DimensionSet::UpTo(rank); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + ChunkLayout effective_layout, + GetEffectiveChunkLayout(initial_layout, schema)); + + EXPECT_THAT(span(effective_layout.read_chunk_shape()), + ElementsAre(8, 16)); + EXPECT_EQ(effective_layout.read_chunk_shape().hard_constraint, + expected_hard_constraints); + EXPECT_THAT(effective_layout.grid_origin(), ElementsAre(0, 0)); + EXPECT_EQ(effective_layout.grid_origin().hard_constraint, + expected_hard_constraints); + EXPECT_THAT(effective_layout.inner_order(), ElementsAre(1, 0)); + EXPECT_TRUE(effective_layout.inner_order().hard_constraint); +} + +TEST(GetEffectiveChunkLayoutTest, SchemaSoftInnerOrder) { + ImageDirectory ifd = + MakeImageDirectory(/*width=*/60, /*height=*/40, + /*chunk_width=*/16, /*chunk_height=*/8); + DimensionIndex rank = 2; + std::vector labels = {"y", "x"}; + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + ChunkLayout initial_layout, + GetInitialChunkLayout(ifd, rank, labels, PlanarConfigType::kChunky, 1, + "c")); + Schema schema; + TENSORSTORE_ASSERT_OK( + schema.Set(ChunkLayout::InnerOrder({1, 0}, /*hard=*/false))); + DimensionSet expected_hard_constraints = DimensionSet::UpTo(rank); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + ChunkLayout effective_layout, + GetEffectiveChunkLayout(initial_layout, schema)); + + EXPECT_THAT(span(effective_layout.read_chunk_shape()), + ElementsAre(8, 16)); + EXPECT_EQ(effective_layout.read_chunk_shape().hard_constraint, + expected_hard_constraints); + EXPECT_THAT(effective_layout.grid_origin(), ElementsAre(0, 0)); + EXPECT_EQ(effective_layout.grid_origin().hard_constraint, + expected_hard_constraints); + EXPECT_THAT(effective_layout.inner_order(), ElementsAre(1, 0)); + EXPECT_FALSE(effective_layout.inner_order().hard_constraint); // Still soft +} + +TEST(GetEffectiveChunkLayoutTest, SchemaSoftChunkShape) { + ImageDirectory ifd = + MakeImageDirectory(/*width=*/60, /*height=*/40, + /*chunk_width=*/16, /*chunk_height=*/8); + DimensionIndex rank = 2; + std::vector labels = {"y", "x"}; + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + ChunkLayout initial_layout, + GetInitialChunkLayout(ifd, rank, labels, PlanarConfigType::kChunky, 1, + "c")); + Schema schema; + TENSORSTORE_ASSERT_OK( + schema.Set(ChunkLayout::ReadChunkShape({10, 20}, /*hard=*/false))); + DimensionSet expected_hard_constraints = DimensionSet::UpTo(rank); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + ChunkLayout effective_layout, + GetEffectiveChunkLayout(initial_layout, schema)); + + EXPECT_THAT(span(effective_layout.read_chunk_shape()), + ElementsAre(8, 16)); // Still TIFF shape + EXPECT_EQ(effective_layout.read_chunk_shape().hard_constraint, + expected_hard_constraints); // Still hard +} + +// --- GetEffective... tests --- +TEST(GetEffectiveDomainTest, InitialOnly) { + DimensionIndex rank = 3; + std::vector shape = {10, 20, 30}; + std::vector labels = {"z", "y", "x"}; + Schema schema; + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto result, GetEffectiveDomain(rank, shape, labels, schema)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto expected_domain, + IndexDomainBuilder(3).shape(shape).labels(labels).Finalize()); + + EXPECT_EQ(result.first, expected_domain); + EXPECT_EQ(result.second, labels); +} + +TEST(GetEffectiveDomainTest, SchemaRankOnly) { + DimensionIndex rank = 3; + std::vector shape = {10, 20, 30}; + std::vector labels = {"z", "y", "x"}; + Schema schema; + TENSORSTORE_ASSERT_OK(schema.Set(RankConstraint{3})); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto result, GetEffectiveDomain(rank, shape, labels, schema)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto expected_domain, + IndexDomainBuilder(3).shape(shape).labels(labels).Finalize()); + + EXPECT_EQ(result.first, expected_domain); + EXPECT_EQ(result.second, labels); +} + +TEST(GetEffectiveDomainTest, SchemaDomainOverridesLabels) { + DimensionIndex rank = 3; + std::vector shape = {10, 20, 30}; + std::vector initial_labels = {"z", "y", "x"}; + Schema schema; + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto schema_domain, + IndexDomainBuilder(3).shape(shape).labels({"Z", "Y", "X"}).Finalize()); + TENSORSTORE_ASSERT_OK(schema.Set(schema_domain)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto result, GetEffectiveDomain(rank, shape, initial_labels, schema)); + + EXPECT_EQ(result.first, schema_domain); // Domain from schema + EXPECT_THAT(result.second, ElementsAre("Z", "Y", "X")); // Labels from schema +} + +TEST(GetEffectiveDomainTest, SchemaDomainIncompatibleShape) { + DimensionIndex rank = 3; + std::vector initial_shape = {10, 20, 30}; + std::vector initial_labels = {"z", "y", "x"}; + Schema schema; + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto schema_domain, + IndexDomainBuilder(3) + .shape({10, 20, 31}) + .labels(initial_labels) + .Finalize()); + TENSORSTORE_ASSERT_OK(schema.Set(schema_domain)); + + EXPECT_THAT(GetEffectiveDomain(rank, initial_shape, initial_labels, schema), + MatchesStatus(absl::StatusCode::kInvalidArgument, + ".*Mismatch in dimension 2:.*")); +} + +TEST(GetEffectiveDomainTest, SchemaRankIncompatible) { + DimensionIndex rank = 3; + std::vector initial_shape = {10, 20, 30}; + std::vector initial_labels = {"z", "y", "x"}; + Schema schema; + TENSORSTORE_ASSERT_OK(schema.Set(RankConstraint{2})); // Rank mismatch + + EXPECT_THAT(GetEffectiveDomain(rank, initial_shape, initial_labels, schema), + MatchesStatus(absl::StatusCode::kFailedPrecondition, + ".*rank constraint 2 is incompatible.*rank 3.*")); +} + +TEST(GetEffectiveDimensionUnitsTest, InitialOnly) { + DimensionIndex rank = 3; + Schema schema; + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto units, + GetEffectiveDimensionUnits(rank, schema)); + ASSERT_EQ(units.size(), 3); + EXPECT_THAT(units, ElementsAre(std::nullopt, std::nullopt, std::nullopt)); +} + +TEST(GetEffectiveDimensionUnitsTest, SchemaOnly) { + DimensionIndex rank = 2; + Schema schema; + TENSORSTORE_ASSERT_OK(schema.Set(Schema::DimensionUnits({"nm", "um"}))); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto units, + GetEffectiveDimensionUnits(rank, schema)); + ASSERT_EQ(units.size(), 2); + EXPECT_THAT(units[0], Optional(tensorstore::Unit("nm"))); + EXPECT_THAT(units[1], Optional(tensorstore::Unit("um"))); +} + +TEST(GetEffectiveDimensionUnitsTest, SchemaRankMismatch) { + DimensionIndex rank = 3; // TIFF implies rank 3 + Schema schema; + TENSORSTORE_ASSERT_OK( + schema.Set(Schema::DimensionUnits({"nm", "um"}))); // Implies rank 2 + + EXPECT_THAT(GetEffectiveDimensionUnits(rank, schema), + MatchesStatus(absl::StatusCode::kInvalidArgument, + ".*Schema dimension_units rank.*")); +} + +TEST(GetEffectiveDataTypeTest, ManyChecks) { + TiffMetadataConstraints constraints; + Schema schema; + EXPECT_FALSE(GetEffectiveDataType(constraints, schema).value().valid()); + TENSORSTORE_ASSERT_OK(schema.Set(dtype_v)); + EXPECT_THAT(GetEffectiveDataType(constraints, schema), + Optional(dtype_v)); + schema = Schema(); + constraints.dtype = dtype_v; + EXPECT_THAT(GetEffectiveDataType(constraints, schema), + Optional(dtype_v)); + TENSORSTORE_ASSERT_OK(schema.Set(dtype_v)); + EXPECT_THAT(GetEffectiveDataType(constraints, schema), + Optional(dtype_v)); +} + +// --- Tests for ResolveMetadata --- +// Helper to check basic metadata properties +void CheckBaseMetadata( + const TiffMetadata& md, uint32_t expected_ifd, uint32_t expected_num_ifds, + DimensionIndex expected_rank, const std::vector& expected_shape, + DataType expected_dtype, uint16_t expected_spp, + CompressionType expected_comp, PlanarConfigType expected_planar, + const std::vector& expected_read_chunk_shape, + const std::vector& expected_inner_order) { + EXPECT_EQ(md.base_ifd_index, expected_ifd); + EXPECT_EQ(md.num_ifds_read, expected_num_ifds); + EXPECT_EQ(md.rank, expected_rank); + EXPECT_THAT(md.shape, ElementsAreArray(expected_shape)); + EXPECT_EQ(md.dtype, expected_dtype); + EXPECT_EQ(md.samples_per_pixel, expected_spp); + EXPECT_EQ(md.compression_type, expected_comp); + EXPECT_EQ(md.planar_config, expected_planar); + EXPECT_THAT(md.chunk_layout.read_chunk_shape(), + ElementsAreArray(expected_read_chunk_shape)); + EXPECT_THAT(md.chunk_layout.inner_order(), + ElementsAreArray(expected_inner_order)); + EXPECT_EQ(md.dimension_mapping.labels_by_ts_dim.size(), expected_rank); +} + +TEST(ResolveMetadataTest, BasicSuccessTileChunkySpp1) { + auto parse_result = + MakeParseResult({MakeImageDirectory(100, 80, 16, 16, true, 1)}); + TiffSpecOptions options; // ifd_index = 0 + Schema schema; + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto metadata, ResolveMetadata(parse_result, options, schema)); + + CheckBaseMetadata(*metadata, 0, 1, 2, {80, 100}, dtype_v, 1, + CompressionType::kNone, PlanarConfigType::kChunky, {16, 16}, + {0, 1}); + + EXPECT_THAT(metadata->dimension_labels, ElementsAre("y", "x")); + EXPECT_THAT(metadata->dimension_mapping.ts_y_dim, Optional(0)); + EXPECT_THAT(metadata->dimension_mapping.ts_x_dim, Optional(1)); + EXPECT_FALSE(metadata->dimension_mapping.ts_sample_dim.has_value()); + EXPECT_TRUE(metadata->dimension_mapping.ts_stacked_dims.empty()); + EXPECT_THAT(metadata->dimension_mapping.labels_by_ts_dim, + ElementsAre("y", "x")); +} + +TEST(ResolveMetadataTest, BasicSuccessStripChunkySpp1) { + ImageDirectory img_dir = MakeImageDirectory(100, 80, 0, 10, false, 1); + auto parse_result = MakeParseResult({img_dir}); + TiffSpecOptions options; + Schema schema; + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto metadata, ResolveMetadata(parse_result, options, schema)); + + CheckBaseMetadata(*metadata, 0, 1, 2, {80, 100}, dtype_v, 1, + CompressionType::kNone, PlanarConfigType::kChunky, + {10, 100}, {0, 1}); + + EXPECT_THAT(metadata->dimension_labels, ElementsAre("y", "x")); + EXPECT_THAT(metadata->dimension_mapping.ts_y_dim, Optional(0)); + EXPECT_THAT(metadata->dimension_mapping.ts_x_dim, Optional(1)); +} + +TEST(ResolveMetadataTest, BasicSuccessTileChunkySpp3) { + ImageDirectory img_dir = MakeImageDirectory(100, 80, 16, 16, true, 3); + auto parse_result = MakeParseResult({img_dir}); + TiffSpecOptions options; + Schema schema; + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto metadata, ResolveMetadata(parse_result, options, schema)); + + CheckBaseMetadata(*metadata, 0, 1, 3, {80, 100, 3}, dtype_v, 3, + CompressionType::kNone, PlanarConfigType::kChunky, + {16, 16, 3}, {0, 1, 2}); + EXPECT_THAT(metadata->dimension_labels, ElementsAre("y", "x", "c")); + EXPECT_THAT(metadata->dimension_mapping.ts_y_dim, Optional(0)); + EXPECT_THAT(metadata->dimension_mapping.ts_x_dim, Optional(1)); + EXPECT_THAT(metadata->dimension_mapping.ts_sample_dim, Optional(2)); + EXPECT_TRUE(metadata->dimension_mapping.ts_stacked_dims.empty()); + EXPECT_THAT(metadata->dimension_mapping.labels_by_ts_dim, + ElementsAre("y", "x", "c")); +} + +TEST(ResolveMetadataTest, SelectIfd) { + auto parse_result = + MakeParseResult({MakeImageDirectory(100, 80, 16, 16, true, 1, 8), + MakeImageDirectory(50, 40, 8, 8, true, 3, 16)}); + TiffSpecOptions options; + options.ifd_index = 1; + Schema schema; + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto metadata, ResolveMetadata(parse_result, options, schema)); + + CheckBaseMetadata(*metadata, 1, 1, 3, {40, 50, 3}, dtype_v, 3, + CompressionType::kNone, PlanarConfigType::kChunky, + {8, 8, 3}, {0, 1, 2}); + + EXPECT_THAT(metadata->dimension_labels, ElementsAre("y", "x", "c")); +} + +TEST(ResolveMetadataTest, InvalidIfdIndex) { + auto parse_result = MakeParseResult({MakeImageDirectory()}); + TiffSpecOptions options; + options.ifd_index = 1; + Schema schema; + EXPECT_THAT(ResolveMetadata(parse_result, options, schema), + MatchesStatus(absl::StatusCode::kNotFound, + ".*Requested IFD index 1 not found.*")); +} + +TEST(ResolveMetadataTest, SchemaMergeChunkShapeConflict) { + auto parse_result = MakeParseResult({MakeImageDirectory(100, 80, 16, 16)}); + TiffSpecOptions options; + Schema schema; + ChunkLayout schema_layout; + TENSORSTORE_ASSERT_OK(schema_layout.Set(ChunkLayout::ChunkShape({32, 32}))); + TENSORSTORE_ASSERT_OK(schema.Set(schema_layout)); + EXPECT_THAT(ResolveMetadata(parse_result, options, schema), + MatchesStatus(absl::StatusCode::kInvalidArgument, + ".*New hard constraint .*16.* does not match " + "existing hard constraint .*32.*.*")); +} + +TEST(ResolveMetadataTest, SchemaMergeInnerOrder) { + auto parse_result = + MakeParseResult({MakeImageDirectory(100, 80, 16, 16, true, 1)}); + TiffSpecOptions options; + Schema schema; + ChunkLayout schema_layout; + TENSORSTORE_ASSERT_OK(schema_layout.Set(ChunkLayout::InnerOrder({1, 0}))); + TENSORSTORE_ASSERT_OK(schema.Set(schema_layout)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto metadata, ResolveMetadata(parse_result, options, schema)); + + // Schema hard constraint overrides TIFF default soft constraint + EXPECT_THAT(metadata->chunk_layout.inner_order(), ElementsAre(1, 0)); + EXPECT_EQ(metadata->layout_order, ContiguousLayoutOrder::fortran); + EXPECT_THAT(metadata->chunk_layout.read_chunk_shape(), ElementsAre(16, 16)); +} + +TEST(ResolveMetadataTest, SchemaOverrideLabels) { + // Image is 80x100, spp=3 -> initial order/labels: y, x, c + auto parse_result = + MakeParseResult({MakeImageDirectory(100, 80, 16, 16, true, 3)}); + TiffSpecOptions options; + Schema schema; + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto desired_domain, + IndexDomainBuilder(3) + .shape({80, 100, 3}) + .labels({"height", "width", "channel"}) + .Finalize()); + + TENSORSTORE_ASSERT_OK(schema.Set(desired_domain)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto metadata, ResolveMetadata(parse_result, options, schema)); + + // Check that ResolveMetadata respected the schema's domain labels + EXPECT_THAT(metadata->dimension_labels, + ElementsAre("height", "width", "channel")); + + // Check mapping based on conceptual labels ('y', 'x', 'c') matching the + // *final* labels + EXPECT_THAT(metadata->dimension_mapping.ts_y_dim, + Optional(0)); // 'y' matched 'height' at index 0 + EXPECT_THAT(metadata->dimension_mapping.ts_x_dim, + Optional(1)); // 'x' matched 'width' at index 1 + EXPECT_THAT(metadata->dimension_mapping.ts_sample_dim, + Optional(2)); // 'c' matched 'channel' at index 2 + EXPECT_THAT(metadata->dimension_mapping.labels_by_ts_dim, + ElementsAre("y", "x", "c")); // Conceptual order still y,x,c + + // Check that chunk layout inner order reflects the final dimension order + // The default soft inner order is still {0, 1, 2} relative to the final + // axes + EXPECT_THAT(metadata->chunk_layout.inner_order(), ElementsAre(0, 1, 2)); +} + +TEST(ResolveMetadataTest, SchemaUseSampleDimensionLabel) { + auto parse_result = + MakeParseResult({MakeImageDirectory(100, 80, 16, 16, true, 3)}); + TiffSpecOptions options; + options.sample_dimension_label = "comp"; + Schema schema; + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto desired_domain, + IndexDomainBuilder(3) + .shape({80, 100, 3}) + .labels({"y", "x", "comp"}) + .Finalize()); + TENSORSTORE_ASSERT_OK(schema.Set(desired_domain)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto metadata, ResolveMetadata(parse_result, options, schema)); + + EXPECT_THAT(metadata->dimension_labels, ElementsAre("y", "x", "comp")); + EXPECT_THAT(metadata->dimension_mapping.ts_y_dim, Optional(0)); + EXPECT_THAT(metadata->dimension_mapping.ts_x_dim, Optional(1)); + EXPECT_THAT(metadata->dimension_mapping.ts_sample_dim, Optional(2)); + EXPECT_THAT(metadata->dimension_mapping.labels_by_ts_dim, + ElementsAre("y", "x", "comp")); +} + +TEST(ResolveMetadataTest, StackZ_Spp1) { + std::vector ifds; + for (int i = 0; i < 5; ++i) + ifds.push_back(MakeImageDirectory(32, 64, 8, 16, true, 1)); + auto parse_result = MakeParseResult(ifds); + TiffSpecOptions options; + options.ifd_stacking.emplace(); + options.ifd_stacking->dimensions = {"z"}; + options.ifd_stacking->ifd_count = 5; + Schema schema; + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto metadata, ResolveMetadata(parse_result, options, schema)); + + // Default order: Z, Y, X + CheckBaseMetadata(*metadata, 0, 5, 3, {5, 64, 32}, dtype_v, 1, + CompressionType::kNone, PlanarConfigType::kChunky, + {1, 16, 8}, {0, 1, 2}); + + EXPECT_THAT(metadata->dimension_labels, ElementsAre("z", "y", "x")); + EXPECT_THAT(metadata->dimension_mapping.ts_y_dim, Optional(1)); + EXPECT_THAT(metadata->dimension_mapping.ts_x_dim, Optional(2)); + EXPECT_FALSE(metadata->dimension_mapping.ts_sample_dim.has_value()); + EXPECT_THAT(metadata->dimension_mapping.ts_stacked_dims, + ElementsAre(testing::Pair("z", 0))); + EXPECT_THAT(metadata->dimension_mapping.labels_by_ts_dim, + ElementsAre("z", "y", "x")); +} + +TEST(ResolveMetadataTest, StackTC_Spp3_Chunky) { + std::vector ifds; + // 2 time points, 3 channels = 6 IFDs + for (int i = 0; i < 6; ++i) + ifds.push_back(MakeImageDirectory(32, 64, 8, 16, true, 3)); + auto parse_result = MakeParseResult(ifds); + TiffSpecOptions options; + options.ifd_stacking.emplace(); + options.ifd_stacking->dimensions = {"t", "channel"}; + options.ifd_stacking->dimension_sizes = {2, 3}; // t=2, channel=3 -> 6 IFDs + options.sample_dimension_label = "rgb"; // Label the SPP dim + Schema schema; + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto metadata, ResolveMetadata(parse_result, options, schema)); + + // Default order: T, Channel, Y, X, RGB + CheckBaseMetadata(*metadata, 0, 6, 5, {2, 3, 64, 32, 3}, dtype_v, 3, + CompressionType::kNone, PlanarConfigType::kChunky, + {1, 1, 16, 8, 3}, {0, 1, 2, 3, 4}); + + EXPECT_THAT(metadata->dimension_labels, + ElementsAre("t", "channel", "y", "x", "rgb")); + EXPECT_THAT(metadata->dimension_mapping.ts_y_dim, Optional(2)); + EXPECT_THAT(metadata->dimension_mapping.ts_x_dim, Optional(3)); + EXPECT_THAT(metadata->dimension_mapping.ts_sample_dim, Optional(4)); + EXPECT_THAT(metadata->dimension_mapping.ts_stacked_dims, + ::testing::UnorderedElementsAre(testing::Pair("t", 0), + testing::Pair("channel", 1))); + EXPECT_THAT(metadata->dimension_mapping.labels_by_ts_dim, + ElementsAre("t", "channel", "y", "x", "rgb")); +} + +TEST(ResolveMetadataTest, StackNonUniformIFDs) { + std::vector ifds; + ifds.push_back(MakeImageDirectory(32, 64, 8, 16, true, 1)); + ifds.push_back(MakeImageDirectory(32, 64, 8, 16, true, 1)); + ifds.push_back( + MakeImageDirectory(32, 65, 8, 16, true, 1)); // Different height + auto parse_result = MakeParseResult(ifds); + TiffSpecOptions options; + options.ifd_stacking.emplace(); + options.ifd_stacking->dimensions = {"z"}; + options.ifd_stacking->ifd_count = 3; + Schema schema; + + EXPECT_THAT( + ResolveMetadata(parse_result, options, schema), + MatchesStatus(absl::StatusCode::kInvalidArgument, + ".*IFD 2 dimensions \\(32 x 65\\) do not match IFD 0.*")); +} + +// --- Tests for resolving compression --- +TEST(ResolveMetadataCompressionTest, TiffRawSchemaNone) { + auto parse_result = MakeParseResult({MakeImageDirectory( + 100, 80, 16, 16, true, 1, 8, SampleFormatType::kUnsignedInteger, + CompressionType::kNone, PlanarConfigType::kChunky)}); + TiffSpecOptions options; + Schema schema; + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto metadata, ResolveMetadata(parse_result, options, schema)); + + EXPECT_EQ(metadata->compressor, nullptr); + EXPECT_EQ(metadata->compression_type, CompressionType::kNone); +} + +TEST(ResolveMetadataCompressionTest, TiffDeflateUnsupportedSchemaNone) { + auto parse_result = MakeParseResult({MakeImageDirectory( + 100, 80, 16, 16, true, 1, 8, SampleFormatType::kUnsignedInteger, + CompressionType::kCCITTGroup4, PlanarConfigType::kChunky)}); + TiffSpecOptions options; + Schema schema; + + EXPECT_THAT(ResolveMetadata(parse_result, options, schema), + MatchesStatus(absl::StatusCode::kUnimplemented, + ".*Unsupported TIFF compression type tag: 3.*")); +} + +TEST(ResolveMetadataCompressionTest, TiffRawSchemaZstd) { + auto parse_result = MakeParseResult({MakeImageDirectory( + 100, 80, 16, 16, true, 1, 8, SampleFormatType::kUnsignedInteger, + CompressionType::kNone, PlanarConfigType::kChunky)}); + TiffSpecOptions options; + Schema schema; + TENSORSTORE_ASSERT_OK( + schema.Set(CodecSpec::FromJson( + {{"driver", "tiff"}, {"compression", {{"type", "zstd"}}}}) + .value())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto metadata, ResolveMetadata(parse_result, options, schema)); + + // Expect Zstd compressor (schema overrides raw) but original tag type. + ASSERT_NE(metadata->compressor, nullptr); + EXPECT_EQ(metadata->compression_type, + CompressionType::kNone); // Original tag + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto json_repr, + jb::ToJson(metadata->compressor)); + EXPECT_THAT(json_repr, tensorstore::MatchesJson({{"type", "zstd"}})); +} + +// --- Tests for ValidateResolvedMetadata --- +TEST(ValidateResolvedMetadataTest, CompatibleConstraints) { + auto parse_result = MakeParseResult({MakeImageDirectory(100, 80, 16, 16)}); + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto metadata, + ResolveMetadata(parse_result, {}, {})); + TiffMetadataConstraints constraints; + + TENSORSTORE_EXPECT_OK(ValidateResolvedMetadata(*metadata, constraints)); + constraints.rank = 2; + TENSORSTORE_EXPECT_OK(ValidateResolvedMetadata(*metadata, constraints)); + constraints.rank = dynamic_rank; + constraints.dtype = dtype_v; + TENSORSTORE_EXPECT_OK(ValidateResolvedMetadata(*metadata, constraints)); + constraints.dtype = std::nullopt; + constraints.shape = {{80, 100}}; + TENSORSTORE_EXPECT_OK(ValidateResolvedMetadata(*metadata, constraints)); +} + +TEST(ValidateResolvedMetadataTest, IncompatibleRank) { + auto parse_result = MakeParseResult({MakeImageDirectory(100, 80, 16, 16)}); + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto metadata, + ResolveMetadata(parse_result, {}, {})); + TiffMetadataConstraints constraints; + constraints.rank = 3; + EXPECT_THAT(ValidateResolvedMetadata(*metadata, constraints), + MatchesStatus(absl::StatusCode::kFailedPrecondition, + ".*Resolved TIFF rank \\(2\\) does not match user " + "constraint rank \\(3\\).*")); +} + +// Helper function to encode an array to a Cord for testing DecodeChunk +Result EncodeArrayToCord(SharedArrayView array, + tensorstore::endian source_endian, + ContiguousLayoutOrder order) { + absl::Cord cord; + riegeli::CordWriter<> writer(&cord); + if (!tensorstore::internal::EncodeArrayEndian(array, source_endian, order, + writer)) { + return writer.status(); + } + if (!writer.Close()) { + return writer.status(); + } + return cord; +} + +// Test fixture for DecodeChunk tests +class DecodeChunkTest : public ::testing::Test { + protected: + TiffMetadata CreateMetadata( + DataType dtype, span shape, + span grid_chunk_shape, + ContiguousLayoutOrder layout_order = ContiguousLayoutOrder::c, + Endian endian = Endian::kLittle, + CompressionType compression = CompressionType::kNone, + uint16_t samples_per_pixel = 1, + PlanarConfigType planar_config = PlanarConfigType::kChunky) { + TiffMetadata metadata; + metadata.dtype = dtype; + metadata.rank = shape.size(); + metadata.shape.assign(shape.begin(), shape.end()); + metadata.endian = endian; + metadata.compression_type = compression; + metadata.samples_per_pixel = samples_per_pixel; + metadata.planar_config = planar_config; + metadata.compressor = Compressor{nullptr}; + + TENSORSTORE_CHECK_OK( + metadata.chunk_layout.Set(RankConstraint{metadata.rank})); + TENSORSTORE_CHECK_OK(metadata.chunk_layout.Set( + ChunkLayout::ChunkShape(grid_chunk_shape, /*hard=*/true))); + TENSORSTORE_CHECK_OK(metadata.chunk_layout.Set(ChunkLayout::GridOrigin( + GetConstantVector(metadata.rank), /*hard=*/true))); + std::vector inner_order(metadata.rank); + tensorstore::SetPermutation(layout_order, span(inner_order)); + TENSORSTORE_CHECK_OK(metadata.chunk_layout.Set( + ChunkLayout::InnerOrder(inner_order, /*hard=*/true))); + TENSORSTORE_CHECK_OK(metadata.chunk_layout.Finalize()); + + metadata.layout_order = layout_order; + + if (!grid_chunk_shape.empty()) { + metadata.ifd0_chunk_height = + (metadata.rank > 0) ? grid_chunk_shape[metadata.rank - 2] : 0; + // Assuming X is last + metadata.ifd0_chunk_width = + (metadata.rank > 0) ? grid_chunk_shape.back() : 0; + if (planar_config == PlanarConfigType::kPlanar && metadata.rank > 0) { + metadata.ifd0_chunk_height = + (metadata.rank > 1) ? grid_chunk_shape[metadata.rank - 2] : 0; // Y + metadata.ifd0_chunk_width = + (metadata.rank > 0) ? grid_chunk_shape.back() : 0; // X + } + } + + return metadata; + } +}; + +TEST_F(DecodeChunkTest, UncompressedUint8CorderLittleEndianChunkySpp1) { + const Index shape[] = {2, 3}; + const Index grid_chunk_shape[] = {2, 3}; // Grid shape matches image shape + auto metadata = CreateMetadata( + dtype_v, shape, grid_chunk_shape, ContiguousLayoutOrder::c, + Endian::kLittle, CompressionType::kNone, 1, PlanarConfigType::kChunky); + auto expected_array = MakeArray({{1, 2, 3}, {4, 5, 6}}); + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto input_cord, EncodeArrayToCord(expected_array, endian::little, + ContiguousLayoutOrder::c)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto decoded_array_void, + DecodeChunk(metadata, input_cord)); + SharedArray decoded_array( + std::static_pointer_cast(decoded_array_void.pointer()), + expected_array.layout()); + + EXPECT_EQ(decoded_array, expected_array); +} + +TEST_F(DecodeChunkTest, UncompressedUint8CorderLittleEndianChunkySpp3) { + const Index shape[] = {2, 3, 3}; // Y, X, C + const Index grid_chunk_shape[] = {2, 3, 3}; // Grid shape is Y, X + const uint16_t spp = 3; + auto metadata = CreateMetadata( + dtype_v, shape, grid_chunk_shape, ContiguousLayoutOrder::c, + Endian::kLittle, CompressionType::kNone, spp, PlanarConfigType::kChunky); + + auto expected_array = MakeArray( + {{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}, + {{11, 12, 13}, {14, 15, 16}, {17, 18, 19}}}); // Y=2, X=3, C=3 + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto input_cord, EncodeArrayToCord(expected_array, endian::little, + ContiguousLayoutOrder::c)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto decoded_array_void, + DecodeChunk(metadata, input_cord)); + SharedArray decoded_array( + std::static_pointer_cast(decoded_array_void.pointer()), + expected_array.layout()); + + EXPECT_THAT(decoded_array.shape(), ElementsAre(2, 3, 3)); + EXPECT_EQ(decoded_array, expected_array); +} + +TEST_F(DecodeChunkTest, UncompressedUint16FortranOrderBigEndian) { + const Index shape[] = {2, 3}; + const Index grid_chunk_shape[] = {2, 3}; + auto metadata = CreateMetadata(dtype_v, shape, grid_chunk_shape, + ContiguousLayoutOrder::fortran, Endian::kBig); + auto expected_array = tensorstore::MakeCopy( + MakeArray({{100, 200, 300}, {400, 500, 600}}), + ContiguousLayoutOrder::fortran); + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto input_cord, EncodeArrayToCord(expected_array, endian::big, + ContiguousLayoutOrder::fortran)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto decoded_array_void, + DecodeChunk(metadata, input_cord)); + SharedArray decoded_array( + std::static_pointer_cast(decoded_array_void.pointer()), + expected_array.layout()); + + EXPECT_EQ(decoded_array, expected_array); +} + +TEST_F(DecodeChunkTest, UncompressedFloat32CorderBigEndianToNative) { + const Index shape[] = {2, 2}; + // Native endian might be little, source is big + auto metadata = CreateMetadata(dtype_v, shape, shape, + ContiguousLayoutOrder::c, Endian::kBig); + auto expected_array = MakeArray({{1.0f, 2.5f}, {-3.0f, 4.75f}}); + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto input_cord, + EncodeArrayToCord(expected_array, endian::big, ContiguousLayoutOrder::c)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto decoded_array_void, + DecodeChunk(metadata, input_cord)); + SharedArray decoded_array( + std::static_pointer_cast(decoded_array_void.pointer()), + expected_array.layout()); + + EXPECT_EQ(decoded_array, expected_array); +} + +TEST_F(DecodeChunkTest, UncompressedRank3) { + const Index shape[] = {2, 3, 2}; // Y, X, C + auto metadata = CreateMetadata(dtype_v, shape, shape, + ContiguousLayoutOrder::c, Endian::kLittle); + auto expected_array = MakeArray( + {{{1, 2}, {3, 4}, {5, 6}}, {{7, 8}, {9, 10}, {11, 12}}}); + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto input_cord, EncodeArrayToCord(expected_array, endian::little, + ContiguousLayoutOrder::c)); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto decoded_array_void, + DecodeChunk(metadata, input_cord)); + SharedArray decoded_array( + std::static_pointer_cast(decoded_array_void.pointer()), + expected_array.layout()); + + EXPECT_EQ(decoded_array, expected_array); +} + +TEST_F(DecodeChunkTest, ErrorInputTooSmall) { + const Index shape[] = {2, 3}; + auto metadata = CreateMetadata(dtype_v, shape, shape, + ContiguousLayoutOrder::c, Endian::kLittle); + auto expected_array = MakeArray({{1, 2, 3}, {4, 5, 6}}); + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto input_cord, EncodeArrayToCord(expected_array, endian::little, + ContiguousLayoutOrder::c)); + + // Truncate the cord + absl::Cord truncated_cord = input_cord.Subcord(0, input_cord.size() - 1); + + EXPECT_THAT( + DecodeChunk(metadata, truncated_cord), + MatchesStatus(absl::StatusCode::kInvalidArgument, ".*Not enough data.*")); +} + +TEST_F(DecodeChunkTest, ErrorExcessData) { + const Index shape[] = {2, 3}; + auto metadata = CreateMetadata(dtype_v, shape, shape, + ContiguousLayoutOrder::c, Endian::kLittle); + auto expected_array = MakeArray({{1, 2, 3}, {4, 5, 6}}); + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto input_cord, EncodeArrayToCord(expected_array, endian::little, + ContiguousLayoutOrder::c)); + + // Add extra data + input_cord.Append("extra"); + + EXPECT_THAT(DecodeChunk(metadata, input_cord), + MatchesStatus(absl::StatusCode::kInvalidArgument, + ".*End of data expected.*")); +} + +// --- Placeholder Tests for Compression --- +// These require compressor implementations to be registered and +// potentially pre-compressed "golden" data. +TEST_F(DecodeChunkTest, DISABLED_CompressedDeflate) { + // 1. Register Deflate compressor (implementation needed separately) + // RegisterTiffCompressor("deflate", ...); + + // 2. Create metadata with deflate compression + const Index shape[] = {4, 5}; + auto metadata = + CreateMetadata(dtype_v, shape, shape, ContiguousLayoutOrder::c, + Endian::kLittle, CompressionType::kDeflate); + // Get compressor instance via ResolveMetadata or manually for test + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + metadata.compressor, + Compressor::FromJson({{"type", "deflate"}})); // Assumes registration + + // 3. Create expected *decoded* array + auto expected_array = AllocateArray(shape, ContiguousLayoutOrder::c, + tensorstore::value_init); + // Fill with some data... + for (Index i = 0; i < 4; ++i) + for (Index j = 0; j < 5; ++j) expected_array(i, j) = i * 10 + j; + + // 4. Create *compressed* input cord (requires deflate implementation or + // golden data) Example using golden data (replace hex string with actual + // compressed bytes) std::string compressed_hex = "789c..."; + // TENSORSTORE_ASSERT_OK_AND_ASSIGN(absl::Cord input_cord, + // HexToCord(compressed_hex)); + absl::Cord input_cord; // Placeholder - needs real compressed data + GTEST_SKIP() + << "Skipping compressed test until compressor impl/data is available."; + + // 5. Call DecodeChunk and verify + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto decoded_array_void, + DecodeChunk(metadata, input_cord)); + // Cast the void result to the expected type, preserving layout + SharedArray decoded_array( + std::static_pointer_cast(decoded_array_void.pointer()), + expected_array.layout()); + + EXPECT_EQ(decoded_array, expected_array); +} + +} // namespace \ No newline at end of file diff --git a/tensorstore/driver/tiff/schema.yml b/tensorstore/driver/tiff/schema.yml new file mode 100644 index 000000000..06b2b9d61 --- /dev/null +++ b/tensorstore/driver/tiff/schema.yml @@ -0,0 +1,166 @@ +$schema: http://json-schema.org/draft-07/schema# +$id: driver/tiff +title: Read-only TIFF driver +description: | + Provides read-only access to TIFF files or sequences of IFDs backed by a KeyValueStore. + Creation of new TIFF files is not supported. +allOf: +- $ref: KeyValueStoreBackedChunkDriver +- type: object + properties: + driver: + const: tiff + description: Specifies the use of the read-only TIFF driver. + metadata: + title: TIFF dataset metadata constraints. + description: | + Specifies constraints that are validated against the metadata read + from the existing TIFF file. All members are optional. If specified, + they must match the corresponding properties inferred from the TIFF tags + and interpretation options. + allOf: + - type: object + properties: + dtype: + $ref: DataType + title: Data type constraint. + description: Constrains the expected data type of the TIFF dataset. + shape: + type: array + items: + type: integer + minimum: 0 + title: Shape constraint. + description: | + Constrains the expected shape (dimensions) of the TIFF dataset. Length must match the rank. + examples: + - [ 1024, 1024, 50 ] + tiff: + title: TIFF-specific interpretation options. + description: Options controlling how the existing TIFF file or IFD sequence is interpreted. + type: object + properties: + ifd: + type: integer + minimum: 0 + default: 0 + title: Image File Directory (IFD) index. + description: | + Specifies which IFD to open when not using IFD stacking. Defaults to 0. + Cannot be non-zero if `ifd_stacking` is also specified. + ifd_stacking: + type: object + title: IFD Stacking Options. + description: | + Configures interpretation of a sequence of IFDs as additional TensorStore dimensions. + Cannot be specified if `ifd` is non-zero. + properties: + dimensions: + type: array + items: + type: string + minItems: 1 + uniqueItems: true + title: Labels for stacked dimensions. + description: | + Required if `ifd_stacking` is used. Specifies the labels for the + dimensions represented by the IFD sequence (e.g., ["z", "t"]). + dimension_sizes: + type: array + items: + type: integer + minimum: 1 + title: Sizes of stacked dimensions. + description: | + Optional. Must match the length of `dimensions`. Required if `dimensions` + has more than one entry. If only one dimension is specified, `ifd_count` can + be used instead. If both are specified, their product must match `ifd_count`. + ifd_count: + type: integer + minimum: 1 + title: Total number of IFDs in the stack. + description: | + Optional. Specifies the total number of IFDs involved in the stack. + Required if `dimension_sizes` is not specified for a single stack dimension. + If specified along with `dimension_sizes`, their product must match `ifd_count`. + ifd_sequence_order: + type: array + items: + type: string + uniqueItems: true + title: IFD sequence iteration order. + description: | + Optional. Specifies the order of stacked dimensions within the flat IFD sequence. + Must be a permutation of `dimensions`. Defaults to the order in `dimensions`, with + the last dimension varying fastest. + required: + - dimensions + sample_dimension_label: + type: string + minLength: 1 + title: Label for the sample dimension. + description: | + Optional. Specifies the label for the dimension derived from the TIFF SamplesPerPixel + tag when it's greater than 1 (e.g., "c", "channel", "rgba"). Defaults internally to "c". + Cannot conflict with labels in `ifd_stacking.dimensions`. + additionalProperties: false + +# Definitions for TIFF-specific codec components +definitions: + codec-properties: + $id: '#codec-properties' + type: object + properties: + compression: + $ref: '#/definitions/compression' + title: Specifies the expected chunk compression method. + codec: + $id: 'driver/tiff/Codec' + allOf: + - $ref: Codec + - type: object + properties: + driver: + const: "tiff" + - $ref: "#/definitions/codec-properties" + compression: + $id: 'driver/tiff/Compression' + type: object + description: | + The `.type` member identifies the compression method found in the TIFF file. + The remaining members are specific to the compression method. Corresponds to + the TIFF Compression tag. + properties: + type: + type: string + description: Identifies the compressor used in the TIFF file. + required: + - type + compression-raw: + $id: 'driver/tiff/Compression/raw' + description: Chunks are stored uncompressed (TIFF Compression=1 or None). + allOf: + - $ref: "#/definitions/compression" + - type: object + properties: + type: + const: raw + compression-zlib: + $id: 'driver/tiff/Compression/zlib' + description: | + Specifies `zlib`/deflate compression (TIFF Compression=8 or 32946). + allOf: + - $ref: "#/definitions/compression" + - type: object + properties: + type: + const: zlib + compression-zstd: + $id: 'driver/tiff/Compression/zstd' + description: Specifies `zstd` compression (TIFF Compression=50000). + allOf: + - $ref: "#/definitions/compression" + - type: object + properties: + type: + const: zstd diff --git a/tensorstore/driver/tiff/testdata/generate.py b/tensorstore/driver/tiff/testdata/generate.py new file mode 100644 index 000000000..4acb1a154 --- /dev/null +++ b/tensorstore/driver/tiff/testdata/generate.py @@ -0,0 +1,243 @@ +import numpy as np +import tifffile +import os +from pathlib import Path +import logging + +# Configure logging +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) + +OUTPUT_DIR = Path("single") +BASE_HEIGHT = 32 +BASE_WIDTH = 48 +TILE_SHAPE = (16, 16) + + +def generate_coordinate_array(shape, dtype=np.uint16): + shape = tuple(shape) + arr = np.zeros(shape, dtype=dtype) + it = np.nditer(arr, flags=["multi_index"], op_flags=["readwrite"]) + count = 1 + while not it.finished: + if np.issubdtype(dtype, np.integer): + iinfo = np.iinfo(dtype) + modulo_base = int(iinfo.max) + 1 + if modulo_base > 0: + current_val = count % modulo_base + else: + current_val = count + else: + current_val = count + + arr[it.multi_index] = current_val + count += 1 + it.iternext() + return arr + + +def write_tiff( + filename: Path, + base_shape: tuple, + dtype: np.dtype, + stack_dims: dict | None = None, + spp: int = 1, + planar_config_str: str = "contig", + tile_shape: tuple | None = TILE_SHAPE, + ifd_sequence_order: list[str] | None = None, + photometric: str | None = None, + extrasamples: tuple | None = None, + compression: str | None = None, + description: str | None = None, +): + filename = Path(filename) + filename.parent.mkdir(parents=True, exist_ok=True) + logging.info(f"Generating TIFF: {filename.name}") + logging.info( + f" Stack: {stack_dims or 'None'}, SPP: {spp}, Planar: {planar_config_str}, Dtype: {dtype.__name__}, Tile: {tile_shape}" + ) + + stack_dims = stack_dims or {} + + if not stack_dims: + stack_labels_numpy_order = [] + stack_shape_numpy_order = [] + elif ifd_sequence_order: + stack_labels_numpy_order = ifd_sequence_order # Slowest -> Fastest + stack_shape_numpy_order = [ + stack_dims[label] for label in stack_labels_numpy_order + ] + else: + # Default order: alphabetical for consistency if not specified + stack_labels_numpy_order = sorted(stack_dims.keys()) + stack_shape_numpy_order = [ + stack_dims[label] for label in stack_labels_numpy_order + ] + logging.warning( + f" ifd_sequence_order not specified for {filename.name}, using default alphabetical order: {stack_labels_numpy_order}" + ) + + numpy_shape_list = list(stack_shape_numpy_order) + height, width = base_shape + + if spp > 1 and planar_config_str == "separate": + numpy_shape_list.append(spp) + + numpy_shape_list.extend([height, width]) + + if spp > 1 and planar_config_str == "contig": + numpy_shape_list.append(spp) + + full_shape = tuple(numpy_shape_list) + logging.info(f" Generating numpy data with shape: {full_shape}") + + full_data = generate_coordinate_array(full_shape, dtype=dtype) + + # Reshape for IFD slicing + num_ifds = np.prod(stack_shape_numpy_order or [1]) + flat_ifd_data = ( + full_data.reshape((num_ifds, height, width, spp)) + if spp > 1 and planar_config_str == "contig" + else full_data.reshape((num_ifds, height, width)) + ) + + tifffile_kwargs = { + "planarconfig": planar_config_str, + "dtype": dtype, + "shape": ( + (height, width, spp) + if spp > 1 and planar_config_str == "contig" + else (height, width) + ), + } + + if photometric: + tifffile_kwargs["photometric"] = photometric + if extrasamples: + tifffile_kwargs["extrasamples"] = extrasamples + if tile_shape: + tifffile_kwargs["tile"] = tile_shape + if compression: + tifffile_kwargs["compression"] = compression + if description: + tifffile_kwargs["description"] = description + + try: + for i in range(num_ifds): + tifffile.imwrite( + filename, + flat_ifd_data[i], + append=i > 0, + **tifffile_kwargs, + ) + logging.info(f" Successfully wrote {filename.name}") + except Exception as e: + logging.error(f" Failed to write {filename.name}: {e}") + if filename.exists(): + os.remove(filename) + + OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + logging.info(f"Starting TIFF file generation in {OUTPUT_DIR}") + logging.info( + f"Using Base Shape: {BASE_HEIGHT}x{BASE_WIDTH}, Tile Shape: {TILE_SHAPE}" + ) + + +# --- Test Case 1: Simple Z-Stack (5 planes), SPP=1 --- +write_tiff( + filename=OUTPUT_DIR / "stack_z5_spp1_uint8.tif", + base_shape=(BASE_HEIGHT, BASE_WIDTH), + dtype=np.uint8, + stack_dims={"z": 5}, + description="Z=5, SPP=1, uint8, Contig, Tile=16x16", +) + +# --- Test Case 2: Z-Stack (4 planes), SPP=3 (RGB), Contig --- +write_tiff( + filename=OUTPUT_DIR / "stack_z4_spp3_rgb_uint16.tif", + base_shape=(BASE_HEIGHT, BASE_WIDTH), + dtype=np.uint16, + stack_dims={"z": 4}, + spp=3, + planar_config_str="contig", + photometric="rgb", # Explicitly RGB + description="Z=4, SPP=3, uint16, Contig, Tile=16x16", +) + +# --- Test Case 3: Time (2) x Channel (3) Stack, SPP=1 --- +# Default IFD order: C fastest, then T (alphabetical: c, t) +write_tiff( + filename=OUTPUT_DIR / "stack_t2_c3_spp1_float32.tif", + base_shape=(BASE_HEIGHT, BASE_WIDTH), + dtype=np.float32, + stack_dims={"t": 2, "c": 3}, + description="T=2, C=3, SPP=1, float32, Contig, Tile=16x16. Default IFD order (C fastest)", +) + +# --- Test Case 4: Time (2) x Channel (3) Stack, SPP=1, T fastest in file --- +# Specify IFD sequence order: ['c', 't'] means C varies slowest, T fastest +write_tiff( + filename=OUTPUT_DIR / "stack_c3_t2_spp1_t_fastest.tif", + base_shape=(BASE_HEIGHT, BASE_WIDTH), + dtype=np.uint8, + stack_dims={"c": 3, "t": 2}, + ifd_sequence_order=["c", "t"], # C slowest, T fastest + description="C=3, T=2, SPP=1, uint8, Contig, Tile=16x16. T fastest IFD order", +) + +# --- Test Case 5: Stripped Z-Stack (3 planes), SPP=1 --- +write_tiff( + filename=OUTPUT_DIR / "stack_z3_spp1_uint8_stripped.tif", + base_shape=(BASE_HEIGHT, BASE_WIDTH), + dtype=np.uint8, + stack_dims={"z": 3}, + tile_shape=None, # Stripped + description="Z=3, SPP=1, uint8, Contig, Stripped", +) + +# --- Test Case 6: Single IFD, but SPP=4 (RGBA example) --- +write_tiff( + filename=OUTPUT_DIR / "single_spp4_rgba_uint8.tif", + base_shape=(BASE_HEIGHT, BASE_WIDTH), + dtype=np.uint8, + stack_dims=None, # Single IFD + spp=4, + planar_config_str="contig", + photometric="rgb", # Use 'rgb' + extrasamples=(1,), # Specify associated alpha + description="Single IFD, SPP=4 (RGBA), uint8, Contig, Tile=16x16", +) + +# --- Test Case 7: Z (2) x T (3) stack, SPP=1, Different Dtype --- +# IFD order Z, T (T fastest) +write_tiff( + filename=OUTPUT_DIR / "stack_z2_t3_spp1_int16.tif", + base_shape=(BASE_HEIGHT, BASE_WIDTH), + dtype=np.int16, + stack_dims={"z": 2, "t": 3}, + ifd_sequence_order=["z", "t"], # T fastest + description="Z=2, T=3, SPP=1, int16, Contig, Tile=16x16. T fastest IFD order", +) + +# --- Test Case 8: single‑image, Zstd‑compressed --- +write_tiff( + filename=OUTPUT_DIR / "single_zstd_uint8.tif", + base_shape=(BASE_HEIGHT, BASE_WIDTH), + dtype=np.uint8, + stack_dims=None, + compression="zstd", + description="Single IFD, uint8, Zstd compression, Tile=16x16", +) + +# --- Test Case 8: single‑image, zlib‑compressed --- +write_tiff( + filename=OUTPUT_DIR / "single_zlib_uint8.tif", + base_shape=(BASE_HEIGHT, BASE_WIDTH), + dtype=np.uint8, + stack_dims=None, + compression="zlib", + description="Single IFD, uint8, zlib compression, Tile=16x16", +) + +logging.info(f"Finished generating TIFF files in {OUTPUT_DIR}") diff --git a/tensorstore/driver/tiff/zlib_compressor.cc b/tensorstore/driver/tiff/zlib_compressor.cc new file mode 100644 index 000000000..6e7b34235 --- /dev/null +++ b/tensorstore/driver/tiff/zlib_compressor.cc @@ -0,0 +1,36 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Defines the "zlib" compressor for the Tiff driver. +#include "tensorstore/driver/tiff/compressor.h" +#include "tensorstore/driver/tiff/compressor_registry.h" +#include "tensorstore/internal/compression/zlib_compressor.h" +#include "tensorstore/internal/json_binding/json_binding.h" + +namespace tensorstore { +namespace internal_tiff { +namespace { + +using ::tensorstore::internal::ZlibCompressor; +namespace jb = ::tensorstore::internal_json_binding; + +struct Registration { + Registration() { + RegisterCompressor("zlib", jb::Object()); + } +} registration; + +} // namespace +} // namespace internal_tiff +} // namespace tensorstore \ No newline at end of file diff --git a/tensorstore/driver/tiff/zstd_compressor.cc b/tensorstore/driver/tiff/zstd_compressor.cc new file mode 100644 index 000000000..28961fddf --- /dev/null +++ b/tensorstore/driver/tiff/zstd_compressor.cc @@ -0,0 +1,35 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Defines the "zstd" compressor for the Tiff driver. +#include "tensorstore/internal/compression/zstd_compressor.h" + +#include "tensorstore/driver/tiff/compressor.h" +#include "tensorstore/driver/tiff/compressor_registry.h" +#include "tensorstore/internal/json_binding/json_binding.h" + +namespace tensorstore { +namespace internal_tiff { +namespace { + +using ::tensorstore::internal::ZstdCompressor; +namespace jb = ::tensorstore::internal_json_binding; + +struct Registration { + Registration() { RegisterCompressor("zstd", jb::Object()); } +} registration; + +} // namespace +} // namespace internal_tiff +} // namespace tensorstore \ No newline at end of file diff --git a/tensorstore/kvstore/tiff/BUILD b/tensorstore/kvstore/tiff/BUILD new file mode 100644 index 000000000..8d16adf12 --- /dev/null +++ b/tensorstore/kvstore/tiff/BUILD @@ -0,0 +1,126 @@ +load("//bazel:tensorstore.bzl", "tensorstore_cc_library", "tensorstore_cc_test") + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +tensorstore_cc_library( + name = "tiff_key_value_store", + srcs = [ + "tiff_key_value_store.cc", + ], + hdrs = [ + "tiff_key_value_store.h", + ], + deps = [ + ":tiff_details", + ":tiff_dir_cache", + "//tensorstore/internal:data_copy_concurrency_resource", + "//tensorstore/internal:intrusive_ptr", + "//tensorstore/internal/cache", + "//tensorstore/internal/cache:async_cache", + "//tensorstore/internal/cache:cache_pool_resource", + "//tensorstore/kvstore", + "//tensorstore/util:future", + "@com_google_absl//absl/functional:function_ref", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_riegeli//riegeli/bytes:cord_reader", + ], +) + +tensorstore_cc_test( + name = "tiff_key_value_store_test", + srcs = ["tiff_key_value_store_test.cc"], + deps = [ + ":tiff_key_value_store", + ":tiff_test_util", + "//tensorstore/kvstore", + "//tensorstore/kvstore:test_util", + "//tensorstore/kvstore/memory", + "//tensorstore/util:future", + "@com_google_googletest//:gtest_main", + ], +) + +tensorstore_cc_library( + name = "tiff_dir_cache", + srcs = ["tiff_dir_cache.cc"], + hdrs = ["tiff_dir_cache.h"], + deps = [ + ":tiff_details", + "//tensorstore/internal/cache", + "//tensorstore/internal/cache:async_cache", + "//tensorstore/internal/cache:async_initialized_cache_mixin", + "//tensorstore/internal/cache_key", + "//tensorstore/internal/estimate_heap_usage", + "//tensorstore/kvstore", + "//tensorstore/kvstore:byte_range", + "//tensorstore/util:executor", + "//tensorstore/util:future", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/hash", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings:cord", + "@com_google_riegeli//riegeli/bytes:cord_reader", + ], +) + +tensorstore_cc_test( + name = "tiff_dir_cache_test", + srcs = ["tiff_dir_cache_test.cc"], + deps = [ + ":tiff_dir_cache", + ":tiff_test_util", + "//tensorstore:context", + "//tensorstore/internal/cache", + "//tensorstore/internal/cache:cache_pool_resource", + "//tensorstore/kvstore", + "//tensorstore/kvstore:test_util", + "//tensorstore/kvstore/memory", + "//tensorstore/util:executor", + "//tensorstore/util:status_testutil", + "@com_google_absl//absl/strings:cord", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +tensorstore_cc_library( + name = "tiff_details", + srcs = ["tiff_details.cc"], + hdrs = ["tiff_details.h"], + deps = [ + "//tensorstore/internal/log:verbose_flag", + "//tensorstore/util:status", + "//tensorstore/util:str_cat", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings:str_format", + "@com_google_riegeli//riegeli/bytes:reader", + "@com_google_riegeli//riegeli/endian:endian_reading", + ], +) + +tensorstore_cc_test( + name = "tiff_details_test", + size = "small", + srcs = ["tiff_details_test.cc"], + deps = [ + ":tiff_details", + "//tensorstore/util:status_testutil", + "@com_google_absl//absl/status", + "@com_google_googletest//:gtest_main", + "@com_google_riegeli//riegeli/bytes:cord_reader", + "@com_google_riegeli//riegeli/bytes:string_reader", + ], +) + +tensorstore_cc_library( + name = "tiff_test_util", + srcs = ["tiff_test_util.cc"], + hdrs = ["tiff_test_util.h"], +) diff --git a/tensorstore/kvstore/tiff/index.rst b/tensorstore/kvstore/tiff/index.rst new file mode 100644 index 000000000..ad85771db --- /dev/null +++ b/tensorstore/kvstore/tiff/index.rst @@ -0,0 +1,54 @@ +.. _tiff-kvstore-driver: + +``tiff`` Key-Value Store driver +====================================================== + +The ``tiff`` driver implements support for reading from +`TIFF `_ format +files on top of a base key-value store. It provides access to individual tiles or strips +within TIFF images in a standardized key-value format. + +.. json:schema:: kvstore/tiff + +Example JSON specifications +--------------------------- + +.. code-block:: json + + { + "driver": "tiff", + "base": "gs://my-bucket/path/to/file.tiff" + } + +.. code-block:: json + + { + "driver": "tiff", + "base": { + "driver": "file", + "path": "/path/to/image.tiff" + } + } + + +Key Format +---------- + +Keys are formatted as: ``chunk//`` + +* ````: The Image File Directory (IFD) index (0-based). +* ````: Linear index for the tile/strip (0-based) + +For example, the key ``chunk/0/3`` refers to the third tile/strip in the first IFD. + +Features +-------- + +* Support for both tiled and stripped TIFF formats +* Multi-page TIFF support via IFD indices +* Handles various bit depths and sample formats + +Limitations +----------- + +* Writing is not supported (read-only) and not all TIFF features are supported. \ No newline at end of file diff --git a/tensorstore/kvstore/tiff/schema.yml b/tensorstore/kvstore/tiff/schema.yml new file mode 100644 index 000000000..de729872f --- /dev/null +++ b/tensorstore/kvstore/tiff/schema.yml @@ -0,0 +1,33 @@ +$schema: http://json-schema.org/draft-07/schema# +$id: kvstore/tiff +title: Read-only adapter for accessing tiles/strips within TIFF images. +description: JSON specification of the TIFF key-value store. +allOf: +- $ref: KvStore +- type: object + properties: + driver: + const: tiff + base: + $ref: KvStore + title: Underlying key-value store with path to a TIFF file. + description: |- + Key-value store that provides access to the TIFF file. + Each key in this store corresponds to a TIFF file. + cache_pool: + $ref: ContextResource + description: |- + Specifies or references a previously defined `Context.cache_pool`. It + is typically more convenient to specify a default `~Context.cache_pool` + in the `.context`. + default: cache_pool + data_copy_concurrency: + $ref: ContextResource + description: |- + Specifies or references a previously defined + `Context.data_copy_concurrency`. It is typically more + convenient to specify a default `~Context.data_copy_concurrency` in + the `.context`. + default: data_copy_concurrency + required: + - base \ No newline at end of file diff --git a/tensorstore/kvstore/tiff/tiff_details.cc b/tensorstore/kvstore/tiff/tiff_details.cc new file mode 100644 index 000000000..b86fe5726 --- /dev/null +++ b/tensorstore/kvstore/tiff/tiff_details.cc @@ -0,0 +1,607 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/kvstore/tiff/tiff_details.h" + +#include +#include + +#include +#include + +#include "absl/log/absl_log.h" +#include "absl/status/status.h" +#include "absl/strings/str_format.h" +#include "riegeli/bytes/reader.h" +#include "riegeli/endian/endian_reading.h" +#include "tensorstore/internal/log/verbose_flag.h" +#include "tensorstore/util/status.h" +#include "tensorstore/util/str_cat.h" + +namespace tensorstore { +namespace internal_tiff_kvstore { + +namespace { + +using ::riegeli::ReadBigEndian16; +using ::riegeli::ReadBigEndian32; +using ::riegeli::ReadBigEndian64; +using ::riegeli::ReadLittleEndian16; +using ::riegeli::ReadLittleEndian32; +using ::riegeli::ReadLittleEndian64; + +ABSL_CONST_INIT internal_log::VerboseFlag tiff_logging("tiff_details"); + +// Helper function to read a value based on endianness +template +bool ReadEndian(riegeli::Reader& reader, Endian endian, T& value) { + if (endian == Endian::kLittle) { + if constexpr (sizeof(T) == 2) return ReadLittleEndian16(reader, value); + if constexpr (sizeof(T) == 4) return ReadLittleEndian32(reader, value); + if constexpr (sizeof(T) == 8) return ReadLittleEndian64(reader, value); + } else { + if constexpr (sizeof(T) == 2) return ReadBigEndian16(reader, value); + if constexpr (sizeof(T) == 4) return ReadBigEndian32(reader, value); + if constexpr (sizeof(T) == 8) return ReadBigEndian64(reader, value); + } + return false; +} + +// Helper to find an IFD entry by tag +const IfdEntry* GetIfdEntry(Tag tag, const std::vector& entries) { + const IfdEntry* found = nullptr; + for (const auto& entry : entries) { + if (entry.tag == tag) { + if (found) { + return nullptr; // Duplicate tag + } + found = &entry; + } + } + return found; +} + +// Helper to parse a uint32 value from an IFD entry +absl::Status ParseUint32Value(const IfdEntry* entry, uint32_t& out) { + if (!entry) { + return absl::NotFoundError("Required tag missing"); + } + if (entry->count != 1) { + return absl::InvalidArgumentError("Expected count of 1"); + } + if (entry->type != TiffDataType::kShort && + entry->type != TiffDataType::kLong) { + return absl::InvalidArgumentError("Expected SHORT or LONG type"); + } + out = static_cast(entry->value_or_offset); + return absl::OkStatus(); +} + +// Helper to parse array of uint64 values from an IFD entry +absl::Status ParseUint64Array(const IfdEntry* entry, + std::vector& out) { + if (!entry) { + return absl::NotFoundError("Required tag missing"); + } + + if (entry->type != TiffDataType::kShort && + entry->type != TiffDataType::kLong && + entry->type != TiffDataType::kLong8) { + return absl::InvalidArgumentError("Expected SHORT, LONG, or LONG8 type"); + } + + // If this is an external array, it must be loaded separately + if (entry->is_external_array) { + out.resize(entry->count); + return absl::OkStatus(); + } else { + // Inline value - parse it directly + out.resize(entry->count); + if (entry->count == 1) { + out[0] = entry->value_or_offset; + return absl::OkStatus(); + } else { + return absl::InternalError( + "Inconsistent state: multi-value array marked as inline"); + } + } +} + +// Helper to parse a uint16 value from an IFD entry +absl::Status ParseUint16Value(const IfdEntry* entry, uint16_t& out) { + if (!entry) { + return absl::NotFoundError("Required tag missing"); + } + if (entry->count != 1) { + return absl::InvalidArgumentError("Expected count of 1"); + } + if (entry->type != TiffDataType::kShort) { + return absl::InvalidArgumentError("Expected SHORT type"); + } + out = static_cast(entry->value_or_offset); + return absl::OkStatus(); +} + +// Helper function to parse array of uint16 values from an IFD entry +absl::Status ParseUint16Array(const IfdEntry* entry, + std::vector& out) { + if (!entry) { + return absl::NotFoundError("Required tag missing"); + } + + if (entry->type != TiffDataType::kShort) { + return absl::InvalidArgumentError("Expected SHORT type"); + } + + // If this is an external array, it must be loaded separately + if (entry->is_external_array) { + out.resize(entry->count); + return absl::OkStatus(); + } else { + // Inline value - parse it directly + out.resize(entry->count); + if (entry->count == 1) { + out[0] = static_cast(entry->value_or_offset); + return absl::OkStatus(); + } else { + return absl::InternalError( + "Inconsistent state: multi-value array marked as inline"); + } + } +} + +// Helper to calculate the number of chunks/tiles/strips +std::tuple CalculateChunkCounts( + uint32_t image_width, uint32_t image_height, uint32_t chunk_width, + uint32_t chunk_height) { + if (chunk_width == 0 || chunk_height == 0) { + return {0, 0, 0}; + } + uint32_t num_cols = (image_width + chunk_width - 1) / chunk_width; + uint32_t num_rows = (image_height + chunk_height - 1) / chunk_height; + uint64_t num_chunks = static_cast(num_rows) * num_cols; + return {num_chunks, num_rows, num_cols}; +} + +} // namespace + +absl::Status ParseUint16Array(riegeli::Reader& reader, Endian endian, + uint64_t offset, uint64_t count, + std::vector& out) { + out.resize(count); + + if (!reader.Seek(offset)) { + return absl::InvalidArgumentError(absl::StrFormat( + "Failed to seek to external array at offset %llu", offset)); + } + + for (uint64_t i = 0; i < count; ++i) { + uint16_t value; + if (!ReadEndian(reader, endian, value)) { + return absl::DataLossError(absl::StrFormat( + "Failed to read SHORT value %llu in external array", i)); + } + out[i] = value; + } + + ABSL_LOG_IF(INFO, tiff_logging) << absl::StrFormat( + "Read uint16 external array: offset=%llu, count=%llu", offset, count); + + return absl::OkStatus(); +} + +// Get the size in bytes for a given TIFF data type +size_t GetTiffDataTypeSize(TiffDataType type) { + switch (type) { + case TiffDataType::kByte: + case TiffDataType::kAscii: + case TiffDataType::kSbyte: + case TiffDataType::kUndefined: + return 1; + case TiffDataType::kShort: + case TiffDataType::kSshort: + return 2; + case TiffDataType::kLong: + case TiffDataType::kSlong: + case TiffDataType::kFloat: + case TiffDataType::kIfd: + return 4; + case TiffDataType::kRational: + case TiffDataType::kSrational: + case TiffDataType::kDouble: + case TiffDataType::kLong8: + case TiffDataType::kSlong8: + case TiffDataType::kIfd8: + return 8; + default: + return 0; // Unknown type + } +} + +// Determine if an entry represents an external array based on type and count +bool IsExternalArray(TiffDataType type, uint64_t count) { + size_t type_size = GetTiffDataTypeSize(type); + size_t total_size = type_size * count; + + // If the total size is more than 4 bytes, it's stored externally + // (4 bytes is the size of the value_or_offset field in standard TIFF) + return (total_size > 4); +} + +absl::Status ParseTiffHeader(riegeli::Reader& reader, Endian& endian, + uint64_t& first_ifd_offset) { + // Pull first 8 bytes which contain the header info + if (!reader.Pull(8)) { + return absl::InvalidArgumentError( + "Failed to read TIFF header: insufficient data"); + } + + // Read byte order mark (II or MM) + char byte_order[2]; + if (!reader.Read(2, byte_order)) { + return absl::InvalidArgumentError( + "Failed to read TIFF header byte order mark"); + } + + if (byte_order[0] == 'I' && byte_order[1] == 'I') { + endian = Endian::kLittle; + } else if (byte_order[0] == 'M' && byte_order[1] == 'M') { + endian = Endian::kBig; + } else { + return absl::InvalidArgumentError("Invalid TIFF byte order mark"); + } + + // Read magic number (42 for standard TIFF) + uint16_t magic; + if (!ReadEndian(reader, endian, magic) || magic != 42) { + return absl::InvalidArgumentError("Invalid TIFF magic number"); + } + + // Read offset to first IFD + uint32_t offset32; + if (!ReadEndian(reader, endian, offset32)) { + return absl::InvalidArgumentError("Failed to read first IFD offset"); + } + first_ifd_offset = offset32; + + ABSL_LOG_IF(INFO, tiff_logging) + << "TIFF header: endian=" + << (endian == Endian::kLittle ? "little" : "big") + << " first_ifd_offset=" << first_ifd_offset; + + return absl::OkStatus(); +} + +absl::Status ParseTiffDirectory(riegeli::Reader& reader, Endian endian, + uint64_t directory_offset, + size_t available_size, TiffDirectory& out) { + if (!reader.Seek(directory_offset)) { + return absl::InvalidArgumentError(absl::StrFormat( + "Failed to seek to IFD at offset %d", directory_offset)); + } + + if (available_size < 2) { + return absl::DataLossError("Insufficient data to read IFD entry count"); + } + + uint16_t num_entries; + if (!ReadEndian(reader, endian, num_entries)) { + return absl::InvalidArgumentError("Failed to read IFD entry count"); + } + + // Each entry is 12 bytes, plus 2 bytes for count and 4 bytes for next IFD + // offset + size_t required_size = 2 + (num_entries * 12) + 4; + if (available_size < required_size) { + return absl::DataLossError(absl::StrFormat( + "Insufficient data to read complete IFD: need %d bytes, have %d", + required_size, available_size)); + } + + out.endian = endian; + out.directory_offset = directory_offset; + out.entries.clear(); + out.entries.reserve(num_entries); + + for (uint16_t i = 0; i < num_entries; ++i) { + IfdEntry entry; + + // Read tag + uint16_t tag_value; + if (!ReadEndian(reader, endian, tag_value)) { + return absl::InvalidArgumentError( + absl::StrFormat("Failed to read tag for IFD entry %d", i)); + } + entry.tag = static_cast(tag_value); + + // Read type + uint16_t type_raw; + if (!ReadEndian(reader, endian, type_raw)) { + return absl::InvalidArgumentError( + absl::StrFormat("Failed to read type for IFD entry %d", i)); + } + entry.type = static_cast(type_raw); + + // Read count + uint32_t count32; + if (!ReadEndian(reader, endian, count32)) { + return absl::InvalidArgumentError( + absl::StrFormat("Failed to read count for IFD entry %d", i)); + } + entry.count = count32; + + // Read value/offset + uint32_t value32; + if (!ReadEndian(reader, endian, value32)) { + return absl::InvalidArgumentError( + absl::StrFormat("Failed to read value/offset for IFD entry %d", i)); + } + entry.value_or_offset = value32; + + // Determine if this is an external array + entry.is_external_array = IsExternalArray(entry.type, entry.count); + + ABSL_LOG_IF(INFO, tiff_logging) << absl::StrFormat( + "IFD entry %d: tag=0x%x type=%d count=%d value=%d external=%d", i, + entry.tag, static_cast(entry.type), entry.count, + entry.value_or_offset, entry.is_external_array); + + out.entries.push_back(entry); + } + + // Read offset to next IFD + uint32_t next_ifd; + if (!ReadEndian(reader, endian, next_ifd)) { + return absl::InvalidArgumentError("Failed to read next IFD offset"); + } + out.next_ifd_offset = next_ifd; + + ABSL_LOG_IF(INFO, tiff_logging) + << "Read IFD with " << num_entries << " entries" + << ", next_ifd_offset=" << out.next_ifd_offset; + + return absl::OkStatus(); +} + +absl::Status ParseExternalArray(riegeli::Reader& reader, Endian endian, + uint64_t offset, uint64_t count, + TiffDataType data_type, + std::vector& out) { + out.resize(count); + + if (!reader.Seek(offset)) { + return absl::InvalidArgumentError(absl::StrFormat( + "Failed to seek to external array at offset %llu", offset)); + } + + for (uint64_t i = 0; i < count; ++i) { + switch (data_type) { + case TiffDataType::kShort: { + uint16_t value; + if (!ReadEndian(reader, endian, value)) { + return absl::DataLossError(absl::StrFormat( + "Failed to read SHORT value %llu in external array", i)); + } + out[i] = value; + break; + } + case TiffDataType::kLong: { + uint32_t value; + if (!ReadEndian(reader, endian, value)) { + return absl::DataLossError(absl::StrFormat( + "Failed to read LONG value %llu in external array", i)); + } + out[i] = value; + break; + } + case TiffDataType::kLong8: { + uint64_t value; + if (!ReadEndian(reader, endian, value)) { + return absl::DataLossError(absl::StrFormat( + "Failed to read LONG8 value %llu in external array", i)); + } + out[i] = value; + break; + } + default: + return absl::InvalidArgumentError( + absl::StrFormat("Unsupported data type %d for external array", + static_cast(data_type))); + } + } + + ABSL_LOG_IF(INFO, tiff_logging) << absl::StrFormat( + "Read external array: offset=%llu, count=%llu", offset, count); + + return absl::OkStatus(); +} + +absl::Status ParseImageDirectory(const std::vector& entries, + ImageDirectory& out) { + // Required fields for all TIFF files + TENSORSTORE_RETURN_IF_ERROR( + ParseUint32Value(GetIfdEntry(Tag::kImageWidth, entries), out.width)); + TENSORSTORE_RETURN_IF_ERROR( + ParseUint32Value(GetIfdEntry(Tag::kImageLength, entries), out.height)); + + // Parse optional fields + + // Samples Per Pixel (defaults to 1 if missing) + const IfdEntry* spp_entry = GetIfdEntry(Tag::kSamplesPerPixel, entries); + if (spp_entry) { + TENSORSTORE_RETURN_IF_ERROR( + ParseUint16Value(spp_entry, out.samples_per_pixel)); + } else { + out.samples_per_pixel = 1; + } + + // Bits Per Sample (defaults to 1 bit per sample if missing) + const IfdEntry* bps_entry = GetIfdEntry(Tag::kBitsPerSample, entries); + if (bps_entry) { + TENSORSTORE_RETURN_IF_ERROR( + ParseUint16Array(bps_entry, out.bits_per_sample)); + if (out.bits_per_sample.size() != out.samples_per_pixel && + out.bits_per_sample.size() != + 1) { // Allow single value for all samples + return absl::InvalidArgumentError( + "BitsPerSample count does not match SamplesPerPixel"); + } + // If only one value provided, replicate it for all samples + if (out.bits_per_sample.size() == 1 && out.samples_per_pixel > 1) { + out.bits_per_sample.resize(out.samples_per_pixel, out.bits_per_sample[0]); + } + } else { + out.bits_per_sample.assign(out.samples_per_pixel, 1); + } + + // Compression (defaults to None if missing) + const IfdEntry* comp_entry = GetIfdEntry(Tag::kCompression, entries); + if (comp_entry) { + TENSORSTORE_RETURN_IF_ERROR(ParseUint16Value(comp_entry, out.compression)); + } else { + out.compression = static_cast(CompressionType::kNone); + } + + // Photometric Interpretation (defaults to 0 if missing) + const IfdEntry* photo_entry = GetIfdEntry(Tag::kPhotometric, entries); + if (photo_entry) { + TENSORSTORE_RETURN_IF_ERROR(ParseUint16Value(photo_entry, out.photometric)); + } else { + out.photometric = 0; // Default WhiteIsZero + } + + // Planar Configuration (defaults to Chunky if missing) + const IfdEntry* planar_entry = GetIfdEntry(Tag::kPlanarConfig, entries); + if (planar_entry) { + TENSORSTORE_RETURN_IF_ERROR( + ParseUint16Value(planar_entry, out.planar_config)); + } else { + out.planar_config = static_cast(PlanarConfigType::kChunky); + } + + // Sample Format (defaults to uint if missing) + const IfdEntry* format_entry = GetIfdEntry(Tag::kSampleFormat, entries); + if (format_entry) { + TENSORSTORE_RETURN_IF_ERROR( + ParseUint16Array(format_entry, out.sample_format)); + // Validate size matches SamplesPerPixel + if (out.sample_format.size() != out.samples_per_pixel && + out.sample_format.size() != 1) { + return absl::InvalidArgumentError( + "SampleFormat count does not match SamplesPerPixel"); + } + // If only one value provided, replicate it for all samples + if (out.sample_format.size() == 1 && out.samples_per_pixel > 1) { + out.sample_format.resize(out.samples_per_pixel, out.sample_format[0]); + } + } else { + out.sample_format.assign( + out.samples_per_pixel, + static_cast(SampleFormatType::kUnsignedInteger)); + } + + // Determine tiled vs. stripped and parse chunk info + const IfdEntry* tile_width_entry = GetIfdEntry(Tag::kTileWidth, entries); + const IfdEntry* rows_per_strip_entry = + GetIfdEntry(Tag::kRowsPerStrip, entries); + + if (tile_width_entry) { + out.is_tiled = true; + if (rows_per_strip_entry) { + ABSL_LOG_IF(WARNING, tiff_logging) + << "Both TileWidth and RowsPerStrip present; ignoring RowsPerStrip."; + } + + TENSORSTORE_RETURN_IF_ERROR( + ParseUint32Value(tile_width_entry, out.chunk_width)); + TENSORSTORE_RETURN_IF_ERROR(ParseUint32Value( + GetIfdEntry(Tag::kTileLength, entries), out.chunk_height)); + + const IfdEntry* offsets_entry = GetIfdEntry(Tag::kTileOffsets, entries); + const IfdEntry* counts_entry = GetIfdEntry(Tag::kTileByteCounts, entries); + + if (!offsets_entry) + return absl::NotFoundError("TileOffsets tag missing for tiled image"); + if (!counts_entry) + return absl::NotFoundError("TileByteCounts tag missing for tiled image"); + + TENSORSTORE_RETURN_IF_ERROR( + ParseUint64Array(offsets_entry, out.chunk_offsets)); + TENSORSTORE_RETURN_IF_ERROR( + ParseUint64Array(counts_entry, out.chunk_bytecounts)); + + // Validate counts + auto [num_chunks, num_rows, num_cols] = CalculateChunkCounts( + out.width, out.height, out.chunk_width, out.chunk_height); + if (out.chunk_offsets.size() != num_chunks) { + return absl::InvalidArgumentError(absl::StrFormat( + "TileOffsets count (%d) does not match expected number of tiles (%d)", + out.chunk_offsets.size(), num_chunks)); + } + if (out.chunk_bytecounts.size() != num_chunks) { + return absl::InvalidArgumentError( + absl::StrFormat("TileByteCounts count (%d) does not match expected " + "number of tiles (%d)", + out.chunk_bytecounts.size(), num_chunks)); + } + + } else { + // Stripped Mode + out.is_tiled = false; + if (!rows_per_strip_entry) { + // Neither TileWidth nor RowsPerStrip found + return absl::NotFoundError( + "Neither TileWidth nor RowsPerStrip tag found"); + } + + TENSORSTORE_RETURN_IF_ERROR( + ParseUint32Value(rows_per_strip_entry, out.chunk_height)); + // Strip width is always the image width + out.chunk_width = out.width; + + const IfdEntry* offsets_entry = GetIfdEntry(Tag::kStripOffsets, entries); + const IfdEntry* counts_entry = GetIfdEntry(Tag::kStripByteCounts, entries); + + if (!offsets_entry) + return absl::NotFoundError("StripOffsets tag missing for stripped image"); + if (!counts_entry) + return absl::NotFoundError( + "StripByteCounts tag missing for stripped image"); + + TENSORSTORE_RETURN_IF_ERROR( + ParseUint64Array(offsets_entry, out.chunk_offsets)); + TENSORSTORE_RETURN_IF_ERROR( + ParseUint64Array(counts_entry, out.chunk_bytecounts)); + + // Validate counts + auto [num_chunks, num_rows, num_cols] = CalculateChunkCounts( + out.width, out.height, out.chunk_width, out.chunk_height); + + if (out.chunk_offsets.size() != out.chunk_bytecounts.size()) { + return absl::InvalidArgumentError( + "StripOffsets and StripByteCounts have different counts"); + } + if (out.chunk_offsets.size() != num_chunks) { + ABSL_LOG_IF(WARNING, tiff_logging) << absl::StrFormat( + "StripOffsets/Counts size (%d) does not match expected number of " + "strips (%d) based on RowsPerStrip", + out.chunk_offsets.size(), num_chunks); + } + } + + return absl::OkStatus(); +} + +} // namespace internal_tiff_kvstore +} // namespace tensorstore \ No newline at end of file diff --git a/tensorstore/kvstore/tiff/tiff_details.h b/tensorstore/kvstore/tiff/tiff_details.h new file mode 100644 index 000000000..b2feed445 --- /dev/null +++ b/tensorstore/kvstore/tiff/tiff_details.h @@ -0,0 +1,202 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_KVSTORE_TIFF_TIFF_DETAILS_H_ +#define TENSORSTORE_KVSTORE_TIFF_TIFF_DETAILS_H_ + +#include +#include + +#include +#include +#include + +#include "absl/status/status.h" +#include "riegeli/bytes/reader.h" + +namespace tensorstore { +namespace internal_tiff_kvstore { + +enum class Endian { + kLittle, + kBig, +}; + +enum Tag : uint16_t { + kImageWidth = 256, + kImageLength = 257, + kBitsPerSample = 258, + kCompression = 259, + kPhotometric = 262, + kSamplesPerPixel = 277, + kRowsPerStrip = 278, + kStripOffsets = 273, + kStripByteCounts = 279, + kPlanarConfig = 284, + kTileWidth = 322, + kTileLength = 323, + kTileOffsets = 324, + kTileByteCounts = 325, + kSampleFormat = 339, +}; + +// Common compression types +enum class CompressionType : uint16_t { + kNone = 1, + kCCITTGroup3 = 2, + kCCITTGroup4 = 3, + kLZW = 5, + kJPEG = 6, + kDeflate = 8, + kZStd = 50000, + kPackBits = 32773, +}; + +// Photometric interpretations +enum class PhotometricType : uint16_t { + kWhiteIsZero = 0, + kBlackIsZero = 1, + kRGB = 2, + kPalette = 3, + kTransparencyMask = 4, + kCMYK = 5, + kYCbCr = 6, + kCIELab = 8, +}; + +// Planar configurations +enum class PlanarConfigType : uint16_t { + kChunky = 1, // RGBRGBRGB... + kPlanar = 2, // RRR...GGG...BBB... +}; + +// Sample formats +enum class SampleFormatType : uint16_t { + kUnsignedInteger = 1, + kSignedInteger = 2, + kIEEEFloat = 3, + kUndefined = 4, +}; + +// TIFF data types +enum class TiffDataType : uint16_t { + kByte = 1, // 8-bit unsigned integer + kAscii = 2, // 8-bit bytes with last byte null + kShort = 3, // 16-bit unsigned integer + kLong = 4, // 32-bit unsigned integer + kRational = 5, // Two 32-bit unsigned integers + kSbyte = 6, // 8-bit signed integer + kUndefined = 7, // 8-bit byte + kSshort = 8, // 16-bit signed integer + kSlong = 9, // 32-bit signed integer + kSrational = 10, // Two 32-bit signed integers + kFloat = 11, // 32-bit IEEE floating point + kDouble = 12, // 64-bit IEEE floating point + kIfd = 13, // 32-bit unsigned integer (offset) + kLong8 = 16, // BigTIFF 64-bit unsigned integer + kSlong8 = 17, // BigTIFF 64-bit signed integer + kIfd8 = 18, // BigTIFF 64-bit unsigned integer (offset) +}; + +// IFD entry in a TIFF file +struct IfdEntry { + Tag tag; + TiffDataType type; + uint64_t count; + // For values that fit in 4/8 bytes, this is the value. + // Otherwise, this is an offset to the data. + uint64_t value_or_offset; + + // Flag to indicate if this entry references an external array + bool is_external_array = false; + + constexpr static auto ApplyMembers = [](auto&& x, auto f) { + return f(x.tag, x.type, x.count, x.value_or_offset, x.is_external_array); + }; +}; + +// Represents a TIFF Image File Directory (IFD) +struct TiffDirectory { + // Basic header info + Endian endian; + uint64_t directory_offset; // Offset to this IFD from start of file + uint64_t next_ifd_offset; // Offset to next IFD (0 if none) + + // Entries in this IFD + std::vector entries; + + constexpr static auto ApplyMembers = [](auto&& x, auto f) { + return f(x.endian, x.directory_offset, x.next_ifd_offset, x.entries); + }; +}; + +struct ImageDirectory { + uint32_t width = 0; + uint32_t height = 0; + uint32_t chunk_width = 0; + uint32_t chunk_height = 0; + uint16_t samples_per_pixel = 1; + uint16_t compression = static_cast(CompressionType::kNone); + uint16_t photometric = 0; + uint16_t planar_config = static_cast(PlanarConfigType::kChunky); + std::vector bits_per_sample; + std::vector sample_format; + std::vector chunk_offsets; + std::vector chunk_bytecounts; + + bool is_tiled = false; + + constexpr static auto ApplyMembers = [](auto&& x, auto f) { + return f(x.width, x.height, x.chunk_width, x.chunk_height, + x.samples_per_pixel, x.compression, x.photometric, x.planar_config, + x.bits_per_sample, x.sample_format, x.chunk_offsets, + x.chunk_bytecounts, x.is_tiled); + }; +}; + +// Parse the TIFF header at the current position +absl::Status ParseTiffHeader(riegeli::Reader& reader, Endian& endian, + uint64_t& first_ifd_offset); + +// Parse a TIFF directory at the given offset +absl::Status ParseTiffDirectory(riegeli::Reader& reader, Endian endian, + uint64_t directory_offset, + size_t available_size, TiffDirectory& out); + +// Parse IFD entries into an ImageDirectory structure +absl::Status ParseImageDirectory(const std::vector& entries, + ImageDirectory& out); + +// Parse an external array from a reader +absl::Status ParseExternalArray(riegeli::Reader& reader, Endian endian, + uint64_t offset, uint64_t count, + TiffDataType data_type, + std::vector& out); + +// Parse a uint16_t array from an IFD entry +absl::Status ParseUint16Array(riegeli::Reader& reader, Endian endian, + uint64_t offset, uint64_t count, + std::vector& out); + +// Determine if an IFD entry represents an external array based on type and +// count +bool IsExternalArray(TiffDataType type, uint64_t count); + +// Get the size in bytes for a given TIFF data type +size_t GetTiffDataTypeSize(TiffDataType type); + +} // namespace internal_tiff_kvstore +} // namespace tensorstore + +#endif // TENSORSTORE_KVSTORE_TIFF_TIFF_DETAILS_H_ \ No newline at end of file diff --git a/tensorstore/kvstore/tiff/tiff_details_test.cc b/tensorstore/kvstore/tiff/tiff_details_test.cc new file mode 100644 index 000000000..e2b2f6f7c --- /dev/null +++ b/tensorstore/kvstore/tiff/tiff_details_test.cc @@ -0,0 +1,367 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/kvstore/tiff/tiff_details.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include "absl/status/status.h" +#include "riegeli/bytes/cord_reader.h" +#include "riegeli/bytes/string_reader.h" +#include "tensorstore/util/status_testutil.h" + +namespace { + +using ::tensorstore::internal_tiff_kvstore::Endian; +using ::tensorstore::internal_tiff_kvstore::GetTiffDataTypeSize; +using ::tensorstore::internal_tiff_kvstore::IfdEntry; +using ::tensorstore::internal_tiff_kvstore::ImageDirectory; +using ::tensorstore::internal_tiff_kvstore::IsExternalArray; +using ::tensorstore::internal_tiff_kvstore::ParseExternalArray; +using ::tensorstore::internal_tiff_kvstore::ParseImageDirectory; +using ::tensorstore::internal_tiff_kvstore::ParseTiffDirectory; +using ::tensorstore::internal_tiff_kvstore::ParseTiffHeader; +using ::tensorstore::internal_tiff_kvstore::ParseUint16Array; +using ::tensorstore::internal_tiff_kvstore::Tag; +using ::tensorstore::internal_tiff_kvstore::TiffDataType; +using ::tensorstore::internal_tiff_kvstore::TiffDirectory; + +TEST(TiffDetailsTest, ParseValidTiffHeader) { + // Create a minimal valid TIFF header (II, 42, offset 8) + static constexpr unsigned char kHeader[] = { + 'I', 'I', // Little endian + 42, 0, // Magic number (little endian) + 8, 0, 0, 0, // Offset to first IFD (little endian) + }; + + riegeli::StringReader reader(std::string_view( + reinterpret_cast(kHeader), sizeof(kHeader))); + + Endian endian; + uint64_t first_ifd_offset; + ASSERT_THAT(ParseTiffHeader(reader, endian, first_ifd_offset), + ::tensorstore::IsOk()); + EXPECT_EQ(endian, Endian::kLittle); + EXPECT_EQ(first_ifd_offset, 8); +} + +TEST(TiffDetailsTest, ParseBadByteOrder) { + // Create an invalid TIFF header with wrong byte order marker + static constexpr unsigned char kHeader[] = { + 'X', 'X', // Invalid byte order + 42, 0, // Magic number + 8, 0, 0, 0, // Offset to first IFD + }; + + riegeli::StringReader reader(std::string_view( + reinterpret_cast(kHeader), sizeof(kHeader))); + + Endian endian; + uint64_t first_ifd_offset; + EXPECT_THAT(ParseTiffHeader(reader, endian, first_ifd_offset), + ::tensorstore::MatchesStatus(absl::StatusCode::kInvalidArgument)); +} + +TEST(TiffDetailsTest, ParseBadMagic) { + // Create an invalid TIFF header with wrong magic number + static constexpr unsigned char kHeader[] = { + 'I', 'I', // Little endian + 43, 0, // Wrong magic number + 8, 0, 0, 0, // Offset to first IFD + }; + + riegeli::StringReader reader(std::string_view( + reinterpret_cast(kHeader), sizeof(kHeader))); + + Endian endian; + uint64_t first_ifd_offset; + EXPECT_THAT(ParseTiffHeader(reader, endian, first_ifd_offset), + ::tensorstore::MatchesStatus(absl::StatusCode::kInvalidArgument)); +} + +TEST(TiffDetailsTest, ParseValidDirectory) { + // Create a minimal valid IFD with one entry + static constexpr unsigned char kIfd[] = { + 1, 0, // Number of entries + 0, 1, // Tag (ImageWidth = 256) + 3, 0, // Type (SHORT) + 1, 0, 0, 0, // Count + 100, 0, 0, 0, // Value (100) + 0, 0, 0, 0, // Next IFD offset (0 = no more) + }; + + riegeli::StringReader reader( + std::string_view(reinterpret_cast(kIfd), sizeof(kIfd))); + + TiffDirectory dir; + ASSERT_THAT(ParseTiffDirectory(reader, Endian::kLittle, 0, sizeof(kIfd), dir), + ::tensorstore::IsOk()); + + EXPECT_EQ(dir.entries.size(), 1); + EXPECT_EQ(dir.next_ifd_offset, 0); + + const auto& entry = dir.entries[0]; + EXPECT_EQ(entry.tag, Tag::kImageWidth); + EXPECT_EQ(entry.type, TiffDataType::kShort); + EXPECT_EQ(entry.count, 1); + EXPECT_EQ(entry.value_or_offset, 100); +} + +TEST(TiffDetailsTest, ParseTruncatedDirectory) { + // Create a truncated IFD + static constexpr unsigned char kTruncatedIfd[] = { + 1, 0, // Number of entries + 1, 0, // Tag (partial entry) + }; + + riegeli::StringReader reader(std::string_view( + reinterpret_cast(kTruncatedIfd), sizeof(kTruncatedIfd))); + + TiffDirectory dir; + EXPECT_THAT(ParseTiffDirectory(reader, Endian::kLittle, 0, + sizeof(kTruncatedIfd), dir), + ::tensorstore::MatchesStatus(absl::StatusCode::kDataLoss)); +} + +TEST(TiffDetailsTest, ParseImageDirectory_Tiled_InlineOffsets_Success) { + std::vector entries = { + {Tag::kImageWidth, TiffDataType::kLong, 1, 256}, // ImageWidth + {Tag::kImageLength, TiffDataType::kLong, 1, 256}, // ImageLength + {Tag::kTileWidth, TiffDataType::kLong, 1, 256}, // TileWidth + {Tag::kTileLength, TiffDataType::kLong, 1, 256}, // TileLength + {Tag::kTileOffsets, TiffDataType::kLong, 1, 1000}, // TileOffsets + {Tag::kTileByteCounts, TiffDataType::kLong, 1, 65536}, // TileByteCounts + }; + + ImageDirectory dir; + ASSERT_THAT(ParseImageDirectory(entries, dir), ::tensorstore::IsOk()); + + EXPECT_EQ(dir.width, 256); + EXPECT_EQ(dir.height, 256); + EXPECT_EQ(dir.chunk_width, 256); + EXPECT_EQ(dir.chunk_height, 256); + ASSERT_EQ(dir.chunk_offsets.size(), 1); + EXPECT_EQ(dir.chunk_offsets[0], 1000); + ASSERT_EQ(dir.chunk_bytecounts.size(), 1); + EXPECT_EQ(dir.chunk_bytecounts[0], 65536); +} + +TEST(TiffDetailsTest, ParseImageDirectory_Stripped_InlineOffsets_Success) { + std::vector entries = { + {Tag::kImageWidth, TiffDataType::kLong, 1, 800}, // ImageWidth + {Tag::kImageLength, TiffDataType::kLong, 1, 600}, // ImageLength + {Tag::kRowsPerStrip, TiffDataType::kLong, 1, 100}, // RowsPerStrip + {Tag::kStripOffsets, TiffDataType::kLong, 1, 1000}, // StripOffsets + {Tag::kStripByteCounts, TiffDataType::kLong, 1, 8192}, // StripByteCounts + }; + + ImageDirectory dir; + ASSERT_THAT(ParseImageDirectory(entries, dir), ::tensorstore::IsOk()); + + EXPECT_EQ(dir.width, 800); + EXPECT_EQ(dir.height, 600); + EXPECT_FALSE(dir.is_tiled); + EXPECT_EQ(dir.chunk_height, 100); + ASSERT_EQ(dir.chunk_offsets.size(), 1); + EXPECT_EQ(dir.chunk_offsets[0], 1000); + ASSERT_EQ(dir.chunk_bytecounts.size(), 1); + EXPECT_EQ(dir.chunk_bytecounts[0], 8192); +} + +TEST(TiffDetailsTest, ParseImageDirectory_DuplicateTags) { + std::vector entries = { + {Tag::kImageWidth, TiffDataType::kLong, 1, 800}, // ImageWidth + {Tag::kImageWidth, TiffDataType::kLong, 1, 1024}, // Duplicate ImageWidth + {Tag::kImageLength, TiffDataType::kLong, 1, 600}, // ImageLength + }; + + ImageDirectory dir; + EXPECT_THAT(ParseImageDirectory(entries, dir), + ::tensorstore::MatchesStatus(absl::StatusCode::kNotFound)); +} + +TEST(TiffDetailsTest, GetTiffDataTypeSize) { + EXPECT_EQ(GetTiffDataTypeSize(TiffDataType::kByte), 1); + EXPECT_EQ(GetTiffDataTypeSize(TiffDataType::kAscii), 1); + EXPECT_EQ(GetTiffDataTypeSize(TiffDataType::kShort), 2); + EXPECT_EQ(GetTiffDataTypeSize(TiffDataType::kLong), 4); + EXPECT_EQ(GetTiffDataTypeSize(TiffDataType::kRational), 8); + EXPECT_EQ(GetTiffDataTypeSize(TiffDataType::kSbyte), 1); + EXPECT_EQ(GetTiffDataTypeSize(TiffDataType::kUndefined), 1); + EXPECT_EQ(GetTiffDataTypeSize(TiffDataType::kSshort), 2); + EXPECT_EQ(GetTiffDataTypeSize(TiffDataType::kSlong), 4); + EXPECT_EQ(GetTiffDataTypeSize(TiffDataType::kSrational), 8); + EXPECT_EQ(GetTiffDataTypeSize(TiffDataType::kFloat), 4); + EXPECT_EQ(GetTiffDataTypeSize(TiffDataType::kDouble), 8); + EXPECT_EQ(GetTiffDataTypeSize(TiffDataType::kIfd), 4); + EXPECT_EQ(GetTiffDataTypeSize(TiffDataType::kLong8), 8); + EXPECT_EQ(GetTiffDataTypeSize(TiffDataType::kSlong8), 8); + EXPECT_EQ(GetTiffDataTypeSize(TiffDataType::kIfd8), 8); + + // Test with invalid type + EXPECT_EQ(GetTiffDataTypeSize(static_cast(999)), 0); +} + +TEST(TiffDetailsTest, IsExternalArray) { + // Test with data that fits in 4 bytes (inline) + EXPECT_FALSE(IsExternalArray(TiffDataType::kLong, 1)); + EXPECT_FALSE(IsExternalArray(TiffDataType::kShort, 2)); + EXPECT_FALSE(IsExternalArray(TiffDataType::kByte, 4)); + + // Test with data that doesn't fit in 4 bytes (external) + EXPECT_TRUE(IsExternalArray(TiffDataType::kLong, 2)); + EXPECT_TRUE(IsExternalArray(TiffDataType::kShort, 3)); + EXPECT_TRUE(IsExternalArray(TiffDataType::kByte, 5)); + EXPECT_TRUE(IsExternalArray(TiffDataType::kRational, 1)); +} + +TEST(TiffDetailsTest, ParseExternalArray) { + // Create a buffer with four uint32 values in little-endian format + static constexpr unsigned char kBuffer[] = { + 100, 0, 0, 0, 200, 0, 0, 0, 150, 0, 0, 0, 250, 0, 0, 0, + }; + + riegeli::StringReader reader(std::string_view( + reinterpret_cast(kBuffer), sizeof(kBuffer))); + + std::vector values; + ASSERT_THAT(ParseExternalArray(reader, Endian::kLittle, 0, 4, + TiffDataType::kLong, values), + ::tensorstore::IsOk()); + + ASSERT_EQ(values.size(), 4); + EXPECT_EQ(values[0], 100); + EXPECT_EQ(values[1], 200); + EXPECT_EQ(values[2], 150); + EXPECT_EQ(values[3], 250); +} + +TEST(TiffDetailsTest, ParseExternalArray_SeekFail) { + // Create a small buffer to test seek failure + static constexpr unsigned char kBuffer[] = {1, 2, 3, 4}; + + riegeli::StringReader reader(std::string_view( + reinterpret_cast(kBuffer), sizeof(kBuffer))); + + std::vector values; + // Try to seek beyond the buffer size + EXPECT_THAT(ParseExternalArray(reader, Endian::kLittle, 100, 1, + TiffDataType::kLong, values), + ::tensorstore::MatchesStatus(absl::StatusCode::kInvalidArgument)); +} + +TEST(TiffDetailsTest, ParseExternalArray_ReadFail) { + // Create a buffer with incomplete data + static constexpr unsigned char kBuffer[] = {100, 0, 0}; + + riegeli::StringReader reader(std::string_view( + reinterpret_cast(kBuffer), sizeof(kBuffer))); + + std::vector values; + EXPECT_THAT(ParseExternalArray(reader, Endian::kLittle, 0, 1, + TiffDataType::kLong, values), + ::tensorstore::MatchesStatus(absl::StatusCode::kDataLoss)); +} + +TEST(TiffDetailsTest, ParseExternalArray_InvalidType) { + static constexpr unsigned char kBuffer[] = {1, 2, 3, 4}; + + riegeli::StringReader reader(std::string_view( + reinterpret_cast(kBuffer), sizeof(kBuffer))); + + std::vector values; + EXPECT_THAT(ParseExternalArray(reader, Endian::kLittle, 0, 1, + TiffDataType::kRational, values), + ::tensorstore::MatchesStatus(absl::StatusCode::kInvalidArgument)); +} + +TEST(TiffDetailsTest, ParseUint16Array) { + static constexpr unsigned char kBuffer[] = { + 100, 0, 200, 0, 150, 0, 250, 0, + }; + + riegeli::StringReader reader(std::string_view( + reinterpret_cast(kBuffer), sizeof(kBuffer))); + + std::vector values; + ASSERT_THAT(ParseUint16Array(reader, Endian::kLittle, 0, 4, values), + ::tensorstore::IsOk()); + + ASSERT_EQ(values.size(), 4); + EXPECT_EQ(values[0], 100); + EXPECT_EQ(values[1], 200); + EXPECT_EQ(values[2], 150); + EXPECT_EQ(values[3], 250); +} + +TEST(TiffDetailsTest, ParseUint16Array_SeekFail) { + static constexpr unsigned char kBuffer[] = {1, 2, 3, 4}; + + riegeli::StringReader reader(std::string_view( + reinterpret_cast(kBuffer), sizeof(kBuffer))); + + std::vector values; + // Try to seek beyond the buffer size + EXPECT_THAT(ParseUint16Array(reader, Endian::kLittle, 100, 1, values), + ::tensorstore::MatchesStatus(absl::StatusCode::kInvalidArgument)); +} + +TEST(TiffDetailsTest, ParseUint16Array_ReadFail) { + // Create a buffer with incomplete data + static constexpr unsigned char kBuffer[] = {100}; + + riegeli::StringReader reader(std::string_view( + reinterpret_cast(kBuffer), sizeof(kBuffer))); + + std::vector values; + EXPECT_THAT(ParseUint16Array(reader, Endian::kLittle, 0, 1, values), + ::tensorstore::MatchesStatus(absl::StatusCode::kDataLoss)); +} + +TEST(TiffDetailsTest, ParseImageDirectory_ExternalArrays) { + std::vector entries = { + {Tag::kImageWidth, TiffDataType::kLong, 1, 512}, + {Tag::kImageLength, TiffDataType::kLong, 1, 512}, + {Tag::kTileWidth, TiffDataType::kLong, 1, 256}, + {Tag::kTileLength, TiffDataType::kLong, 1, 256}, + {Tag::kTileOffsets, TiffDataType::kLong, 4, 1000, true}, + {Tag::kTileByteCounts, TiffDataType::kLong, 4, 2000, true}, + {Tag::kBitsPerSample, TiffDataType::kShort, 3, 3000, true}, + {Tag::kSamplesPerPixel, TiffDataType::kShort, 1, 3}, + }; + + ImageDirectory dir; + ASSERT_THAT(ParseImageDirectory(entries, dir), ::tensorstore::IsOk()); + + EXPECT_EQ(dir.width, 512); + EXPECT_EQ(dir.height, 512); + EXPECT_TRUE(dir.is_tiled); + EXPECT_EQ(dir.chunk_width, 256); + EXPECT_EQ(dir.chunk_height, 256); + EXPECT_EQ(dir.samples_per_pixel, 3); + + // External arrays should have the correct size but not be loaded yet + ASSERT_EQ(dir.chunk_offsets.size(), 4); + ASSERT_EQ(dir.chunk_bytecounts.size(), 4); + ASSERT_EQ(dir.bits_per_sample.size(), 3); +} + +} // namespace \ No newline at end of file diff --git a/tensorstore/kvstore/tiff/tiff_dir_cache.cc b/tensorstore/kvstore/tiff/tiff_dir_cache.cc new file mode 100644 index 000000000..77976f879 --- /dev/null +++ b/tensorstore/kvstore/tiff/tiff_dir_cache.cc @@ -0,0 +1,643 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/kvstore/tiff/tiff_dir_cache.h" + +#include + +#include "absl/base/attributes.h" +#include "absl/log/absl_log.h" +#include "absl/status/status.h" +#include "riegeli/bytes/cord_reader.h" +#include "tensorstore/internal/cache/async_cache.h" +#include "tensorstore/internal/estimate_heap_usage/estimate_heap_usage.h" +#include "tensorstore/internal/estimate_heap_usage/std_vector.h" // IWYU pragma: keep +#include "tensorstore/internal/log/verbose_flag.h" +#include "tensorstore/kvstore/byte_range.h" +#include "tensorstore/kvstore/operations.h" +#include "tensorstore/kvstore/read_result.h" +#include "tensorstore/util/future.h" + +namespace tensorstore { +namespace internal_tiff_kvstore { + +namespace { + +ABSL_CONST_INIT internal_log::VerboseFlag tiff_logging("tiff"); + +struct ReadDirectoryOp + : public internal::AtomicReferenceCount { + TiffDirectoryCache::Entry* entry_; + std::shared_ptr existing_read_data_; + kvstore::ReadOptions options_; + + // partial reads are needed. + bool is_full_read_; + + // The resulting parse data we will build up. + std::shared_ptr parse_result_; + + // Buffer for storing raw file data during reading and parsing operations + absl::Cord buffer; + + // The offset in the file that corresponds to buffer[0]. + uint64_t file_offset_; + + // The next IFD offset we expect to parse. If 0, we have no more IFDs. + uint64_t next_ifd_offset_; + + void StartTiffRead() { + auto& cache = internal::GetOwningCache(*entry_); + ABSL_LOG_IF(INFO, tiff_logging) + << "StartTiffRead " << entry_->key() + << " with byte range: " << options_.byte_range; + + is_full_read_ = false; + file_offset_ = 0; + parse_result_ = std::make_shared(); + + if (!options_.byte_range.IsFull() && + options_.byte_range.size() <= kInitialReadBytes) { + // Caller already requested an explicit (small) range. Keep it. + } else { + // Otherwise issue our standard 0‑kInitialReadBytes probe. + options_.byte_range = + OptionalByteRangeRequest::Range(0, kInitialReadBytes); + } + + auto future = + cache.kvstore_driver_->Read(std::string(entry_->key()), options_); + ABSL_LOG_IF(INFO, tiff_logging) + << "Issued initial read request for key: " << entry_->key() + << " with byte range: " << options_.byte_range; + future.Force(); + future.ExecuteWhenReady( + [self = internal::IntrusivePtr(this)]( + ReadyFuture ready) { + ABSL_LOG_IF(INFO, tiff_logging) + << "Initial read completed for key: " << self->entry_->key(); + self->OnHeaderReadComplete(std::move(ready)); + }); + } + + // Called after the initial read completes. + void OnHeaderReadComplete(ReadyFuture ready) { + const auto& r = ready.result(); + ABSL_LOG_IF(INFO, tiff_logging) + << "OnHeaderReadComplete called for key: " << entry_->key(); + + if (!r.ok()) { + ABSL_LOG_IF(WARNING, tiff_logging) + << "Read failed with status: " << r.status(); + // Possibly partial read overshot the file + if (!is_full_read_ && absl::IsOutOfRange(r.status())) { + is_full_read_ = true; + ABSL_LOG_IF(INFO, tiff_logging) + << "Overshot file. Issuing a full read for key: " << entry_->key(); + options_.byte_range = {}; + auto& cache = internal::GetOwningCache(*entry_); + auto retry_future = + cache.kvstore_driver_->Read(std::string(entry_->key()), options_); + retry_future.Force(); + retry_future.ExecuteWhenReady( + [self = internal::IntrusivePtr(this)]( + ReadyFuture f) { + self->OnHeaderReadComplete(std::move(f)); + }); + return; + } + entry_->ReadError( + internal::ConvertInvalidArgumentToFailedPrecondition(r.status())); + return; + } + + if (r->not_found()) { + ABSL_LOG_IF(WARNING, tiff_logging) + << "File not found for key: " << entry_->key(); + entry_->ReadError(absl::NotFoundError("File not found")); + return; + } + if (r->aborted()) { + if (existing_read_data_) { + ABSL_LOG_IF(INFO, tiff_logging) + << "Read aborted, returning existing data for key: " + << entry_->key(); + entry_->ReadSuccess(TiffDirectoryCache::ReadState{existing_read_data_, + std::move(r->stamp)}); + } else { + entry_->ReadError(absl::AbortedError("Read aborted.")); + } + return; + } + + // We now have partial data at offsets [0..someSize). + buffer = std::move(r->value); + uint64_t bytes_received = buffer.size(); + + // If we got less data than requested, treat it as a full read. + if (!is_full_read_ && bytes_received < kInitialReadBytes) { + parse_result_->full_read = true; + } else { + parse_result_->full_read = is_full_read_; + } + + riegeli::CordReader cord_reader(&buffer); + Endian endian; + absl::Status header_status = + ParseTiffHeader(cord_reader, endian, next_ifd_offset_); + if (!header_status.ok()) { + ABSL_LOG_IF(WARNING, tiff_logging) + << "Failed to parse TIFF header: " << header_status; + entry_->ReadError(header_status); + return; + } + ABSL_LOG_IF(INFO, tiff_logging) + << "TIFF header parsed successfully." + << ", Next IFD offset: " << next_ifd_offset_; + parse_result_->endian = endian; + + StartParsingIFDs(std::move(r->stamp)); + } + + /// This function begins (or continues) parsing IFDs at next_ifd_offset_ until + /// we reach offset=0 or an error. + void StartParsingIFDs(tensorstore::TimestampedStorageGeneration stamp) { + if (next_ifd_offset_ == 0) { + // No IFDs, so finalize + OnAllIFDsDone(std::move(stamp)); + return; + } + + absl::Status status = ParseOneIFD(); + if (absl::IsOutOfRange(status)) { + // Means we need more data + RequestMoreData(std::move(stamp)); + return; + } + if (!status.ok()) { + // Some other error + entry_->ReadError(status); + return; + } + + // If parse succeeded, check if the IFD we parsed gave us a new offset for + // the next IFD. + if (next_ifd_offset_ == 0) { + OnAllIFDsDone(std::move(stamp)); + return; + } + + // Parse the next IFD in the chain. + StartParsingIFDs(std::move(stamp)); + } + + // This attempts to parse one IFD at next_ifd_offset_ using our current + // buffer. If that offset is beyond the buffer range, returns OutOfRangeError. + // If success, updates parse_result_, next_ifd_offset_. + absl::Status ParseOneIFD() { + ABSL_LOG_IF(INFO, tiff_logging) + << "Parsing IFD at offset: " << next_ifd_offset_ + << " for key: " << entry_->key(); + + if (next_ifd_offset_ < file_offset_) { + return absl::DataLossError( + "IFD offset is behind our current buffer offset, which is " + "unexpected."); + } + + uint64_t relative_pos = next_ifd_offset_ - file_offset_; + uint64_t buffer_size = buffer.size(); + + if (relative_pos > buffer_size) { + ABSL_LOG_IF(WARNING, tiff_logging) + << "Buffer underflow while parsing IFD. Needed next_ifd_offset: " + << relative_pos + << ", Max available offset: " << file_offset_ + buffer_size; + // We’re missing data + return absl::OutOfRangeError( + "Next IFD is outside our current buffer range."); + } + + // Slice off everything before relative_pos, because we no longer need it. + buffer = buffer.Subcord(relative_pos, buffer_size - relative_pos); + file_offset_ = next_ifd_offset_; + + // Now parse from the beginning of buffer as offset=0 in the local sense. + riegeli::CordReader reader(&buffer); + TiffDirectory dir; + absl::Status status = + ParseTiffDirectory(reader, parse_result_->endian, + /*local_offset=*/0, buffer.size(), dir); + if (!status.ok()) { + ABSL_LOG_IF(WARNING, tiff_logging) << "Failed to parse IFD: " << status; + return status; + } + + // Store the IFD’s entries in parse_result_->ifd_entries (or directories). + parse_result_->directories.push_back(dir); + + // Update next_ifd_offset_ to the directory’s next offset + next_ifd_offset_ = dir.next_ifd_offset; + ABSL_LOG_IF(INFO, tiff_logging) + << "Parsed IFD successfully. Next IFD offset: " << dir.next_ifd_offset; + return absl::OkStatus(); + } + + /// If we discover we need more data to parse the next IFD, we read newer + /// bytes from the file. Suppose we read from [file_offset_ + buffer.size(), + /// file_offset_ + buffer.size() + chunk). + void RequestMoreData(tensorstore::TimestampedStorageGeneration stamp) { + ABSL_LOG_IF(INFO, tiff_logging) + << "Requesting more data for key: " << entry_->key() + << ". Current buffer size: " << buffer.size() + << ", Full read: " << parse_result_->full_read; + if (parse_result_->full_read) { + // We’re already in full read mode and still are outOfRange => truncated + // file or corrupted offset + entry_->ReadError( + absl::DataLossError("Insufficient data after full read.")); + return; + } + + if (!is_full_read_) { + uint64_t current_data_end = file_offset_ + buffer.size(); + // Start from the next IFD offset if it's beyond what we already have: + uint64_t read_begin = std::max(current_data_end, next_ifd_offset_); + uint64_t read_end = read_begin + kInitialReadBytes; + + // If we need to request more than some large threshold, + // we might want to do a full read. + if (read_end - read_begin > (32 * 1024 * 1024)) { + is_full_read_ = true; + options_.byte_range = OptionalByteRangeRequest(file_offset_); + } else { + options_.byte_range = + OptionalByteRangeRequest::Range(read_begin, read_end); + } + } else { + // We set parse_result_->full_read but apparently we didn’t get enough + // data. That’s an error or truncated file. + entry_->ReadError(absl::DataLossError( + "Need more data after already in full-read mode.")); + return; + } + + auto& cache = internal::GetOwningCache(*entry_); + auto future = + cache.kvstore_driver_->Read(std::string(entry_->key()), options_); + ABSL_LOG_IF(INFO, tiff_logging) + << "Issued additional read request for key: " << entry_->key() + << " with byte range: " << options_.byte_range; + future.Force(); + future.ExecuteWhenReady( + [self = internal::IntrusivePtr(this), + s = std::move(stamp)](ReadyFuture ready) mutable { + ABSL_LOG_IF(INFO, tiff_logging) + << "Additional read completed for key: " << self->entry_->key(); + self->OnAdditionalDataRead(std::move(ready), std::move(s)); + }); + } + + /// Called once more data arrives. We append that data to + /// buffer and attempt parsing the IFD again. + void OnAdditionalDataRead(ReadyFuture ready, + tensorstore::TimestampedStorageGeneration stamp) { + const auto& r = ready.result(); + if (!r.ok()) { + // Possibly partial read overshoot again + if (!is_full_read_ && absl::IsOutOfRange(r.status())) { + is_full_read_ = true; + options_.byte_range = OptionalByteRangeRequest(file_offset_); + auto& cache = internal::GetOwningCache(*entry_); + auto future = + cache.kvstore_driver_->Read(std::string(entry_->key()), options_); + future.Force(); + future.ExecuteWhenReady( + [self = internal::IntrusivePtr(this), + stamp = + std::move(stamp)](ReadyFuture f) mutable { + self->OnAdditionalDataRead(std::move(f), std::move(stamp)); + }); + return; + } + entry_->ReadError( + internal::ConvertInvalidArgumentToFailedPrecondition(r.status())); + return; + } + + auto& rr = *r; + if (rr.not_found()) { + entry_->ReadError( + absl::NotFoundError("Not found during incremental read.")); + return; + } + if (rr.aborted()) { + if (existing_read_data_) { + entry_->ReadSuccess(TiffDirectoryCache::ReadState{existing_read_data_, + std::move(rr.stamp)}); + return; + } + entry_->ReadError(absl::AbortedError("Read aborted, no existing data.")); + return; + } + + // If we're reading from next_ifd_offset directly (which is far away from + // our buffer end), we should reset our buffer instead of appending. + if (options_.byte_range.inclusive_min >= file_offset_ + buffer.size()) { + buffer = std::move(rr.value); + file_offset_ = options_.byte_range.inclusive_min; + } else { + // Append new data to buffer (contiguous read) + size_t old_size = buffer.size(); + buffer.Append(rr.value); + size_t new_size = buffer.size(); + + // If we got less data than requested, treat it as a full read + if (!is_full_read_ && + (new_size - old_size) < (options_.byte_range.size() - old_size)) { + parse_result_->full_read = true; + } + } + + parse_result_->full_read = parse_result_->full_read || is_full_read_; + + // We can now try parsing the same IFD offset again + StartParsingIFDs(std::move(stamp)); + } + + /// Called when we exhaust next_ifd_offset_ (i.e., reached offset=0 in the + /// chain). We parse the final directory or load external arrays, etc. + void OnAllIFDsDone(tensorstore::TimestampedStorageGeneration stamp) { + ABSL_LOG_IF(INFO, tiff_logging) + << "All IFDs parsed successfully for key: " << entry_->key() + << ". Total directories: " << parse_result_->directories.size(); + // We now have parse_result_->directories for all IFDs. + // Reserve space for a matching list of ImageDirectory objects. + parse_result_->image_directories.clear(); + parse_result_->image_directories.resize(parse_result_->directories.size()); + + bool has_external_arrays = false; + + // Parse each TiffDirectory into a corresponding ImageDirectory. + // Also check entries for external arrays. + for (size_t i = 0; i < parse_result_->directories.size(); ++i) { + ABSL_LOG_IF(INFO, tiff_logging) << "Parsing image metadata from IFD #" + << i << " for key: " << entry_->key(); + absl::Status status = + ParseImageDirectory(parse_result_->directories[i].entries, + parse_result_->image_directories[i]); + if (!status.ok()) { + entry_->ReadError(status); + return; + } + + // Check for external arrays in this directory’s entries + for (const auto& e : parse_result_->directories[i].entries) { + if (e.is_external_array) { + has_external_arrays = true; + } + } + } + + if (!has_external_arrays) { + ABSL_LOG_IF(INFO, tiff_logging) + << "No external arrays found for key: " << entry_->key(); + // We’re done + entry_->ReadSuccess(TiffDirectoryCache::ReadState{ + std::move(parse_result_), std::move(stamp)}); + return; + } + + // Otherwise, load external arrays + auto future = entry_->LoadExternalArrays(parse_result_, stamp); + future.Force(); + future.ExecuteWhenReady( + [self = internal::IntrusivePtr(this), + stamp](ReadyFuture load_done) { + if (!load_done.result().ok()) { + self->entry_->ReadError(load_done.result().status()); + return; + } + // Done + self->entry_->ReadSuccess(TiffDirectoryCache::ReadState{ + std::move(self->parse_result_), std::move(stamp)}); + }); + } +}; + +} // namespace + +Future TiffDirectoryCache::Entry::LoadExternalArrays( + std::shared_ptr parse_result, + tensorstore::TimestampedStorageGeneration stamp) { + ABSL_LOG_IF(INFO, tiff_logging) + << "Loading external arrays for key: " << this->key(); + // Collect all external arrays that need to be loaded + struct ExternalArrayInfo { + Tag tag; + TiffDataType type; + uint64_t offset; + uint64_t count; + size_t image_index; + }; + + std::vector external_arrays; + + // Collect external arrays from each directory (and store them by index). + for (size_t i = 0; i < parse_result->directories.size(); ++i) { + const auto& tiff_dir = parse_result->directories[i]; + + for (const auto& entry : tiff_dir.entries) { + if (!entry.is_external_array) continue; + + ExternalArrayInfo info; + info.tag = entry.tag; + info.type = entry.type; + info.offset = entry.value_or_offset; + info.count = entry.count; + info.image_index = i; + external_arrays.push_back(info); + } + } + + // If no external arrays, we can return immediately. + if (external_arrays.empty()) { + return MakeReadyFuture(); + } + + auto [promise, future] = PromiseFuturePair::Make(); + auto& cache = internal::GetOwningCache(*this); + + // Track how many arrays remain. We build a small shared struct to handle + // completion. + struct LoadState : public internal::AtomicReferenceCount { + size_t remaining_count; + absl::Status first_error; + Promise done_promise; + + LoadState(size_t count, Promise pr) + : remaining_count(count), done_promise(std::move(pr)) {} + + void CompleteOne(absl::Status s) { + if (!s.ok() && first_error.ok()) { + first_error = s; // Record the first error + } + if (--remaining_count == 0) { + // If we encountered any error, set that; otherwise OK. + if (first_error.ok()) { + done_promise.SetResult(absl::OkStatus()); + } else { + done_promise.SetResult(first_error); + } + } + } + }; + + auto load_state = internal::MakeIntrusivePtr( + external_arrays.size(), std::move(promise)); + + // Issue read operations for each external array in parallel. + for (const auto& array_info : external_arrays) { + ABSL_LOG_IF(INFO, tiff_logging) + << "Reading external array for tag: " + << static_cast(array_info.tag) << ", Offset: " << array_info.offset + << ", Count: " << array_info.count; + // Compute the byte range. + size_t element_size = GetTiffDataTypeSize(array_info.type); + uint64_t byte_count = array_info.count * element_size; + + kvstore::ReadOptions read_opts; + read_opts.generation_conditions.if_equal = stamp.generation; + read_opts.byte_range = OptionalByteRangeRequest::Range( + array_info.offset, array_info.offset + byte_count); + + ABSL_LOG_IF(INFO, tiff_logging) + << "Reading external array for tag " << static_cast(array_info.tag) + << " at offset " << array_info.offset << " size " << byte_count; + + auto read_future = + cache.kvstore_driver_->Read(std::string(this->key()), read_opts); + read_future.Force(); + + read_future.ExecuteWhenReady( + [ls = load_state, parse_result, array_info, + stamp](ReadyFuture ready) mutable { + auto& rr = ready.result(); + if (!rr.ok()) { + ls->CompleteOne( + internal::ConvertInvalidArgumentToFailedPrecondition( + rr.status())); + return; + } + + if (rr->not_found() || rr->aborted()) { + ls->CompleteOne( + absl::DataLossError("Missing or aborted external array read.")); + return; + } + + // We'll parse the data into the image directory's appropriate field. + auto& img_dir = + parse_result->image_directories[array_info.image_index]; + + riegeli::CordReader cord_reader(&rr->value); + absl::Status parse_status; + if (array_info.type == TiffDataType::kShort && + (array_info.tag == Tag::kBitsPerSample || + array_info.tag == Tag::kSampleFormat)) { + // Parse uint16_t arrays + std::vector* uint16_array = nullptr; + + switch (array_info.tag) { + case Tag::kBitsPerSample: + uint16_array = &img_dir.bits_per_sample; + break; + case Tag::kSampleFormat: + uint16_array = &img_dir.sample_format; + break; + default: + break; + } + + if (uint16_array) { + parse_status = ParseUint16Array(cord_reader, parse_result->endian, + /*offset=*/0, array_info.count, + *uint16_array); + } else { + parse_status = absl::OkStatus(); // Skip unhandled uint16_t array + } + } else { + // Parse uint64_t arrays + std::vector* output_array = nullptr; + switch (array_info.tag) { + case Tag::kStripOffsets: + output_array = &img_dir.chunk_offsets; + break; + case Tag::kStripByteCounts: + output_array = &img_dir.chunk_bytecounts; + break; + case Tag::kTileOffsets: + output_array = &img_dir.chunk_offsets; + break; + case Tag::kTileByteCounts: + output_array = &img_dir.chunk_bytecounts; + break; + default: + break; // Skip unhandled uint64_t array + } + + if (output_array) { + parse_status = + ParseExternalArray(cord_reader, parse_result->endian, + /*offset=*/0, array_info.count, + array_info.type, *output_array); + } else { + parse_status = absl::OkStatus(); // Skip unhandled tag + } + } + + ls->CompleteOne(parse_status); + }); + } + + return future; +} + +size_t TiffDirectoryCache::Entry::ComputeReadDataSizeInBytes( + const void* read_data) { + return internal::EstimateHeapUsage(*static_cast(read_data)); +} + +void TiffDirectoryCache::Entry::DoRead(AsyncCacheReadRequest request) { + auto state = internal::MakeIntrusivePtr(); + state->entry_ = this; + state->options_.staleness_bound = request.staleness_bound; + { + ReadLock lock(*this); + state->existing_read_data_ = lock.shared_data(); + state->options_.generation_conditions.if_not_equal = + lock.read_state().stamp.generation; + } + + state->StartTiffRead(); +} + +TiffDirectoryCache::Entry* TiffDirectoryCache::DoAllocateEntry() { + return new Entry; +} + +size_t TiffDirectoryCache::DoGetSizeofEntry() { return sizeof(Entry); } + +} // namespace internal_tiff_kvstore +} // namespace tensorstore \ No newline at end of file diff --git a/tensorstore/kvstore/tiff/tiff_dir_cache.h b/tensorstore/kvstore/tiff/tiff_dir_cache.h new file mode 100644 index 000000000..35ff21e3a --- /dev/null +++ b/tensorstore/kvstore/tiff/tiff_dir_cache.h @@ -0,0 +1,98 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_KVSTORE_TIFF_TIFF_DIR_CACHE_H_ +#define TENSORSTORE_KVSTORE_TIFF_TIFF_DIR_CACHE_H_ + +#include + +#include "absl/strings/cord.h" +#include "tensorstore/internal/cache/async_cache.h" +#include "tensorstore/internal/cache/async_initialized_cache_mixin.h" +#include "tensorstore/kvstore/driver.h" +#include "tensorstore/kvstore/generation.h" +#include "tensorstore/kvstore/tiff/tiff_details.h" +#include "tensorstore/util/executor.h" + +namespace tensorstore { +namespace internal_tiff_kvstore { + +// First attempt reads this many bytes. +inline constexpr std::size_t kInitialReadBytes = 1024; + +struct TiffParseResult { + bool full_read = false; // Indicates if the entire file was read + + // Store the endian order for the TIFF file + Endian endian; + + // Store all IFD directories in the TIFF file + std::vector directories; + + // Store all parsed image directories + std::vector image_directories; + + constexpr static auto ApplyMembers = [](auto&& x, auto f) { + return f(x.full_read, x.endian, x.directories, x.image_directories); + }; +}; + +class TiffDirectoryCache : public internal::AsyncCache, + public internal::AsyncInitializedCacheMixin { + using Base = internal::AsyncCache; + + public: + using ReadData = TiffParseResult; + + explicit TiffDirectoryCache(kvstore::DriverPtr kv, Executor exec) + : kvstore_driver_(std::move(kv)), executor_(std::move(exec)) {} + + class Entry : public Base::Entry { + public: + using OwningCache = TiffDirectoryCache; + size_t ComputeReadDataSizeInBytes(const void* read_data) final; + void DoRead(AsyncCacheReadRequest request) final; + + // Load external arrays identified during IFD parsing + Future LoadExternalArrays( + std::shared_ptr parse_result, + tensorstore::TimestampedStorageGeneration stamp); + + absl::Status AnnotateError(const absl::Status& error, bool reading) { + return GetOwningCache(*this).kvstore_driver_->AnnotateError( + this->key(), reading ? "reading" : "writing", error); + } + }; + + Entry* DoAllocateEntry() final; + size_t DoGetSizeofEntry() final; + + TransactionNode* DoAllocateTransactionNode(AsyncCache::Entry& entry) final { + ABSL_UNREACHABLE(); // Not implemented. + return nullptr; + } + + kvstore::DriverPtr kvstore_driver_; + Executor executor_; + + const Executor& executor() { return executor_; } +}; + +} // namespace internal_tiff_kvstore +} // namespace tensorstore + +TENSORSTORE_DECLARE_GARBAGE_COLLECTION_NOT_REQUIRED( + tensorstore::internal_tiff_kvstore::TiffDirectoryCache::Entry) + +#endif // TENSORSTORE_KVSTORE_TIFF_TIFF_DIR_CACHE_H_ \ No newline at end of file diff --git a/tensorstore/kvstore/tiff/tiff_dir_cache_test.cc b/tensorstore/kvstore/tiff/tiff_dir_cache_test.cc new file mode 100644 index 000000000..c7189ccc3 --- /dev/null +++ b/tensorstore/kvstore/tiff/tiff_dir_cache_test.cc @@ -0,0 +1,713 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/kvstore/tiff/tiff_dir_cache.h" + +#include +#include + +#include +#include + +#include "absl/strings/cord.h" +#include "absl/time/time.h" +#include "tensorstore/context.h" +#include "tensorstore/internal/cache/cache.h" +#include "tensorstore/internal/intrusive_ptr.h" +#include "tensorstore/kvstore/kvstore.h" +#include "tensorstore/kvstore/operations.h" +#include "tensorstore/kvstore/tiff/tiff_test_util.h" +#include "tensorstore/util/executor.h" +#include "tensorstore/util/status.h" +#include "tensorstore/util/status_testutil.h" + +namespace { + +using ::tensorstore::Context; +using ::tensorstore::InlineExecutor; +using ::tensorstore::internal::CachePool; +using ::tensorstore::internal::GetCache; +using ::tensorstore::internal_tiff_kvstore::TiffDirectoryCache; +using ::tensorstore::internal_tiff_kvstore::testing::TiffBuilder; + +TEST(TiffDirectoryCacheTest, ReadSlice) { + auto context = Context::Default(); + auto pool = CachePool::Make(CachePool::Limits{}); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + tensorstore::KvStore memory, + tensorstore::kvstore::Open({{"driver", "memory"}}, context).result()); + + TiffBuilder builder; + auto tiff_data = builder.StartIfd(6) + .AddEntry(256, 3, 1, 256) + .AddEntry(257, 3, 1, 256) + .AddEntry(322, 3, 1, 256) + .AddEntry(323, 3, 1, 256) + .AddEntry(324, 4, 1, 128) + .AddEntry(325, 4, 1, 256) + .EndIfd() + .PadTo(2048) + .Build(); + + ASSERT_THAT( + tensorstore::kvstore::Write(memory, "test.tiff", absl::Cord(tiff_data)) + .result(), + ::tensorstore::IsOk()); + + auto cache = GetCache(pool.get(), "", [&] { + return std::make_unique(memory.driver, + InlineExecutor{}); + }); + + auto entry = GetCacheEntry(cache, "test.tiff"); + + // Request with specified range - should read first 1024 bytes + { + tensorstore::internal::AsyncCache::AsyncCacheReadRequest request; + request.staleness_bound = absl::InfinitePast(); + + ASSERT_THAT(entry->Read(request).result(), ::tensorstore::IsOk()); + + TiffDirectoryCache::ReadLock lock(*entry); + auto* data = lock.data(); + ASSERT_THAT(data, ::testing::NotNull()); + EXPECT_FALSE(data->full_read); + + EXPECT_EQ(data->directories.size(), 1); + EXPECT_EQ(data->directories[0].entries.size(), 6); + EXPECT_EQ(data->image_directories.size(), 1); + + EXPECT_EQ(data->image_directories[0].width, 256); + EXPECT_EQ(data->image_directories[0].height, 256); + EXPECT_EQ(data->image_directories[0].is_tiled, true); + EXPECT_EQ(data->image_directories[0].chunk_width, 256); + EXPECT_EQ(data->image_directories[0].chunk_height, 256); + } +} + +TEST(TiffDirectoryCacheTest, ReadFull) { + auto context = Context::Default(); + auto pool = CachePool::Make(CachePool::Limits{}); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + tensorstore::KvStore memory, + tensorstore::kvstore::Open({{"driver", "memory"}}, context).result()); + + TiffBuilder builder; + auto tiff_data = builder + .StartIfd(5) // 5 entries + // Add strip-based entries + .AddEntry(256, 3, 1, 400) // ImageWidth = 400 + .AddEntry(257, 3, 1, 300) // ImageLength = 300 + .AddEntry(278, 3, 1, 100) // RowsPerStrip = 100 + .AddEntry(273, 4, 1, 128) // StripOffsets = 128 + .AddEntry(279, 4, 1, 200) // StripByteCounts = 200 + .EndIfd() // No more IFDs + .PadTo(512) // Pad to fill data + .Build(); + + ASSERT_THAT( + tensorstore::kvstore::Write(memory, "test.tiff", absl::Cord(tiff_data)) + .result(), + ::tensorstore::IsOk()); + + auto cache = GetCache(pool.get(), "", [&] { + return std::make_unique(memory.driver, + InlineExecutor{}); + }); + + auto entry = GetCacheEntry(cache, "test.tiff"); + + { + tensorstore::internal::AsyncCache::AsyncCacheReadRequest request; + request.staleness_bound = absl::InfinitePast(); + + ASSERT_THAT(entry->Read(request).result(), ::tensorstore::IsOk()); + + TiffDirectoryCache::ReadLock lock(*entry); + auto* data = lock.data(); + ASSERT_THAT(data, ::testing::NotNull()); + EXPECT_TRUE(data->full_read); + + EXPECT_EQ(data->directories.size(), 1); + EXPECT_EQ(data->directories[0].entries.size(), 5); + EXPECT_EQ(data->image_directories.size(), 1); + + EXPECT_EQ(data->image_directories[0].width, 400); + EXPECT_EQ(data->image_directories[0].height, 300); + EXPECT_EQ(data->image_directories[0].is_tiled, false); + EXPECT_EQ(data->image_directories[0].chunk_height, 100); + EXPECT_EQ(data->image_directories[0].chunk_offsets.size(), 1); + EXPECT_EQ(data->image_directories[0].chunk_offsets[0], 128); + EXPECT_EQ(data->image_directories[0].chunk_bytecounts.size(), 1); + EXPECT_EQ(data->image_directories[0].chunk_bytecounts[0], 200); + } +} + +TEST(TiffDirectoryCacheTest, BadIfdFailsParse) { + auto context = Context::Default(); + auto pool = CachePool::Make(CachePool::Limits{}); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + tensorstore::KvStore memory, + tensorstore::kvstore::Open({{"driver", "memory"}}, context).result()); + + TiffBuilder builder; + // Claim 10 entries (too many) + auto corrupt_tiff = builder.StartIfd(10).AddEntry(1, 1, 1, 0).Build(); + + ASSERT_THAT(tensorstore::kvstore::Write(memory, "corrupt.tiff", + absl::Cord(corrupt_tiff)) + .result(), + ::tensorstore::IsOk()); + + auto cache = GetCache(pool.get(), "", [&] { + return std::make_unique(memory.driver, + InlineExecutor{}); + }); + + auto entry = GetCacheEntry(cache, "corrupt.tiff"); + + tensorstore::internal::AsyncCache::AsyncCacheReadRequest request; + request.staleness_bound = absl::InfinitePast(); + + // Reading should fail due to corrupt IFD + auto read_result = entry->Read(request).result(); + EXPECT_THAT(read_result.status(), ::testing::Not(::tensorstore::IsOk())); + EXPECT_TRUE(absl::IsDataLoss(read_result.status()) || + absl::IsInvalidArgument(read_result.status())); +} + +TEST(TiffDirectoryCacheTest, ExternalArrays_EagerLoad) { + auto context = Context::Default(); + auto pool = CachePool::Make(CachePool::Limits{}); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + tensorstore::KvStore memory, + tensorstore::kvstore::Open({{"driver", "memory"}}, context).result()); + + uint32_t strip_offsets_offset = 200; + uint32_t strip_bytecounts_offset = 216; + uint32_t strip_offsets[4] = {1000, 2000, 3000, 4000}; + uint32_t strip_bytecounts[4] = {500, 600, 700, 800}; + + TiffBuilder builder; + auto tiff_data = + builder.StartIfd(5) + .AddEntry(256, 3, 1, 800) + .AddEntry(257, 3, 1, 600) + .AddEntry(278, 3, 1, 100) + .AddEntry(273, 4, 4, strip_offsets_offset) + .AddEntry(279, 4, 4, strip_bytecounts_offset) + .EndIfd() + .PadTo(strip_offsets_offset) + .AddUint32Array({strip_offsets[0], strip_offsets[1], strip_offsets[2], + strip_offsets[3]}) + .AddUint32Array({strip_bytecounts[0], strip_bytecounts[1], + strip_bytecounts[2], strip_bytecounts[3]}) + .PadTo(4096) + .Build(); + + ASSERT_THAT(tensorstore::kvstore::Write(memory, "external_arrays.tiff", + absl::Cord(tiff_data)) + .result(), + ::tensorstore::IsOk()); + + auto cache = GetCache(pool.get(), "", [&] { + return std::make_unique(memory.driver, + InlineExecutor{}); + }); + + auto entry = GetCacheEntry(cache, "external_arrays.tiff"); + + { + tensorstore::internal::AsyncCache::AsyncCacheReadRequest request; + request.staleness_bound = absl::InfinitePast(); + + ASSERT_THAT(entry->Read(request).result(), ::tensorstore::IsOk()); + + TiffDirectoryCache::ReadLock lock(*entry); + auto* data = lock.data(); + ASSERT_THAT(data, ::testing::NotNull()); + + EXPECT_EQ(data->image_directories[0].chunk_offsets.size(), 4); + EXPECT_EQ(data->image_directories[0].chunk_bytecounts.size(), 4); + + for (int i = 0; i < 4; i++) { + EXPECT_EQ(data->image_directories[0].chunk_offsets[i], strip_offsets[i]); + EXPECT_EQ(data->image_directories[0].chunk_bytecounts[i], + strip_bytecounts[i]); + } + } +} + +TEST(TiffDirectoryCacheTest, ExternalArrays_BadPointer) { + auto context = Context::Default(); + auto pool = CachePool::Make(CachePool::Limits{}); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + tensorstore::KvStore memory, + tensorstore::kvstore::Open({{"driver", "memory"}}, context).result()); + + uint32_t invalid_offset = 50000; // Far beyond our file size + + TiffBuilder builder; + auto tiff_data = builder.StartIfd(5) + .AddEntry(256, 3, 1, 800) + .AddEntry(257, 3, 1, 600) + .AddEntry(278, 3, 1, 100) + .AddEntry(273, 4, 4, invalid_offset) + .AddEntry(279, 4, 1, 500) + .EndIfd() + .PadTo(1000) + .Build(); + + ASSERT_THAT(tensorstore::kvstore::Write(memory, "bad_external_array.tiff", + absl::Cord(tiff_data)) + .result(), + ::tensorstore::IsOk()); + + auto cache = GetCache(pool.get(), "", [&] { + return std::make_unique(memory.driver, + InlineExecutor{}); + }); + + auto entry = GetCacheEntry(cache, "bad_external_array.tiff"); + + tensorstore::internal::AsyncCache::AsyncCacheReadRequest request; + request.staleness_bound = absl::InfinitePast(); + + auto read_result = entry->Read(request).result(); + EXPECT_THAT(read_result.status(), ::testing::Not(::tensorstore::IsOk())); + + EXPECT_TRUE(absl::IsOutOfRange(read_result.status()) || + absl::IsDataLoss(read_result.status()) || + absl::IsInvalidArgument(read_result.status()) || + absl::IsFailedPrecondition(read_result.status())); +} + +// Helper to create a test TIFF file with multiple IFDs +std::string MakeMultiPageTiff() { + TiffBuilder builder; + + return builder.StartIfd(5) + .AddEntry(256, 3, 1, 400) + .AddEntry(257, 3, 1, 100) + .AddEntry(278, 3, 1, 100) + .AddEntry(273, 4, 1, 1000) + .AddEntry(279, 4, 1, 200) + .EndIfd(200) + .PadTo(200) + .StartIfd(6) + .AddEntry(256, 3, 1, 256) + .AddEntry(257, 3, 1, 256) + .AddEntry(322, 3, 1, 256) + .AddEntry(323, 3, 1, 256) + .AddEntry(324, 4, 1, 2000) + .AddEntry(325, 4, 1, 300) + .EndIfd() + .PadTo(3000) + .Build(); +} + +TEST(TiffDirectoryCacheMultiIfdTest, ReadAndVerifyIFDs) { + auto context = Context::Default(); + auto pool = CachePool::Make(CachePool::Limits{}); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + tensorstore::KvStore memory, + tensorstore::kvstore::Open({{"driver", "memory"}}, context).result()); + + ASSERT_THAT(tensorstore::kvstore::Write(memory, "multi_ifd.tiff", + absl::Cord(MakeMultiPageTiff())) + .result(), + ::tensorstore::IsOk()); + + auto cache = GetCache(pool.get(), "", [&] { + return std::make_unique(memory.driver, + InlineExecutor{}); + }); + + auto entry = GetCacheEntry(cache, "multi_ifd.tiff"); + + tensorstore::internal::AsyncCache::AsyncCacheReadRequest request; + request.staleness_bound = absl::InfinitePast(); + + ASSERT_THAT(entry->Read(request).result(), ::tensorstore::IsOk()); + + TiffDirectoryCache::ReadLock lock(*entry); + auto* data = lock.data(); + ASSERT_THAT(data, ::testing::NotNull()); + + EXPECT_EQ(data->directories.size(), 2); + EXPECT_EQ(data->image_directories.size(), 2); + + // Check first IFD (strip-based) + const auto& ifd1 = data->directories[0]; + const auto& img1 = data->image_directories[0]; + EXPECT_EQ(ifd1.entries.size(), 5); + EXPECT_EQ(img1.width, 400); + EXPECT_EQ(img1.height, 100); + EXPECT_EQ(img1.is_tiled, false); + EXPECT_EQ(img1.chunk_height, 100); + EXPECT_EQ(img1.chunk_offsets.size(), 1); + EXPECT_EQ(img1.chunk_offsets[0], 1000); + EXPECT_EQ(img1.chunk_bytecounts[0], 200); + + // Check second IFD (tile-based) + const auto& ifd2 = data->directories[1]; + const auto& img2 = data->image_directories[1]; + EXPECT_EQ(ifd2.entries.size(), 6); + EXPECT_EQ(img2.width, 256); + EXPECT_EQ(img2.height, 256); + EXPECT_EQ(img2.is_tiled, true); + EXPECT_EQ(img2.chunk_width, 256); + EXPECT_EQ(img2.chunk_height, 256); + EXPECT_EQ(img2.chunk_offsets.size(), 1); + EXPECT_EQ(img2.chunk_offsets[0], 2000); + + EXPECT_FALSE(data->full_read); +} + +TEST(TiffDirectoryCacheMultiIfdTest, ReadLargeMultiPageTiff) { + auto context = Context::Default(); + auto pool = CachePool::Make(CachePool::Limits{}); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + tensorstore::KvStore memory, + tensorstore::kvstore::Open({{"driver", "memory"}}, context).result()); + + // Create a TIFF file larger than kInitialReadBytes + TiffBuilder builder; + auto tiff_data = builder.StartIfd(5) + .AddEntry(256, 3, 1, 400) + .AddEntry(257, 3, 1, 300) + .AddEntry(278, 3, 1, 100) + .AddEntry(273, 4, 1, 1024) + .AddEntry(279, 4, 1, 200) + .EndIfd(2048) + .PadTo(2048) + .StartIfd(6) + .AddEntry(256, 3, 1, 256) + .AddEntry(257, 3, 1, 256) + .AddEntry(322, 3, 1, 256) + .AddEntry(323, 3, 1, 256) + .AddEntry(324, 4, 1, 3000) + .AddEntry(325, 4, 1, 300) + .EndIfd() + .PadTo(4096) + .Build(); + + ASSERT_THAT(tensorstore::kvstore::Write(memory, "large_multi_ifd.tiff", + absl::Cord(tiff_data)) + .result(), + ::tensorstore::IsOk()); + + auto cache = GetCache(pool.get(), "", [&] { + return std::make_unique(memory.driver, + InlineExecutor{}); + }); + + auto entry = GetCacheEntry(cache, "large_multi_ifd.tiff"); + + tensorstore::internal::AsyncCache::AsyncCacheReadRequest request; + request.staleness_bound = absl::InfinitePast(); + + ASSERT_THAT(entry->Read(request).result(), ::tensorstore::IsOk()); + + TiffDirectoryCache::ReadLock lock(*entry); + auto* data = lock.data(); + ASSERT_THAT(data, ::testing::NotNull()); + + EXPECT_EQ(data->directories.size(), 2); + EXPECT_EQ(data->image_directories.size(), 2); + + EXPECT_EQ(data->image_directories[0].width, 400); + EXPECT_EQ(data->image_directories[1].width, 256); +} + +TEST(TiffDirectoryCacheMultiIfdTest, ExternalArraysMultiIfdTest) { + auto context = Context::Default(); + auto pool = CachePool::Make(CachePool::Limits{}); + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + tensorstore::KvStore memory, + tensorstore::kvstore::Open({{"driver", "memory"}}, context).result()); + + std::vector offsets1 = {1000, 2000, 3000, 4000}; + std::vector bytecounts1 = {50, 60, 70, 80}; + std::vector offsets2 = {5000, 5004, 5008, 5012}; + std::vector bytecounts2 = {100, 200, 300, 400}; + + TiffBuilder builder; + auto tiff_data = builder.StartIfd(5) + .AddEntry(256, 3, 1, 400) + .AddEntry(257, 3, 1, 300) + .AddEntry(278, 3, 1, 100) + .AddEntry(273, 4, 4, 512) + .AddEntry(279, 4, 4, 528) + .EndIfd(600) + .PadTo(512) + .AddUint32Array(offsets1) + .AddUint32Array(bytecounts1) + .PadTo(600) + .StartIfd(6) + .AddEntry(256, 3, 1, 512) + .AddEntry(257, 3, 1, 512) + .AddEntry(322, 3, 1, 256) + .AddEntry(323, 3, 1, 256) + .AddEntry(324, 4, 4, 700) + .AddEntry(325, 4, 4, 716) + .EndIfd() + .PadTo(700) + .AddUint32Array(offsets2) + .AddUint32Array(bytecounts2) + .Build(); + + ASSERT_THAT(tensorstore::kvstore::Write(memory, "multi_ifd_external.tiff", + absl::Cord(tiff_data)) + .result(), + ::tensorstore::IsOk()); + + auto cache = GetCache(pool.get(), "", [&] { + return std::make_unique(memory.driver, + InlineExecutor{}); + }); + + auto entry = GetCacheEntry(cache, "multi_ifd_external.tiff"); + + // Read back with TiffDirectoryCache + tensorstore::internal::AsyncCache::AsyncCacheReadRequest request; + request.staleness_bound = absl::InfinitePast(); + + ASSERT_THAT(entry->Read(request).result(), ::tensorstore::IsOk()); + + TiffDirectoryCache::ReadLock lock(*entry); + auto* data = lock.data(); + ASSERT_THAT(data, ::testing::NotNull()); + + EXPECT_EQ(data->directories.size(), 2); + EXPECT_EQ(data->image_directories.size(), 2); + + EXPECT_EQ(data->image_directories[0].chunk_offsets.size(), 4); + EXPECT_EQ(data->image_directories[0].chunk_bytecounts.size(), 4); + + EXPECT_EQ(data->image_directories[1].chunk_offsets.size(), 4); + EXPECT_EQ(data->image_directories[1].chunk_bytecounts.size(), 4); +} + +TEST(TiffDirectoryCacheTest, ExternalArrays_Uint16Arrays) { + auto context = Context::Default(); + auto pool = CachePool::Make(CachePool::Limits{}); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + tensorstore::KvStore memory, + tensorstore::kvstore::Open({{"driver", "memory"}}, context).result()); + + uint32_t bits_per_sample_offset = 200; + uint32_t sample_format_offset = 212; + std::vector bits_values = {8, 8, 8}; + std::vector sample_format_values = {1, 1, 1}; + + TiffBuilder builder; + auto tiff_data = builder.StartIfd(8) + .AddEntry(256, 3, 1, 800) + .AddEntry(257, 3, 1, 600) + .AddEntry(277, 3, 1, 3) + .AddEntry(278, 3, 1, 100) + .AddEntry(258, 3, 3, bits_per_sample_offset) + .AddEntry(339, 3, 3, sample_format_offset) + .AddEntry(273, 4, 1, 1000) + .AddEntry(279, 4, 1, 30000) + .EndIfd() + .PadTo(bits_per_sample_offset) + .AddUint16Array(bits_values) + .PadTo(sample_format_offset) + .AddUint16Array(sample_format_values) + .PadTo(2048) + .Build(); + + ASSERT_THAT(tensorstore::kvstore::Write(memory, "uint16_arrays.tiff", + absl::Cord(tiff_data)) + .result(), + ::tensorstore::IsOk()); + + auto cache = GetCache(pool.get(), "", [&] { + return std::make_unique(memory.driver, + InlineExecutor{}); + }); + + auto entry = GetCacheEntry(cache, "uint16_arrays.tiff"); + + tensorstore::internal::AsyncCache::AsyncCacheReadRequest request; + request.staleness_bound = absl::InfinitePast(); + + ASSERT_THAT(entry->Read(request).result(), ::tensorstore::IsOk()); + + TiffDirectoryCache::ReadLock lock(*entry); + auto* data = lock.data(); + ASSERT_THAT(data, ::testing::NotNull()); + + const auto& img_dir = data->image_directories[0]; + + EXPECT_EQ(img_dir.samples_per_pixel, 3); + EXPECT_EQ(img_dir.chunk_height, 100); + ASSERT_EQ(img_dir.bits_per_sample.size(), 3); + + for (int i = 0; i < 3; i++) { + EXPECT_EQ(img_dir.bits_per_sample[i], bits_values[i]); + } + + ASSERT_EQ(img_dir.sample_format.size(), 3); + for (int i = 0; i < 3; i++) { + EXPECT_EQ(img_dir.sample_format[i], sample_format_values[i]); + } +} + +// Comprehensive test that checks all supported TIFF tags +TEST(TiffDirectoryCacheTest, ComprehensiveTiffTagsTest) { + auto context = Context::Default(); + auto pool = CachePool::Make(CachePool::Limits{}); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + tensorstore::KvStore memory, + tensorstore::kvstore::Open({{"driver", "memory"}}, context).result()); + + TiffBuilder builder; + auto tiff_data = builder.StartIfd(11) + .AddEntry(256, 3, 1, 1024) + .AddEntry(257, 3, 1, 768) + .AddEntry(258, 3, 1, 16) + .AddEntry(259, 3, 1, 1) + .AddEntry(262, 3, 1, 2) + .AddEntry(277, 3, 1, 1) + .AddEntry(278, 3, 1, 128) + .AddEntry(273, 4, 1, 1000) + .AddEntry(279, 4, 1, 65536) + .AddEntry(284, 3, 1, 1) + .AddEntry(339, 3, 1, 1) + .EndIfd() + .PadTo(2048) + .Build(); + + ASSERT_THAT(tensorstore::kvstore::Write(memory, "comprehensive_tags.tiff", + absl::Cord(tiff_data)) + .result(), + ::tensorstore::IsOk()); + + auto cache = GetCache(pool.get(), "", [&] { + return std::make_unique(memory.driver, + InlineExecutor{}); + }); + + auto entry = GetCacheEntry(cache, "comprehensive_tags.tiff"); + + tensorstore::internal::AsyncCache::AsyncCacheReadRequest request; + request.staleness_bound = absl::InfinitePast(); + + ASSERT_THAT(entry->Read(request).result(), ::tensorstore::IsOk()); + + TiffDirectoryCache::ReadLock lock(*entry); + auto* data = lock.data(); + ASSERT_THAT(data, ::testing::NotNull()); + + const auto& img_dir = data->image_directories[0]; + EXPECT_EQ(img_dir.width, 1024); + EXPECT_EQ(img_dir.height, 768); + ASSERT_EQ(img_dir.bits_per_sample.size(), 1); + EXPECT_EQ(img_dir.bits_per_sample[0], 16); + EXPECT_EQ(img_dir.compression, 1); + EXPECT_EQ(img_dir.photometric, 2); + EXPECT_EQ(img_dir.samples_per_pixel, 1); + EXPECT_EQ(img_dir.is_tiled, false); + EXPECT_EQ(img_dir.chunk_height, 128); + ASSERT_EQ(img_dir.chunk_offsets.size(), 1); + EXPECT_EQ(img_dir.chunk_offsets[0], 1000); + ASSERT_EQ(img_dir.chunk_bytecounts.size(), 1); + EXPECT_EQ(img_dir.chunk_bytecounts[0], 65536); + EXPECT_EQ(img_dir.planar_config, 1); + ASSERT_EQ(img_dir.sample_format.size(), 1); + EXPECT_EQ(img_dir.sample_format[0], 1); +} + +// Test for a tiled TIFF with all supported tags +TEST(TiffDirectoryCacheTest, TiledTiffWithAllTags) { + auto context = Context::Default(); + auto pool = CachePool::Make(CachePool::Limits{}); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + tensorstore::KvStore memory, + tensorstore::kvstore::Open({{"driver", "memory"}}, context).result()); + + TiffBuilder builder; + auto tiff_data = builder.StartIfd(12) + .AddEntry(256, 3, 1, 256) + .AddEntry(257, 3, 1, 256) + .AddEntry(258, 3, 1, 32) + .AddEntry(259, 3, 1, 8) + .AddEntry(262, 3, 1, 1) + .AddEntry(277, 3, 1, 1) + .AddEntry(284, 3, 1, 1) + .AddEntry(339, 3, 1, 3) + .AddEntry(322, 3, 1, 256) + .AddEntry(323, 3, 1, 256) + .AddEntry(324, 4, 1, 1000) + .AddEntry(325, 4, 1, 10000) + .EndIfd() + .PadTo(2048) + .Build(); + + ASSERT_THAT(tensorstore::kvstore::Write(memory, "tiled_tiff_all_tags.tiff", + absl::Cord(tiff_data)) + .result(), + ::tensorstore::IsOk()); + + auto cache = GetCache(pool.get(), "", [&] { + return std::make_unique(memory.driver, + InlineExecutor{}); + }); + + auto entry = GetCacheEntry(cache, "tiled_tiff_all_tags.tiff"); + + tensorstore::internal::AsyncCache::AsyncCacheReadRequest request; + request.staleness_bound = absl::InfinitePast(); + + ASSERT_THAT(entry->Read(request).result(), ::tensorstore::IsOk()); + + TiffDirectoryCache::ReadLock lock(*entry); + auto* data = lock.data(); + ASSERT_THAT(data, ::testing::NotNull()); + + const auto& img_dir = data->image_directories[0]; + + EXPECT_EQ(img_dir.width, 256); + EXPECT_EQ(img_dir.height, 256); + ASSERT_EQ(img_dir.bits_per_sample.size(), 1); + EXPECT_EQ(img_dir.bits_per_sample[0], 32); + EXPECT_EQ(img_dir.compression, 8); + EXPECT_EQ(img_dir.photometric, 1); + EXPECT_EQ(img_dir.samples_per_pixel, 1); + EXPECT_EQ(img_dir.planar_config, 1); + ASSERT_EQ(img_dir.sample_format.size(), 1); + EXPECT_EQ(img_dir.sample_format[0], 3); + + EXPECT_EQ(img_dir.chunk_width, 256); + EXPECT_EQ(img_dir.chunk_height, 256); + ASSERT_EQ(img_dir.chunk_offsets.size(), 1); + EXPECT_EQ(img_dir.chunk_offsets[0], 1000); + ASSERT_EQ(img_dir.chunk_bytecounts.size(), 1); + EXPECT_EQ(img_dir.chunk_bytecounts[0], 10000); +} + +} // namespace \ No newline at end of file diff --git a/tensorstore/kvstore/tiff/tiff_key_value_store.cc b/tensorstore/kvstore/tiff/tiff_key_value_store.cc new file mode 100644 index 000000000..aad3ab6be --- /dev/null +++ b/tensorstore/kvstore/tiff/tiff_key_value_store.cc @@ -0,0 +1,501 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/kvstore/tiff/tiff_key_value_store.h" + +#include +#include +#include +#include +#include + +#include "absl/log/absl_log.h" +#include "absl/status/status.h" +#include "absl/strings/cord.h" +#include "absl/strings/str_format.h" +#include "absl/strings/strip.h" +#include "tensorstore/context.h" +#include "tensorstore/internal/cache/async_cache.h" +#include "tensorstore/internal/cache/cache.h" +#include "tensorstore/internal/cache/cache_pool_resource.h" +#include "tensorstore/internal/cache_key/cache_key.h" +#include "tensorstore/internal/data_copy_concurrency_resource.h" +#include "tensorstore/internal/intrusive_ptr.h" +#include "tensorstore/internal/json_binding/json_binding.h" +#include "tensorstore/internal/log/verbose_flag.h" +#include "tensorstore/kvstore/byte_range.h" +#include "tensorstore/kvstore/driver.h" +#include "tensorstore/kvstore/key_range.h" +#include "tensorstore/kvstore/kvstore.h" +#include "tensorstore/kvstore/operations.h" +#include "tensorstore/kvstore/read_result.h" +#include "tensorstore/kvstore/registry.h" +#include "tensorstore/kvstore/spec.h" +#include "tensorstore/kvstore/tiff/tiff_details.h" +#include "tensorstore/kvstore/tiff/tiff_dir_cache.h" +#include "tensorstore/transaction.h" +#include "tensorstore/util/executor.h" +#include "tensorstore/util/future.h" +#include "tensorstore/util/quote_string.h" +#include "tensorstore/util/result.h" +#include "tensorstore/util/str_cat.h" + +namespace tensorstore::kvstore::tiff_kvstore { +namespace jb = ::tensorstore::internal_json_binding; + +using ::tensorstore::internal_tiff_kvstore::ImageDirectory; +using ::tensorstore::internal_tiff_kvstore::TiffDirectoryCache; +using ::tensorstore::internal_tiff_kvstore::TiffParseResult; +using ::tensorstore::kvstore::ListEntry; +using ::tensorstore::kvstore::ListReceiver; + +namespace { + +ABSL_CONST_INIT internal_log::VerboseFlag tiff_logging("tiff"); + +// Expected key: "chunk//" +absl::Status ParseChunkKey(std::string_view key, uint32_t& ifd, + uint32_t& linear_index) { + auto eat_number = [&](std::string_view& s, uint32_t& out) -> bool { + if (s.empty()) return false; + uint32_t v = 0; + size_t i = 0; + while (i < s.size() && s[i] >= '0' && s[i] <= '9') { + v = v * 10 + (s[i] - '0'); + ++i; + } + if (i == 0) return false; // no digits + out = v; + s.remove_prefix(i); + return true; + }; + + if (!absl::ConsumePrefix(&key, "chunk/")) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "Invalid chunk key format: expected prefix 'chunk/' in '", key, "'")); + } + + // Parse IFD index + if (!eat_number(key, ifd)) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "Invalid chunk key format: expected numeric IFD index in '", key, "'")); + } + + // Consume separator '/' + if (!absl::ConsumePrefix(&key, "/")) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "Invalid chunk key format: expected '/' after IFD index in '", key, + "'")); + } + + // Parse linear index + if (!eat_number(key, linear_index)) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "Invalid chunk key format: expected numeric linear chunk index in '", + key, "'")); + } + + // Ensure no trailing characters remain + if (!key.empty()) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "Invalid chunk key format: unexpected trailing characters '", key, + "'")); + } + + return absl::OkStatus(); +} + +struct TiffKvStoreSpecData { + kvstore::Spec base; + Context::Resource cache_pool; + Context::Resource + data_copy_concurrency; + + constexpr static auto ApplyMembers = [](auto& x, auto f) { + return f(x.base, x.cache_pool, x.data_copy_concurrency); + }; + + constexpr static auto default_json_binder = jb::Object( + jb::Member("base", jb::Projection<&TiffKvStoreSpecData::base>()), + jb::Member(internal::CachePoolResource::id, + jb::Projection<&TiffKvStoreSpecData::cache_pool>()), + jb::Member( + internal::DataCopyConcurrencyResource::id, + jb::Projection<&TiffKvStoreSpecData::data_copy_concurrency>())); +}; + +struct Spec + : public internal_kvstore::RegisteredDriverSpec { + static constexpr char id[] = "tiff"; + + Future DoOpen() const override; + + absl::Status ApplyOptions(kvstore::DriverSpecOptions&& o) override { + return data_.base.driver.Set(std::move(o)); + } + Result GetBase(std::string_view) const override { + return data_.base; + } +}; + +class TiffKeyValueStore + : public internal_kvstore::RegisteredDriver { + public: + Future Read(Key key, ReadOptions options) override; + + void ListImpl(ListOptions options, ListReceiver receiver) override; + + std::string DescribeKey(std::string_view key) override { + return StrCat(QuoteString(key), " in ", + base_.driver->DescribeKey(base_.path)); + } + + SupportedFeatures GetSupportedFeatures(const KeyRange& r) const override { + return base_.driver->GetSupportedFeatures( + KeyRange::AddPrefix(base_.path, r)); + } + + Result GetBase(std::string_view, + const Transaction& t) const override { + return KvStore(base_.driver, base_.path, t); + } + + const Executor& executor() const { + return spec_data_.data_copy_concurrency->executor; + } + + absl::Status GetBoundSpecData(TiffKvStoreSpecData& spec) const { + spec = spec_data_; + return absl::OkStatus(); + } + + TiffKvStoreSpecData spec_data_; + kvstore::KvStore base_; + internal::PinnedCacheEntry cache_entry_; +}; + +// Implements TiffKeyValueStore::Read +struct ReadState : public internal::AtomicReferenceCount { + internal::IntrusivePtr owner_; + kvstore::Key key_; + kvstore::ReadOptions options_; + uint32_t ifd_; + uint32_t linear_index_; + + void OnDirectoryReady(Promise promise) { + TimestampedStorageGeneration dir_stamp; + uint64_t chunk_offset; + uint64_t chunk_byte_count; + + { + TiffDirectoryCache::ReadLock lock( + *(owner_->cache_entry_)); + + if (!lock.data()) { + promise.SetResult(owner_->cache_entry_->AnnotateError( + absl::FailedPreconditionError( + "TIFF directory cache data is null after read attempt"), + true)); + return; + } + dir_stamp = lock.stamp(); + const auto& parse_result = *lock.data(); + + if (ifd_ >= parse_result.image_directories.size()) { + promise.SetResult(absl::NotFoundError( + absl::StrFormat("IFD %d not found, only %d IFDs available", ifd_, + lock.data()->image_directories.size()))); + return; + } + + const auto& dir = parse_result.image_directories[ifd_]; + + if (linear_index_ >= dir.chunk_offsets.size() || + linear_index_ >= dir.chunk_bytecounts.size()) { + promise.SetResult(absl::OutOfRangeError( + absl::StrFormat("Linear chunk index %d out of range for IFD %d " + "(valid range [0, %d))", + linear_index_, ifd_, dir.chunk_offsets.size()))); + return; + } + + chunk_offset = dir.chunk_offsets[linear_index_]; + chunk_byte_count = dir.chunk_bytecounts[linear_index_]; + + if (!options_.generation_conditions.Matches(dir_stamp.generation)) { + promise.SetResult( + kvstore::ReadResult::Unspecified(std::move(dir_stamp))); + return; + } + } + + kvstore::ReadOptions chunk_read_options; + chunk_read_options.staleness_bound = options_.staleness_bound; + chunk_read_options.byte_range = options_.byte_range; + chunk_read_options.generation_conditions = options_.generation_conditions; + + // Calculate the absolute byte range needed from the base store + Result absolute_byte_range_result = + chunk_read_options.byte_range.Validate(chunk_byte_count); + if (!absolute_byte_range_result.ok()) { + promise.SetResult(std::move(absolute_byte_range_result).status()); + return; + } + ByteRange absolute_byte_range = absolute_byte_range_result.value(); + absolute_byte_range.inclusive_min += chunk_offset; + absolute_byte_range.exclusive_max += chunk_offset; + chunk_read_options.byte_range = absolute_byte_range; + + auto future = owner_->base_.driver->Read(owner_->base_.path, + std::move(chunk_read_options)); + future.Force(); + future.ExecuteWhenReady( + [self = internal::IntrusivePtr(this), + promise = std::move(promise)]( + ReadyFuture ready) mutable { + if (!ready.result().ok()) { + promise.SetResult(std::move(ready.result())); + return; + } + + auto read_result = std::move(ready.result().value()); + if (!read_result.has_value()) { + promise.SetResult(std::move(read_result)); + return; + } + + promise.SetResult(std::move(read_result)); + }); + } +}; + +// Implements TiffKeyValueStore::List +struct ListState : public internal::AtomicReferenceCount { + internal::IntrusivePtr owner_; + kvstore::ListOptions options_; + ListReceiver receiver_; + Promise promise_; + Future future_; + + ListState(internal::IntrusivePtr&& owner, + kvstore::ListOptions&& options, ListReceiver&& receiver) + : owner_(std::move(owner)), + options_(std::move(options)), + receiver_(std::move(receiver)) { + auto [promise, future] = PromiseFuturePair::Make(MakeResult()); + this->promise_ = std::move(promise); + this->future_ = std::move(future); + future_.Force(); + execution::set_starting(receiver_, [promise = promise_] { + promise.SetResult(absl::CancelledError("")); + }); + } + + ~ListState() { + auto& r = promise_.raw_result(); + if (r.ok()) { + execution::set_done(receiver_); + } else { + execution::set_error(receiver_, r.status()); + } + execution::set_stopping(receiver_); + } + + void OnDirectoryReady() { + TiffDirectoryCache::ReadLock lock( + *(owner_->cache_entry_)); + + if (!lock.data()) { + promise_.SetResult(owner_->cache_entry_->AnnotateError( + absl::FailedPreconditionError( + "TIFF directory cache data is null after read attempt"), + true)); + return; + } + + const auto& parse_result = *lock.data(); + for (size_t ifd_index = 0; + ifd_index < parse_result.image_directories.size(); ++ifd_index) { + const auto& dir = parse_result.image_directories[ifd_index]; + + const size_t num_chunks = dir.chunk_offsets.size(); + if (num_chunks != dir.chunk_bytecounts.size()) { + promise_.SetResult(absl::InternalError(absl::StrFormat( + "Inconsistent chunk offset/bytecount array sizes for IFD %d", + ifd_index))); + return; + } + + for (uint64_t linear_index = 0; linear_index < num_chunks; + ++linear_index) { + std::string key = + absl::StrFormat("chunk/%d/%d", ifd_index, linear_index); + + if (tensorstore::Contains(options_.range, key)) { + size_t chunk_size = dir.chunk_bytecounts[linear_index]; + + // Apply prefix stripping if requested + std::string_view adjusted_key = key; + if (options_.strip_prefix_length > 0 && + options_.strip_prefix_length <= key.size()) { + adjusted_key = + std::string_view(key).substr(options_.strip_prefix_length); + } else if (options_.strip_prefix_length > key.size()) { + adjusted_key = ""; // Strip entire key + } + + // Send the entry to the receiver + execution::set_value(receiver_, + ListEntry{std::string(adjusted_key), + ListEntry::checked_size(chunk_size)}); + + if (!promise_.result_needed()) { + return; + } + } else if (key >= options_.range.exclusive_max && + !options_.range.exclusive_max.empty()) { + // If current key is already past the requested range's end, + // we can potentially optimize by stopping early for this IFD, + // assuming keys are generated in lexicographical order. + break; + } + } + + // Check again for cancellation after processing an IFD + if (!promise_.result_needed()) { + return; + } + } + + promise_.SetResult(absl::OkStatus()); + } +}; + +Future Spec::DoOpen() const { + return MapFutureValue( + InlineExecutor{}, + [spec = internal::IntrusivePtr(this)]( + kvstore::KvStore& base_kvstore) mutable + -> Result { + std::string cache_key; + internal::EncodeCacheKey(&cache_key, base_kvstore.driver, + base_kvstore.path, + spec->data_.data_copy_concurrency); + + // Get or create the directory cache + auto& cache_pool = *spec->data_.cache_pool; + auto directory_cache = internal::GetCache( + cache_pool.get(), cache_key, [&] { + return std::make_unique( + base_kvstore.driver, + spec->data_.data_copy_concurrency->executor); + }); + + // Create the driver and set its fields + auto driver = internal::MakeIntrusivePtr(); + driver->base_ = std::move(base_kvstore); + driver->spec_data_ = std::move(spec->data_); + driver->cache_entry_ = + GetCacheEntry(directory_cache, driver->base_.path); + + return driver; + }, + kvstore::Open(data_.base)); +} + +Future TiffKeyValueStore::Read(Key key, ReadOptions options) { + uint32_t ifd, linear_index; + if (auto st = ParseChunkKey(key, ifd, linear_index); !st.ok()) { + // Instead of returning the error, return a "missing" result + return MakeReadyFuture( + kvstore::ReadResult::Missing(TimestampedStorageGeneration{ + StorageGeneration::NoValue(), absl::Now()})); + } + + auto state = internal::MakeIntrusivePtr(); + state->owner_ = internal::IntrusivePtr(this); + state->key_ = std::move(key); + state->options_ = options; + state->ifd_ = ifd; + state->linear_index_ = linear_index; + + return PromiseFuturePair::LinkValue( + WithExecutor( + executor(), + [state = std::move(state)](Promise promise, + ReadyFuture) { + if (!promise.result_needed()) return; + state->OnDirectoryReady(std::move(promise)); + }), + cache_entry_->Read({options.staleness_bound})) + .future; +} + +void TiffKeyValueStore::ListImpl(ListOptions options, ListReceiver receiver) { + auto state = internal::MakeIntrusivePtr( + internal::IntrusivePtr(this), std::move(options), + std::move(receiver)); + auto* state_ptr = state.get(); + + LinkValue(WithExecutor(executor(), + [state = std::move(state)](Promise promise, + ReadyFuture) { + state->OnDirectoryReady(); + }), + state_ptr->promise_, + cache_entry_->Read({state_ptr->options_.staleness_bound})); +} + +} // namespace + +Result GetTiffKeyValueStoreDriver( + DriverPtr base_kvstore, std::string path, + const Context::Resource& cache_pool_res, + const Context::Resource& + data_copy_res, + const internal::PinnedCacheEntry& + dir_cache_entry) { + // Check if resources are valid before dereferencing + if (!cache_pool_res.has_resource()) { + return absl::InvalidArgumentError("Cache pool resource is not available"); + } + if (!data_copy_res.has_resource()) { + return absl::InvalidArgumentError( + "Data copy concurrency resource is not available"); + } + if (!dir_cache_entry) { + return absl::InvalidArgumentError( + "TIFF directory cache entry is not valid"); + } + + auto driver = internal::MakeIntrusivePtr(); + driver->base_ = KvStore(base_kvstore, std::move(path)); + + driver->spec_data_.cache_pool = cache_pool_res; + driver->spec_data_.data_copy_concurrency = data_copy_res; + driver->cache_entry_ = dir_cache_entry; + + return DriverPtr(std::move(driver)); +} + +} // namespace tensorstore::kvstore::tiff_kvstore + +TENSORSTORE_DECLARE_GARBAGE_COLLECTION_NOT_REQUIRED( + tensorstore::kvstore::tiff_kvstore::TiffKeyValueStore) + +namespace { +const tensorstore::internal_kvstore::DriverRegistration< + tensorstore::kvstore::tiff_kvstore::Spec> + registration; +} // namespace diff --git a/tensorstore/kvstore/tiff/tiff_key_value_store.h b/tensorstore/kvstore/tiff/tiff_key_value_store.h new file mode 100644 index 000000000..fd09c803f --- /dev/null +++ b/tensorstore/kvstore/tiff/tiff_key_value_store.h @@ -0,0 +1,50 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_KVSTORE_TIFF_TIFF_KEY_VALUE_STORE_H_ +#define TENSORSTORE_KVSTORE_TIFF_TIFF_KEY_VALUE_STORE_H_ + +#include + +#include "tensorstore/context.h" +#include "tensorstore/internal/cache/cache.h" +#include "tensorstore/internal/cache/cache_pool_resource.h" +#include "tensorstore/internal/data_copy_concurrency_resource.h" +#include "tensorstore/kvstore/driver.h" +#include "tensorstore/kvstore/kvstore.h" +#include "tensorstore/kvstore/tiff/tiff_dir_cache.h" +#include "tensorstore/util/future.h" + +namespace tensorstore { +namespace kvstore { +namespace tiff_kvstore { + +/// Creates a TiffKeyValueStore driver instance using resolved resources. +/// +/// This function assumes the provided resources (cache_pool_res, data_copy_res) +/// have already been resolved/bound using a Context. +Result GetTiffKeyValueStoreDriver( + DriverPtr base_kvstore, // Base driver (e.g., file, memory) + std::string path, // Path within the base driver + const Context::Resource& cache_pool_res, + const Context::Resource& + data_copy_res, + const internal::PinnedCacheEntry& + dir_cache_entry); + +} // namespace tiff_kvstore +} // namespace kvstore +} // namespace tensorstore + +#endif // TENSORSTORE_KVSTORE_TIFF_TIFF_KEY_VALUE_STORE_H_ diff --git a/tensorstore/kvstore/tiff/tiff_key_value_store_test.cc b/tensorstore/kvstore/tiff/tiff_key_value_store_test.cc new file mode 100644 index 000000000..6b0ccb096 --- /dev/null +++ b/tensorstore/kvstore/tiff/tiff_key_value_store_test.cc @@ -0,0 +1,421 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/kvstore/tiff/tiff_key_value_store.h" + +#include + +#include "absl/strings/cord.h" +#include "absl/synchronization/notification.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "tensorstore/context.h" +#include "tensorstore/kvstore/byte_range.h" +#include "tensorstore/kvstore/key_range.h" +#include "tensorstore/kvstore/kvstore.h" +#include "tensorstore/kvstore/operations.h" +#include "tensorstore/kvstore/spec.h" +#include "tensorstore/kvstore/test_matchers.h" +#include "tensorstore/kvstore/test_util.h" +#include "tensorstore/kvstore/tiff/tiff_test_util.h" +#include "tensorstore/util/execution/sender_testutil.h" +#include "tensorstore/util/status_testutil.h" + +namespace { + +namespace kvstore = tensorstore::kvstore; +using ::tensorstore::CompletionNotifyingReceiver; +using ::tensorstore::Context; +using ::tensorstore::KeyRange; +using ::tensorstore::MatchesStatus; +using ::tensorstore::internal::MatchesKvsReadResultNotFound; +using ::tensorstore::internal_tiff_kvstore::testing::MakeMalformedTiff; +using ::tensorstore::internal_tiff_kvstore::testing::MakeMultiIfdTiff; +using ::tensorstore::internal_tiff_kvstore::testing::MakeReadOpTiff; +using ::tensorstore::internal_tiff_kvstore::testing::MakeTiffMissingHeight; +using ::tensorstore::internal_tiff_kvstore::testing::MakeTinyStripedTiff; +using ::tensorstore::internal_tiff_kvstore::testing::MakeTinyTiledTiff; +using ::tensorstore::internal_tiff_kvstore::testing::MakeTwoStripedTiff; +using ::tensorstore::internal_tiff_kvstore::testing::TiffBuilder; + +class TiffKeyValueStoreTest : public ::testing::Test { + public: + TiffKeyValueStoreTest() : context_(Context::Default()) {} + + void PrepareMemoryKvstore(absl::Cord value) { + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + tensorstore::KvStore memory, + kvstore::Open({{"driver", "memory"}}, context_).result()); + + TENSORSTORE_CHECK_OK(kvstore::Write(memory, "data.tiff", value).result()); + } + + tensorstore::Context context_; +}; + +TEST_F(TiffKeyValueStoreTest, Tiled_ReadSuccess) { + PrepareMemoryKvstore(absl::Cord(MakeTinyTiledTiff())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto tiff_store, + kvstore::Open({{"driver", "tiff"}, + {"base", {{"driver", "memory"}, {"path", "data.tiff"}}}}, + context_) + .result()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto rr, kvstore::Read(tiff_store, "chunk/0/0").result()); + EXPECT_EQ(std::string(rr.value), "DATA"); +} + +TEST_F(TiffKeyValueStoreTest, Tiled_OutOfRange) { + PrepareMemoryKvstore(absl::Cord(MakeTinyTiledTiff())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto tiff_store, + kvstore::Open({{"driver", "tiff"}, + {"base", {{"driver", "memory"}, {"path", "data.tiff"}}}}, + context_) + .result()); + + auto status = kvstore::Read(tiff_store, "chunk/0/81").result().status(); + EXPECT_THAT(status, MatchesStatus(absl::StatusCode::kOutOfRange)); +} + +TEST_F(TiffKeyValueStoreTest, Striped_ReadOneStrip) { + PrepareMemoryKvstore(absl::Cord(MakeTinyStripedTiff())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto tiff_store, + kvstore::Open({{"driver", "tiff"}, + {"base", {{"driver", "memory"}, {"path", "data.tiff"}}}}, + context_) + .result()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto rr, kvstore::Read(tiff_store, "chunk/0/0").result()); + EXPECT_EQ(std::string(rr.value), "DATASTR!"); +} + +TEST_F(TiffKeyValueStoreTest, Striped_ReadSecondStrip) { + PrepareMemoryKvstore(absl::Cord(MakeTwoStripedTiff())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto tiff_store, + kvstore::Open({{"driver", "tiff"}, + {"base", {{"driver", "memory"}, {"path", "data.tiff"}}}}, + context_) + .result()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto rr, kvstore::Read(tiff_store, "chunk/0/1").result()); + EXPECT_EQ(std::string(rr.value), "BBBB"); +} + +TEST_F(TiffKeyValueStoreTest, Striped_OutOfRangeRow) { + PrepareMemoryKvstore(absl::Cord(MakeTinyStripedTiff())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto tiff_store, + kvstore::Open({{"driver", "tiff"}, + {"base", {{"driver", "memory"}, {"path", "data.tiff"}}}}, + context_) + .result()); + + auto status = kvstore::Read(tiff_store, "chunk/0/2").result().status(); + EXPECT_THAT(status, MatchesStatus(absl::StatusCode::kOutOfRange)); +} + +TEST_F(TiffKeyValueStoreTest, List) { + PrepareMemoryKvstore(absl::Cord(MakeTinyTiledTiff())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto tiff_store, + kvstore::Open({{"driver", "tiff"}, + {"base", {{"driver", "memory"}, {"path", "data.tiff"}}}}, + context_) + .result()); + + // Listing the entire stream works. + for (int i = 0; i < 2; ++i) { + absl::Notification notification; + std::vector log; + tensorstore::execution::submit( + kvstore::List(tiff_store, {}), + tensorstore::CompletionNotifyingReceiver{ + ¬ification, tensorstore::LoggingReceiver{&log}}); + notification.WaitForNotification(); + + // Only one tile in our tiny tiled TIFF + EXPECT_THAT(log, ::testing::UnorderedElementsAre( + "set_starting", "set_value: chunk/0/0", "set_done", + "set_stopping")) + << i; + } +} + +TEST_F(TiffKeyValueStoreTest, ListWithPrefix) { + PrepareMemoryKvstore(absl::Cord(MakeTwoStripedTiff())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto tiff_store, + kvstore::Open({{"driver", "tiff"}, + {"base", {{"driver", "memory"}, {"path", "data.tiff"}}}}, + context_) + .result()); + + { + kvstore::ListOptions options; + options.range = options.range.Prefix("chunk/0/1"); + options.strip_prefix_length = 6; + absl::Notification notification; + std::vector log; + tensorstore::execution::submit( + kvstore::List(tiff_store, options), + tensorstore::CompletionNotifyingReceiver{ + ¬ification, tensorstore::LoggingReceiver{&log}}); + notification.WaitForNotification(); + + EXPECT_THAT( + log, ::testing::UnorderedElementsAre("set_starting", "set_value: 0/1", + "set_done", "set_stopping")); + } +} + +TEST_F(TiffKeyValueStoreTest, ListMultipleStrips) { + PrepareMemoryKvstore(absl::Cord(MakeTwoStripedTiff())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto tiff_store, + kvstore::Open({{"driver", "tiff"}, + {"base", {{"driver", "memory"}, {"path", "data.tiff"}}}}, + context_) + .result()); + + absl::Notification notification; + std::vector log; + tensorstore::execution::submit( + kvstore::List(tiff_store, {}), + tensorstore::CompletionNotifyingReceiver{ + ¬ification, tensorstore::LoggingReceiver{&log}}); + notification.WaitForNotification(); + + EXPECT_THAT(log, ::testing::UnorderedElementsAre( + "set_starting", "set_value: chunk/0/0", + "set_value: chunk/0/1", "set_done", "set_stopping")); +} + +TEST_F(TiffKeyValueStoreTest, ReadOps) { + PrepareMemoryKvstore(absl::Cord(MakeReadOpTiff())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, + kvstore::Open({{"driver", "tiff"}, + {"base", {{"driver", "memory"}, {"path", "data.tiff"}}}}, + context_) + .result()); + + ::tensorstore::internal::TestKeyValueStoreReadOps( + store, "chunk/0/0", absl::Cord("abcdefghijklmnop"), "missing_key"); +} + +TEST_F(TiffKeyValueStoreTest, InvalidSpec) { + auto context = tensorstore::Context::Default(); + + EXPECT_THAT( + kvstore::Open({{"driver", "tiff"}, {"extra", "key"}}, context).result(), + MatchesStatus(absl::StatusCode::kInvalidArgument)); +} + +TEST_F(TiffKeyValueStoreTest, SpecRoundtrip) { + tensorstore::internal::KeyValueStoreSpecRoundtripOptions options; + options.check_data_persists = false; + options.check_write_read = false; + options.check_data_after_serialization = false; + options.check_store_serialization = true; + options.full_spec = {{"driver", "tiff"}, + {"base", {{"driver", "memory"}, {"path", "abc.tif"}}}}; + options.full_base_spec = {{"driver", "memory"}, {"path", "abc.tif"}}; + tensorstore::internal::TestKeyValueStoreSpecRoundtrip(options); +} + +TEST_F(TiffKeyValueStoreTest, MalformedTiff) { + PrepareMemoryKvstore(absl::Cord(MakeMalformedTiff())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto tiff_store, + kvstore::Open({{"driver", "tiff"}, + {"base", {{"driver", "memory"}, {"path", "data.tiff"}}}}, + context_) + .result()); + + auto status = kvstore::Read(tiff_store, "chunk/0/0").result().status(); + EXPECT_FALSE(status.ok()); +} + +TEST_F(TiffKeyValueStoreTest, InvalidKeyFormats) { + PrepareMemoryKvstore(absl::Cord(MakeTinyTiledTiff())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto tiff_store, + kvstore::Open({{"driver", "tiff"}, + {"base", {{"driver", "memory"}, {"path", "data.tiff"}}}}, + context_) + .result()); + + auto test_key = [&](std::string key) { + return kvstore::Read(tiff_store, key).result(); + }; + + // Wrong prefix + EXPECT_THAT(test_key("wrong/0/0/0"), MatchesKvsReadResultNotFound()); + + // Missing components + EXPECT_THAT(test_key("chunk/"), MatchesKvsReadResultNotFound()); + EXPECT_THAT(test_key("chunk/0"), MatchesKvsReadResultNotFound()); + + // Non-numeric components + EXPECT_THAT(test_key("chunk/a/0"), MatchesKvsReadResultNotFound()); + + // Extra components + EXPECT_THAT(test_key("chunk/0/0/0/extra"), MatchesKvsReadResultNotFound()); +} + +TEST_F(TiffKeyValueStoreTest, MultipleIFDs) { + PrepareMemoryKvstore(absl::Cord(MakeMultiIfdTiff())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto tiff_store, + kvstore::Open({{"driver", "tiff"}, + {"base", {{"driver", "memory"}, {"path", "data.tiff"}}}}, + context_) + .result()); + + // Read from the first IFD + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto rr1, kvstore::Read(tiff_store, "chunk/0/0").result()); + EXPECT_EQ(std::string(rr1.value), "DATA1"); + + // Read from the second IFD + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto rr2, kvstore::Read(tiff_store, "chunk/1/0").result()); + EXPECT_EQ(std::string(rr2.value), "DATA2"); + + // Test invalid IFD index + auto status = kvstore::Read(tiff_store, "chunk/2/0").result().status(); + EXPECT_THAT(status, MatchesStatus(absl::StatusCode::kNotFound)); +} + +TEST_F(TiffKeyValueStoreTest, ByteRangeReads) { + PrepareMemoryKvstore(absl::Cord(MakeReadOpTiff())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto tiff_store, + kvstore::Open({{"driver", "tiff"}, + {"base", {{"driver", "memory"}, {"path", "data.tiff"}}}}, + context_) + .result()); + + // Full read for reference + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto full_read, kvstore::Read(tiff_store, "chunk/0/0").result()); + EXPECT_EQ(std::string(full_read.value), "abcdefghijklmnop"); + + // Partial read - first half + kvstore::ReadOptions options1; + options1.byte_range = tensorstore::OptionalByteRangeRequest::Range(0, 8); + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto partial1, kvstore::Read(tiff_store, "chunk/0/0", options1).result()); + EXPECT_EQ(std::string(partial1.value), "abcdefgh"); + + // Partial read - second half + kvstore::ReadOptions options2; + options2.byte_range = tensorstore::OptionalByteRangeRequest::Range(8, 16); + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto partial2, kvstore::Read(tiff_store, "chunk/0/0", options2).result()); + EXPECT_EQ(std::string(partial2.value), "ijklmnop"); + + // Out-of-range byte range + kvstore::ReadOptions options3; + options3.byte_range = tensorstore::OptionalByteRangeRequest::Range(0, 20); + auto status = + kvstore::Read(tiff_store, "chunk/0/0", options3).result().status(); + EXPECT_FALSE(status.ok()); +} + +TEST_F(TiffKeyValueStoreTest, MissingRequiredTags) { + PrepareMemoryKvstore(absl::Cord(MakeTiffMissingHeight())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto tiff_store, + kvstore::Open({{"driver", "tiff"}, + {"base", {{"driver", "memory"}, {"path", "data.tiff"}}}}, + context_) + .result()); + + auto status = kvstore::Read(tiff_store, "chunk/0/0").result().status(); + EXPECT_FALSE(status.ok()); +} + +TEST_F(TiffKeyValueStoreTest, StalenessBound) { + PrepareMemoryKvstore(absl::Cord(MakeTinyTiledTiff())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto tiff_store, + kvstore::Open({{"driver", "tiff"}, + {"base", {{"driver", "memory"}, {"path", "data.tiff"}}}}, + context_) + .result()); + + // Read with infinite past staleness bound (should work) + kvstore::ReadOptions options_past; + options_past.staleness_bound = absl::InfinitePast(); + EXPECT_THAT(kvstore::Read(tiff_store, "chunk/0/0", options_past).result(), + ::tensorstore::IsOk()); + + // Read with infinite future staleness bound (should work) + kvstore::ReadOptions options_future; + options_future.staleness_bound = absl::InfiniteFuture(); + EXPECT_THAT(kvstore::Read(tiff_store, "chunk/0/0", options_future).result(), + ::tensorstore::IsOk()); +} + +TEST_F(TiffKeyValueStoreTest, ListWithComplexRange) { + PrepareMemoryKvstore(absl::Cord(MakeTwoStripedTiff())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto tiff_store, + kvstore::Open({{"driver", "tiff"}, + {"base", {{"driver", "memory"}, {"path", "data.tiff"}}}}, + context_) + .result()); + + // Test listing with exclusive range + kvstore::ListOptions options; + options.range = KeyRange(KeyRange::Successor("chunk/0/0"), "chunk/0/2"); + + absl::Notification notification; + std::vector log; + tensorstore::execution::submit( + kvstore::List(tiff_store, options), + tensorstore::CompletionNotifyingReceiver{ + ¬ification, tensorstore::LoggingReceiver{&log}}); + notification.WaitForNotification(); + + EXPECT_THAT(log, ::testing::UnorderedElementsAre("set_starting", + "set_value: chunk/0/1", + "set_done", "set_stopping")); +} + +} // namespace diff --git a/tensorstore/kvstore/tiff/tiff_test_util.cc b/tensorstore/kvstore/tiff/tiff_test_util.cc new file mode 100644 index 000000000..bac086394 --- /dev/null +++ b/tensorstore/kvstore/tiff/tiff_test_util.cc @@ -0,0 +1,236 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/kvstore/tiff/tiff_test_util.h" + +namespace tensorstore { +namespace internal_tiff_kvstore { +namespace testing { + +TiffBuilder::TiffBuilder() { + // Standard TIFF header + data_ += "II"; // Little endian + data_.push_back(42); + data_.push_back(0); // Magic number + data_.push_back(8); + data_.push_back(0); // IFD offset (8) + data_.push_back(0); + data_.push_back(0); +} + +TiffBuilder& TiffBuilder::StartIfd(uint16_t num_entries) { + data_.push_back(num_entries & 0xFF); + data_.push_back((num_entries >> 8) & 0xFF); + return *this; +} + +TiffBuilder& TiffBuilder::AddEntry(uint16_t tag, uint16_t type, uint32_t count, + uint32_t value) { + data_.push_back(tag & 0xFF); + data_.push_back((tag >> 8) & 0xFF); + data_.push_back(type & 0xFF); + data_.push_back((type >> 8) & 0xFF); + data_.push_back(count & 0xFF); + data_.push_back((count >> 8) & 0xFF); + data_.push_back((count >> 16) & 0xFF); + data_.push_back((count >> 24) & 0xFF); + data_.push_back(value & 0xFF); + data_.push_back((value >> 8) & 0xFF); + data_.push_back((value >> 16) & 0xFF); + data_.push_back((value >> 24) & 0xFF); + return *this; +} + +TiffBuilder& TiffBuilder::EndIfd(uint32_t next_ifd_offset) { + data_.push_back(next_ifd_offset & 0xFF); + data_.push_back((next_ifd_offset >> 8) & 0xFF); + data_.push_back((next_ifd_offset >> 16) & 0xFF); + data_.push_back((next_ifd_offset >> 24) & 0xFF); + return *this; +} + +TiffBuilder& TiffBuilder::AddUint32Array(const std::vector& values) { + for (uint32_t val : values) { + data_.push_back(val & 0xFF); + data_.push_back((val >> 8) & 0xFF); + data_.push_back((val >> 16) & 0xFF); + data_.push_back((val >> 24) & 0xFF); + } + return *this; +} + +TiffBuilder& TiffBuilder::AddUint16Array(const std::vector& values) { + for (uint16_t val : values) { + data_.push_back(val & 0xFF); + data_.push_back((val >> 8) & 0xFF); + } + return *this; +} + +TiffBuilder& TiffBuilder::PadTo(size_t offset) { + while (data_.size() < offset) { + data_.push_back('X'); + } + return *this; +} + +std::string TiffBuilder::Build() const { return data_; } + +void PutLE16(std::string& dst, uint16_t v) { + dst.push_back(static_cast(v & 0xff)); + dst.push_back(static_cast(v >> 8)); +} + +void PutLE32(std::string& dst, uint32_t v) { + dst.push_back(static_cast(v & 0xff)); + dst.push_back(static_cast(v >> 8)); + dst.push_back(static_cast(v >> 16)); + dst.push_back(static_cast(v >> 24)); +} + +std::string MakeTinyTiledTiff() { + TiffBuilder builder; + return builder + .StartIfd(6) // 6 entries + .AddEntry(256, 3, 1, 256) + .AddEntry(257, 3, 1, 256) // width, length (256×256) + .AddEntry(322, 3, 1, 256) + .AddEntry(323, 3, 1, 256) // tile width/length + .AddEntry(324, 4, 1, 128) + .AddEntry(325, 4, 1, 4) // offset/bytecount + .EndIfd() // next IFD + .PadTo(128) + .Build() + + "DATA"; +} + +std::string MakeTinyStripedTiff() { + TiffBuilder builder; + return builder + .StartIfd(5) // 5 entries + .AddEntry(256, 3, 1, 4) // ImageWidth = 4 + .AddEntry(257, 3, 1, 8) // ImageLength = 8 + .AddEntry(278, 3, 1, 8) // RowsPerStrip = 8 + .AddEntry(273, 4, 1, 128) // StripOffsets = 128 + .AddEntry(279, 4, 1, 8) // StripByteCounts = 8 + .EndIfd() // No more IFDs + .PadTo(128) + .Build() + + "DATASTR!"; +} + +std::string MakeTwoStripedTiff() { + TiffBuilder builder; + return builder + .StartIfd(6) // 6 entries + .AddEntry(256, 3, 1, 4) // ImageWidth = 4 + .AddEntry(257, 3, 1, 8) // ImageLength = 8 + .AddEntry(278, 3, 1, 4) // RowsPerStrip = 4 + .AddEntry(273, 4, 2, 128) // StripOffsets array at offset 128 + .AddEntry(279, 4, 2, 136) // StripByteCounts array at offset 136 + .AddEntry(259, 3, 1, 1) // Compression = none + .EndIfd() // No more IFDs + .PadTo(128) + .AddUint32Array({200, 208}) // Strip offsets + .PadTo(136) + .AddUint32Array({4, 4}) // Strip byte counts + .PadTo(200) + .Build() + + "AAAA" + std::string(4, '\0') + "BBBB"; +} + +std::string MakeReadOpTiff() { + TiffBuilder builder; + return builder + .StartIfd(6) // 6 entries + .AddEntry(256, 3, 1, 16) + .AddEntry(257, 3, 1, 16) // width, length + .AddEntry(322, 3, 1, 16) + .AddEntry(323, 3, 1, 16) // tile width/length + .AddEntry(324, 4, 1, 128) + .AddEntry(325, 4, 1, 16) // offset/bytecount + .EndIfd() // next IFD + .PadTo(128) + .Build() + + "abcdefghijklmnop"; +} + +std::string MakeMalformedTiff() { + std::string t; + t += "MM"; // Bad endianness (motorola instead of intel) + PutLE16(t, 42); + PutLE32(t, 8); // header + PutLE16(t, 1); // 1 IFD entry + + // Helper lambda for creating an entry + auto E = [&](uint16_t tag, uint16_t type, uint32_t cnt, uint32_t val) { + PutLE16(t, tag); + PutLE16(t, type); + PutLE32(t, cnt); + PutLE32(t, val); + }; + + E(256, 3, 1, 16); // Only width, missing other required tags + PutLE32(t, 0); // next IFD + return t; +} + +std::string MakeMultiIfdTiff() { + TiffBuilder builder; + return builder + .StartIfd(6) // 6 entries for first IFD + .AddEntry(256, 3, 1, 256) + .AddEntry(257, 3, 1, 256) // width, length (256×256) + .AddEntry(322, 3, 1, 256) + .AddEntry(323, 3, 1, 256) // tile width/length + .AddEntry(324, 4, 1, 200) + .AddEntry(325, 4, 1, 5) // offset/bytecount for IFD 0 + .EndIfd(86) // next IFD at offset 86 + .PadTo(86) // pad to second IFD + .StartIfd(6) // 6 entries for second IFD + .AddEntry(256, 3, 1, 128) + .AddEntry(257, 3, 1, 128) // width, length (128×128) + .AddEntry(322, 3, 1, 128) + .AddEntry(323, 3, 1, 128) // tile width/length + .AddEntry(324, 4, 1, 208) + .AddEntry(325, 4, 1, 5) // offset/bytecount for IFD 1 + .EndIfd() // No more IFDs + .PadTo(200) + .Build() + + "DATA1" + std::string(3, '\0') + "DATA2"; +} + +std::string MakeTiffMissingHeight() { + std::string t; + t += "II"; // Little endian + PutLE16(t, 42); + PutLE32(t, 8); // header + PutLE16(t, 1); // 1 IFD entry + + // Helper lambda for creating an entry + auto E = [&](uint16_t tag, uint16_t type, uint32_t cnt, uint32_t val) { + PutLE16(t, tag); + PutLE16(t, type); + PutLE32(t, cnt); + PutLE32(t, val); + }; + + E(256, 3, 1, 16); // Width but no Height + PutLE32(t, 0); // next IFD + return t; +} + +} // namespace testing +} // namespace internal_tiff_kvstore +} // namespace tensorstore diff --git a/tensorstore/kvstore/tiff/tiff_test_util.h b/tensorstore/kvstore/tiff/tiff_test_util.h new file mode 100644 index 000000000..ab4eee621 --- /dev/null +++ b/tensorstore/kvstore/tiff/tiff_test_util.h @@ -0,0 +1,82 @@ +// Copyright 2025 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_KVSTORE_TIFF_TIFF_TEST_UTIL_H_ +#define TENSORSTORE_KVSTORE_TIFF_TIFF_TEST_UTIL_H_ + +#include +#include +#include +#include + +namespace tensorstore { +namespace internal_tiff_kvstore { +namespace testing { + +// Helper class for building test TIFF files +class TiffBuilder { + public: + TiffBuilder(); + + // Start an IFD with specified number of entries + TiffBuilder& StartIfd(uint16_t num_entries); + + // Add an IFD entry + TiffBuilder& AddEntry(uint16_t tag, uint16_t type, uint32_t count, + uint32_t value); + + // End the current IFD and point to the next one at specified offset + // Use 0 for no next IFD + TiffBuilder& EndIfd(uint32_t next_ifd_offset = 0); + + // Add external uint32_t array data + TiffBuilder& AddUint32Array(const std::vector& values); + + // Add external uint16_t array data + TiffBuilder& AddUint16Array(const std::vector& values); + + // Pad to a specific offset + TiffBuilder& PadTo(size_t offset); + + // Get the final TIFF data + std::string Build() const; + + size_t CurrentOffset() const { return data_.size(); } + + std::string data_; +}; + +// Little‑endian byte helper functions +void PutLE16(std::string& dst, uint16_t v); +void PutLE32(std::string& dst, uint32_t v); + +std::string MakeTinyTiledTiff(); + +std::string MakeTinyStripedTiff(); + +std::string MakeTwoStripedTiff(); + +std::string MakeReadOpTiff(); + +std::string MakeMalformedTiff(); + +std::string MakeMultiIfdTiff(); + +std::string MakeTiffMissingHeight(); + +} // namespace testing +} // namespace internal_tiff_kvstore +} // namespace tensorstore + +#endif // TENSORSTORE_KVSTORE_TIFF_TIFF_TEST_UTIL_H_ \ No newline at end of file