diff --git a/src/spatial/modules/geos/geos_geometry.hpp b/src/spatial/modules/geos/geos_geometry.hpp index ea871a76..a274e119 100644 --- a/src/spatial/modules/geos/geos_geometry.hpp +++ b/src/spatial/modules/geos/geos_geometry.hpp @@ -51,6 +51,10 @@ class GeosGeometry { GeosGeometry get_voronoi_diagram() const; GeosGeometry get_built_area() const; GeosGeometry get_noded() const; + GeosGeometry get_clipped(double xmin, double ymin, double xmax, double ymax) const; + GeosGeometry get_transformed(double xt, double yt, double xs, double ys) const; + + void orient_polygons(bool ext_cw); bool contains(const GeosGeometry &other) const; bool covers(const GeosGeometry &other) const; @@ -332,6 +336,28 @@ inline GeosGeometry GeosGeometry::get_noded() const { return GeosGeometry(handle, GEOSNode_r(handle, geom)); } +inline GeosGeometry GeosGeometry::get_clipped(double xmin, double ymin, double xmax, double ymax) const { + return GeosGeometry(handle, GEOSClipByRect_r(handle, geom, xmin, ymin, xmax, ymax)); +} + +inline GeosGeometry GeosGeometry::get_transformed(double xt, double yt, double xs, double ys) const { + double matrix[4] = {xt, yt, xs, ys}; + + return GeosGeometry(handle, GEOSGeom_transformXY_r( + handle, geom, + [](double *x, double *y, void *data) { + const auto mat = static_cast(data); + *x = (*x - mat[0]) * mat[2]; + *y = (*y - mat[1]) * mat[3]; + return 1; + }, + matrix)); +} + +inline void GeosGeometry::orient_polygons(bool ext_cw) { + GEOSOrientPolygons_r(handle, geom, ext_cw ? 1 : 0); +} + inline GeosGeometry GeosGeometry::get_maximum_inscribed_circle() const { double xmin = 0; double ymin = 0; diff --git a/src/spatial/modules/geos/geos_module.cpp b/src/spatial/modules/geos/geos_module.cpp index 31e037f5..80029429 100644 --- a/src/spatial/modules/geos/geos_module.cpp +++ b/src/spatial/modules/geos/geos_module.cpp @@ -6,7 +6,9 @@ #include "duckdb/common/vector_operations/senary_executor.hpp" #include "duckdb/common/vector_operations/generic_executor.hpp" +#include "duckdb/execution/expression_executor.hpp" #include "duckdb/planner/expression/bound_constant_expression.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" namespace duckdb { @@ -192,6 +194,149 @@ class AsymmetricPreparedBinaryFunction { namespace { +struct ST_AsMVTGeom { + + struct BindData final : FunctionData { + int32_t extent = 4096; + int32_t buffer = 256; + bool clip = true; + + unique_ptr Copy() const override { + auto copy = make_uniq(); + copy->extent = extent; + copy->buffer = buffer; + copy->clip = clip; + return std::move(copy); + } + + bool Equals(const FunctionData &other) const override { + auto &other_data = other.Cast(); + return (extent == other_data.extent) && (buffer == other_data.buffer) && (clip == other_data.clip); + } + }; + + static unique_ptr Bind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments) { + auto result = make_uniq(); + + if (arguments.size() > 2) { + auto &extent_expr = arguments[2]; + if (extent_expr->return_type.id() != LogicalTypeId::INTEGER) { + throw InvalidInputException("ST_AsMVTGeom: extent must be an integer"); + } + auto value = ExpressionExecutor::EvaluateScalar(context, *extent_expr); + if (value.IsNull()) { + throw InvalidInputException("ST_AsMVTGeom: extent cannot be NULL"); + } + result->extent = value.GetValue(); + } + + if (arguments.size() > 3) { + auto &buffer_expr = arguments[3]; + if (buffer_expr->return_type.id() != LogicalTypeId::INTEGER) { + throw InvalidInputException("ST_AsMVTGeom: buffer must be an integer"); + } + auto value = ExpressionExecutor::EvaluateScalar(context, *buffer_expr); + if (value.IsNull()) { + throw InvalidInputException("ST_AsMVTGeom: buffer cannot be NULL"); + } + result->buffer = value.GetValue(); + } + + if (arguments.size() > 4) { + auto &clip_expr = arguments[4]; + if (clip_expr->return_type.id() != LogicalTypeId::BOOLEAN) { + throw InvalidInputException("ST_AsMVTGeom: clip must be a boolean"); + } + auto value = ExpressionExecutor::EvaluateScalar(context, *clip_expr); + if (value.IsNull()) { + throw InvalidInputException("ST_AsMVTGeom: clip cannot be NULL"); + } + result->clip = value.GetValue(); + } + + return std::move(result); + } + + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + using BOX_TYPE = StructTypeQuaternary; + using GEOM_TYPE = PrimitiveType; + + auto &lstate = LocalState::ResetAndGet(state); + + auto &func_expr = state.expr.Cast(); + const auto &bdata = func_expr.bind_info->Cast(); + + GenericExecutor::ExecuteBinary( + args.data[0], args.data[1], result, args.size(), [&](const GEOM_TYPE &geom_val, const BOX_TYPE &box_val) { + auto &blob = geom_val.val; + + auto geom = lstate.Deserialize(blob); + + // Make sure polygons are oriented correctly + geom.orient_polygons(true); + + const auto xmin = box_val.a_val; + const auto ymin = box_val.b_val; + const auto xmax = box_val.c_val; + const auto ymax = box_val.d_val; + + const auto tile_w = xmax - xmin; + const auto tile_h = ymax - ymin; + + // Clip geometry, if requested + if (bdata.clip) { + + const auto ext = bdata.extent; + const auto buf = bdata.buffer; + + const auto buf_w = buf * tile_w / ext; + const auto buf_h = buf * tile_h / ext; + + const auto buf_xmin = xmin - buf_w; + const auto buf_ymin = ymin - buf_h; + const auto buf_xmax = xmax + buf_w; + const auto buf_ymax = ymax + buf_h; + + auto clipped = geom.get_clipped(buf_xmin, buf_ymin, buf_xmax, buf_ymax); + + geom = std::move(clipped); + } + + // TODO: Make valid/check vertices + + // Scale to fit the tile + const auto xs = tile_w / (xmax - xmin); + const auto ys = tile_h / (ymax - ymin); + const auto scaled = geom.get_transformed(xmin, ymin, xs, ys); + + // Serialize + return lstate.Serialize(result, scaled); + }); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_AsMVTGeom", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("box", GeoTypes::BOX_2D()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetBind(Bind); + variant.SetFunction(Execute); + }); + + func.SetDescription(R"( + Returns a geometry that is suitable for use in an MVT tile. + The geometry will be clipped to the bounding box and scaled to fit the tile. + )"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "conversion"); + }); + } +}; + struct ST_Boundary { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { const auto &lstate = LocalState::ResetAndGet(state); @@ -2717,6 +2862,7 @@ struct ST_CoverageInvalidEdges_Agg : GEOSCoverageAggFunction { void RegisterGEOSModule(DatabaseInstance &db) { // Scalar Functions + ST_AsMVTGeom::Register(db); ST_Boundary::Register(db); ST_Buffer::Register(db); ST_BuildArea::Register(db); diff --git a/src/spatial/modules/mvt/mvt_module.cpp b/src/spatial/modules/mvt/mvt_module.cpp index 69714dd9..fbaedc6f 100644 --- a/src/spatial/modules/mvt/mvt_module.cpp +++ b/src/spatial/modules/mvt/mvt_module.cpp @@ -2,11 +2,17 @@ #include "spatial/modules/mvt/mvt_module.hpp" +#include "duckdb/common/types/list_segment.hpp" #include "duckdb/common/vector_operations/generic_executor.hpp" +#include "duckdb/execution/expression_executor.hpp" + #include "spatial/geometry/geometry_serialization.hpp" #include "spatial/geometry/sgl.hpp" #include "spatial/spatial_types.hpp" -#include +#include "spatial/util/binary_reader.hpp" +#include "spatial/util/function_builder.hpp" + +#include "protozero/pbf_writer.hpp" namespace duckdb { @@ -168,11 +174,770 @@ struct ST_TileEnvelope { }; } // namespace -//------------------------------------------------------------------------------ + +//====================================================================================================================== +// ST_AsMVT +//====================================================================================================================== +namespace { + +struct ST_AsMVT { + + //------------------------------------------------------------------------------------------------------------------ + // Bind + //------------------------------------------------------------------------------------------------------------------ + struct BindData final : FunctionData { + string layer_name = "layer"; + int32_t extent = 4096; + idx_t geom_idx = 0; + string feature_id_name = "feature_id"; + vector property_names; + vector property_types; + + unique_ptr Copy() const override { + auto copy = make_uniq(); + copy->layer_name = layer_name; + copy->extent = extent; + copy->geom_idx = geom_idx; + copy->feature_id_name = feature_id_name; + copy->property_names = property_names; + copy->property_types = property_types; + return std::move(copy); + } + + bool Equals(const FunctionData &other) const override { + auto &other_data = other.Cast(); + return layer_name == other_data.layer_name && extent == other_data.extent && + geom_idx == other_data.geom_idx && feature_id_name == other_data.feature_id_name && + property_names == other_data.property_names && property_types == other_data.property_types; + } + }; + + static unique_ptr Bind(ClientContext &context, AggregateFunction &function, + vector> &arguments) { + auto result = make_uniq(); + + // Validate and set optional arguments + if (arguments.size() > 1) { + const auto &name_arg = arguments[1]; + if (name_arg->return_type.id() != LogicalTypeId::VARCHAR) { + throw InvalidInputException("ST_AsMVT: second argument must be a VARCHAR"); + } + if (!name_arg->IsFoldable()) { + throw InvalidInputException("ST_AsMVT: second argument must be a constant string"); + } + result->layer_name = ExpressionExecutor::EvaluateScalar(context, *name_arg).GetValue(); + Function::EraseArgument(function, arguments, 1); + } + + if (arguments.size() > 2) { + const auto &extent_arg = arguments[2]; + if (extent_arg->return_type.id() != LogicalTypeId::INTEGER) { + throw InvalidInputException("ST_AsMVT: third argument must be an INTEGER"); + } + if (!extent_arg->IsFoldable()) { + throw InvalidInputException("ST_AsMVT: third argument must be a constant integer"); + } + result->extent = ExpressionExecutor::EvaluateScalar(context, *extent_arg).GetValue(); + Function::EraseArgument(function, arguments, 2); + } + + string geom_name; + if (arguments.size() > 3) { + const auto &geom_name_arg = arguments[3]; + if (geom_name_arg->return_type.id() != LogicalTypeId::VARCHAR) { + throw InvalidInputException("ST_AsMVT: fourth argument must be a VARCHAR"); + } + if (!geom_name_arg->IsFoldable()) { + throw InvalidInputException("ST_AsMVT: fourth argument must be a constant string"); + } + geom_name = ExpressionExecutor::EvaluateScalar(context, *geom_name_arg).GetValue(); + Function::EraseArgument(function, arguments, 3); + } + + if (arguments.size() > 4) { + const auto &feature_id_arg = arguments[4]; + if (feature_id_arg->return_type.id() != LogicalTypeId::VARCHAR) { + throw InvalidInputException("ST_AsMVT: fifth argument must be a VARCHAR"); + } + if (!feature_id_arg->IsFoldable()) { + throw InvalidInputException("ST_AsMVT: fifth argument must be a constant string"); + } + result->feature_id_name = ExpressionExecutor::EvaluateScalar(context, *feature_id_arg).GetValue(); + Function::EraseArgument(function, arguments, 4); + } + + // Find the geometry column index in the row + const auto &row = arguments[0]; + const auto &row_type = row->return_type; + if (row_type.id() != LogicalTypeId::STRUCT) { + throw InvalidInputException("ST_AsMVT: first argument must be a STRUCT"); + } + + auto geom_idx = optional_idx::Invalid(); + + if (geom_name.empty()) { + // Look for the first geometry column + for (idx_t i = 0; i < StructType::GetChildCount(row_type); i++) { + auto &child = StructType::GetChildType(row_type, i); + if (child == GeoTypes::GEOMETRY()) { + if (geom_idx != optional_idx::Invalid()) { + throw InvalidInputException("ST_AsMVT: only one geometry column is allowed in the input row"); + } + geom_idx = i; + } + } + } else { + // Look for the geometry column by name + for (idx_t i = 0; i < StructType::GetChildCount(row_type); i++) { + auto &child = StructType::GetChildType(row_type, i); + auto &child_name = StructType::GetChildName(row_type, i); + if (child == GeoTypes::GEOMETRY() && child_name == geom_name) { + if (geom_idx != optional_idx::Invalid()) { + throw InvalidInputException("ST_AsMVT: only one geometry column is allowed in the input row"); + } + geom_idx = i; + } + } + } + if (!geom_idx.IsValid()) { + throw InvalidInputException("ST_AsMVT: input row must contain a geometry column"); + } + + result->geom_idx = geom_idx.GetIndex(); + + // TODO: Cast properties to supported types + for (idx_t i = 0; i < StructType::GetChildCount(row_type); i++) { + if (i == result->geom_idx) { + continue; // Skip the geometry column + } + auto &child_name = StructType::GetChildName(row_type, i); + auto &child_type = StructType::GetChildType(row_type, i); + result->property_names.push_back(child_name); + result->property_types.push_back(child_type); + } + + return std::move(result); + } + + //------------------------------------------------------------------------------------------------------------------ + // State + //------------------------------------------------------------------------------------------------------------------ + struct Feature { + vector geometry; + vector> tags; + int32_t type = 0; + }; + + struct Layer { + vector features; + + vector vals; + unordered_map val_map; + + void Combine(ArenaAllocator &arena, const Layer &other) { + // Copy the features over + for (auto feature : other.features) { + // Check if we need to add new keys + for (auto &tag : feature.tags) { + + // Map the old to the new value + auto &old_val = other.vals[tag.second]; + auto val_it = val_map.find(old_val); + if (val_it == val_map.end()) { + // This is a new value, add it to the value map + const auto new_val_idx = vals.size(); + + if (old_val.IsInlined()) { + vals.push_back(old_val); + val_map[old_val] = new_val_idx; + } else { + const auto mem = arena.Allocate(old_val.GetSize()); + memcpy(mem, old_val.GetData(), old_val.GetSize()); + auto new_val = string_t(const_char_ptr_cast(mem), old_val.GetSize()); + + vals.push_back(new_val); + val_map[new_val] = new_val_idx; + } + tag.second = new_val_idx; + } else { + // This value already exists, replace it in the feature + tag.second = val_it->second; + } + } + features.push_back(std::move(feature)); + } + } + + uint32_t AddValue(ArenaAllocator &arena, int32_t value) { + + // Encode as protobuf int64_t + string encoded_value; + { + protozero::pbf_writer writer(encoded_value); + writer.add_int64(4, value); + } + if (encoded_value.size() < string_t::INLINE_BYTES) { + return Intern(string_t(encoded_value)); + } + // If the encoded value is too large, we need to store it as a blob + const auto mem = arena.Allocate(encoded_value.size()); + memcpy(mem, encoded_value.data(), encoded_value.size()); + return Intern(string_t(const_char_ptr_cast(mem), encoded_value.size())); + } + + uint32_t AddValue(ArenaAllocator &arena, double value) { + string encoded_value; + { + protozero::pbf_writer writer(encoded_value); + writer.add_double(3, value); + } + if (encoded_value.size() < string_t::INLINE_BYTES) { + return Intern(string_t(encoded_value)); + } + // If the encoded value is too large, we need to store it as a blob + const auto mem = arena.Allocate(encoded_value.size()); + memcpy(mem, encoded_value.data(), encoded_value.size()); + return Intern(string_t(const_char_ptr_cast(mem), encoded_value.size())); + } + + uint32_t AddValue(ArenaAllocator &arena, const string_t &value) { + string encoded_value; + { + protozero::pbf_writer writer(encoded_value); + writer.add_string(1, value.GetData(), value.GetSize()); + } + if (encoded_value.size() < string_t::INLINE_BYTES) { + return Intern(string_t(encoded_value)); + } + // If the value is too large, we need to store it as a blob + const auto mem = arena.Allocate(encoded_value.size()); + memcpy(mem, encoded_value.data(), encoded_value.size()); + return Intern(string_t(const_char_ptr_cast(mem), encoded_value.size())); + } + + private: + uint32_t Intern(const string_t &value) { + const auto it = val_map.find(value); + if (it != val_map.end()) { + return it->second; + } + // This is a new value, add it to the value map + const auto new_val_idx = vals.size(); + vals.push_back(value); + val_map[value] = new_val_idx; + return new_val_idx; + } + }; + + struct State { + Layer layer; + }; + + static idx_t StateSize(const AggregateFunction &) { + return sizeof(State); + } + + static void Initialize(const AggregateFunction &, data_ptr_t state_mem) { + new (state_mem) State(); + } + + //------------------------------------------------------------------------------------------------------------------ + // Update + //------------------------------------------------------------------------------------------------------------------ + static int32_t CastDouble(double d) { + if (d < static_cast(std::numeric_limits::min()) || + d > static_cast(std::numeric_limits::max())) { + throw InvalidInputException("ST_AsMVT: coordinate out of range for int32_t"); + } + return static_cast(d); + } + + static void Update(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count, Vector &state_vec, + idx_t count) { + + const auto &bdata = aggr_input_data.bind_data->Cast(); + const auto &row_cols = StructVector::GetEntries(inputs[0]); + + UnifiedVectorFormat state_format; + UnifiedVectorFormat geom_format; + vector property_formats; + + state_vec.ToUnifiedFormat(count, state_format); + + for (idx_t col_idx = 0; col_idx < row_cols.size(); col_idx++) { + if (col_idx == bdata.geom_idx) { + row_cols[col_idx]->ToUnifiedFormat(count, geom_format); + } else { + property_formats.emplace_back(); + row_cols[col_idx]->ToUnifiedFormat(count, property_formats.back()); + } + } + + for (idx_t row_idx = 0; row_idx < count; row_idx++) { + auto &state = *UnifiedVectorFormat::GetData(state_format)[state_format.sel->get_index(row_idx)]; + + const auto geom_row_idx = geom_format.sel->get_index(row_idx); + if (!geom_format.validity.RowIsValid(geom_row_idx)) { + // Skip if geometry is NULL + continue; + } + + Feature feature; + + auto &geom_blob = UnifiedVectorFormat::GetData(geom_format)[geom_row_idx]; + // Deserialize the geometry + + BinaryReader cursor(geom_blob.GetData(), geom_blob.GetSize()); + const auto type = static_cast(cursor.Read() + 1); + const auto flags = cursor.Read(); + cursor.Skip(sizeof(uint16_t)); + cursor.Skip(sizeof(uint32_t)); // padding + + // Parse flags + const auto has_z = (flags & 0x01) != 0; + const auto has_m = (flags & 0x02) != 0; + const auto has_bbox = (flags & 0x04) != 0; + + const auto format_v1 = (flags & 0x40) != 0; + const auto format_v0 = (flags & 0x80) != 0; + + if (format_v1 || format_v0) { + // Unsupported version, throw an error + throw NotImplementedException( + "This geometry seems to be written with a newer version of the DuckDB spatial library that is not " + "compatible with this version. Please upgrade your DuckDB installation."); + } + + if (has_bbox) { + // Skip past bbox if present + cursor.Skip(sizeof(float) * 2 * (2 + has_z + has_m)); + } + + // Read the first type + cursor.Skip(sizeof(uint32_t)); + + const auto vertex_width = (2 + (has_z ? 1 : 0) + (has_m ? 1 : 0)) * sizeof(double); + const auto vertex_space = vertex_width - (2 * sizeof(double)); // Space for x and y + + switch (type) { + case sgl::geometry_type::POINT: { + feature.type = 1; // MVT_POINT + + // Read the point geometry + const auto vertex_count = cursor.Read(); + if (vertex_count == 0) { + // No vertices, skip + throw InvalidInputException("ST_AsMVT: POINT geometry cant be empty"); + } + const auto x = CastDouble(cursor.Read()); + const auto y = CastDouble(cursor.Read()); + cursor.Skip(vertex_space); // Skip z and m if present + + feature.geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part + feature.geometry.push_back(protozero::encode_zigzag32(x)); + feature.geometry.push_back(protozero::encode_zigzag32(y)); + + } break; + case sgl::geometry_type::LINESTRING: { + feature.type = 2; // MVT_LINESTRING + + const auto vertex_count = cursor.Read(); + if (vertex_count < 2) { + // Invalid linestring, skip + throw InvalidInputException("ST_AsMVT: LINESTRING geometry cant contain less than 2 vertices"); + } + // Read the vertices + int32_t cursor_x = 0; + int32_t cursor_y = 0; + + for (uint32_t vertex_idx = 0; vertex_idx < vertex_count; vertex_idx++) { + + const auto x = CastDouble(cursor.Read()); + const auto y = CastDouble(cursor.Read()); + cursor.Skip(vertex_space); // Skip z and m if present + + if (vertex_idx == 0) { + feature.geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part + feature.geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + feature.geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + feature.geometry.push_back((2 & 0x7) | ((vertex_count - 1) << 3)); // LineTo, part count + } else { + feature.geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + feature.geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + } + + cursor_x = x; + cursor_y = y; + } + } break; + case sgl::geometry_type::POLYGON: { + feature.type = 3; // MVT_POLYGON + + const auto part_count = cursor.Read(); + if (part_count == 0) { + // No parts, invalid + throw InvalidInputException("ST_AsMVT: POLYGON geometry cant be empty"); + } + + int32_t cursor_x = 0; + int32_t cursor_y = 0; + + auto ring_cursor = cursor; + cursor.Skip((part_count * 4) + (part_count % 2 == 1 ? 4 : 0)); // Skip part types and padding + for (uint32_t part_idx = 0; part_idx < part_count; part_idx++) { + const auto vertex_count = ring_cursor.Read(); + if (vertex_count < 3) { + // Invalid polygon, skip + throw InvalidInputException("ST_AsMVT: POLYGON ring cant contain less than 3 vertices"); + } + + for (uint32_t vertex_idx = 0; vertex_idx < vertex_count; vertex_idx++) { + const auto x = CastDouble(cursor.Read()); + const auto y = CastDouble(cursor.Read()); + cursor.Skip(vertex_space); // Skip z and m if present + + if (vertex_idx == 0) { + feature.geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part + feature.geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + feature.geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + feature.geometry.push_back((2 & 0x7) | ((vertex_count - 2) << 3)); + + cursor_x = x; + cursor_y = y; + + } else if (vertex_idx == vertex_count - 1) { + // Close the ring + feature.geometry.push_back((7 & 0x7) | (1 << 3)); // ClosePath + } else { + // Add the vertex + feature.geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + feature.geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + + cursor_x = x; + cursor_y = y; + } + } + } + } break; + case sgl::geometry_type::MULTI_POINT: { + feature.type = 1; // MVT_POINT + + const auto part_count = cursor.Read(); + if (part_count == 0) { + throw InvalidInputException("ST_AsMVT: MULTI_POINT geometry cant be empty"); + } + + int32_t cursor_x = 0; + int32_t cursor_y = 0; + + feature.geometry.push_back((1 & 0x7) | (part_count << 3)); // MoveTo, part count + + // Read the parts + for (uint32_t part_idx = 0; part_idx < part_count; part_idx++) { + cursor.Skip(sizeof(uint32_t)); // Skip part type + const auto vertex_count = cursor.Read(); + if (vertex_count == 0) { + // No vertices, skip + throw InvalidInputException("ST_AsMVT: POINT geometry cant be empty"); + } + + const auto x = CastDouble(cursor.Read()); + const auto y = CastDouble(cursor.Read()); + cursor.Skip(vertex_space); // Skip z and m if present + + feature.geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + feature.geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + + cursor_x = x; + cursor_y = y; + } + } break; + case sgl::geometry_type::MULTI_LINESTRING: { + feature.type = 2; // MVT_LINESTRING + + // Read the multi-linestring geometry + const auto part_count = cursor.Read(); + if (part_count == 0) { + // No parts, invalid + throw InvalidInputException("ST_AsMVT: MULTI_LINESTRING geometry cant be empty"); + } + int32_t cursor_x = 0; + int32_t cursor_y = 0; + + for (uint32_t part_idx = 0; part_idx < part_count; part_idx++) { + cursor.Skip(sizeof(uint32_t)); // Skip part type + const auto vertex_count = cursor.Read(); + + if (vertex_count < 2) { + // Invalid linestring, skip + throw InvalidInputException("ST_AsMVT: LINESTRING geometry cant contain less than 2 vertices"); + } + + for (uint32_t vertex_idx = 0; vertex_idx < vertex_count; vertex_idx++) { + + const auto x = CastDouble(cursor.Read()); + const auto y = CastDouble(cursor.Read()); + cursor.Skip(vertex_space); // Skip z and m if present + + if (vertex_idx == 0) { + feature.geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part + feature.geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + feature.geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + feature.geometry.push_back((2 & 0x7) | ((vertex_count - 2) << 3)); // LineTo, part count + } else { + feature.geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + feature.geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + } + + cursor_x = x; + cursor_y = y; + } + } + + } break; + case sgl::geometry_type::MULTI_POLYGON: { + feature.type = 3; // MVT_POLYGON + + // Read the multi-linestring geometry + const auto poly_count = cursor.Read(); + if (poly_count == 0) { + // No parts, invalid + throw InvalidInputException("ST_AsMVT: MULTI_POLYGON geometry cant be empty"); + } + + int32_t cursor_x = 0; + int32_t cursor_y = 0; + + for (uint32_t poly_idx = 0; poly_idx < poly_count; poly_idx++) { + cursor.Skip(sizeof(uint32_t)); // Skip part type + const auto part_count = cursor.Read(); + if (part_count == 0) { + // No parts, invalid + throw InvalidInputException("ST_AsMVT: POLYGON geometry cant be empty"); + } + + auto ring_cursor = cursor; + cursor.Skip((part_count * 4) + (part_count % 2 == 1 ? 4 : 0)); // Skip part types and padding + + for (uint32_t part_idx = 0; part_idx < part_count; part_idx++) { + const auto vertex_count = ring_cursor.Read(); + if (vertex_count < 3) { + // Invalid polygon, skip + throw InvalidInputException("ST_AsMVT: POLYGON ring cant contain less than 3 vertices"); + } + + for (uint32_t vertex_idx = 0; vertex_idx < vertex_count; vertex_idx++) { + const auto x = CastDouble(cursor.Read()); + const auto y = CastDouble(cursor.Read()); + cursor.Skip(vertex_space); // Skip z and m if present + + if (vertex_idx == 0) { + feature.geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part + feature.geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + feature.geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + feature.geometry.push_back((2 & 0x7) | ((vertex_count - 2) << 3)); + + cursor_x = x; + cursor_y = y; + + } else if (vertex_idx == vertex_count - 1) { + // Close the ring + feature.geometry.push_back((7 & 0x7) | (1 << 3)); // ClosePath + } else { + // Add the vertex + feature.geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + feature.geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + + cursor_x = x; + cursor_y = y; + } + } + } + } + } break; + default: + throw InvalidInputException("ST_AsMVT: unsupported geometry type %d", static_cast(type)); + } + + // Write out the properties + for (idx_t prop_idx = 0; prop_idx < property_formats.size(); prop_idx++) { + auto &property_format = property_formats[prop_idx]; + const auto prop_row_idx = property_format.sel->get_index(row_idx); + + if (!property_format.validity.RowIsValid(prop_row_idx)) { + continue; + } + + uint32_t tag_idx = 0; + + auto &property_type = bdata.property_types[prop_idx]; + + if (property_type.id() == LogicalTypeId::VARCHAR) { + auto &property_value = UnifiedVectorFormat::GetData(property_format)[prop_row_idx]; + tag_idx = state.layer.AddValue(aggr_input_data.allocator, property_value); + } else if (property_type.id() == LogicalTypeId::INTEGER) { + auto property_value = UnifiedVectorFormat::GetData(property_format)[prop_row_idx]; + tag_idx = state.layer.AddValue(aggr_input_data.allocator, property_value); + } else if (property_type.id() == LogicalTypeId::DOUBLE) { + auto property_value = UnifiedVectorFormat::GetData(property_format)[prop_row_idx]; + tag_idx = state.layer.AddValue(aggr_input_data.allocator, property_value); + } else { + throw InvalidInputException("ST_AsMVT: unsupported property type: " + property_type.ToString()); + } + + // Add the tag to the feature + feature.tags.emplace_back(prop_idx, tag_idx); + } + + if (!feature.geometry.empty()) { + state.layer.features.push_back(std::move(feature)); + } + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Combine + //------------------------------------------------------------------------------------------------------------------ + static void Combine(Vector &source_vec, Vector &target_vec, AggregateInputData &aggr_input_data, idx_t count) { + // There is no point in doing destructive combining here. In the future if we have a linked list it might + + UnifiedVectorFormat source_format; + source_vec.ToUnifiedFormat(count, source_format); + + const auto source_ptr = UnifiedVectorFormat::GetData(source_format); + const auto target_ptr = FlatVector::GetData(target_vec); + + for (idx_t row_idx = 0; row_idx < count; row_idx++) { + auto &source = *source_ptr[source_format.sel->get_index(row_idx)]; + auto &target = *target_ptr[row_idx]; + + // Append the feature data from source to target + target.layer.Combine(aggr_input_data.allocator, source.layer); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Finalize + //------------------------------------------------------------------------------------------------------------------ + static void Finalize(Vector &state_vec, AggregateInputData &aggr_input_data, Vector &result, idx_t count, + idx_t offset) { + + const auto &bdata = aggr_input_data.bind_data->Cast(); + + UnifiedVectorFormat state_format; + state_vec.ToUnifiedFormat(count, state_format); + const auto state_ptr = UnifiedVectorFormat::GetData(state_format); + + // Layer and feature buffers + string l_buffer; + + for (idx_t raw_idx = 0; raw_idx < count; raw_idx++) { + auto &state = *state_ptr[state_format.sel->get_index(raw_idx)]; + const auto out_idx = raw_idx + offset; + + l_buffer.clear(); + protozero::pbf_writer tile_writer(l_buffer); + protozero::pbf_writer layer_writer(tile_writer, 3); + + // Add version + layer_writer.add_uint32(15, 2); + + // Add layer name + layer_writer.add_string(1, bdata.layer_name); + + // Add features + uint64_t fid = 0; + for (auto &feature : state.layer.features) { + + protozero::pbf_writer feature_writer(layer_writer, 2); + + // Id = 1 + feature_writer.add_uint64(1, fid++); + + // Tags = 2 + protozero::packed_field_uint32 tags_writer(feature_writer, 2); + for (const auto &tag : feature.tags) { + tags_writer.add_element(tag.first); + tags_writer.add_element(tag.second); + } + tags_writer.commit(); + + // Type = 3 + feature_writer.add_enum(3, feature.type); + + // Geometry = 4 + feature_writer.add_packed_uint32(4, feature.geometry.begin(), feature.geometry.end()); + + // Add to the layer + feature_writer.commit(); + } + + // Now add keys and values + for (auto &key : bdata.property_names) { + layer_writer.add_string(3, key); + } + + for (auto &val : state.layer.vals) { + // Add val + layer_writer.add_message(4, val.GetData(), val.GetSize()); + } + + // Add extent + layer_writer.add_uint32(5, bdata.extent); + + // Commit the layer + layer_writer.commit(); + + + // Now we have the layer buffer, we can write it to the result vector + const auto result_data = FlatVector::GetData(result); + result_data[out_idx] = StringVector::AddStringOrBlob(result, l_buffer.data(), l_buffer.size()); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Destroy + //------------------------------------------------------------------------------------------------------------------ + static void Destroy(Vector &state_vec, AggregateInputData &, idx_t count) { + UnifiedVectorFormat state_format; + state_vec.ToUnifiedFormat(count, state_format); + + const auto state_ptr = UnifiedVectorFormat::GetData(state_format); + for (idx_t raw_idx = 0; raw_idx < count; raw_idx++) { + const auto row_idx = state_format.sel->get_index(raw_idx); + if (state_format.validity.RowIsValid(row_idx)) { + auto &state = *state_ptr[row_idx]; + + // Call destructor + state.~State(); + } + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + AggregateFunction agg({LogicalTypeId::ANY}, LogicalType::BLOB, StateSize, Initialize, Update, Combine, Finalize, + nullptr, Bind, Destroy); + + FunctionBuilder::RegisterAggregate(db, "ST_AsMVT", [&](AggregateFunctionBuilder &func) { + func.SetFunction(agg); + func.SetDescription("Makes a vector tile from a set of geometries"); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +} // namespace +//====================================================================================================================== // Register -//------------------------------------------------------------------------------ +//====================================================================================================================== void RegisterMapboxVectorTileModule(DatabaseInstance &db) { ST_TileEnvelope::Register(db); + ST_AsMVT::Register(db); }; } // namespace duckdb \ No newline at end of file diff --git a/src/spatial/modules/proj/proj_module.cpp b/src/spatial/modules/proj/proj_module.cpp index fe8b43e7..c77f2a67 100644 --- a/src/spatial/modules/proj/proj_module.cpp +++ b/src/spatial/modules/proj/proj_module.cpp @@ -55,8 +55,8 @@ PJ_CONTEXT *ProjModule::GetThreadProjContext() { // We set the default context proj.db path to the one in the binary here // Otherwise GDAL will try to load the proj.db from the system // Any PJ_CONTEXT we create after this will inherit these settings - auto path = StringUtil::Format("file:/proj.db?immutable=1&ptr=%llu&sz=%lu&max=%lu", - static_cast(proj_db), proj_db_len, proj_db_len); + auto path = StringUtil::Format("file:/proj.db?immutable=1&ptr=%llu&sz=%lu&max=%lu", static_cast(proj_db), + proj_db_len, proj_db_len); proj_context_set_sqlite3_vfs_name(ctx, "memvfs"); const auto ok = proj_context_set_database_path(ctx, path.c_str(), nullptr, nullptr); @@ -97,13 +97,13 @@ void ProjModule::RegisterVFS(DatabaseInstance &db) { // We set the default context proj.db path to the one in the binary here // Otherwise GDAL will try to load the proj.db from the system // Any PJ_CONTEXT we create after this will inherit these settings (on this thread?) - auto path = StringUtil::Format("file:/proj.db?immutable=1&ptr=%llu&sz=%lu&max=%lu", - static_cast(proj_db), proj_db_len, proj_db_len); + auto path = StringUtil::Format("file:/proj.db?immutable=1&ptr=%llu&sz=%lu&max=%lu", static_cast(proj_db), + proj_db_len, proj_db_len); proj_context_set_sqlite3_vfs_name(nullptr, "memvfs"); // Try to open the database - sqlite3* sdb = nullptr; + sqlite3 *sdb = nullptr; const auto sok = sqlite3_open_v2(path.c_str(), &sdb, SQLITE_OPEN_READONLY, "memvfs"); if (sok != SQLITE_OK) { throw InternalException("Could not open sqlite3 memvfs database");