From abede55371c34217f388fc92839974e33511fb0a Mon Sep 17 00:00:00 2001 From: CGodiksen <36046286+CGodiksen@users.noreply.github.com> Date: Wed, 18 Mar 2026 09:35:02 +0100 Subject: [PATCH 1/4] Write schema to a pointer instead of empty record batch --- crates/modelardb_embedded/src/capi.rs | 32 +++++++++------------------ 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/crates/modelardb_embedded/src/capi.rs b/crates/modelardb_embedded/src/capi.rs index b9e840b0..0d542e94 100644 --- a/crates/modelardb_embedded/src/capi.rs +++ b/crates/modelardb_embedded/src/capi.rs @@ -445,27 +445,22 @@ unsafe fn tables( } /// Writes the [`Schema`] of the table with the name in `table_name_ptr` in the [`DataFolder`] or -/// [`Client`] in `maybe_operations_ptr` to `schema_struct_array_ptr` and -/// `schema_struct_array_schema_ptr`. Assumes `maybe_operations_ptr` points to a [`DataFolder`] or -/// [`Client`]; table_name_ptr` points to a valid C string; schema_struct_array_ptr` is a valid -/// pointer to enough memory for an Apache Arrow C Data Interface Array; and -/// `schema_struct_array_schema_ptr` is a valid pointer to enough memory for an Apache Arrow C Data -/// Interface Schema. +/// [`Client`] in `maybe_operations_ptr` to `schema_ptr`. Assumes `maybe_operations_ptr` points to +/// a [`DataFolder`] or [`Client`]; `table_name_ptr` points to a valid C string; and `schema_ptr` +/// is a valid pointer to enough memory for an Apache Arrow C Data Interface Schema. #[unsafe(no_mangle)] pub unsafe extern "C" fn modelardb_embedded_schema( maybe_operations_ptr: *mut c_void, is_data_folder: bool, table_name_ptr: *const c_char, - schema_struct_array_ptr: *mut FFI_ArrowArray, - schema_struct_array_schema_ptr: *mut FFI_ArrowSchema, + schema_ptr: *mut FFI_ArrowSchema, ) -> c_int { let maybe_unit = unsafe { schema( maybe_operations_ptr, is_data_folder, table_name_ptr, - schema_struct_array_ptr, - schema_struct_array_schema_ptr, + schema_ptr, ) }; set_error_and_return_code(maybe_unit) @@ -476,24 +471,17 @@ unsafe fn schema( maybe_operations_ptr: *mut c_void, is_data_folder: bool, table_name_ptr: *const c_char, - schema_struct_array_ptr: *mut FFI_ArrowArray, - schema_struct_array_schema_ptr: *mut FFI_ArrowSchema, + schema_ptr: *mut FFI_ArrowSchema, ) -> Result<()> { let modelardb = unsafe { c_void_to_operations(maybe_operations_ptr, is_data_folder)? }; let table_name = unsafe { c_char_ptr_to_str(table_name_ptr)? }; let schema = TOKIO_RUNTIME.block_on(modelardb.schema(table_name))?; - let schema_batch = RecordBatch::new_empty(Arc::new(schema)); - // The schema is returned using an empty record batch since using a pointer to the schema - // causes an ArrowInvalid error. - unsafe { - record_batch_to_pointers( - schema_batch, - schema_struct_array_ptr, - schema_struct_array_schema_ptr, - ) - } + let ffi_schema = FFI_ArrowSchema::try_from(&schema)?; + unsafe { schema_ptr.write(ffi_schema) }; + + Ok(()) } /// Writes the data in `struct_array_ptr` and `struct_array_schema_ptr` to the table with the table From 2b4c9f275f927ce082f530c8954e0ba72fc4e49f Mon Sep 17 00:00:00 2001 From: CGodiksen <36046286+CGodiksen@users.noreply.github.com> Date: Wed, 18 Mar 2026 09:36:51 +0100 Subject: [PATCH 2/4] Update Python interface to only use schema pointer --- .../bindings/python/modelardb/operations.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/crates/modelardb_embedded/bindings/python/modelardb/operations.py b/crates/modelardb_embedded/bindings/python/modelardb/operations.py index 424d0f22..22cd614f 100644 --- a/crates/modelardb_embedded/bindings/python/modelardb/operations.py +++ b/crates/modelardb_embedded/bindings/python/modelardb/operations.py @@ -154,8 +154,7 @@ def __find_library(build: str) -> str: int modelardb_embedded_schema(void* maybe_operations_ptr, bool is_data_folder, char* table_name_ptr, - struct ArrowArray* schema_struct_array_ptr, - struct ArrowSchema* schema_struct_array_schema_ptr); + struct ArrowSchema* schema_ptr); int modelardb_embedded_write(void* maybe_operations_ptr, bool is_data_folder, @@ -462,21 +461,18 @@ def schema(self, table_name: str) -> Schema: """ table_name_ptr = ffi.new("char[]", bytes(table_name, "UTF-8")) - # The schema is retrieved using an empty record batch since using a pointer to the schema causes an - # ArrowInvalid error. - schema_batch_ffi = FFIArray.from_type(RecordBatch) + schema_ptr = ffi.new("struct ArrowSchema*") + schema_ptr_int = int(ffi.cast("uintptr_t", schema_ptr)) return_code = self.__library.modelardb_embedded_schema( self.__operations_ptr, self.__is_data_folder, table_name_ptr, - schema_batch_ffi.array_ptr, - schema_batch_ffi.schema_ptr, + schema_ptr, ) self.__check_return_code_and_raise_error(return_code) - schema_batch: RecordBatch = schema_batch_ffi.array() - return schema_batch.schema + return Schema._import_from_c(schema_ptr_int) def write(self, table_name: str, uncompressed_batch: RecordBatch): """Writes the data in `uncompressed_batch` to the table with From d4e7b0ee36e50a5625196263a4a1cb608ba03c00 Mon Sep 17 00:00:00 2001 From: CGodiksen <36046286+CGodiksen@users.noreply.github.com> Date: Wed, 18 Mar 2026 10:06:37 +0100 Subject: [PATCH 3/4] Fix modelardb_embedded_schema header after merge --- crates/modelardb_embedded/bindings/c/modelardb_embedded.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/modelardb_embedded/bindings/c/modelardb_embedded.h b/crates/modelardb_embedded/bindings/c/modelardb_embedded.h index 2ba5c2b4..e4c40b4d 100644 --- a/crates/modelardb_embedded/bindings/c/modelardb_embedded.h +++ b/crates/modelardb_embedded/bindings/c/modelardb_embedded.h @@ -127,8 +127,7 @@ int modelardb_embedded_tables(void* maybe_operations_ptr, int modelardb_embedded_schema(void* maybe_operations_ptr, bool is_data_folder, const char* table_name_ptr, - struct ArrowArray* schema_struct_array_ptr, - struct ArrowSchema* schema_struct_array_schema_ptr); + struct ArrowSchema* schema_ptr); // Write data to the table with the given name. int modelardb_embedded_write(void* maybe_operations_ptr, From e974c14b133379086b8941f3b0e0f8f5c1598040 Mon Sep 17 00:00:00 2001 From: CGodiksen <36046286+CGodiksen@users.noreply.github.com> Date: Wed, 18 Mar 2026 10:09:05 +0100 Subject: [PATCH 4/4] Fix formatting error --- .../bindings/python/modelardb/operations.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/modelardb_embedded/bindings/python/modelardb/operations.py b/crates/modelardb_embedded/bindings/python/modelardb/operations.py index 5ebbbd0e..f62c3a7a 100644 --- a/crates/modelardb_embedded/bindings/python/modelardb/operations.py +++ b/crates/modelardb_embedded/bindings/python/modelardb/operations.py @@ -95,9 +95,7 @@ def __find_library(build: str) -> str: case "Windows": library_path = library_folder / "modelardb_embedded.dll" case _: - raise RuntimeError( - "Only Linux, FreeBSD, macOS, and Windows are supported." - ) + raise RuntimeError("Only Linux, FreeBSD, macOS, and Windows are supported.") if library_path.exists(): return library_path