bluesky · cjboyle · Jul 10, 2025 · Jul 28, 2025 · Aug 5, 2025 · Aug 5, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -95,6 +95,7 @@ all = [
     "python-dateutil",
     "python-jose[cryptography]",
     "python-multipart",
+    "ragged",
     "redis",
     "rich",
     "sparse >=0.15.5",
@@ -126,6 +127,7 @@ client = [
     "numpy",
     "pandas",
     "pyarrow >=14.0.1",  # includes fix to CVE 2023-47248
+    "ragged",
     "rich",
     "sparse >=0.15.5",
     "stamina",
@@ -260,6 +262,7 @@ server = [
     "python-dateutil",
     "python-jose[cryptography]",
     "python-multipart",
+    "ragged",
     "sparse >=0.15.5",
     "stamina",
     "redis",

diff --git a/tiled/_tests/adapters/test_sql.py b/tiled/_tests/adapters/test_sql.py
@@ -5,6 +5,7 @@
 import pyarrow as pa
 import pytest
 
+from tiled.adapters.array import ArrayAdapter
 from tiled.adapters.sql import (
     COLUMN_NAME_PATTERN,
     TABLE_NAME_PATTERN,
@@ -21,20 +22,25 @@
 data0 = [
     pa.array([1, 2, 3, 4, 5]),
     pa.array([1.0, 2.0, 3.0, 4.0, 5.0]),
-    pa.array(["foo0", "bar0", "baz0", None, "goo0"]),
-    pa.array([True, None, False, True, None]),
+    # pa.array(["foo0", "bar0", "baz0", None, "goo0"]),
+    # pa.array([True, None, False, True, None]),
+    pa.array(["foo0", "bar0", "baz0", "None", "goo0"]),
+    pa.array([True, bool(None), False, True, bool(None)]),
 ]
 data1 = [
     pa.array([6, 7, 8, 9, 10, 11, 12]),
     pa.array([6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]),
-    pa.array(["foo1", "bar1", None, "baz1", "biz", None, "goo"]),
-    pa.array([None, True, True, False, False, None, True]),
+    # pa.array(["foo1", "bar1", None, "baz1", "biz", None, "goo"]),
+    # pa.array([None, True, True, False, False, None, True]),
+    pa.array(["foo1", "bar1", "None", "baz1", "biz", "None", "goo"]),
+    pa.array([bool(None), True, True, False, False, bool(None), True]),
 ]
 data2 = [
     pa.array([13, 14]),
     pa.array([13.0, 14.0]),
     pa.array(["foo2", "baz2"]),
-    pa.array([False, None]),
+    # pa.array([False, None]),
+    pa.array([False, bool(None)]),
 ]
 
 batch0 = pa.record_batch(data0, names=names)
@@ -797,3 +803,38 @@ def deep_array_equal(a1: Any, a2: Any) -> bool:
     assert deep_array_equal(result_part, result_full)
 
     storage.dispose()  # Close all connections
+
+
+@pytest.mark.parametrize(
+    "sql_adapter_name",
+    [
+        "adapter_duckdb_many_partitions",
+        "adapter_psql_many_partitions",
+        "adapter_sqlite_many_partitions",
+    ],
+)
+@pytest.mark.parametrize("field", names)
+def test_compare_field_data_from_array_adapter(
+    sql_adapter_name: str,
+    field: str,
+    request: pytest.FixtureRequest,
+) -> None:
+    # get adapter from fixture
+    sql_adapter: SQLAdapter = request.getfixturevalue(sql_adapter_name)
+
+    table = pa.Table.from_batches([batch0, batch1, batch2])
+    sql_adapter.append_partition(0, table)
+
+    array_adapter = sql_adapter[field]
+    assert isinstance(array_adapter, ArrayAdapter)
+
+    result_read = array_adapter.read()
+    field_index = names.index(field)
+    assert np.array_equal(
+        [
+            *data0[field_index].tolist(),
+            *data1[field_index].tolist(),
+            *data2[field_index].tolist(),
+        ],
+        result_read.tolist(),
+    )
diff --git a/tiled/_tests/adapters/test_sql_arrays.py b/tiled/_tests/adapters/test_sql_arrays.py
@@ -1,5 +1,6 @@
-from typing import Callable, cast
+from typing import Callable, Dict, Type, Union, cast
 
+import awkward as ak
 import numpy as np
 import pyarrow as pa
 import pytest
@@ -9,6 +10,9 @@
 from tiled._tests.adapters.test_sql import adapter_psql_many_partitions  # noqa: F401
 from tiled._tests.adapters.test_sql import adapter_psql_one_partition  # noqa: F401
 from tiled._tests.adapters.test_sql import assert_same_rows
+from tiled.adapters.array import ArrayAdapter
+from tiled.adapters.awkward import AwkwardAdapter
+from tiled.adapters.ragged import RaggedAdapter
 from tiled.adapters.sql import SQLAdapter
 from tiled.storage import SQLStorage, parse_storage, register_storage
 from tiled.structures.core import StructureFamily
@@ -17,57 +21,29 @@
 
 rng = np.random.default_rng(42)
 
-names = ["i0", "i1", "i2", "i3", "f4", "f5"]
+names_adapters: Dict[str, Type[Union[ArrayAdapter, AwkwardAdapter, RaggedAdapter]]] = {
+    "integers": ArrayAdapter,
+    "floats": ArrayAdapter,
+    "ragged_floats": RaggedAdapter,
+}
+names = list(names_adapters.keys())
 batch_size = 5
 data0 = [
-    pa.array(
-        [rng.integers(-100, 100, size=10, dtype=np.int8) for _ in range(batch_size)]
-    ),
-    pa.array(
-        [rng.integers(-100, 100, size=11, dtype=np.int16) for _ in range(batch_size)]
-    ),
-    pa.array(
-        [rng.integers(-100, 100, size=12, dtype=np.int32) for _ in range(batch_size)]
-    ),
-    pa.array(
-        [rng.integers(-100, 100, size=13, dtype=np.int64) for _ in range(batch_size)]
-    ),
-    pa.array([rng.random(size=14, dtype=np.float32) for _ in range(batch_size)]),
-    pa.array([rng.random(size=15, dtype=np.float64) for _ in range(batch_size)]),
+    pa.array([rng.integers(-100, 100, size=10) for _ in range(batch_size)]),
+    pa.array([rng.random(size=15) for _ in range(batch_size)]),
+    pa.array([rng.random(size=rng.integers(1, 10)) for _ in range(batch_size)]),
 ]
 batch_size = 8
 data1 = [
-    pa.array(
-        [rng.integers(-100, 100, size=10, dtype=np.int8) for _ in range(batch_size)]
-    ),
-    pa.array(
-        [rng.integers(-100, 100, size=11, dtype=np.int16) for _ in range(batch_size)]
-    ),
-    pa.array(
-        [rng.integers(-100, 100, size=12, dtype=np.int32) for _ in range(batch_size)]
-    ),
-    pa.array(
-        [rng.integers(-100, 100, size=13, dtype=np.int64) for _ in range(batch_size)]
-    ),
-    pa.array([rng.random(size=14, dtype=np.float32) for _ in range(batch_size)]),
-    pa.array([rng.random(size=15, dtype=np.float64) for _ in range(batch_size)]),
+    pa.array([rng.integers(-100, 100, size=10) for _ in range(batch_size)]),
+    pa.array([rng.random(size=15) for _ in range(batch_size)]),
+    pa.array([rng.random(size=rng.integers(1, 10)) for _ in range(batch_size)]),
 ]
 batch_size = 3
 data2 = [
-    pa.array(
-        [rng.integers(-100, 100, size=10, dtype=np.int8) for _ in range(batch_size)]
-    ),
-    pa.array(
-        [rng.integers(-100, 100, size=11, dtype=np.int16) for _ in range(batch_size)]
-    ),
-    pa.array(
-        [rng.integers(-100, 100, size=12, dtype=np.int32) for _ in range(batch_size)]
-    ),
-    pa.array(
-        [rng.integers(-100, 100, size=13, dtype=np.int64) for _ in range(batch_size)]
-    ),
-    pa.array([rng.random(size=14, dtype=np.float32) for _ in range(batch_size)]),
-    pa.array([rng.random(size=15, dtype=np.float64) for _ in range(batch_size)]),
+    pa.array([rng.integers(-100, 100, size=10) for _ in range(batch_size)]),
+    pa.array([rng.random(size=15) for _ in range(batch_size)]),
+    pa.array([rng.random(size=rng.integers(1, 10)) for _ in range(batch_size)]),
 ]
 
 batch0 = pa.record_batch(data0, names=names)
@@ -90,7 +66,7 @@ def _data_source_from_init_storage(
             assets=[],
         )
 
-        storage = cast(SQLStorage, parse_storage(data_uri))
+        storage = cast("SQLStorage", parse_storage(data_uri))
         register_storage(storage)
         return SQLAdapter.init_storage(data_source=data_source, storage=storage)
 
@@ -240,17 +216,53 @@ def test_write_read_one_batch_many_part(
     # read a specific field
     result_read = adapter.read_partition(0, fields=[field])
     field_index = names.index(field)
-    assert np.array_equal(
+    assert ak.array_equal(
         [*data0[field_index].tolist(), *data2[field_index].tolist()],
         result_read[field].tolist(),
     )
     result_read = adapter.read_partition(1, fields=[field])
-    assert np.array_equal(
+    assert ak.array_equal(
         [*data1[field_index].tolist(), *data0[field_index].tolist()],
         result_read[field].tolist(),
     )
     result_read = adapter.read_partition(2, fields=[field])
-    assert np.array_equal(
+    assert ak.array_equal(
         [*data2[field_index].tolist(), *data1[field_index].tolist()],
         result_read[field].tolist(),
     )
+
+
+@pytest.mark.parametrize(
+    "sql_adapter_name",
+    [("adapter_duckdb_many_partitions"), ("adapter_psql_many_partitions")],
+)
+@pytest.mark.parametrize(("field", "array_adapter_type"), [*names_adapters.items()])
+def test_compare_field_data_from_array_adapter(
+    sql_adapter_name: str,
+    field: str,
+    array_adapter_type: type,
+    request: pytest.FixtureRequest,
+) -> None:
+    # get adapter from fixture
+    sql_adapter: SQLAdapter = request.getfixturevalue(sql_adapter_name)
+
+    table = pa.Table.from_batches([batch0, batch1, batch2])
+    sql_adapter.append_partition(0, table)
+
+    array_adapter = sql_adapter[field]
+    assert isinstance(array_adapter, array_adapter_type)
+
+    field_index = names.index(field)
+    if isinstance(array_adapter, AwkwardAdapter):
+        result_read = array_adapter.read()  # smoke test
+        raise NotImplementedError
+    else:
+        result_read = array_adapter.read()
+        assert ak.array_equal(
+            [
+                *data0[field_index].tolist(),
+                *data1[field_index].tolist(),
+                *data2[field_index].tolist(),
+            ],
+            result_read.tolist(),  # type: ignore[attr-defined]
+        )
diff --git a/tiled/_tests/adapters/test_sql_types.py b/tiled/_tests/adapters/test_sql_types.py
@@ -206,6 +206,25 @@ def duckdb_uri(tmp_path: Path) -> Generator[str, None, None]:
             "duckdb": (["DECIMAL(5, 2) NULL"], pa.schema([("x", pa.decimal128(5, 2))])),
         },
     ),
+    "ragged_lists": (
+        pa.Table.from_arrays(
+            [
+                pa.array([[1], [2, 3], [4, 5, 6]], pa.list_(pa.int32())),
+                pa.array([[1.1, 2.2, 3.3], [4.4, 5.5], [6.6]], pa.list_(pa.float32())),
+            ],
+            names=["x", "y"],
+        ),
+        {
+            "duckdb": (
+                ["INTEGER[] NULL", "REAL[] NULL"],
+                pa.schema([("x", pa.list_(pa.int32())), ("y", pa.list_(pa.float32()))]),
+            ),
+            "postgresql": (
+                ["INTEGER ARRAY NULL", "REAL ARRAY NULL"],
+                pa.schema([("x", pa.list_(pa.int32())), ("y", pa.list_(pa.float32()))]),
+            ),
+        },
+    ),
 }