Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
810045a
check for array of arrays and convert to ndarray
cjboyle Jul 10, 2025
8714ab1
Merge remote-tracking branch 'upstream/main'
cjboyle Jul 28, 2025
875af0b
Merge remote-tracking branch 'upstream/main'
cjboyle Aug 5, 2025
7b48ee9
Add ragged dependency
cjboyle Aug 5, 2025
76e13ab
Merge remote-tracking branch 'upstream/main' into support-ragged-arrays
cjboyle Aug 5, 2025
dc7c042
Merge remote-tracking branch 'upstream/main' into support-ragged-arrays
cjboyle Aug 5, 2025
016c03e
Merge remote-tracking branch 'upstream/main' into support-ragged-arrays
cjboyle Aug 8, 2025
95cc27f
Merge remote-tracking branch 'upstream/main' into support-ragged-arrays
cjboyle Aug 14, 2025
4543b73
From SQLAdapter, test Array-, Ragged-, then AwkwardAdapter
cjboyle Aug 15, 2025
81ea973
Merge remote-tracking branch 'upstream/main' into support-ragged-arrays
cjboyle Aug 18, 2025
62ff00e
Test returned adapters, without nullable data types
cjboyle Aug 19, 2025
4235a68
remove normalize_chunks from ragged adapter
cjboyle Aug 19, 2025
fb4d71f
Add schema tests for irregular arrays
cjboyle Aug 20, 2025
fc242e1
No need to test every datatype, already done elsewhere
cjboyle Aug 20, 2025
548b0f3
write + read full ragged arrays
cjboyle Aug 22, 2025
23cba73
Merge remote-tracking branch 'upstream/main' into support-ragged-arrays
cjboyle Aug 22, 2025
18cfd67
fix lack of `read()`
cjboyle Aug 25, 2025
bebdc68
add more complexity to tests
cjboyle Aug 25, 2025
3684801
Merge remote-tracking branch 'upstream/main' into support-ragged-arrays
cjboyle Aug 27, 2025
f3ba2ea
test simple to complex arrays
cjboyle Aug 27, 2025
ec7b92f
Merge remote-tracking branch 'upstream/main' into support-ragged-arrays
cjboyle Sep 19, 2025
07fbe8e
Merge remote-tracking branch 'upstream/main' into support-ragged-arrays
cjboyle Nov 12, 2025
596a104
Update structure to store offsets
cjboyle Nov 12, 2025
1c9a9ec
Merge remote-tracking branch 'upstream/main' into support-ragged-arrays
cjboyle Nov 20, 2025
47ce353
Merge remote-tracking branch 'upstream/main' into support-ragged-arrays
cjboyle Nov 24, 2025
b477814
fix exit clause logic
cjboyle Dec 17, 2025
b9b4fd7
Merge remote-tracking branch 'upstream/main' into support-ragged-arrays
cjboyle Dec 22, 2025
7ddbe04
fix parameter order
cjboyle Dec 22, 2025
94aed41
test ragged structure and utilities
cjboyle Dec 22, 2025
bfc9b58
wip: writing/reading full arrays from flattened .npy files working
cjboyle Dec 22, 2025
a0e04d9
update tmp location for JSON export
cjboyle Dec 22, 2025
410428b
Add size field to structure, to make RaggedClient closer to ArrayClient.
cjboyle Dec 22, 2025
ff877ec
wip: reading sliced data (tests commented)
cjboyle Dec 23, 2025
6f72f66
Fix "Self" for python<=3.10
cjboyle Dec 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ all = [
"python-dateutil",
"python-jose[cryptography]",
"python-multipart",
"ragged",
"redis",
"rich",
"sparse >=0.15.5",
Expand Down Expand Up @@ -126,6 +127,7 @@ client = [
"numpy",
"pandas",
"pyarrow >=14.0.1", # includes fix to CVE 2023-47248
"ragged",
"rich",
"sparse >=0.15.5",
"stamina",
Expand Down Expand Up @@ -260,6 +262,7 @@ server = [
"python-dateutil",
"python-jose[cryptography]",
"python-multipart",
"ragged",
"sparse >=0.15.5",
"stamina",
"redis",
Expand Down
51 changes: 46 additions & 5 deletions tiled/_tests/adapters/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pyarrow as pa
import pytest

from tiled.adapters.array import ArrayAdapter
from tiled.adapters.sql import (
COLUMN_NAME_PATTERN,
TABLE_NAME_PATTERN,
Expand All @@ -21,20 +22,25 @@
data0 = [
pa.array([1, 2, 3, 4, 5]),
pa.array([1.0, 2.0, 3.0, 4.0, 5.0]),
pa.array(["foo0", "bar0", "baz0", None, "goo0"]),
pa.array([True, None, False, True, None]),
# pa.array(["foo0", "bar0", "baz0", None, "goo0"]),
# pa.array([True, None, False, True, None]),
pa.array(["foo0", "bar0", "baz0", "None", "goo0"]),
pa.array([True, bool(None), False, True, bool(None)]),
]
data1 = [
pa.array([6, 7, 8, 9, 10, 11, 12]),
pa.array([6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]),
pa.array(["foo1", "bar1", None, "baz1", "biz", None, "goo"]),
pa.array([None, True, True, False, False, None, True]),
# pa.array(["foo1", "bar1", None, "baz1", "biz", None, "goo"]),
# pa.array([None, True, True, False, False, None, True]),
pa.array(["foo1", "bar1", "None", "baz1", "biz", "None", "goo"]),
pa.array([bool(None), True, True, False, False, bool(None), True]),
]
data2 = [
pa.array([13, 14]),
pa.array([13.0, 14.0]),
pa.array(["foo2", "baz2"]),
pa.array([False, None]),
# pa.array([False, None]),
pa.array([False, bool(None)]),
]

batch0 = pa.record_batch(data0, names=names)
Expand Down Expand Up @@ -797,3 +803,38 @@ def deep_array_equal(a1: Any, a2: Any) -> bool:
assert deep_array_equal(result_part, result_full)

storage.dispose() # Close all connections


@pytest.mark.parametrize(
"sql_adapter_name",
[
"adapter_duckdb_many_partitions",
"adapter_psql_many_partitions",
"adapter_sqlite_many_partitions",
],
)
@pytest.mark.parametrize("field", names)
def test_compare_field_data_from_array_adapter(
sql_adapter_name: str,
field: str,
request: pytest.FixtureRequest,
) -> None:
# get adapter from fixture
sql_adapter: SQLAdapter = request.getfixturevalue(sql_adapter_name)

table = pa.Table.from_batches([batch0, batch1, batch2])
sql_adapter.append_partition(0, table)

array_adapter = sql_adapter[field]
assert isinstance(array_adapter, ArrayAdapter)

result_read = array_adapter.read()
field_index = names.index(field)
assert np.array_equal(
[
*data0[field_index].tolist(),
*data1[field_index].tolist(),
*data2[field_index].tolist(),
],
result_read.tolist(),
)
108 changes: 60 additions & 48 deletions tiled/_tests/adapters/test_sql_arrays.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Callable, cast
from typing import Callable, Dict, Type, Union, cast

import awkward as ak
import numpy as np
import pyarrow as pa
import pytest
Expand All @@ -9,6 +10,9 @@
from tiled._tests.adapters.test_sql import adapter_psql_many_partitions # noqa: F401
from tiled._tests.adapters.test_sql import adapter_psql_one_partition # noqa: F401
from tiled._tests.adapters.test_sql import assert_same_rows
from tiled.adapters.array import ArrayAdapter
from tiled.adapters.awkward import AwkwardAdapter
from tiled.adapters.ragged import RaggedAdapter
from tiled.adapters.sql import SQLAdapter
from tiled.storage import SQLStorage, parse_storage, register_storage
from tiled.structures.core import StructureFamily
Expand All @@ -17,57 +21,29 @@

rng = np.random.default_rng(42)

names = ["i0", "i1", "i2", "i3", "f4", "f5"]
names_adapters: Dict[str, Type[Union[ArrayAdapter, AwkwardAdapter, RaggedAdapter]]] = {
"integers": ArrayAdapter,
"floats": ArrayAdapter,
"ragged_floats": RaggedAdapter,
}
names = list(names_adapters.keys())
batch_size = 5
data0 = [
pa.array(
[rng.integers(-100, 100, size=10, dtype=np.int8) for _ in range(batch_size)]
),
pa.array(
[rng.integers(-100, 100, size=11, dtype=np.int16) for _ in range(batch_size)]
),
pa.array(
[rng.integers(-100, 100, size=12, dtype=np.int32) for _ in range(batch_size)]
),
pa.array(
[rng.integers(-100, 100, size=13, dtype=np.int64) for _ in range(batch_size)]
),
pa.array([rng.random(size=14, dtype=np.float32) for _ in range(batch_size)]),
pa.array([rng.random(size=15, dtype=np.float64) for _ in range(batch_size)]),
pa.array([rng.integers(-100, 100, size=10) for _ in range(batch_size)]),
pa.array([rng.random(size=15) for _ in range(batch_size)]),
pa.array([rng.random(size=rng.integers(1, 10)) for _ in range(batch_size)]),
]
batch_size = 8
data1 = [
pa.array(
[rng.integers(-100, 100, size=10, dtype=np.int8) for _ in range(batch_size)]
),
pa.array(
[rng.integers(-100, 100, size=11, dtype=np.int16) for _ in range(batch_size)]
),
pa.array(
[rng.integers(-100, 100, size=12, dtype=np.int32) for _ in range(batch_size)]
),
pa.array(
[rng.integers(-100, 100, size=13, dtype=np.int64) for _ in range(batch_size)]
),
pa.array([rng.random(size=14, dtype=np.float32) for _ in range(batch_size)]),
pa.array([rng.random(size=15, dtype=np.float64) for _ in range(batch_size)]),
pa.array([rng.integers(-100, 100, size=10) for _ in range(batch_size)]),
pa.array([rng.random(size=15) for _ in range(batch_size)]),
pa.array([rng.random(size=rng.integers(1, 10)) for _ in range(batch_size)]),
]
batch_size = 3
data2 = [
pa.array(
[rng.integers(-100, 100, size=10, dtype=np.int8) for _ in range(batch_size)]
),
pa.array(
[rng.integers(-100, 100, size=11, dtype=np.int16) for _ in range(batch_size)]
),
pa.array(
[rng.integers(-100, 100, size=12, dtype=np.int32) for _ in range(batch_size)]
),
pa.array(
[rng.integers(-100, 100, size=13, dtype=np.int64) for _ in range(batch_size)]
),
pa.array([rng.random(size=14, dtype=np.float32) for _ in range(batch_size)]),
pa.array([rng.random(size=15, dtype=np.float64) for _ in range(batch_size)]),
pa.array([rng.integers(-100, 100, size=10) for _ in range(batch_size)]),
pa.array([rng.random(size=15) for _ in range(batch_size)]),
pa.array([rng.random(size=rng.integers(1, 10)) for _ in range(batch_size)]),
]

batch0 = pa.record_batch(data0, names=names)
Expand All @@ -90,7 +66,7 @@ def _data_source_from_init_storage(
assets=[],
)

storage = cast(SQLStorage, parse_storage(data_uri))
storage = cast("SQLStorage", parse_storage(data_uri))
register_storage(storage)
return SQLAdapter.init_storage(data_source=data_source, storage=storage)

Expand Down Expand Up @@ -240,17 +216,53 @@ def test_write_read_one_batch_many_part(
# read a specific field
result_read = adapter.read_partition(0, fields=[field])
field_index = names.index(field)
assert np.array_equal(
assert ak.array_equal(
[*data0[field_index].tolist(), *data2[field_index].tolist()],
result_read[field].tolist(),
)
result_read = adapter.read_partition(1, fields=[field])
assert np.array_equal(
assert ak.array_equal(
[*data1[field_index].tolist(), *data0[field_index].tolist()],
result_read[field].tolist(),
)
result_read = adapter.read_partition(2, fields=[field])
assert np.array_equal(
assert ak.array_equal(
[*data2[field_index].tolist(), *data1[field_index].tolist()],
result_read[field].tolist(),
)


@pytest.mark.parametrize(
"sql_adapter_name",
[("adapter_duckdb_many_partitions"), ("adapter_psql_many_partitions")],
)
@pytest.mark.parametrize(("field", "array_adapter_type"), [*names_adapters.items()])
def test_compare_field_data_from_array_adapter(
sql_adapter_name: str,
field: str,
array_adapter_type: type,
request: pytest.FixtureRequest,
) -> None:
# get adapter from fixture
sql_adapter: SQLAdapter = request.getfixturevalue(sql_adapter_name)

table = pa.Table.from_batches([batch0, batch1, batch2])
sql_adapter.append_partition(0, table)

array_adapter = sql_adapter[field]
assert isinstance(array_adapter, array_adapter_type)

field_index = names.index(field)
if isinstance(array_adapter, AwkwardAdapter):
result_read = array_adapter.read() # smoke test
raise NotImplementedError
else:
result_read = array_adapter.read()
assert ak.array_equal(
[
*data0[field_index].tolist(),
*data1[field_index].tolist(),
*data2[field_index].tolist(),
],
result_read.tolist(), # type: ignore[attr-defined]
)
19 changes: 19 additions & 0 deletions tiled/_tests/adapters/test_sql_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,25 @@ def duckdb_uri(tmp_path: Path) -> Generator[str, None, None]:
"duckdb": (["DECIMAL(5, 2) NULL"], pa.schema([("x", pa.decimal128(5, 2))])),
},
),
"ragged_lists": (
pa.Table.from_arrays(
[
pa.array([[1], [2, 3], [4, 5, 6]], pa.list_(pa.int32())),
pa.array([[1.1, 2.2, 3.3], [4.4, 5.5], [6.6]], pa.list_(pa.float32())),
],
names=["x", "y"],
),
{
"duckdb": (
["INTEGER[] NULL", "REAL[] NULL"],
pa.schema([("x", pa.list_(pa.int32())), ("y", pa.list_(pa.float32()))]),
),
"postgresql": (
["INTEGER ARRAY NULL", "REAL ARRAY NULL"],
pa.schema([("x", pa.list_(pa.int32())), ("y", pa.list_(pa.float32()))]),
),
},
),
}


Expand Down
Loading
Loading