Skip to content

Commit a605b61

Browse files
authored
Update python minimum version to 3.10 (#1296)
* Set minimum python version to 3.10 since 3.9 is end of life * Ruff updates after 3.10 * Update pyo3 to use 3.10 * Update dependencies for 3.14
1 parent 694a5d8 commit a605b61

File tree

8 files changed

+879
-966
lines changed

8 files changed

+879
-966
lines changed

.github/workflows/test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ jobs:
3333
fail-fast: false
3434
matrix:
3535
python-version:
36-
- "3.9"
3736
- "3.10"
3837
- "3.11"
3938
- "3.12"
4039
- "3.13"
40+
- "3.14"
4141
toolchain:
4242
- "stable"
4343

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ substrait = ["dep:datafusion-substrait"]
3535

3636
[dependencies]
3737
tokio = { version = "1.47", features = ["macros", "rt", "rt-multi-thread", "sync"] }
38-
pyo3 = { version = "0.25", features = ["extension-module", "abi3", "abi3-py39"] }
38+
pyo3 = { version = "0.25", features = ["extension-module", "abi3", "abi3-py310"] }
3939
pyo3-async-runtimes = { version = "0.25", features = ["tokio-runtime"]}
4040
pyo3-log = "0.12.4"
4141
arrow = { version = "56", features = ["pyarrow"] }

pyproject.toml

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ name = "datafusion"
2424
description = "Build and run queries against data"
2525
readme = "README.md"
2626
license = { file = "LICENSE.txt" }
27-
requires-python = ">=3.9"
27+
requires-python = ">=3.10"
2828
keywords = ["datafusion", "dataframe", "rust", "query-engine"]
2929
classifiers = [
3030
"Development Status :: 2 - Pre-Alpha",
@@ -35,15 +35,19 @@ classifiers = [
3535
"Operating System :: Microsoft :: Windows",
3636
"Operating System :: POSIX :: Linux",
3737
"Programming Language :: Python :: 3",
38-
"Programming Language :: Python :: 3.9",
3938
"Programming Language :: Python :: 3.10",
4039
"Programming Language :: Python :: 3.11",
4140
"Programming Language :: Python :: 3.12",
4241
"Programming Language :: Python :: 3.13",
42+
"Programming Language :: Python :: 3.14",
4343
"Programming Language :: Python",
4444
"Programming Language :: Rust",
4545
]
46-
dependencies = ["pyarrow>=11.0.0", "typing-extensions;python_version<'3.13'"]
46+
dependencies = [
47+
"pyarrow>=11.0.0;python_version<'3.14'",
48+
"pyarrow>=22.0.0;python_version>='3.14'",
49+
"typing-extensions;python_version<'3.13'"
50+
]
4751
dynamic = ["version"]
4852

4953
[project.urls]
@@ -147,8 +151,10 @@ ignore-words-list = [
147151
[dependency-groups]
148152
dev = [
149153
"maturin>=1.8.1",
150-
"numpy>1.25.0",
151-
"pre-commit>=4.0.0",
154+
"numpy>1.25.0;python_version<'3.14'",
155+
"numpy>=2.3.2;python_version>='3.14'",
156+
"pre-commit>=4.3.0",
157+
"pyyaml>=6.0.3",
152158
"pytest>=7.4.4",
153159
"pytest-asyncio>=0.23.3",
154160
"ruff>=0.9.1",

python/datafusion/dataframe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -586,7 +586,7 @@ def with_columns(
586586
if isinstance(expr, str):
587587
expressions.append(self.parse_sql_expr(expr).expr)
588588
elif isinstance(expr, Iterable) and not isinstance(
589-
expr, (Expr, str, bytes, bytearray)
589+
expr, Expr | str | bytes | bytearray
590590
):
591591
expressions.extend(
592592
[
@@ -639,7 +639,7 @@ def aggregate(
639639
"""
640640
group_by_list = (
641641
list(group_by)
642-
if isinstance(group_by, Sequence) and not isinstance(group_by, (Expr, str))
642+
if isinstance(group_by, Sequence) and not isinstance(group_by, Expr | str)
643643
else [group_by]
644644
)
645645
aggs_list = (

python/datafusion/expr.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ def _iter(
271271
) -> Iterable[expr_internal.Expr]:
272272
for expr in items:
273273
if isinstance(expr, Iterable) and not isinstance(
274-
expr, (Expr, str, bytes, bytearray)
274+
expr, Expr | str | bytes | bytearray
275275
):
276276
# Treat string-like objects as atomic to surface standard errors
277277
yield from _iter(expr)
@@ -308,7 +308,7 @@ def expr_list_to_raw_expr_list(
308308
expr_list: Optional[list[Expr] | Expr],
309309
) -> Optional[list[expr_internal.Expr]]:
310310
"""Convert a sequence of expressions or column names to raw expressions."""
311-
if isinstance(expr_list, (Expr, str)):
311+
if isinstance(expr_list, Expr | str):
312312
expr_list = [expr_list]
313313
if expr_list is None:
314314
return None
@@ -326,7 +326,7 @@ def sort_list_to_raw_sort_list(
326326
sort_list: Optional[_typing.Union[Sequence[SortKey], SortKey]],
327327
) -> Optional[list[expr_internal.SortExpr]]:
328328
"""Helper function to return an optional sort list to raw variant."""
329-
if isinstance(sort_list, (Expr, SortExpr, str)):
329+
if isinstance(sort_list, Expr | SortExpr | str):
330330
sort_list = [sort_list]
331331
if sort_list is None:
332332
return None

python/tests/test_functions.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ def test_array_functions(stmt, py_expr):
567567

568568
col = column("arr")
569569
query_result = df.select(stmt(col)).collect()[0].column(0)
570-
for a, b in zip(query_result, py_expr(data)):
570+
for a, b in zip(query_result, py_expr(data), strict=False):
571571
np.testing.assert_array_almost_equal(
572572
np.array(a.as_py(), dtype=float), np.array(b, dtype=float)
573573
)
@@ -582,7 +582,7 @@ def test_array_function_flatten():
582582
stmt = f.flatten(literal(data))
583583
py_expr = [py_flatten(data)]
584584
query_result = df.select(stmt).collect()[0].column(0)
585-
for a, b in zip(query_result, py_expr):
585+
for a, b in zip(query_result, py_expr, strict=False):
586586
np.testing.assert_array_almost_equal(
587587
np.array(a.as_py(), dtype=float), np.array(b, dtype=float)
588588
)
@@ -600,7 +600,7 @@ def test_array_function_cardinality():
600600

601601
query_result = df.select(stmt).collect()[0].column(0)
602602

603-
for a, b in zip(query_result, py_expr):
603+
for a, b in zip(query_result, py_expr, strict=False):
604604
np.testing.assert_array_equal(
605605
np.array([a.as_py()], dtype=int), np.array([b], dtype=int)
606606
)
@@ -631,7 +631,7 @@ def test_make_array_functions(make_func):
631631
]
632632

633633
query_result = df.select(stmt).collect()[0].column(0)
634-
for a, b in zip(query_result, py_expr):
634+
for a, b in zip(query_result, py_expr, strict=False):
635635
np.testing.assert_array_equal(
636636
np.array(a.as_py(), dtype=str), np.array(b, dtype=str)
637637
)
@@ -664,7 +664,7 @@ def test_array_function_obj_tests(stmt, py_expr):
664664
batch = pa.RecordBatch.from_arrays([np.array(data, dtype=object)], names=["arr"])
665665
df = ctx.create_dataframe([[batch]])
666666
query_result = np.array(df.select(stmt).collect()[0].column(0))
667-
for a, b in zip(query_result, py_expr(data)):
667+
for a, b in zip(query_result, py_expr(data), strict=False):
668668
assert a == b
669669

670670

python/tests/test_sql.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def test_register_parquet_partitioned(ctx, tmp_path, path_to_str, legacy_data_ty
194194
result = pa.Table.from_batches(result)
195195

196196
rd = result.to_pydict()
197-
assert dict(zip(rd["grp"], rd["cnt"])) == {"a": 3, "b": 1}
197+
assert dict(zip(rd["grp"], rd["cnt"], strict=False)) == {"a": 3, "b": 1}
198198

199199

200200
@pytest.mark.parametrize("path_to_str", [True, False])
@@ -340,7 +340,10 @@ def test_execute(ctx, tmp_path):
340340
result_values.extend(pydict["cnt"])
341341

342342
result_keys, result_values = (
343-
list(t) for t in zip(*sorted(zip(result_keys, result_values)))
343+
list(t)
344+
for t in zip(
345+
*sorted(zip(result_keys, result_values, strict=False)), strict=False
346+
)
344347
)
345348

346349
assert result_keys == [1, 2, 3, 11, 12]
@@ -467,7 +470,7 @@ def test_simple_select(ctx, tmp_path, arr):
467470
# In DF 43.0.0 we now default to having BinaryView and StringView
468471
# so the array that is saved to the parquet is slightly different
469472
# than the array read. Convert to values for comparison.
470-
if isinstance(result, (pa.BinaryViewArray, pa.StringViewArray)):
473+
if isinstance(result, pa.BinaryViewArray | pa.StringViewArray):
471474
arr = arr.tolist()
472475
result = result.tolist()
473476

@@ -524,12 +527,12 @@ def test_register_listing_table(
524527
result = pa.Table.from_batches(result)
525528

526529
rd = result.to_pydict()
527-
assert dict(zip(rd["grp"], rd["count"])) == {"a": 5, "b": 2}
530+
assert dict(zip(rd["grp"], rd["count"], strict=False)) == {"a": 5, "b": 2}
528531

529532
result = ctx.sql(
530533
"SELECT grp, COUNT(*) AS count FROM my_table WHERE date='2020-10-05' GROUP BY grp" # noqa: E501
531534
).collect()
532535
result = pa.Table.from_batches(result)
533536

534537
rd = result.to_pydict()
535-
assert dict(zip(rd["grp"], rd["count"])) == {"a": 3, "b": 2}
538+
assert dict(zip(rd["grp"], rd["count"], strict=False)) == {"a": 3, "b": 2}

0 commit comments

Comments
 (0)