From cdaf678b90d1dd104dd2bc823c82bfab499d7275 Mon Sep 17 00:00:00 2001 From: Rob Reeves Date: Tue, 28 Apr 2026 20:26:13 +0000 Subject: [PATCH 1/5] Add deepcopy tests for BooleanExpression subclasses Pydantic v2's BaseModel.__deepcopy__ calls cls.__new__(cls) with no args, but And, Or, and Not define __new__ with required positional arguments. These tests reproduce the TypeError on deepcopy. --- tests/expressions/test_expressions.py | 50 +++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py index 157c1ada..1af977f7 100644 --- a/tests/expressions/test_expressions.py +++ b/tests/expressions/test_expressions.py @@ -16,6 +16,7 @@ # under the License. # pylint:disable=redefined-outer-name,eval-used +import copy import pickle import uuid from decimal import Decimal @@ -1292,6 +1293,55 @@ def test_bind_ambiguous_name() -> None: assert "Invalid schema, multiple fields for name foo.bar: 2 and 3" in str(exc_info) +# --- deepcopy tests --- + + +def test_deepcopy_and() -> None: + expr = And(EqualTo("x", 1), EqualTo("y", 2)) + copied = copy.deepcopy(expr) + assert copied == expr + + +def test_deepcopy_or() -> None: + expr = Or(EqualTo("x", 1), EqualTo("y", 2)) + copied = copy.deepcopy(expr) + assert copied == expr + + +def test_deepcopy_not() -> None: + expr = Not(EqualTo("x", 1)) + copied = copy.deepcopy(expr) + assert copied == expr + + +def test_deepcopy_equal_to() -> None: + expr = EqualTo("x", 1) + copied = copy.deepcopy(expr) + assert copied == expr + + +def test_deepcopy_always_true() -> None: + copied = copy.deepcopy(AlwaysTrue()) + assert copied is AlwaysTrue() + + +def test_deepcopy_always_false() -> None: + copied = copy.deepcopy(AlwaysFalse()) + assert copied is AlwaysFalse() + + +def test_deepcopy_always_true_then_pickle() -> None: + copied = copy.deepcopy(AlwaysTrue()) + restored = pickle.loads(pickle.dumps(copied)) + assert restored is AlwaysTrue() + + +def test_deepcopy_nested_expression() -> None: + expr = And(Or(EqualTo("a", 1), EqualTo("b", 2)), Not(EqualTo("c", 3))) + copied = copy.deepcopy(expr) + assert copied == expr + + # __ __ ___ # | \/ |_ _| _ \_ _ # | |\/| | || | _/ || | From 38004f64b5dd97d2343896da64a598e884701638 Mon Sep 17 00:00:00 2001 From: Rob Reeves Date: Tue, 28 Apr 2026 20:37:41 +0000 Subject: [PATCH 2/5] Fix deepcopy for BooleanExpression subclasses Add __deepcopy__ to BooleanExpression that reconstructs the expression via model_fields, and returns self for Singleton subclasses. Fixes TypeError from Pydantic v2's BaseModel.__deepcopy__ calling cls.__new__(cls) with no args on And, Or, and Not which require positional arguments in __new__. --- pyiceberg/expressions/__init__.py | 7 +++++++ tests/expressions/test_expressions.py | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index 3910a146..4988a56a 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -17,6 +17,7 @@ from __future__ import annotations +import copy from abc import ABC, abstractmethod from collections.abc import Callable, Iterable, Sequence from functools import cached_property @@ -51,6 +52,12 @@ def _to_literal(value: L | Literal[L]) -> Literal[L]: class BooleanExpression(IcebergBaseModel, ABC): """An expression that evaluates to a boolean.""" + def __deepcopy__(self, memo: dict[int, Any]) -> BooleanExpression: + if isinstance(self, Singleton): + return self + fields = {name: copy.deepcopy(getattr(self, name), memo) for name in type(self).model_fields} + return type(self)(**fields) + @abstractmethod def __invert__(self) -> BooleanExpression: """Transform the Expression into its negated version.""" diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py index 1af977f7..547a2171 100644 --- a/tests/expressions/test_expressions.py +++ b/tests/expressions/test_expressions.py @@ -1336,6 +1336,18 @@ def test_deepcopy_always_true_then_pickle() -> None: assert restored is AlwaysTrue() +def test_deepcopy_balanced_and() -> None: + expr = And(EqualTo("a", 1), EqualTo("b", 2), EqualTo("c", 3), EqualTo("d", 4)) + copied = copy.deepcopy(expr) + assert copied == expr + + +def test_deepcopy_balanced_or() -> None: + expr = Or(EqualTo("a", 1), EqualTo("b", 2), EqualTo("c", 3), EqualTo("d", 4)) + copied = copy.deepcopy(expr) + assert copied == expr + + def test_deepcopy_nested_expression() -> None: expr = And(Or(EqualTo("a", 1), EqualTo("b", 2)), Not(EqualTo("c", 3))) copied = copy.deepcopy(expr) From 0fe92c657beee4f6ba9104b71ce8bbc0268c1462 Mon Sep 17 00:00:00 2001 From: Rob Reeves Date: Tue, 28 Apr 2026 20:40:19 +0000 Subject: [PATCH 3/5] Add deepcopy then pickle test for compound expressions --- tests/expressions/test_expressions.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py index 547a2171..ff9545c0 100644 --- a/tests/expressions/test_expressions.py +++ b/tests/expressions/test_expressions.py @@ -1354,6 +1354,13 @@ def test_deepcopy_nested_expression() -> None: assert copied == expr +def test_deepcopy_then_pickle() -> None: + expr = And(EqualTo("x", 1), EqualTo("y", 2)) + copied = copy.deepcopy(expr) + restored = pickle.loads(pickle.dumps(copied)) + assert restored == expr + + # __ __ ___ # | \/ |_ _| _ \_ _ # | |\/| | || | _/ || | From e56fee0977b35aecebf4c728dda3623df6d8bd1b Mon Sep 17 00:00:00 2001 From: Rob Reeves Date: Tue, 28 Apr 2026 21:00:42 +0000 Subject: [PATCH 4/5] Add missing docstring to __deepcopy__ for pydocstyle --- pyiceberg/expressions/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index 4988a56a..5902b815 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -53,6 +53,7 @@ class BooleanExpression(IcebergBaseModel, ABC): """An expression that evaluates to a boolean.""" def __deepcopy__(self, memo: dict[int, Any]) -> BooleanExpression: + """Return a deep copy of this expression.""" if isinstance(self, Singleton): return self fields = {name: copy.deepcopy(getattr(self, name), memo) for name in type(self).model_fields} From 4693b233e73a919bc6b51f808263bd4b026d2319 Mon Sep 17 00:00:00 2001 From: Rob Reeves Date: Tue, 28 Apr 2026 21:33:41 +0000 Subject: [PATCH 5/5] Simplify deepcopy fix to per-class overrides on And, Or, Not Instead of a generic __deepcopy__ on BooleanExpression with Pydantic internals, add simple __deepcopy__ methods to the three classes that need them. Also add identity assertions for non-Singleton copies. --- pyiceberg/expressions/__init__.py | 19 ++++++++++++------- tests/expressions/test_expressions.py | 4 ++++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index 5902b815..ef4cb250 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -52,13 +52,6 @@ def _to_literal(value: L | Literal[L]) -> Literal[L]: class BooleanExpression(IcebergBaseModel, ABC): """An expression that evaluates to a boolean.""" - def __deepcopy__(self, memo: dict[int, Any]) -> BooleanExpression: - """Return a deep copy of this expression.""" - if isinstance(self, Singleton): - return self - fields = {name: copy.deepcopy(getattr(self, name), memo) for name in type(self).model_fields} - return type(self)(**fields) - @abstractmethod def __invert__(self) -> BooleanExpression: """Transform the Expression into its negated version.""" @@ -347,6 +340,10 @@ def __invert__(self) -> BooleanExpression: # De Morgan's law: not (A and B) = (not A) or (not B) return Or(~self.left, ~self.right) + def __deepcopy__(self, memo: dict[int, Any]) -> And: + """Return a deep copy of the And expression.""" + return And(copy.deepcopy(self.left, memo), copy.deepcopy(self.right, memo)) + def __getnewargs__(self) -> tuple[BooleanExpression, BooleanExpression]: """Pickle the And class.""" return (self.left, self.right) @@ -394,6 +391,10 @@ def __invert__(self) -> BooleanExpression: # De Morgan's law: not (A or B) = (not A) and (not B) return And(~self.left, ~self.right) + def __deepcopy__(self, memo: dict[int, Any]) -> Or: + """Return a deep copy of the Or expression.""" + return Or(copy.deepcopy(self.left, memo), copy.deepcopy(self.right, memo)) + def __getnewargs__(self) -> tuple[BooleanExpression, BooleanExpression]: """Pickle the Or class.""" return (self.left, self.right) @@ -436,6 +437,10 @@ def __invert__(self) -> BooleanExpression: """Transform the Expression into its negated version.""" return self.child + def __deepcopy__(self, memo: dict[int, Any]) -> Not: + """Return a deep copy of the Not expression.""" + return Not(copy.deepcopy(self.child, memo)) + def __getnewargs__(self) -> tuple[BooleanExpression]: """Pickle the Not class.""" return (self.child,) diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py index ff9545c0..fde69a46 100644 --- a/tests/expressions/test_expressions.py +++ b/tests/expressions/test_expressions.py @@ -1300,24 +1300,28 @@ def test_deepcopy_and() -> None: expr = And(EqualTo("x", 1), EqualTo("y", 2)) copied = copy.deepcopy(expr) assert copied == expr + assert copied is not expr def test_deepcopy_or() -> None: expr = Or(EqualTo("x", 1), EqualTo("y", 2)) copied = copy.deepcopy(expr) assert copied == expr + assert copied is not expr def test_deepcopy_not() -> None: expr = Not(EqualTo("x", 1)) copied = copy.deepcopy(expr) assert copied == expr + assert copied is not expr def test_deepcopy_equal_to() -> None: expr = EqualTo("x", 1) copied = copy.deepcopy(expr) assert copied == expr + assert copied is not expr def test_deepcopy_always_true() -> None: