From 97985ece93d96b8b3ec33b2db2bc3034ede8c2a3 Mon Sep 17 00:00:00 2001
From: Fabien Antoine <fabien.antoine@m4x.org>
Date: Mon, 19 May 2025 16:14:47 -0400
Subject: [PATCH 1/6] Integrate tests step in existing workflows

---
 .github/workflows/pull.yml |  7 ++++-
 .github/workflows/push.yml |  5 ++++
 Dockerfile                 |  5 +++-
 Makefile                   |  6 ++++
 README.md                  | 11 +++++++
 docker-compose-dev.yml     |  1 +
 requirements.txt           |  3 +-
 tests/test_recipes.py      | 60 ++++++++++++++++++++++++++++++++++++++
 8 files changed, 95 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_recipes.py

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index f3379f4..d664f02 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -14,6 +14,11 @@ jobs:
         id: extract_branch
       - name: build
         if: success()
-        run: make version backend-docker-check GIT_BRANCH=$GIT_BRANCH || ( make backend-build GIT_BRANCH=$GIT_BRANCH && make backend backend-stop GIT_BRANCH=$GIT_BRANCH)
+        run: make version backend-docker-check GIT_BRANCH=$GIT_BRANCH || ( make backend-build GIT_BRANCH=$GIT_BRANCH && make backend GIT_BRANCH=$GIT_BRANCH)
+        env:
+          GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }}
+      - name: tests
+        if: success()
+        run: make tests backend-stop GIT_BRANCH=$GIT_BRANCH
         env:
           GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }}
diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index 0fe25e5..bbac64b 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -21,6 +21,11 @@ jobs:
         run: make version backend-docker-check GIT_BRANCH=$GIT_BRANCH || ( make backend-build GIT_BRANCH=$GIT_BRANCH && make backend backend-stop GIT_BRANCH=$GIT_BRANCH)
         env:
           GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }}
+      - name: tests
+        if: success()
+        run: make tests
+        env:
+          GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }}
       - name: publish
         if: success()
         run: |
diff --git a/Dockerfile b/Dockerfile
index 334399f..0c04d91 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -23,7 +23,7 @@ RUN mkdir -p code\
              conf/run\
              log\
              referential_data\
-             matchID_test/\
+             tests\
              upload
 
 ################################
@@ -41,11 +41,13 @@ WORKDIR /${APP}
 COPY code/ code/
 COPY conf/ conf/
 COPY referential_data/ referential_data/
+COPY tests/ tests/
 
 VOLUME /${app_path}/projects
 VOLUME /${app_path}/log
 VOLUME /${app_path}/models
 VOLUME /${app_path}/upload
+VOLUME /${app_path}/tests
 
 EXPOSE ${BACKEND_PORT}
 
@@ -68,6 +70,7 @@ VOLUME /${APP}/referential_data
 VOLUME /${APP}/log
 VOLUME /${APP}/models
 VOLUME /${APP}/upload
+VOLUME /${APP}/tests
 
 EXPOSE ${BACKEND_PORT}
 
diff --git a/Makefile b/Makefile
index b2c4a1d..e68d50f 100644
--- a/Makefile
+++ b/Makefile
@@ -36,6 +36,7 @@ export GITHUB_OAUTH_SECRET=203010f81158d3ceab0297a213e80bc0fbfe7f8e
 export BACKEND := $(shell pwd)
 export UPLOAD=${BACKEND}/upload
 export PROJECTS=${BACKEND}/projects
+export TESTS=${BACKEND}/tests
 export EXAMPLES=${BACKEND}/../examples
 export TUTORIAL=${BACKEND}/../tutorial
 export MODELS=${BACKEND}/models
@@ -696,3 +697,8 @@ deploy-remote: config deploy-remote-instance deploy-remote-services deploy-remot
 
 clean-remote:
 	@make -C ${APP_PATH}/${GIT_TOOLS} remote-clean ${MAKEOVERRIDES} > /dev/null 2>&1 || true
+
+tests:
+	@docker exec -i ${USE_TTY} ${DC_PREFIX}-${APP} pytest -q -W "ignore::DeprecationWarning"
+
+.PHONY: tests
diff --git a/README.md b/README.md
index 47dacda..0a9b0e6 100644
--- a/README.md
+++ b/README.md
@@ -128,3 +128,14 @@ If you want to contribute to the developpement, you'll be able to fork the repo
 ```
 make start-dev
 ```
+
+### Running tests
+Install dependencies and run tests with Make:
+```bash
+make tests
+```
+You can also install the requirements and run pytest directly:
+```bash
+pip install -r requirements.txt
+pytest
+```
diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index 96f1144..8699260 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -8,3 +8,4 @@ services:
       - ${BACKEND}/code/:/${APP_GROUP}/code/
       - ${BACKEND}/referential_data/:/${APP_GROUP}/referential_data/
       - ${BACKEND}/conf/:/${APP_GROUP}/conf/
+      - ${BACKEND}/tests/:/${APP_GROUP}/tests/
diff --git a/requirements.txt b/requirements.txt
index 773b5e4..997fdb4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -25,4 +25,5 @@ typing
 Werkzeug==3.0.3
 rsa>=4.7 # not directly required, pinned by Snyk to avoid a vulnerability
 zipp>=3.19.1 # not directly required, pinned by Snyk to avoid a vulnerability
-pyarrow>=14.0.1
\ No newline at end of file
+pyarrow>=14.0.1
+pytest
diff --git a/tests/test_recipes.py b/tests/test_recipes.py
new file mode 100644
index 0000000..3265ce6
--- /dev/null
+++ b/tests/test_recipes.py
@@ -0,0 +1,60 @@
+import importlib.util
+from pathlib import Path
+import pytest
+import sys
+
+pd = pytest.importorskip('pandas')
+np = pytest.importorskip('numpy')
+
+MODULE_PATH = Path(__file__).resolve().parents[1] / 'code' / 'recipes.py'
+CODE_DIR = str(MODULE_PATH.parent)
+
+# pytestmark = pytest.mark.skipif(
+#     not MODULE_PATH.exists(),
+#     reason='recipes module not found'
+# )
+
+def load_recipes():
+    original_sys_path = list(sys.path)
+    if CODE_DIR not in sys.path:
+        sys.path.insert(0, CODE_DIR)
+    
+    recipes_module = None
+    try:
+        spec = importlib.util.spec_from_file_location('recipes', MODULE_PATH)
+        if spec is None:
+            raise ImportError(f"Impossible de trouver le spec pour {MODULE_PATH}")
+        recipes_module = importlib.util.module_from_spec(spec)
+        if spec.loader is None:
+            raise ImportError(f"Spec pour {MODULE_PATH} n'a pas de loader")
+        spec.loader.exec_module(recipes_module)
+        return recipes_module
+    finally:
+        sys.path = original_sys_path
+
+recipes = load_recipes()
+
+def test_fwf_format_and_to_fwf(tmp_path):
+    df = pd.DataFrame({'A': ['a', 'bb'], 'B': ['1', '22']})
+    widths = [3, 3]
+    line = recipes.fwf_format(df.iloc[0], widths)
+    assert line == 'a  1  '
+    outfile = tmp_path / 'out.txt'
+    recipes.to_fwf(df, outfile, widths=widths, names=['A', 'B'])
+    content = outfile.read_text().splitlines()
+    assert content[0].strip() == line.strip()
+
+
+def test_internal_fillna_and_keep():
+    df = pd.DataFrame({'A': [1, None], 'B': [None, 'x']})
+    r_fill = recipes.Recipe.__new__(recipes.Recipe)
+    r_fill.args = [{'A': 0, 'B': ''}]
+    filled = recipes.Recipe.internal_fillna(r_fill, df.copy())
+    assert filled['A'].tolist() == [1, 0]
+    assert filled['B'].tolist() == ['', 'x']
+
+    r_keep = recipes.Recipe.__new__(recipes.Recipe)
+    r_keep.args = {'select': ['A']}
+    recipes.Recipe.select_columns(r_keep, filled)
+    kept = recipes.Recipe.internal_keep(r_keep, filled)
+    assert list(kept.columns) == ['A']

From f45af246b2e20228a4f29214399ae9a9bd217ce2 Mon Sep 17 00:00:00 2001
From: antoinefa <fabien.antoine@m4x.org>
Date: Tue, 20 May 2025 23:37:23 -0400
Subject: [PATCH 2/6] add 20 basic unit tests

---
 tests/test_recipes.py | 212 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 194 insertions(+), 18 deletions(-)

diff --git a/tests/test_recipes.py b/tests/test_recipes.py
index 3265ce6..7be4de4 100644
--- a/tests/test_recipes.py
+++ b/tests/test_recipes.py
@@ -1,38 +1,38 @@
+# -*- coding: utf-8 -*-
 import importlib.util
 from pathlib import Path
-import pytest
 import sys
+import os
+import pytest
 
 pd = pytest.importorskip('pandas')
 np = pytest.importorskip('numpy')
 
+# Chargement dynamique du module recipes
 MODULE_PATH = Path(__file__).resolve().parents[1] / 'code' / 'recipes.py'
 CODE_DIR = str(MODULE_PATH.parent)
 
-# pytestmark = pytest.mark.skipif(
-#     not MODULE_PATH.exists(),
-#     reason='recipes module not found'
-# )
-
-def load_recipes():
+def load_module(name, path):
     original_sys_path = list(sys.path)
     if CODE_DIR not in sys.path:
         sys.path.insert(0, CODE_DIR)
-    
-    recipes_module = None
     try:
-        spec = importlib.util.spec_from_file_location('recipes', MODULE_PATH)
-        if spec is None:
-            raise ImportError(f"Impossible de trouver le spec pour {MODULE_PATH}")
-        recipes_module = importlib.util.module_from_spec(spec)
-        if spec.loader is None:
-            raise ImportError(f"Spec pour {MODULE_PATH} n'a pas de loader")
-        spec.loader.exec_module(recipes_module)
-        return recipes_module
+        spec = importlib.util.spec_from_file_location(name, path)
+        if spec is None or spec.loader is None:
+            raise ImportError(f"Impossible de charger {name} depuis {path}")
+        module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module)
+        return module
     finally:
         sys.path = original_sys_path
 
-recipes = load_recipes()
+recipes = load_module('recipes', MODULE_PATH)
+config_path = Path(__file__).resolve().parents[1] / 'code' / 'config.py'
+config = load_module('config', config_path)
+
+#############################
+# Tests                     #
+#############################
 
 def test_fwf_format_and_to_fwf(tmp_path):
     df = pd.DataFrame({'A': ['a', 'bb'], 'B': ['1', '22']})
@@ -58,3 +58,179 @@ def test_internal_fillna_and_keep():
     recipes.Recipe.select_columns(r_keep, filled)
     kept = recipes.Recipe.internal_keep(r_keep, filled)
     assert list(kept.columns) == ['A']
+
+
+def test_internal_rename():
+    df = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y']})
+    r = recipes.Recipe.__new__(recipes.Recipe)
+    r.args = {'new_A': 'A', 'new_B': 'B'}
+    renamed = recipes.Recipe.internal_rename(r, df.copy())
+    assert list(renamed.columns) == ['new_A', 'new_B']
+
+
+def test_internal_map():
+    df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']})
+    r = recipes.Recipe.__new__(recipes.Recipe)
+    r.args = {'C': 'A', 'D': ['A', 'B']}
+    mapped = recipes.Recipe.internal_map(r, df.copy())
+    assert mapped['C'].tolist() == [1, 2, 3]
+    assert mapped['D'].tolist() == [[1, 'a'], [2, 'b'], [3, 'c']]
+
+
+def test_internal_shuffle():
+    df = pd.DataFrame({'A': range(10), 'B': list('abcdefghij')})
+    r = recipes.Recipe.__new__(recipes.Recipe)
+    shuffled = recipes.Recipe.internal_shuffle(r, df.copy())
+    assert set(shuffled['A']) == set(df['A'])
+    assert set(shuffled['B']) == set(df['B'])
+
+
+# Utilitaire pour créer un objet Recipe minimal avec un logger muet
+def _recipe_with_args(args):
+    r = recipes.Recipe.__new__(recipes.Recipe)
+    r.args = args
+    r.log = type('log', (), {'write': lambda self, *a, **k: None})()
+    return r
+
+
+def test_internal_to_integer_single():
+    df = pd.DataFrame({'A': ['1', '2', '', '']})
+    r = _recipe_with_args({'select': ['A']})
+    converted = recipes.Recipe.internal_to_integer(r, df.copy())
+    assert converted['A'].tolist()[:2] == [1, 2]
+    assert pd.isna(converted['A'].iloc[2]) and pd.isna(converted['A'].iloc[3])
+
+
+def test_internal_to_float():
+    df = pd.DataFrame({'A': ['1.1', '', '']})
+    r = _recipe_with_args({'select': ['A']})
+    converted = recipes.Recipe.internal_to_float(r, df.copy())
+    assert converted['A'].iloc[0] == 1.1
+    assert pd.isna(converted['A'].iloc[1]) and pd.isna(converted['A'].iloc[2])
+
+
+def test_internal_parsedate():
+    df = pd.DataFrame({'A': ['2023-01-01', '2023-02-01']})
+    r = _recipe_with_args({'select': ['A'], 'format': '%Y-%m-%d'})
+    parsed = recipes.Recipe.internal_parsedate(r, df.copy())
+    assert pd.api.types.is_datetime64_any_dtype(parsed['A'])
+
+
+def test_internal_normalize():
+    df = pd.DataFrame({'A': ['été', 'naïve']})
+    r = recipes.Recipe.__new__(recipes.Recipe)
+    r.args = {'select': ['A']}
+    normalized = recipes.Recipe.internal_normalize(r, df.copy())
+    assert normalized['A'].tolist() == ['ete', 'naive']
+
+
+def test_internal_pause():
+    df = pd.DataFrame({'A': [1, 2]})
+    r = recipes.Recipe.__new__(recipes.Recipe)
+    paused = recipes.Recipe.internal_pause(r, df.copy())
+    assert paused.equals(df)
+
+
+def test_internal_list_tuple():
+    df = pd.DataFrame({'A': [[1, 2], [3, 4]]})
+    r1 = recipes.Recipe.__new__(recipes.Recipe)
+    r1.args = {'select': ['A']}
+    tuples = recipes.Recipe.internal_list_to_tuple(r1, df.copy())
+    assert all(isinstance(x, tuple) for x in tuples['A'])
+
+    r2 = recipes.Recipe.__new__(recipes.Recipe)
+    r2.args = {'select': ['A']}
+    lists = recipes.Recipe.internal_tuple_to_list(r2, tuples.copy())
+    assert all(isinstance(x, list) for x in lists['A'])
+
+
+def test_internal_sql():
+    df = pd.DataFrame({'A': [1]})
+    r = recipes.Recipe.__new__(recipes.Recipe)
+
+    # Mock minimal pour input.connector.sql
+    class DummySQL:
+        def execute(self, query):
+            return None
+    r.input = type('inp', (), {'connector': type('conn', (), {'sql': DummySQL()})()})
+
+    r.args = "SELECT 1"
+    assert recipes.Recipe.internal_sql(r, df.copy()).equals(df)
+
+
+def test_internal_unnest_and_nest():
+    df = pd.DataFrame({'A': [[1, 2], [3, 4]], 'B': ['x', 'y']})
+    r_un = recipes.Recipe.__new__(recipes.Recipe)
+    r_un.args = {'select': ['A']}
+    unnest = recipes.Recipe.internal_unnest(r_un, df.copy())
+    assert len(unnest) == 2
+    assert 'A' not in unnest.columns or isinstance(unnest.iloc[0]['A'], list)
+
+    df2 = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y']})
+    r_nest = recipes.Recipe.__new__(recipes.Recipe)
+    r_nest.args = {'select': ['A', 'B'], 'target': 'nested'}
+    nested = recipes.Recipe.internal_nest(r_nest, df2.copy())
+    assert 'nested' in nested.columns
+    assert 'A' not in nested.columns and 'B' not in nested.columns
+
+#############################
+#  Nouveaux tests ajoutés   #
+#############################
+
+def test_internal_keep():
+    df = pd.DataFrame({
+        'A': [1, 2, 3, 4],
+        'B': ['x', 'y', 'z', 'w'],
+        'flag': [True, False, True, False]
+    })
+    r = _recipe_with_args({'select': ['A', 'B'], 'where': 'flag == True'})
+    kept = recipes.Recipe.internal_keep(r, df.copy())
+    # Doit garder uniquement lignes avec flag==True et colonnes A,B
+    assert list(kept.columns) == ['A', 'B']
+    assert kept['A'].tolist() == [1, 3]
+
+
+def test_internal_delete():
+    df = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y'], 'C': [10, 20]})
+    r = _recipe_with_args({'select': ['B']})
+    deleted = recipes.Recipe.internal_delete(r, df.copy())
+    assert 'B' not in deleted.columns and list(deleted.columns) == ['A', 'C']
+
+
+def test_internal_replace():
+    df = pd.DataFrame({'A': ['abc123', 'def456']})
+    r = _recipe_with_args({'select': ['A'], 'regex': [{'[0-9]+': 'NUM'}]})
+    replaced = recipes.Recipe.internal_replace(r, df.copy())
+    assert replaced['A'].tolist() == ['abcNUM', 'defNUM']
+
+
+def test_internal_groupby():
+    df = pd.DataFrame({'grp': ['g1', 'g1', 'g2'], 'val': [1, 2, 3]})
+    r = _recipe_with_args({'select': ['grp'], 'groupby': ['grp'], 'agg': {'val': 'sum'}})
+    grouped = recipes.Recipe.internal_groupby(r, df.copy())
+    assert grouped['val'].tolist() == [3, 3]
+
+
+def test_internal_ngram():
+    df = pd.DataFrame({'txt': ['hello world']})
+    # 'n' doit être une liste pour la fonction ngrams ; on utilise [2] pour les bigrammes
+    r = _recipe_with_args({'select': ['txt'], 'n': [2]})
+    result = recipes.Recipe.internal_ngram(r, df.copy())
+    # La fonction remplace simplement la colonne sélectionnée par la liste de n-grammes
+    assert 'txt' in result.columns
+    # Premier bigramme attendu : 'he'
+    assert result['txt'].iloc[0][0] == 'he'
+
+
+def test_internal_exec():
+    df = pd.DataFrame({'A': [1, 2]})
+    r = _recipe_with_args("df['B'] = df['A'] * 10")
+    executed = recipes.Recipe.internal_exec(r, df.copy())
+    assert executed['B'].tolist() == [10, 20]
+
+
+def test_internal_eval():
+    df = pd.DataFrame({'A': [1, 2]})
+    r = _recipe_with_args([{'B': 'A * 5'}])
+    evaluated = recipes.Recipe.internal_eval(r, df.copy())
+    assert evaluated['B'].tolist() == [5, 10]
\ No newline at end of file

From a08ed5c1e63b4f9d2b798a4bd20e2a4c056b1ef8 Mon Sep 17 00:00:00 2001
From: antoinefa <fabien.antoine@m4x.org>
Date: Tue, 20 May 2025 23:46:40 -0400
Subject: [PATCH 3/6] consolidate tests for numpy further migration

---
 tests/test_recipes.py | 64 +++++++++++++++++++++++++++++--------------
 1 file changed, 43 insertions(+), 21 deletions(-)

diff --git a/tests/test_recipes.py b/tests/test_recipes.py
index 7be4de4..4899016 100644
--- a/tests/test_recipes.py
+++ b/tests/test_recipes.py
@@ -77,13 +77,9 @@ def test_internal_map():
     assert mapped['D'].tolist() == [[1, 'a'], [2, 'b'], [3, 'c']]
 
 
-def test_internal_shuffle():
-    df = pd.DataFrame({'A': range(10), 'B': list('abcdefghij')})
-    r = recipes.Recipe.__new__(recipes.Recipe)
-    shuffled = recipes.Recipe.internal_shuffle(r, df.copy())
-    assert set(shuffled['A']) == set(df['A'])
-    assert set(shuffled['B']) == set(df['B'])
-
+# ---------------------------------------------------------------------------
+# Consolidation des tests sensibles à NumPy
+# ---------------------------------------------------------------------------
 
 # Utilitaire pour créer un objet Recipe minimal avec un logger muet
 def _recipe_with_args(args):
@@ -93,20 +89,46 @@ def _recipe_with_args(args):
     return r
 
 
-def test_internal_to_integer_single():
-    df = pd.DataFrame({'A': ['1', '2', '', '']})
-    r = _recipe_with_args({'select': ['A']})
-    converted = recipes.Recipe.internal_to_integer(r, df.copy())
-    assert converted['A'].tolist()[:2] == [1, 2]
-    assert pd.isna(converted['A'].iloc[2]) and pd.isna(converted['A'].iloc[3])
-
-
-def test_internal_to_float():
-    df = pd.DataFrame({'A': ['1.1', '', '']})
-    r = _recipe_with_args({'select': ['A']})
-    converted = recipes.Recipe.internal_to_float(r, df.copy())
-    assert converted['A'].iloc[0] == 1.1
-    assert pd.isna(converted['A'].iloc[1]) and pd.isna(converted['A'].iloc[2])
+@pytest.mark.parametrize(
+    "func, df, args, validator",
+    [
+        (
+            recipes.Recipe.internal_to_integer,
+            pd.DataFrame({'A': ['1', '2', '', '']}),
+            {'select': ['A']},
+            lambda res: (
+                res['A'].tolist()[:2] == [1, 2]
+                and pd.isna(res['A'].iloc[2])
+                and pd.isna(res['A'].iloc[3])
+            ),
+        ),
+        (
+            recipes.Recipe.internal_to_float,
+            pd.DataFrame({'A': ['1.1', '', '']}),
+            {'select': ['A']},
+            lambda res: (
+                res['A'].iloc[0] == 1.1
+                and pd.isna(res['A'].iloc[1])
+                and pd.isna(res['A'].iloc[2])
+            ),
+        ),
+        (
+            recipes.Recipe.internal_shuffle,
+            pd.DataFrame({'A': range(10), 'B': list('abcdefghij')}),
+            {},
+            lambda res, df_orig=pd.DataFrame({'A': range(10), 'B': list('abcdefghij')}): (
+                set(res['A']) == set(df_orig['A'])
+                and set(res['B']) == set(df_orig['B'])
+            ),
+        ),
+    ],
+)
+def test_numpy_sensitive_functions(func, df, args, validator):
+    """Regroupe les tests des fonctions dont le comportement pourrait varier
+    avec NumPy 2 (conversion numériques, permutation aléatoire, etc.)."""
+    r = _recipe_with_args(args)
+    result = func(r, df.copy())
+    assert validator(result)
 
 
 def test_internal_parsedate():

From 207b2a5ec2130faffea093a1f37797518543b012 Mon Sep 17 00:00:00 2001
From: antoinefa <fabien.antoine@m4x.org>
Date: Tue, 20 May 2025 23:50:11 -0400
Subject: [PATCH 4/6] add 6 new tests

---
 tests/test_recipes.py | 125 +++++++++++++++++++++++++++++++-----------
 1 file changed, 94 insertions(+), 31 deletions(-)

diff --git a/tests/test_recipes.py b/tests/test_recipes.py
index 4899016..7636d06 100644
--- a/tests/test_recipes.py
+++ b/tests/test_recipes.py
@@ -89,40 +89,103 @@ def _recipe_with_args(args):
     return r
 
 
-@pytest.mark.parametrize(
-    "func, df, args, validator",
-    [
-        (
-            recipes.Recipe.internal_to_integer,
-            pd.DataFrame({'A': ['1', '2', '', '']}),
-            {'select': ['A']},
-            lambda res: (
-                res['A'].tolist()[:2] == [1, 2]
-                and pd.isna(res['A'].iloc[2])
-                and pd.isna(res['A'].iloc[3])
-            ),
+# Liste étendue de cas d'usage et de bord pour chaque fonction sensible à NumPy 2
+CASES = [
+    # ------------------------------------------------------------------
+    # internal_to_integer
+    # ------------------------------------------------------------------
+    pytest.param(
+        recipes.Recipe.internal_to_integer,
+        pd.DataFrame({'A': ['1', '2', '', '']}),
+        {'select': ['A']},
+        lambda res: (
+            res['A'].tolist()[:2] == [1, 2]
+            and pd.isna(res['A'].iloc[2])
+            and pd.isna(res['A'].iloc[3])
         ),
-        (
-            recipes.Recipe.internal_to_float,
-            pd.DataFrame({'A': ['1.1', '', '']}),
-            {'select': ['A']},
-            lambda res: (
-                res['A'].iloc[0] == 1.1
-                and pd.isna(res['A'].iloc[1])
-                and pd.isna(res['A'].iloc[2])
-            ),
+        id="to_integer_basic",
+    ),
+    pytest.param(
+        recipes.Recipe.internal_to_integer,
+        pd.DataFrame({'A': ['foo', '3']}),
+        {'select': ['A']},
+        # conversion doit échouer et la colonne rester inchangée
+        lambda res: res['A'].tolist() == ['foo', '3'],
+        id="to_integer_invalid_string",
+    ),
+    pytest.param(
+        recipes.Recipe.internal_to_integer,
+        pd.DataFrame({'A': ['-5', '0', '42']}),
+        {'select': ['A']},
+        lambda res: res['A'].tolist() == [-5, 0, 42],
+        id="to_integer_negative_values",
+    ),
+    # ------------------------------------------------------------------
+    # internal_to_float
+    # ------------------------------------------------------------------
+    pytest.param(
+        recipes.Recipe.internal_to_float,
+        pd.DataFrame({'A': ['1.1', '', '']}),
+        {'select': ['A']},
+        lambda res: (
+            res['A'].iloc[0] == 1.1
+            and pd.isna(res['A'].iloc[1])
+            and pd.isna(res['A'].iloc[2])
         ),
-        (
-            recipes.Recipe.internal_shuffle,
-            pd.DataFrame({'A': range(10), 'B': list('abcdefghij')}),
-            {},
-            lambda res, df_orig=pd.DataFrame({'A': range(10), 'B': list('abcdefghij')}): (
-                set(res['A']) == set(df_orig['A'])
-                and set(res['B']) == set(df_orig['B'])
-            ),
+        id="to_float_basic",
+    ),
+    pytest.param(
+        recipes.Recipe.internal_to_float,
+        pd.DataFrame({'A': ['foo', '2.5']}),
+        {'select': ['A']},
+        # doit être inchangé si erreur de conversion
+        lambda res: res['A'].tolist() == ['foo', '2.5'],
+        id="to_float_invalid_string",
+    ),
+    pytest.param(
+        recipes.Recipe.internal_to_float,
+        pd.DataFrame({'A': ['', '']}),
+        {'select': ['A'], 'na_value': 0},
+        lambda res: res['A'].tolist() == [0, 0],
+        id="to_float_custom_na_value",
+    ),
+    # ------------------------------------------------------------------
+    # internal_shuffle
+    # ------------------------------------------------------------------
+    pytest.param(
+        recipes.Recipe.internal_shuffle,
+        pd.DataFrame({'A': range(10), 'B': list('abcdefghij')}),
+        {},
+        lambda res, df_orig=pd.DataFrame({'A': range(10), 'B': list('abcdefghij')}): (
+            set(res['A']) == set(df_orig['A'])
+            and set(res['B']) == set(df_orig['B'])
         ),
-    ],
-)
+        id="shuffle_basic",
+    ),
+    pytest.param(
+        recipes.Recipe.internal_shuffle,
+        pd.DataFrame({'A': [1, 1, 1, 1], 'B': [10, 20, 30, 40]}),
+        {},
+        lambda res, df_orig=pd.DataFrame({'A': [1, 1, 1, 1], 'B': [10, 20, 30, 40]}): (
+            set(res['A']) == {1}  # colonne constante
+            and set(res['B']) == set(df_orig['B'])
+        ),
+        id="shuffle_with_duplicates",
+    ),
+    pytest.param(
+        recipes.Recipe.internal_shuffle,
+        pd.DataFrame({'A': [np.nan, 1, 2], 'B': ['x', 'y', 'z']}),
+        {},
+        lambda res, df_orig=pd.DataFrame({'A': [np.nan, 1, 2], 'B': ['x', 'y', 'z']}): (
+            set(pd.isna(res['A'])) == set(pd.isna(df_orig['A']))
+            and set(res['B']) == set(df_orig['B'])
+        ),
+        id="shuffle_with_nan",
+    ),
+]
+
+
+@pytest.mark.parametrize("func, df, args, validator", [c.values[:4] if hasattr(c, 'values') else c[:-1] for c in CASES])
 def test_numpy_sensitive_functions(func, df, args, validator):
     """Regroupe les tests des fonctions dont le comportement pourrait varier
     avec NumPy 2 (conversion numériques, permutation aléatoire, etc.)."""

From 1706c7f6ce6b9e97f5f34f4419f4931a28f53b5d Mon Sep 17 00:00:00 2001
From: antoinefa <fabien.antoine@m4x.org>
Date: Tue, 20 May 2025 23:58:30 -0400
Subject: [PATCH 5/6] add 3 tests for pandas future migration

---
 Makefile              |  2 +-
 tests/test_recipes.py | 71 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index e68d50f..5730869 100644
--- a/Makefile
+++ b/Makefile
@@ -699,6 +699,6 @@ clean-remote:
 	@make -C ${APP_PATH}/${GIT_TOOLS} remote-clean ${MAKEOVERRIDES} > /dev/null 2>&1 || true
 
 tests:
-	@docker exec -i ${USE_TTY} ${DC_PREFIX}-${APP} pytest -q -W "ignore::DeprecationWarning"
+	@docker exec -i ${USE_TTY} ${DC_PREFIX}-${APP} pytest -q -W "ignore::DeprecationWarning" -W "ignore::FutureWarning"
 
 .PHONY: tests
diff --git a/tests/test_recipes.py b/tests/test_recipes.py
index 7636d06..b763064 100644
--- a/tests/test_recipes.py
+++ b/tests/test_recipes.py
@@ -318,4 +318,73 @@ def test_internal_eval():
     df = pd.DataFrame({'A': [1, 2]})
     r = _recipe_with_args([{'B': 'A * 5'}])
     evaluated = recipes.Recipe.internal_eval(r, df.copy())
-    assert evaluated['B'].tolist() == [5, 10]
\ No newline at end of file
+    assert evaluated['B'].tolist() == [5, 10]
+
+# ---------------------------------------------------------------------------
+# Fonctions utilitaires de validation spécifiques à pandas
+# ---------------------------------------------------------------------------
+
+
+def _validate_groupby_transform_rank(df_res):
+    """Vérifie que les colonnes dérivées par transform et rank sont correctes"""
+    if not {'val_mean', 'val_rank', 'grp'}.issubset(df_res.columns):
+        return False
+    # mean identique au sein d'un même groupe
+    ok_mean = df_res.groupby('grp')['val_mean'].apply(lambda x: (x == x.iloc[0]).all()).all()
+    # rank dense commence à 1 dans chaque groupe
+    ok_rank = df_res.groupby('grp')['val_rank'].apply(lambda x: set(x) == set(range(1, len(x) + 1))).all()
+    return ok_mean and ok_rank
+
+
+def _validate_unfold_basic(df_res):
+    """S'assure que l'unfold a bien explosé les listes"""
+    # On attend trois lignes issues des deux listes [10,20] et [30]
+    return df_res.shape[0] == 3 and set(df_res['L']) == {10, 20, 30}
+
+
+def _validate_unfold_empty(df_res):
+    """S'assure que les listes vides sont conservées avec valeur de remplissage"""
+    # Si la liste était vide, la valeur doit être vide (""), NaN ou équivalente
+    return df_res['L'].iloc[0] in ("", np.nan) and df_res.shape[0] == 1
+
+
+# ---------------------------------------------------------------------------
+# Tests sensibles à pandas 2
+# ---------------------------------------------------------------------------
+
+
+PANDAS_CASES = [
+    # internal_groupby avec transform et rank
+    pytest.param(
+        recipes.Recipe.internal_groupby,
+        pd.DataFrame({'grp': ['g1', 'g1', 'g2', 'g2'], 'val': [1, 2, 3, 5]}),
+        {'select': ['grp'], 'transform': [{'val': 'mean'}], 'rank': ['val']},
+        _validate_groupby_transform_rank,
+        id="groupby_transform_rank",
+    ),
+    # internal_unfold : cas basique
+    pytest.param(
+        recipes.Recipe.internal_unfold,
+        pd.DataFrame({'A': [1, 2], 'L': [[10, 20], [30]]}),
+        {'select': ['L'], 'fill_na': ''},
+        _validate_unfold_basic,
+        id="unfold_basic",
+    ),
+    # internal_unfold : liste vide
+    pytest.param(
+        recipes.Recipe.internal_unfold,
+        pd.DataFrame({'A': [1], 'L': [[]]}),
+        {'select': ['L'], 'fill_na': ''},
+        _validate_unfold_empty,
+        id="unfold_empty_list",
+    ),
+]
+
+
+@pytest.mark.parametrize("func, df, args, validator", PANDAS_CASES)
+def test_pandas_sensitive_functions(func, df, args, validator):
+    """Regroupe les tests focalisés sur les comportements potentiellement
+    modifiés par la transition vers pandas 2."""
+    r = _recipe_with_args(args)
+    result = func(r, df.copy())
+    assert validator(result)
\ No newline at end of file

From 209cb179ad3499a93045b786ebb5083afadd55dd Mon Sep 17 00:00:00 2001
From: antoinefa <fabien.antoine@m4x.org>
Date: Sat, 31 May 2025 09:55:43 -0400
Subject: [PATCH 6/6] fix tests

---
 .github/workflows/push.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index bbac64b..3845830 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -18,12 +18,12 @@ jobs:
         id: extract_branch
       - name: build
         if: success()
-        run: make version backend-docker-check GIT_BRANCH=$GIT_BRANCH || ( make backend-build GIT_BRANCH=$GIT_BRANCH && make backend backend-stop GIT_BRANCH=$GIT_BRANCH)
+        run: make version backend-docker-check GIT_BRANCH=$GIT_BRANCH || ( make backend-build GIT_BRANCH=$GIT_BRANCH && make backend GIT_BRANCH=$GIT_BRANCH)
         env:
           GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }}
       - name: tests
         if: success()
-        run: make tests
+        run: make tests backend-stop GIT_BRANCH=$GIT_BRANCH
         env:
           GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }}
       - name: publish