From 97985ece93d96b8b3ec33b2db2bc3034ede8c2a3 Mon Sep 17 00:00:00 2001 From: Fabien Antoine Date: Mon, 19 May 2025 16:14:47 -0400 Subject: [PATCH 1/6] Integrate tests step in existing workflows --- .github/workflows/pull.yml | 7 ++++- .github/workflows/push.yml | 5 ++++ Dockerfile | 5 +++- Makefile | 6 ++++ README.md | 11 +++++++ docker-compose-dev.yml | 1 + requirements.txt | 3 +- tests/test_recipes.py | 60 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 tests/test_recipes.py diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index f3379f4..d664f02 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -14,6 +14,11 @@ jobs: id: extract_branch - name: build if: success() - run: make version backend-docker-check GIT_BRANCH=$GIT_BRANCH || ( make backend-build GIT_BRANCH=$GIT_BRANCH && make backend backend-stop GIT_BRANCH=$GIT_BRANCH) + run: make version backend-docker-check GIT_BRANCH=$GIT_BRANCH || ( make backend-build GIT_BRANCH=$GIT_BRANCH && make backend GIT_BRANCH=$GIT_BRANCH) + env: + GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }} + - name: tests + if: success() + run: make tests backend-stop GIT_BRANCH=$GIT_BRANCH env: GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }} diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 0fe25e5..bbac64b 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -21,6 +21,11 @@ jobs: run: make version backend-docker-check GIT_BRANCH=$GIT_BRANCH || ( make backend-build GIT_BRANCH=$GIT_BRANCH && make backend backend-stop GIT_BRANCH=$GIT_BRANCH) env: GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }} + - name: tests + if: success() + run: make tests + env: + GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }} - name: publish if: success() run: | diff --git a/Dockerfile b/Dockerfile index 334399f..0c04d91 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,7 +23,7 @@ RUN mkdir -p code\ conf/run\ log\ referential_data\ - matchID_test/\ + tests\ upload ################################ @@ -41,11 +41,13 @@ WORKDIR /${APP} COPY code/ code/ COPY conf/ conf/ COPY referential_data/ referential_data/ +COPY tests/ tests/ VOLUME /${app_path}/projects VOLUME /${app_path}/log VOLUME /${app_path}/models VOLUME /${app_path}/upload +VOLUME /${app_path}/tests EXPOSE ${BACKEND_PORT} @@ -68,6 +70,7 @@ VOLUME /${APP}/referential_data VOLUME /${APP}/log VOLUME /${APP}/models VOLUME /${APP}/upload +VOLUME /${APP}/tests EXPOSE ${BACKEND_PORT} diff --git a/Makefile b/Makefile index b2c4a1d..e68d50f 100644 --- a/Makefile +++ b/Makefile @@ -36,6 +36,7 @@ export GITHUB_OAUTH_SECRET=203010f81158d3ceab0297a213e80bc0fbfe7f8e export BACKEND := $(shell pwd) export UPLOAD=${BACKEND}/upload export PROJECTS=${BACKEND}/projects +export TESTS=${BACKEND}/tests export EXAMPLES=${BACKEND}/../examples export TUTORIAL=${BACKEND}/../tutorial export MODELS=${BACKEND}/models @@ -696,3 +697,8 @@ deploy-remote: config deploy-remote-instance deploy-remote-services deploy-remot clean-remote: @make -C ${APP_PATH}/${GIT_TOOLS} remote-clean ${MAKEOVERRIDES} > /dev/null 2>&1 || true + +tests: + @docker exec -i ${USE_TTY} ${DC_PREFIX}-${APP} pytest -q -W "ignore::DeprecationWarning" + +.PHONY: tests diff --git a/README.md b/README.md index 47dacda..0a9b0e6 100644 --- a/README.md +++ b/README.md @@ -128,3 +128,14 @@ If you want to contribute to the developpement, you'll be able to fork the repo ``` make start-dev ``` + +### Running tests +Install dependencies and run tests with Make: +```bash +make tests +``` +You can also install the requirements and run pytest directly: +```bash +pip install -r requirements.txt +pytest +``` diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 96f1144..8699260 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -8,3 +8,4 @@ services: - ${BACKEND}/code/:/${APP_GROUP}/code/ - ${BACKEND}/referential_data/:/${APP_GROUP}/referential_data/ - ${BACKEND}/conf/:/${APP_GROUP}/conf/ + - ${BACKEND}/tests/:/${APP_GROUP}/tests/ diff --git a/requirements.txt b/requirements.txt index 773b5e4..997fdb4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,4 +25,5 @@ typing Werkzeug==3.0.3 rsa>=4.7 # not directly required, pinned by Snyk to avoid a vulnerability zipp>=3.19.1 # not directly required, pinned by Snyk to avoid a vulnerability -pyarrow>=14.0.1 \ No newline at end of file +pyarrow>=14.0.1 +pytest diff --git a/tests/test_recipes.py b/tests/test_recipes.py new file mode 100644 index 0000000..3265ce6 --- /dev/null +++ b/tests/test_recipes.py @@ -0,0 +1,60 @@ +import importlib.util +from pathlib import Path +import pytest +import sys + +pd = pytest.importorskip('pandas') +np = pytest.importorskip('numpy') + +MODULE_PATH = Path(__file__).resolve().parents[1] / 'code' / 'recipes.py' +CODE_DIR = str(MODULE_PATH.parent) + +# pytestmark = pytest.mark.skipif( +# not MODULE_PATH.exists(), +# reason='recipes module not found' +# ) + +def load_recipes(): + original_sys_path = list(sys.path) + if CODE_DIR not in sys.path: + sys.path.insert(0, CODE_DIR) + + recipes_module = None + try: + spec = importlib.util.spec_from_file_location('recipes', MODULE_PATH) + if spec is None: + raise ImportError(f"Impossible de trouver le spec pour {MODULE_PATH}") + recipes_module = importlib.util.module_from_spec(spec) + if spec.loader is None: + raise ImportError(f"Spec pour {MODULE_PATH} n'a pas de loader") + spec.loader.exec_module(recipes_module) + return recipes_module + finally: + sys.path = original_sys_path + +recipes = load_recipes() + +def test_fwf_format_and_to_fwf(tmp_path): + df = pd.DataFrame({'A': ['a', 'bb'], 'B': ['1', '22']}) + widths = [3, 3] + line = recipes.fwf_format(df.iloc[0], widths) + assert line == 'a 1 ' + outfile = tmp_path / 'out.txt' + recipes.to_fwf(df, outfile, widths=widths, names=['A', 'B']) + content = outfile.read_text().splitlines() + assert content[0].strip() == line.strip() + + +def test_internal_fillna_and_keep(): + df = pd.DataFrame({'A': [1, None], 'B': [None, 'x']}) + r_fill = recipes.Recipe.__new__(recipes.Recipe) + r_fill.args = [{'A': 0, 'B': ''}] + filled = recipes.Recipe.internal_fillna(r_fill, df.copy()) + assert filled['A'].tolist() == [1, 0] + assert filled['B'].tolist() == ['', 'x'] + + r_keep = recipes.Recipe.__new__(recipes.Recipe) + r_keep.args = {'select': ['A']} + recipes.Recipe.select_columns(r_keep, filled) + kept = recipes.Recipe.internal_keep(r_keep, filled) + assert list(kept.columns) == ['A'] From f45af246b2e20228a4f29214399ae9a9bd217ce2 Mon Sep 17 00:00:00 2001 From: antoinefa Date: Tue, 20 May 2025 23:37:23 -0400 Subject: [PATCH 2/6] add 20 basic unit tests --- tests/test_recipes.py | 212 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 194 insertions(+), 18 deletions(-) diff --git a/tests/test_recipes.py b/tests/test_recipes.py index 3265ce6..7be4de4 100644 --- a/tests/test_recipes.py +++ b/tests/test_recipes.py @@ -1,38 +1,38 @@ +# -*- coding: utf-8 -*- import importlib.util from pathlib import Path -import pytest import sys +import os +import pytest pd = pytest.importorskip('pandas') np = pytest.importorskip('numpy') +# Chargement dynamique du module recipes MODULE_PATH = Path(__file__).resolve().parents[1] / 'code' / 'recipes.py' CODE_DIR = str(MODULE_PATH.parent) -# pytestmark = pytest.mark.skipif( -# not MODULE_PATH.exists(), -# reason='recipes module not found' -# ) - -def load_recipes(): +def load_module(name, path): original_sys_path = list(sys.path) if CODE_DIR not in sys.path: sys.path.insert(0, CODE_DIR) - - recipes_module = None try: - spec = importlib.util.spec_from_file_location('recipes', MODULE_PATH) - if spec is None: - raise ImportError(f"Impossible de trouver le spec pour {MODULE_PATH}") - recipes_module = importlib.util.module_from_spec(spec) - if spec.loader is None: - raise ImportError(f"Spec pour {MODULE_PATH} n'a pas de loader") - spec.loader.exec_module(recipes_module) - return recipes_module + spec = importlib.util.spec_from_file_location(name, path) + if spec is None or spec.loader is None: + raise ImportError(f"Impossible de charger {name} depuis {path}") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module finally: sys.path = original_sys_path -recipes = load_recipes() +recipes = load_module('recipes', MODULE_PATH) +config_path = Path(__file__).resolve().parents[1] / 'code' / 'config.py' +config = load_module('config', config_path) + +############################# +# Tests # +############################# def test_fwf_format_and_to_fwf(tmp_path): df = pd.DataFrame({'A': ['a', 'bb'], 'B': ['1', '22']}) @@ -58,3 +58,179 @@ def test_internal_fillna_and_keep(): recipes.Recipe.select_columns(r_keep, filled) kept = recipes.Recipe.internal_keep(r_keep, filled) assert list(kept.columns) == ['A'] + + +def test_internal_rename(): + df = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y']}) + r = recipes.Recipe.__new__(recipes.Recipe) + r.args = {'new_A': 'A', 'new_B': 'B'} + renamed = recipes.Recipe.internal_rename(r, df.copy()) + assert list(renamed.columns) == ['new_A', 'new_B'] + + +def test_internal_map(): + df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']}) + r = recipes.Recipe.__new__(recipes.Recipe) + r.args = {'C': 'A', 'D': ['A', 'B']} + mapped = recipes.Recipe.internal_map(r, df.copy()) + assert mapped['C'].tolist() == [1, 2, 3] + assert mapped['D'].tolist() == [[1, 'a'], [2, 'b'], [3, 'c']] + + +def test_internal_shuffle(): + df = pd.DataFrame({'A': range(10), 'B': list('abcdefghij')}) + r = recipes.Recipe.__new__(recipes.Recipe) + shuffled = recipes.Recipe.internal_shuffle(r, df.copy()) + assert set(shuffled['A']) == set(df['A']) + assert set(shuffled['B']) == set(df['B']) + + +# Utilitaire pour créer un objet Recipe minimal avec un logger muet +def _recipe_with_args(args): + r = recipes.Recipe.__new__(recipes.Recipe) + r.args = args + r.log = type('log', (), {'write': lambda self, *a, **k: None})() + return r + + +def test_internal_to_integer_single(): + df = pd.DataFrame({'A': ['1', '2', '', '']}) + r = _recipe_with_args({'select': ['A']}) + converted = recipes.Recipe.internal_to_integer(r, df.copy()) + assert converted['A'].tolist()[:2] == [1, 2] + assert pd.isna(converted['A'].iloc[2]) and pd.isna(converted['A'].iloc[3]) + + +def test_internal_to_float(): + df = pd.DataFrame({'A': ['1.1', '', '']}) + r = _recipe_with_args({'select': ['A']}) + converted = recipes.Recipe.internal_to_float(r, df.copy()) + assert converted['A'].iloc[0] == 1.1 + assert pd.isna(converted['A'].iloc[1]) and pd.isna(converted['A'].iloc[2]) + + +def test_internal_parsedate(): + df = pd.DataFrame({'A': ['2023-01-01', '2023-02-01']}) + r = _recipe_with_args({'select': ['A'], 'format': '%Y-%m-%d'}) + parsed = recipes.Recipe.internal_parsedate(r, df.copy()) + assert pd.api.types.is_datetime64_any_dtype(parsed['A']) + + +def test_internal_normalize(): + df = pd.DataFrame({'A': ['été', 'naïve']}) + r = recipes.Recipe.__new__(recipes.Recipe) + r.args = {'select': ['A']} + normalized = recipes.Recipe.internal_normalize(r, df.copy()) + assert normalized['A'].tolist() == ['ete', 'naive'] + + +def test_internal_pause(): + df = pd.DataFrame({'A': [1, 2]}) + r = recipes.Recipe.__new__(recipes.Recipe) + paused = recipes.Recipe.internal_pause(r, df.copy()) + assert paused.equals(df) + + +def test_internal_list_tuple(): + df = pd.DataFrame({'A': [[1, 2], [3, 4]]}) + r1 = recipes.Recipe.__new__(recipes.Recipe) + r1.args = {'select': ['A']} + tuples = recipes.Recipe.internal_list_to_tuple(r1, df.copy()) + assert all(isinstance(x, tuple) for x in tuples['A']) + + r2 = recipes.Recipe.__new__(recipes.Recipe) + r2.args = {'select': ['A']} + lists = recipes.Recipe.internal_tuple_to_list(r2, tuples.copy()) + assert all(isinstance(x, list) for x in lists['A']) + + +def test_internal_sql(): + df = pd.DataFrame({'A': [1]}) + r = recipes.Recipe.__new__(recipes.Recipe) + + # Mock minimal pour input.connector.sql + class DummySQL: + def execute(self, query): + return None + r.input = type('inp', (), {'connector': type('conn', (), {'sql': DummySQL()})()}) + + r.args = "SELECT 1" + assert recipes.Recipe.internal_sql(r, df.copy()).equals(df) + + +def test_internal_unnest_and_nest(): + df = pd.DataFrame({'A': [[1, 2], [3, 4]], 'B': ['x', 'y']}) + r_un = recipes.Recipe.__new__(recipes.Recipe) + r_un.args = {'select': ['A']} + unnest = recipes.Recipe.internal_unnest(r_un, df.copy()) + assert len(unnest) == 2 + assert 'A' not in unnest.columns or isinstance(unnest.iloc[0]['A'], list) + + df2 = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y']}) + r_nest = recipes.Recipe.__new__(recipes.Recipe) + r_nest.args = {'select': ['A', 'B'], 'target': 'nested'} + nested = recipes.Recipe.internal_nest(r_nest, df2.copy()) + assert 'nested' in nested.columns + assert 'A' not in nested.columns and 'B' not in nested.columns + +############################# +# Nouveaux tests ajoutés # +############################# + +def test_internal_keep(): + df = pd.DataFrame({ + 'A': [1, 2, 3, 4], + 'B': ['x', 'y', 'z', 'w'], + 'flag': [True, False, True, False] + }) + r = _recipe_with_args({'select': ['A', 'B'], 'where': 'flag == True'}) + kept = recipes.Recipe.internal_keep(r, df.copy()) + # Doit garder uniquement lignes avec flag==True et colonnes A,B + assert list(kept.columns) == ['A', 'B'] + assert kept['A'].tolist() == [1, 3] + + +def test_internal_delete(): + df = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y'], 'C': [10, 20]}) + r = _recipe_with_args({'select': ['B']}) + deleted = recipes.Recipe.internal_delete(r, df.copy()) + assert 'B' not in deleted.columns and list(deleted.columns) == ['A', 'C'] + + +def test_internal_replace(): + df = pd.DataFrame({'A': ['abc123', 'def456']}) + r = _recipe_with_args({'select': ['A'], 'regex': [{'[0-9]+': 'NUM'}]}) + replaced = recipes.Recipe.internal_replace(r, df.copy()) + assert replaced['A'].tolist() == ['abcNUM', 'defNUM'] + + +def test_internal_groupby(): + df = pd.DataFrame({'grp': ['g1', 'g1', 'g2'], 'val': [1, 2, 3]}) + r = _recipe_with_args({'select': ['grp'], 'groupby': ['grp'], 'agg': {'val': 'sum'}}) + grouped = recipes.Recipe.internal_groupby(r, df.copy()) + assert grouped['val'].tolist() == [3, 3] + + +def test_internal_ngram(): + df = pd.DataFrame({'txt': ['hello world']}) + # 'n' doit être une liste pour la fonction ngrams ; on utilise [2] pour les bigrammes + r = _recipe_with_args({'select': ['txt'], 'n': [2]}) + result = recipes.Recipe.internal_ngram(r, df.copy()) + # La fonction remplace simplement la colonne sélectionnée par la liste de n-grammes + assert 'txt' in result.columns + # Premier bigramme attendu : 'he' + assert result['txt'].iloc[0][0] == 'he' + + +def test_internal_exec(): + df = pd.DataFrame({'A': [1, 2]}) + r = _recipe_with_args("df['B'] = df['A'] * 10") + executed = recipes.Recipe.internal_exec(r, df.copy()) + assert executed['B'].tolist() == [10, 20] + + +def test_internal_eval(): + df = pd.DataFrame({'A': [1, 2]}) + r = _recipe_with_args([{'B': 'A * 5'}]) + evaluated = recipes.Recipe.internal_eval(r, df.copy()) + assert evaluated['B'].tolist() == [5, 10] \ No newline at end of file From a08ed5c1e63b4f9d2b798a4bd20e2a4c056b1ef8 Mon Sep 17 00:00:00 2001 From: antoinefa Date: Tue, 20 May 2025 23:46:40 -0400 Subject: [PATCH 3/6] consolidate tests for numpy further migration --- tests/test_recipes.py | 64 +++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 21 deletions(-) diff --git a/tests/test_recipes.py b/tests/test_recipes.py index 7be4de4..4899016 100644 --- a/tests/test_recipes.py +++ b/tests/test_recipes.py @@ -77,13 +77,9 @@ def test_internal_map(): assert mapped['D'].tolist() == [[1, 'a'], [2, 'b'], [3, 'c']] -def test_internal_shuffle(): - df = pd.DataFrame({'A': range(10), 'B': list('abcdefghij')}) - r = recipes.Recipe.__new__(recipes.Recipe) - shuffled = recipes.Recipe.internal_shuffle(r, df.copy()) - assert set(shuffled['A']) == set(df['A']) - assert set(shuffled['B']) == set(df['B']) - +# --------------------------------------------------------------------------- +# Consolidation des tests sensibles à NumPy +# --------------------------------------------------------------------------- # Utilitaire pour créer un objet Recipe minimal avec un logger muet def _recipe_with_args(args): @@ -93,20 +89,46 @@ def _recipe_with_args(args): return r -def test_internal_to_integer_single(): - df = pd.DataFrame({'A': ['1', '2', '', '']}) - r = _recipe_with_args({'select': ['A']}) - converted = recipes.Recipe.internal_to_integer(r, df.copy()) - assert converted['A'].tolist()[:2] == [1, 2] - assert pd.isna(converted['A'].iloc[2]) and pd.isna(converted['A'].iloc[3]) - - -def test_internal_to_float(): - df = pd.DataFrame({'A': ['1.1', '', '']}) - r = _recipe_with_args({'select': ['A']}) - converted = recipes.Recipe.internal_to_float(r, df.copy()) - assert converted['A'].iloc[0] == 1.1 - assert pd.isna(converted['A'].iloc[1]) and pd.isna(converted['A'].iloc[2]) +@pytest.mark.parametrize( + "func, df, args, validator", + [ + ( + recipes.Recipe.internal_to_integer, + pd.DataFrame({'A': ['1', '2', '', '']}), + {'select': ['A']}, + lambda res: ( + res['A'].tolist()[:2] == [1, 2] + and pd.isna(res['A'].iloc[2]) + and pd.isna(res['A'].iloc[3]) + ), + ), + ( + recipes.Recipe.internal_to_float, + pd.DataFrame({'A': ['1.1', '', '']}), + {'select': ['A']}, + lambda res: ( + res['A'].iloc[0] == 1.1 + and pd.isna(res['A'].iloc[1]) + and pd.isna(res['A'].iloc[2]) + ), + ), + ( + recipes.Recipe.internal_shuffle, + pd.DataFrame({'A': range(10), 'B': list('abcdefghij')}), + {}, + lambda res, df_orig=pd.DataFrame({'A': range(10), 'B': list('abcdefghij')}): ( + set(res['A']) == set(df_orig['A']) + and set(res['B']) == set(df_orig['B']) + ), + ), + ], +) +def test_numpy_sensitive_functions(func, df, args, validator): + """Regroupe les tests des fonctions dont le comportement pourrait varier + avec NumPy 2 (conversion numériques, permutation aléatoire, etc.).""" + r = _recipe_with_args(args) + result = func(r, df.copy()) + assert validator(result) def test_internal_parsedate(): From 207b2a5ec2130faffea093a1f37797518543b012 Mon Sep 17 00:00:00 2001 From: antoinefa Date: Tue, 20 May 2025 23:50:11 -0400 Subject: [PATCH 4/6] add 6 new tests --- tests/test_recipes.py | 125 +++++++++++++++++++++++++++++++----------- 1 file changed, 94 insertions(+), 31 deletions(-) diff --git a/tests/test_recipes.py b/tests/test_recipes.py index 4899016..7636d06 100644 --- a/tests/test_recipes.py +++ b/tests/test_recipes.py @@ -89,40 +89,103 @@ def _recipe_with_args(args): return r -@pytest.mark.parametrize( - "func, df, args, validator", - [ - ( - recipes.Recipe.internal_to_integer, - pd.DataFrame({'A': ['1', '2', '', '']}), - {'select': ['A']}, - lambda res: ( - res['A'].tolist()[:2] == [1, 2] - and pd.isna(res['A'].iloc[2]) - and pd.isna(res['A'].iloc[3]) - ), +# Liste étendue de cas d'usage et de bord pour chaque fonction sensible à NumPy 2 +CASES = [ + # ------------------------------------------------------------------ + # internal_to_integer + # ------------------------------------------------------------------ + pytest.param( + recipes.Recipe.internal_to_integer, + pd.DataFrame({'A': ['1', '2', '', '']}), + {'select': ['A']}, + lambda res: ( + res['A'].tolist()[:2] == [1, 2] + and pd.isna(res['A'].iloc[2]) + and pd.isna(res['A'].iloc[3]) ), - ( - recipes.Recipe.internal_to_float, - pd.DataFrame({'A': ['1.1', '', '']}), - {'select': ['A']}, - lambda res: ( - res['A'].iloc[0] == 1.1 - and pd.isna(res['A'].iloc[1]) - and pd.isna(res['A'].iloc[2]) - ), + id="to_integer_basic", + ), + pytest.param( + recipes.Recipe.internal_to_integer, + pd.DataFrame({'A': ['foo', '3']}), + {'select': ['A']}, + # conversion doit échouer et la colonne rester inchangée + lambda res: res['A'].tolist() == ['foo', '3'], + id="to_integer_invalid_string", + ), + pytest.param( + recipes.Recipe.internal_to_integer, + pd.DataFrame({'A': ['-5', '0', '42']}), + {'select': ['A']}, + lambda res: res['A'].tolist() == [-5, 0, 42], + id="to_integer_negative_values", + ), + # ------------------------------------------------------------------ + # internal_to_float + # ------------------------------------------------------------------ + pytest.param( + recipes.Recipe.internal_to_float, + pd.DataFrame({'A': ['1.1', '', '']}), + {'select': ['A']}, + lambda res: ( + res['A'].iloc[0] == 1.1 + and pd.isna(res['A'].iloc[1]) + and pd.isna(res['A'].iloc[2]) ), - ( - recipes.Recipe.internal_shuffle, - pd.DataFrame({'A': range(10), 'B': list('abcdefghij')}), - {}, - lambda res, df_orig=pd.DataFrame({'A': range(10), 'B': list('abcdefghij')}): ( - set(res['A']) == set(df_orig['A']) - and set(res['B']) == set(df_orig['B']) - ), + id="to_float_basic", + ), + pytest.param( + recipes.Recipe.internal_to_float, + pd.DataFrame({'A': ['foo', '2.5']}), + {'select': ['A']}, + # doit être inchangé si erreur de conversion + lambda res: res['A'].tolist() == ['foo', '2.5'], + id="to_float_invalid_string", + ), + pytest.param( + recipes.Recipe.internal_to_float, + pd.DataFrame({'A': ['', '']}), + {'select': ['A'], 'na_value': 0}, + lambda res: res['A'].tolist() == [0, 0], + id="to_float_custom_na_value", + ), + # ------------------------------------------------------------------ + # internal_shuffle + # ------------------------------------------------------------------ + pytest.param( + recipes.Recipe.internal_shuffle, + pd.DataFrame({'A': range(10), 'B': list('abcdefghij')}), + {}, + lambda res, df_orig=pd.DataFrame({'A': range(10), 'B': list('abcdefghij')}): ( + set(res['A']) == set(df_orig['A']) + and set(res['B']) == set(df_orig['B']) ), - ], -) + id="shuffle_basic", + ), + pytest.param( + recipes.Recipe.internal_shuffle, + pd.DataFrame({'A': [1, 1, 1, 1], 'B': [10, 20, 30, 40]}), + {}, + lambda res, df_orig=pd.DataFrame({'A': [1, 1, 1, 1], 'B': [10, 20, 30, 40]}): ( + set(res['A']) == {1} # colonne constante + and set(res['B']) == set(df_orig['B']) + ), + id="shuffle_with_duplicates", + ), + pytest.param( + recipes.Recipe.internal_shuffle, + pd.DataFrame({'A': [np.nan, 1, 2], 'B': ['x', 'y', 'z']}), + {}, + lambda res, df_orig=pd.DataFrame({'A': [np.nan, 1, 2], 'B': ['x', 'y', 'z']}): ( + set(pd.isna(res['A'])) == set(pd.isna(df_orig['A'])) + and set(res['B']) == set(df_orig['B']) + ), + id="shuffle_with_nan", + ), +] + + +@pytest.mark.parametrize("func, df, args, validator", [c.values[:4] if hasattr(c, 'values') else c[:-1] for c in CASES]) def test_numpy_sensitive_functions(func, df, args, validator): """Regroupe les tests des fonctions dont le comportement pourrait varier avec NumPy 2 (conversion numériques, permutation aléatoire, etc.).""" From 1706c7f6ce6b9e97f5f34f4419f4931a28f53b5d Mon Sep 17 00:00:00 2001 From: antoinefa Date: Tue, 20 May 2025 23:58:30 -0400 Subject: [PATCH 5/6] add 3 tests for pandas future migration --- Makefile | 2 +- tests/test_recipes.py | 71 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index e68d50f..5730869 100644 --- a/Makefile +++ b/Makefile @@ -699,6 +699,6 @@ clean-remote: @make -C ${APP_PATH}/${GIT_TOOLS} remote-clean ${MAKEOVERRIDES} > /dev/null 2>&1 || true tests: - @docker exec -i ${USE_TTY} ${DC_PREFIX}-${APP} pytest -q -W "ignore::DeprecationWarning" + @docker exec -i ${USE_TTY} ${DC_PREFIX}-${APP} pytest -q -W "ignore::DeprecationWarning" -W "ignore::FutureWarning" .PHONY: tests diff --git a/tests/test_recipes.py b/tests/test_recipes.py index 7636d06..b763064 100644 --- a/tests/test_recipes.py +++ b/tests/test_recipes.py @@ -318,4 +318,73 @@ def test_internal_eval(): df = pd.DataFrame({'A': [1, 2]}) r = _recipe_with_args([{'B': 'A * 5'}]) evaluated = recipes.Recipe.internal_eval(r, df.copy()) - assert evaluated['B'].tolist() == [5, 10] \ No newline at end of file + assert evaluated['B'].tolist() == [5, 10] + +# --------------------------------------------------------------------------- +# Fonctions utilitaires de validation spécifiques à pandas +# --------------------------------------------------------------------------- + + +def _validate_groupby_transform_rank(df_res): + """Vérifie que les colonnes dérivées par transform et rank sont correctes""" + if not {'val_mean', 'val_rank', 'grp'}.issubset(df_res.columns): + return False + # mean identique au sein d'un même groupe + ok_mean = df_res.groupby('grp')['val_mean'].apply(lambda x: (x == x.iloc[0]).all()).all() + # rank dense commence à 1 dans chaque groupe + ok_rank = df_res.groupby('grp')['val_rank'].apply(lambda x: set(x) == set(range(1, len(x) + 1))).all() + return ok_mean and ok_rank + + +def _validate_unfold_basic(df_res): + """S'assure que l'unfold a bien explosé les listes""" + # On attend trois lignes issues des deux listes [10,20] et [30] + return df_res.shape[0] == 3 and set(df_res['L']) == {10, 20, 30} + + +def _validate_unfold_empty(df_res): + """S'assure que les listes vides sont conservées avec valeur de remplissage""" + # Si la liste était vide, la valeur doit être vide (""), NaN ou équivalente + return df_res['L'].iloc[0] in ("", np.nan) and df_res.shape[0] == 1 + + +# --------------------------------------------------------------------------- +# Tests sensibles à pandas 2 +# --------------------------------------------------------------------------- + + +PANDAS_CASES = [ + # internal_groupby avec transform et rank + pytest.param( + recipes.Recipe.internal_groupby, + pd.DataFrame({'grp': ['g1', 'g1', 'g2', 'g2'], 'val': [1, 2, 3, 5]}), + {'select': ['grp'], 'transform': [{'val': 'mean'}], 'rank': ['val']}, + _validate_groupby_transform_rank, + id="groupby_transform_rank", + ), + # internal_unfold : cas basique + pytest.param( + recipes.Recipe.internal_unfold, + pd.DataFrame({'A': [1, 2], 'L': [[10, 20], [30]]}), + {'select': ['L'], 'fill_na': ''}, + _validate_unfold_basic, + id="unfold_basic", + ), + # internal_unfold : liste vide + pytest.param( + recipes.Recipe.internal_unfold, + pd.DataFrame({'A': [1], 'L': [[]]}), + {'select': ['L'], 'fill_na': ''}, + _validate_unfold_empty, + id="unfold_empty_list", + ), +] + + +@pytest.mark.parametrize("func, df, args, validator", PANDAS_CASES) +def test_pandas_sensitive_functions(func, df, args, validator): + """Regroupe les tests focalisés sur les comportements potentiellement + modifiés par la transition vers pandas 2.""" + r = _recipe_with_args(args) + result = func(r, df.copy()) + assert validator(result) \ No newline at end of file From 209cb179ad3499a93045b786ebb5083afadd55dd Mon Sep 17 00:00:00 2001 From: antoinefa Date: Sat, 31 May 2025 09:55:43 -0400 Subject: [PATCH 6/6] fix tests --- .github/workflows/push.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index bbac64b..3845830 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -18,12 +18,12 @@ jobs: id: extract_branch - name: build if: success() - run: make version backend-docker-check GIT_BRANCH=$GIT_BRANCH || ( make backend-build GIT_BRANCH=$GIT_BRANCH && make backend backend-stop GIT_BRANCH=$GIT_BRANCH) + run: make version backend-docker-check GIT_BRANCH=$GIT_BRANCH || ( make backend-build GIT_BRANCH=$GIT_BRANCH && make backend GIT_BRANCH=$GIT_BRANCH) env: GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }} - name: tests if: success() - run: make tests + run: make tests backend-stop GIT_BRANCH=$GIT_BRANCH env: GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }} - name: publish