From 41155dc5ab27f4e143d3124e1ea57edf5d379c90 Mon Sep 17 00:00:00 2001 From: Alexis Durieux Date: Thu, 29 Jan 2026 15:14:12 +0100 Subject: [PATCH 01/11] Fix unit tests in all versions + update requirements --- Makefile | 4 ++-- code-env/python/desc.json | 5 ++++- code-env/python/spec/requirements.txt | 11 +++++++---- .../dku_error_analysis_decision_tree/tree.py | 8 +++++--- tests/python/integration/requirements.txt | 3 ++- tests/python/unit/requirements.txt | 14 +++++++++----- tests/python/unit/test_tree.py | 11 ++++------- 7 files changed, 33 insertions(+), 23 deletions(-) diff --git a/Makefile b/Makefile index e1031d19..f1d3911c 100644 --- a/Makefile +++ b/Makefile @@ -21,8 +21,8 @@ plugin: unit-tests: @echo "Running unit tests..." @( \ - PYTHON_VERSION=`python3 -V 2>&1 | sed 's/[^0-9]*//g' | cut -c 1,2`; \ - PYTHON_VERSION_IS_CORRECT=`cat code-env/python/desc.json | python3 -c "import sys, json; print(str($$PYTHON_VERSION) in [x[-2:] for x in json.load(sys.stdin)['acceptedPythonInterpreters']]);"`; \ + PYTHON_VERSION=`python3 -c "import sys; print('%s%s' % (sys.version_info.major, sys.version_info.minor))"`; \ + PYTHON_VERSION_IS_CORRECT=`cat code-env/python/desc.json | python3 -c "import sys, json; print('PYTHON' + str($$PYTHON_VERSION) in json.load(sys.stdin)['acceptedPythonInterpreters']);"`; \ if [ $$PYTHON_VERSION_IS_CORRECT == "False" ]; then echo "Python version $$PYTHON_VERSION is not in acceptedPythonInterpreters"; exit 1; else echo "Python version $$PYTHON_VERSION is in acceptedPythonInterpreters"; fi; \ ) @( \ diff --git a/code-env/python/desc.json b/code-env/python/desc.json index fdaf374a..180fde65 100644 --- a/code-env/python/desc.json +++ b/code-env/python/desc.json @@ -5,7 +5,10 @@ "PYTHON38", "PYTHON39", "PYTHON310", - "PYTHON311" + "PYTHON311", + "PYTHON312", + "PYTHON313", + "PYTHON314" ], "corePackagesSet": "AUTO", "forceConda": false, diff --git a/code-env/python/spec/requirements.txt b/code-env/python/spec/requirements.txt index eef0b871..a2171a94 100644 --- a/code-env/python/spec/requirements.txt +++ b/code-env/python/spec/requirements.txt @@ -1,14 +1,17 @@ graphviz==0.16 -matplotlib==3.3.4 +matplotlib>=3.3.4 scikit-learn>=0.20,<1.1; python_version <= '3.10' -scikit-learn==1.1.3; python_version >= '3.11' +scikit-learn==1.1.3; python_version == '3.11' +scikit-learn==1.3.2; python_version >= '3.12' scipy>=1.2,<1.3; python_version <= '3.7' -scipy==1.10.1; python_version >= '3.8' +scipy==1.10.1; python_version >= '3.8' and python_version < '3.12' +scipy==1.11.3; python_version >= '3.12' xgboost==0.82 lightgbm>=3.2,<3.3 statsmodels>=0.10,<0.11; python_version < '3.9' -statsmodels==0.13.5; python_version >= '3.9' +statsmodels==0.13.5; python_version >= '3.9' and python_version < '3.12' +statsmodels==0.14.0; python_version >= '3.12' jinja2>=2.10,<2.11 flask>=1.0,<1.1 cloudpickle>=1.3,<1.6 diff --git a/python-lib/dku_error_analysis_decision_tree/tree.py b/python-lib/dku_error_analysis_decision_tree/tree.py index 853defe0..c9704296 100644 --- a/python-lib/dku_error_analysis_decision_tree/tree.py +++ b/python-lib/dku_error_analysis_decision_tree/tree.py @@ -132,7 +132,7 @@ def get_stats_numerical_node(binned_column, target_column): "count": [] } if not binned_column.empty: - target_grouped = target_column.groupby(binned_column) + target_grouped = target_column.groupby(binned_column, observed=False) target_distrib = target_grouped.apply(lambda x: x.value_counts()) col_distrib = target_grouped.count() for interval, count in col_distrib.items(): @@ -158,8 +158,10 @@ def get_stats_categorical_node(column, target_column, nr_bins, bins): nr_bins = len(bins) target_grouped = target_column.groupby(column.fillna("No values").apply(safe_str)) target_distrib = target_grouped.value_counts(dropna=False) - col_distrib = target_grouped.count().sort_values(ascending=False) - values = col_distrib.index if not bins else bins + col_distrib = target_grouped.count() + df = col_distrib.reset_index() + df.sort_values(by=[df.columns[1], df.columns[0]], ascending=[False, True], inplace=True) + values = df[df.columns[0]] if not bins else bins for value in values: target_distrib_dict = target_distrib[value].to_dict() diff --git a/tests/python/integration/requirements.txt b/tests/python/integration/requirements.txt index 74ea694a..fc425be8 100644 --- a/tests/python/integration/requirements.txt +++ b/tests/python/integration/requirements.txt @@ -1,3 +1,4 @@ -pytest~=6.2 +pytest~=6.2; python_version < '3.7' +pytest>=7.4,<10.0; python_version >= '3.7' dataiku-api-client git+git://github.com/dataiku/dataiku-plugin-tests-utils.git@master#egg=dataiku-plugin-tests-utils diff --git a/tests/python/unit/requirements.txt b/tests/python/unit/requirements.txt index a510c9ef..5afcb762 100644 --- a/tests/python/unit/requirements.txt +++ b/tests/python/unit/requirements.txt @@ -1,5 +1,9 @@ -pandas~=1.0 -pytest~=6.2 -allure-pytest==2.8.29 -numpy==1.16.6 -pytest-mock~=3.6 +pandas~=1.0; python_version < '3.12' +pandas>=2.0,<4.0; python_version >= '3.12' +pytest~=6.2; python_version < '3.7' +pytest>=7.4,<10.0; python_version >= '3.7' +allure-pytest==2.8.29; python_version < '3.7' +allure-pytest>=2.13.5,<3.0; python_version >= '3.7' +numpy>=1.16.6 +pytest-mock~=3.6; python_version < '3.7' +pytest-mock>=3.10,<4.0; python_version >= '3.7' diff --git a/tests/python/unit/test_tree.py b/tests/python/unit/test_tree.py index a698a762..fe93f3c7 100644 --- a/tests/python/unit/test_tree.py +++ b/tests/python/unit/test_tree.py @@ -137,7 +137,7 @@ def test_get_stats_categorical_node(target, cat_column): # Check nominal case binned_column = cat_column() stats = InteractiveTree.get_stats_categorical_node(binned_column, target, 10, None) - assert stats["bin_value"] == ["B", "Q", "C"] + assert stats["bin_value"] == ["B", "C", "Q"] assert stats["count"] == [3, 1, 1] assert stats["target_distrib"][ErrorAnalyzerConstants.WRONG_PREDICTION] == [0, 1, 1] assert stats["target_distrib"][ErrorAnalyzerConstants.CORRECT_PREDICTION] == [3, 0, 0] @@ -145,7 +145,7 @@ def test_get_stats_categorical_node(target, cat_column): # Check nominal case - less bins binned_column = cat_column() stats = InteractiveTree.get_stats_categorical_node(binned_column, target, 2, None) - assert stats["bin_value"] == ["B", "Q"] + assert stats["bin_value"] == ["B", "C"] assert stats["count"] == [3, 1] assert stats["target_distrib"][ErrorAnalyzerConstants.WRONG_PREDICTION] == [0, 1] assert stats["target_distrib"][ErrorAnalyzerConstants.CORRECT_PREDICTION] == [3, 0] @@ -161,7 +161,7 @@ def test_get_stats_categorical_node(target, cat_column): # Check with nan binned_column = cat_column(False) stats = InteractiveTree.get_stats_categorical_node(binned_column, target, 10, None) - assert stats["bin_value"] == ["B", "Q", "No values", "C"] + assert stats["bin_value"] == ["B", "C", "No values", "Q"] assert stats["count"] == [2, 1, 1, 1] assert stats["target_distrib"][ErrorAnalyzerConstants.WRONG_PREDICTION] == [0, 1, 0, 1] assert stats["target_distrib"][ErrorAnalyzerConstants.CORRECT_PREDICTION] == [2, 0, 1, 0] @@ -169,7 +169,7 @@ def test_get_stats_categorical_node(target, cat_column): # Check with nan - less bins binned_column = cat_column(False) stats = InteractiveTree.get_stats_categorical_node(binned_column, target, 3, None) - assert stats["bin_value"] == ["B", "Q", "No values"] + assert stats["bin_value"] == ["B", "C", "No values"] assert stats["count"] == [2, 1, 1] assert stats["target_distrib"][ErrorAnalyzerConstants.WRONG_PREDICTION] == [0, 1, 0] assert stats["target_distrib"][ErrorAnalyzerConstants.CORRECT_PREDICTION] == [2, 0, 1] @@ -232,7 +232,6 @@ def test_get_stats(create_tree, mocker): # Retrieving stats for numerical features - empty col tree = create_tree() - spy = mocker.spy(InteractiveTree, 'get_stats_numerical_node') mocker.patch.object(tree, "get_filtered_df", return_value=pd.DataFrame([], columns=tree.df.columns)) tree.get_stats(-1, "num_1", 10) cargs = spy.call_args[0] @@ -246,7 +245,6 @@ def test_get_stats(create_tree, mocker): tree = create_tree() bin_edges = np.array([1.0, 5.0, 11.01]) tree.bin_edges["num_1"] = bin_edges - spy = mocker.spy(InteractiveTree, 'get_stats_numerical_node') mocker.patch.object(tree, "get_filtered_df", return_value=tree.df) tree.get_stats(-1, "num_1", 2, pd.Series(pd.Categorical([ pd.Interval(1.0, 3.0, "left"), @@ -276,7 +274,6 @@ def test_get_stats(create_tree, mocker): tree = create_tree() bin_edges = np.array([1.0, 5.0, 11.01]) tree.bin_edges["num_1"] = bin_edges - spy = mocker.spy(InteractiveTree, 'get_stats_numerical_node') mocker.patch.object(tree, "get_filtered_df", return_value=tree.df) tree.get_stats(-1, "num_1", 10, pd.Series(pd.Categorical([ pd.Interval(1.0, 3.0, "left"), From a70d1d3c341a454a7ef3fe8ca18d699f39f27d4e Mon Sep 17 00:00:00 2001 From: Alexis Durieux Date: Thu, 29 Jan 2026 15:32:00 +0100 Subject: [PATCH 02/11] Add ci unit testing --- .github/workflows/auto-unit-test.yml | 40 +++++++++++++++++++ .../dku_error_analysis_decision_tree/tree.py | 2 +- tests/python/unit/requirements.txt | 2 +- 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/auto-unit-test.yml diff --git a/.github/workflows/auto-unit-test.yml b/.github/workflows/auto-unit-test.yml new file mode 100644 index 00000000..7bda0442 --- /dev/null +++ b/.github/workflows/auto-unit-test.yml @@ -0,0 +1,40 @@ +name: Auto unit test + +on: + push: + branches: [master] # Only run push on default branch after merge + pull_request: # Run on all PRs + +# Cancel in-progress runs when a new commit is pushed +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + unit-tests: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + cache-dependency-path: | + tests/python/unit/requirements.txt + code-env/python/spec/requirements.txt + + - name: Install dependencies + run: | + pip install -r tests/python/unit/requirements.txt + pip install -r code-env/python/spec/requirements.txt + + - name: Run tests + env: + PYTHONPATH: python-lib + run: pytest tests/python/unit -v diff --git a/python-lib/dku_error_analysis_decision_tree/tree.py b/python-lib/dku_error_analysis_decision_tree/tree.py index c9704296..7232a691 100644 --- a/python-lib/dku_error_analysis_decision_tree/tree.py +++ b/python-lib/dku_error_analysis_decision_tree/tree.py @@ -132,7 +132,7 @@ def get_stats_numerical_node(binned_column, target_column): "count": [] } if not binned_column.empty: - target_grouped = target_column.groupby(binned_column, observed=False) + target_grouped = target_column.groupby(binned_column) target_distrib = target_grouped.apply(lambda x: x.value_counts()) col_distrib = target_grouped.count() for interval, count in col_distrib.items(): diff --git a/tests/python/unit/requirements.txt b/tests/python/unit/requirements.txt index 5afcb762..00617370 100644 --- a/tests/python/unit/requirements.txt +++ b/tests/python/unit/requirements.txt @@ -1,5 +1,5 @@ pandas~=1.0; python_version < '3.12' -pandas>=2.0,<4.0; python_version >= '3.12' +pandas>=2.0,<3.0; python_version >= '3.12' pytest~=6.2; python_version < '3.7' pytest>=7.4,<10.0; python_version >= '3.7' allure-pytest==2.8.29; python_version < '3.7' From 13bc934b5b7347b9bfd296dc7b24090c49b046b1 Mon Sep 17 00:00:00 2001 From: Alexis Durieux Date: Thu, 29 Jan 2026 15:44:22 +0100 Subject: [PATCH 03/11] Fix dependencies --- .github/workflows/auto-unit-test.yml | 10 ++++++++-- code-env/python/spec/requirements.txt | 7 +++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/workflows/auto-unit-test.yml b/.github/workflows/auto-unit-test.yml index 7bda0442..5529e356 100644 --- a/.github/workflows/auto-unit-test.yml +++ b/.github/workflows/auto-unit-test.yml @@ -12,11 +12,17 @@ concurrency: jobs: unit-tests: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] + os: [ubuntu-latest] + include: + - python-version: "3.6" + os: ubuntu-20.04 + - python-version: "3.7" + os: ubuntu-20.04 steps: - uses: actions/checkout@v4 diff --git a/code-env/python/spec/requirements.txt b/code-env/python/spec/requirements.txt index a2171a94..a8f7582b 100644 --- a/code-env/python/spec/requirements.txt +++ b/code-env/python/spec/requirements.txt @@ -2,16 +2,15 @@ graphviz==0.16 matplotlib>=3.3.4 scikit-learn>=0.20,<1.1; python_version <= '3.10' -scikit-learn==1.1.3; python_version == '3.11' -scikit-learn==1.3.2; python_version >= '3.12' +scikit-learn>=1.1.3; python_version >= '3.11' scipy>=1.2,<1.3; python_version <= '3.7' scipy==1.10.1; python_version >= '3.8' and python_version < '3.12' -scipy==1.11.3; python_version >= '3.12' +scipy>=1.11.3; python_version >= '3.12' xgboost==0.82 lightgbm>=3.2,<3.3 statsmodels>=0.10,<0.11; python_version < '3.9' statsmodels==0.13.5; python_version >= '3.9' and python_version < '3.12' -statsmodels==0.14.0; python_version >= '3.12' +statsmodels>=0.14.0; python_version >= '3.12' jinja2>=2.10,<2.11 flask>=1.0,<1.1 cloudpickle>=1.3,<1.6 From 7ec8d2a7efb4e4d1cdebe8b703c8e4af8b5ea33f Mon Sep 17 00:00:00 2001 From: Alexis Durieux Date: Thu, 29 Jan 2026 16:06:22 +0100 Subject: [PATCH 04/11] simplify/clarify requirements --- code-env/python/spec/requirements.txt | 14 +++++++++++--- tests/python/unit/requirements.txt | 9 +++------ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/code-env/python/spec/requirements.txt b/code-env/python/spec/requirements.txt index a8f7582b..dda0a5be 100644 --- a/code-env/python/spec/requirements.txt +++ b/code-env/python/spec/requirements.txt @@ -2,15 +2,23 @@ graphviz==0.16 matplotlib>=3.3.4 scikit-learn>=0.20,<1.1; python_version <= '3.10' -scikit-learn>=1.1.3; python_version >= '3.11' +scikit-learn>=1.1.3; python_version == '3.11' +scikit-learn==1.3.2; python_version == '3.12' +scikit-learn>=1.5.0; python_version >= '3.13' + scipy>=1.2,<1.3; python_version <= '3.7' scipy==1.10.1; python_version >= '3.8' and python_version < '3.12' -scipy>=1.11.3; python_version >= '3.12' +scipy==1.11.3; python_version == '3.12' +scipy>=1.13.0; python_version >= '3.13' + xgboost==0.82 lightgbm>=3.2,<3.3 + statsmodels>=0.10,<0.11; python_version < '3.9' statsmodels==0.13.5; python_version >= '3.9' and python_version < '3.12' -statsmodels>=0.14.0; python_version >= '3.12' +statsmodels==0.14.0; python_version == '3.12' +statsmodels>=0.14.2; python_version >= '3.13' + jinja2>=2.10,<2.11 flask>=1.0,<1.1 cloudpickle>=1.3,<1.6 diff --git a/tests/python/unit/requirements.txt b/tests/python/unit/requirements.txt index 00617370..12d65c28 100644 --- a/tests/python/unit/requirements.txt +++ b/tests/python/unit/requirements.txt @@ -1,9 +1,6 @@ pandas~=1.0; python_version < '3.12' pandas>=2.0,<3.0; python_version >= '3.12' -pytest~=6.2; python_version < '3.7' -pytest>=7.4,<10.0; python_version >= '3.7' -allure-pytest==2.8.29; python_version < '3.7' -allure-pytest>=2.13.5,<3.0; python_version >= '3.7' +pytest>=6.2,<10.0 +allure-pytest>=2.8.29,<3.0 numpy>=1.16.6 -pytest-mock~=3.6; python_version < '3.7' -pytest-mock>=3.10,<4.0; python_version >= '3.7' +pytest-mock>=3.6,<4.0 \ No newline at end of file From 755f7d30b4f5710aeecd457f529e2067e26662f3 Mon Sep 17 00:00:00 2001 From: Alexis Durieux Date: Thu, 29 Jan 2026 16:10:43 +0100 Subject: [PATCH 05/11] Update version --- CHANGELOG.md | 3 +++ plugin.json | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78162022..11a4a94c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## Version 1.3.5 (2026-01-29) +- Add python 3.12, 3.13 and 3.14 official support + ## Version 1.3.4 (2025-02-05) - Improve python 3.9 support (used to need local compilation of statsmodels, now uses a wheeled version) diff --git a/plugin.json b/plugin.json index 14e9159b..2c546d17 100644 --- a/plugin.json +++ b/plugin.json @@ -1,6 +1,6 @@ { "id" : "model-error-analysis", - "version" : "1.3.4", + "version" : "1.3.5", "meta" : { "label" : "Model Error Analysis", "description" : "Debug model performance with error analysis. A code env is only required to use the Jupyter Notebook.", From 295ebd724d0df03d82acd5ed62cdb9ac788165ec Mon Sep 17 00:00:00 2001 From: Alexis Durieux Date: Thu, 29 Jan 2026 18:09:48 +0100 Subject: [PATCH 06/11] Use `ubuntu-latest` --- .github/workflows/auto-unit-test.yml | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/.github/workflows/auto-unit-test.yml b/.github/workflows/auto-unit-test.yml index 5529e356..029d7d4f 100644 --- a/.github/workflows/auto-unit-test.yml +++ b/.github/workflows/auto-unit-test.yml @@ -12,21 +12,29 @@ concurrency: jobs: unit-tests: - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] - os: [ubuntu-latest] include: - python-version: "3.6" - os: ubuntu-20.04 + container: "python:3.6-buster" - python-version: "3.7" - os: ubuntu-20.04 + container: "python:3.7-buster" + - python-version: "3.8" + - python-version: "3.9" + - python-version: "3.10" + - python-version: "3.11" + - python-version: "3.12" + - python-version: "3.13" + - python-version: "3.14" + + container: ${{ matrix.container }} steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} + if: matrix.container == '' uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -34,6 +42,11 @@ jobs: cache-dependency-path: | tests/python/unit/requirements.txt code-env/python/spec/requirements.txt + + - name: Install dependencies (System) + if: matrix.container != '' + run: | + apt-get update && apt-get install -y git - name: Install dependencies run: | From a006a61a6e0e3a95a73087dcce7c4dfaeafd8654 Mon Sep 17 00:00:00 2001 From: Alexis Durieux Date: Thu, 29 Jan 2026 18:14:23 +0100 Subject: [PATCH 07/11] Fix workflow --- .github/workflows/auto-unit-test.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/auto-unit-test.yml b/.github/workflows/auto-unit-test.yml index 029d7d4f..74c47001 100644 --- a/.github/workflows/auto-unit-test.yml +++ b/.github/workflows/auto-unit-test.yml @@ -43,11 +43,6 @@ jobs: tests/python/unit/requirements.txt code-env/python/spec/requirements.txt - - name: Install dependencies (System) - if: matrix.container != '' - run: | - apt-get update && apt-get install -y git - - name: Install dependencies run: | pip install -r tests/python/unit/requirements.txt From 7fb38d7a466a42da0bc9391a9c03afd8646ec95a Mon Sep 17 00:00:00 2001 From: Alexis Durieux Date: Tue, 3 Feb 2026 10:18:17 +0100 Subject: [PATCH 08/11] Upper bound requirements --- code-env/python/spec/requirements.txt | 10 +++++----- tests/python/integration/requirements.txt | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/code-env/python/spec/requirements.txt b/code-env/python/spec/requirements.txt index dda0a5be..77f907ab 100644 --- a/code-env/python/spec/requirements.txt +++ b/code-env/python/spec/requirements.txt @@ -1,15 +1,15 @@ graphviz==0.16 -matplotlib>=3.3.4 +matplotlib>=3.3.4,<=3.10.8 scikit-learn>=0.20,<1.1; python_version <= '3.10' -scikit-learn>=1.1.3; python_version == '3.11' +scikit-learn>=1.1.3,<=1.8.0; python_version == '3.11' scikit-learn==1.3.2; python_version == '3.12' -scikit-learn>=1.5.0; python_version >= '3.13' +scikit-learn>=1.5.0,<=1.8.0; python_version >= '3.13' scipy>=1.2,<1.3; python_version <= '3.7' scipy==1.10.1; python_version >= '3.8' and python_version < '3.12' scipy==1.11.3; python_version == '3.12' -scipy>=1.13.0; python_version >= '3.13' +scipy>=1.13.0,<=1.17.0; python_version >= '3.13' xgboost==0.82 lightgbm>=3.2,<3.3 @@ -17,7 +17,7 @@ lightgbm>=3.2,<3.3 statsmodels>=0.10,<0.11; python_version < '3.9' statsmodels==0.13.5; python_version >= '3.9' and python_version < '3.12' statsmodels==0.14.0; python_version == '3.12' -statsmodels>=0.14.2; python_version >= '3.13' +statsmodels>=0.14.2,<=0.14.6; python_version >= '3.13' jinja2>=2.10,<2.11 flask>=1.0,<1.1 diff --git a/tests/python/integration/requirements.txt b/tests/python/integration/requirements.txt index fc425be8..3fd697be 100644 --- a/tests/python/integration/requirements.txt +++ b/tests/python/integration/requirements.txt @@ -1,4 +1,4 @@ pytest~=6.2; python_version < '3.7' pytest>=7.4,<10.0; python_version >= '3.7' dataiku-api-client -git+git://github.com/dataiku/dataiku-plugin-tests-utils.git@master#egg=dataiku-plugin-tests-utils +git+https://github.com/dataiku/dataiku-plugin-tests-utils.git@master#egg=dataiku-plugin-tests-utils From 7017e7e8745371911dc0abd327042292b0eb30db Mon Sep 17 00:00:00 2001 From: Alexis Durieux Date: Wed, 4 Feb 2026 14:36:42 +0100 Subject: [PATCH 09/11] Fix integration test with new plugin test utils update + support sklearn newer versions --- resource/py/test_tree_parser.py | 39 ++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/resource/py/test_tree_parser.py b/resource/py/test_tree_parser.py index 8e624c7e..520b8c92 100644 --- a/resource/py/test_tree_parser.py +++ b/resource/py/test_tree_parser.py @@ -53,7 +53,7 @@ def preproc_array(): # PARSING METHODS @pytest.mark.parsing -def test_create_tree(mocker, df, create_parser, caplog): +def test_create_tree(mocker, df, create_parser, caplog, dss_target): error_model = mocker.Mock() error_model.feature_importances_ = np.array([1, 3, 0, 5, 2, 4]) feature_names = [ @@ -106,7 +106,7 @@ def test_create_tree(mocker, df, create_parser, caplog): pd.testing.assert_series_equal(dataframe["vector [element #0]"], pd.Series(['e','a',np.nan,'e','i','e','e','i','i','i','i','i'], name="vector [element #0]")) pd.testing.assert_series_equal(dataframe["vector [element #1]"], - pd.Series([1, 0, np.nan, 0, 2, np.nan, np.nan, 1, 1, 2, 100, np.nan], name="vector [element #1]")) + pd.Series([1, 0, np.nan, 0, 2, np.nan, np.nan, 1, 1, 2, 100, np.nan], name="vector [element #1]")) assert ranked_features[:3] == ["feat_c", "feat_a", "feat_b"] assert set(ranked_features) == {"feat_c", "feat_a", "feat_b", "vector [element #0]", "vector [element #1]", "cat_1", "num_1"} assert parser.num_features == {"num_1", "vector [element #1]"} @@ -144,7 +144,7 @@ def mocked_get_split_param(feature): return TreeParser.SplitParameters(Node.TYPES.NUM, "foo", None, "num_1") @pytest.mark.parsing -def test_build_tree(mocker, df, create_parser): +def test_build_tree(mocker, df, create_parser, dss_target): mocker.patch("dku_error_analysis_tree_parsing.tree_parser.descale_numerical_thresholds", return_value=[8, -2, .5, 3, -.5, -2, -2, -2, 1, -2, -2]) @@ -265,7 +265,7 @@ def check_dummy(split, name, value=None, others=False): assert split.invert_left_and_right and split.invert_left_and_right(0) @pytest.mark.categorical -def test_dummy(create_parser, mocker): +def test_dummy(create_parser, mocker, dss_target): parser = create_parser() step = mocker.Mock(values=["A", "B"], input_column_name="test", should_drop=True) parser._add_dummy_mapping(step) @@ -298,7 +298,7 @@ def test_dummy(create_parser, mocker): check_dummy(others, "test", ["A", "B"], True) @pytest.mark.categorical -def test_target_encoding(create_parser, mocker, preproc_array): +def test_target_encoding(create_parser, mocker, preproc_array, dss_target): # Test classification parser = create_parser() step = mocker.Mock(column_name="test", encoding_name="enc_name") @@ -339,7 +339,7 @@ def test_target_encoding(create_parser, mocker, preproc_array): assert (a.add_preprocessed_feature(preproc_array, 0) == [-1,0,0,0,1,0,4]).all() @pytest.mark.categorical -def test_whole_cat_hashing(create_parser, mocker): +def test_whole_cat_hashing(create_parser, mocker, dss_target): parser = create_parser() step = mocker.Mock(column_name="test", n_features=3) parser._add_cat_hashing_whole_mapping(step) @@ -385,7 +385,7 @@ def test_whole_cat_hashing(create_parser, mocker): assert (third.add_preprocessed_feature(preproc_array, 2) == added_column).all() @pytest.mark.categorical -def test_not_whole_cat_hashing(create_parser, mocker, preproc_array): +def test_not_whole_cat_hashing(create_parser, mocker, preproc_array, dss_target): parser = create_parser() step = mocker.Mock(column_name="test", n_features=2) parser._add_cat_hashing_not_whole_mapping(step) @@ -411,7 +411,7 @@ def test_not_whole_cat_hashing(create_parser, mocker, preproc_array): and not second.invert_left_and_right(.5) assert (second.add_preprocessed_feature(preproc_array, 1) == [0,2,3,0,1,0,0]).all() -def test_frequency_encoding(create_parser, mocker, preproc_array): +def test_frequency_encoding(create_parser, mocker, preproc_array, dss_target): parser = create_parser() step = mocker.Mock(column_name="test", suffix="suffix") parser._add_frequency_encoding_mapping(step) @@ -427,7 +427,7 @@ def test_frequency_encoding(create_parser, mocker, preproc_array): assert (a.add_preprocessed_feature(preproc_array, 0) == [-1,0,0,0,1,0,4]).all() @pytest.mark.categorical -def test_ordinal_encoding(create_parser, mocker, preproc_array): +def test_ordinal_encoding(create_parser, mocker, preproc_array, dss_target): parser = create_parser() step = mocker.Mock(column_name="test", suffix="suffix") parser._add_ordinal_encoding_mapping(step) @@ -444,7 +444,7 @@ def test_ordinal_encoding(create_parser, mocker, preproc_array): # VECTOR HANDLING @pytest.mark.vector -def test_unfold(create_parser, mocker, preproc_array): +def test_unfold(create_parser, mocker, preproc_array, dss_target): parser = create_parser() step = mocker.Mock(input_column_name="test", vector_length=2) parser._add_unfold_mapping(step) @@ -473,7 +473,7 @@ def test_unfold(create_parser, mocker, preproc_array): # NUM HANDLINGS @pytest.mark.numerical -def test_identity(create_parser, mocker, preproc_array): +def test_identity(create_parser, mocker, preproc_array, dss_target): parser = create_parser() parser._add_identity_mapping("test") assert len(parser.preprocessed_feature_mapping) == 1 @@ -490,7 +490,7 @@ def test_identity(create_parser, mocker, preproc_array): assert (split.add_preprocessed_feature(preproc_array, 0) == [-1,0,0,0,1,0,4]).all() @pytest.mark.numerical -def test_binarize(create_parser, mocker, preproc_array): +def test_binarize(create_parser, mocker, preproc_array, dss_target): parser = create_parser() step = mocker.Mock(in_col="test", threshold=42) step._output_name.return_value = "output" @@ -509,7 +509,7 @@ def test_binarize(create_parser, mocker, preproc_array): assert (split.add_preprocessed_feature(preproc_array, 0) == [-1,0,0,0,1,0,4]).all() @pytest.mark.numerical -def test_quantize(create_parser, mocker, preproc_array): +def test_quantize(create_parser, mocker, preproc_array, dss_target): parser = create_parser() step = mocker.Mock(in_col="test", nb_bins=42, r={"bounds": ["0.5", "1.6", "7.8"]}) parser._add_quantize_mapping(step) @@ -527,7 +527,7 @@ def test_quantize(create_parser, mocker, preproc_array): assert (split.add_preprocessed_feature(preproc_array, 0) == [-1,0,0,0,1,0,4]).all() @pytest.mark.numerical -def test_flag_missing(create_parser, mocker): +def test_flag_missing(create_parser, mocker, dss_target): # Flag on numerical feature parser = create_parser() step = mocker.Mock(feature="test", output_block_name="num_flagonly") @@ -562,7 +562,7 @@ def test_flag_missing(create_parser, mocker): and not split.invert_left_and_right(.5) @pytest.mark.numerical -def test_datetime_encoding(create_parser, mocker, preproc_array): +def test_datetime_encoding(create_parser, mocker, preproc_array, dss_target): parser = create_parser() step = mocker.Mock(column_name="test", selected_periods=["p1", "p2"]) @@ -623,7 +623,7 @@ def check_text_features(preproc_array, split, name): assert (split.add_preprocessed_feature(preproc_array, 1) == [0,2,3,0,1,0,0]).all() @pytest.mark.text -def test_vect_hashing(create_parser, mocker, caplog, preproc_array): +def test_vect_hashing(create_parser, mocker, caplog, preproc_array, dss_target): caplog.set_level(logging.INFO) # Hash without SVD parser = create_parser() @@ -653,12 +653,13 @@ def test_vect_hashing(create_parser, mocker, caplog, preproc_array): assert log.msg == "Feature test_bis is a text feature. Its distribution plot will not be available" @pytest.mark.text -def test_count_vect(create_parser, mocker, caplog, preproc_array): +def test_count_vect(create_parser, mocker, caplog, preproc_array, dss_target): caplog.set_level(logging.INFO) parser = create_parser() step = mocker.Mock(column_name="test", prefix="prefix") vectorizer = mocker.Mock() vectorizer.get_feature_names.return_value = ["word", "random"] + vectorizer.get_feature_names_out.return_value = ["word", "random"] step.resource = {"vectorizer": vectorizer} parser._add_text_count_vect_mapping(step) assert len(parser.preprocessed_feature_mapping) == 2 @@ -673,12 +674,13 @@ def test_count_vect(create_parser, mocker, caplog, preproc_array): assert log.msg == "Feature test is a text feature. Its distribution plot will not be available" @pytest.mark.text -def test_tfidf_vect(create_parser, mocker, caplog, preproc_array): +def test_tfidf_vect(create_parser, mocker, caplog, preproc_array, dss_target): caplog.set_level(logging.INFO) parser = create_parser() step = mocker.Mock(column_name="test") vectorizer = mocker.Mock(idf_=[42.4242]) vectorizer.get_feature_names.return_value = ["word", "random"] + vectorizer.get_feature_names_out.return_value = ["word", "random"] step.resource = {"vectorizer": vectorizer} parser._add_tfidf_vect_mapping(step) assert len(parser.preprocessed_feature_mapping) == 1 @@ -693,6 +695,7 @@ def test_tfidf_vect(create_parser, mocker, caplog, preproc_array): step = mocker.Mock(column_name="test_bis") vectorizer = mocker.Mock(idf_=[42.4242, 1]) vectorizer.get_feature_names.return_value = ["word", "random"] + vectorizer.get_feature_names_out.return_value = ["word", "random"] step.resource = {"vectorizer": vectorizer} parser._add_tfidf_vect_mapping(step) assert len(parser.preprocessed_feature_mapping) == 2 From 24ea9ef4e0938c9ebc97552619228d0168183f2a Mon Sep 17 00:00:00 2001 From: Alexis Durieux Date: Thu, 5 Feb 2026 09:09:12 +0100 Subject: [PATCH 10/11] Fix matplotlib-inline requirement --- code-env/python/spec/requirements.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/code-env/python/spec/requirements.txt b/code-env/python/spec/requirements.txt index 77f907ab..eb2165fc 100644 --- a/code-env/python/spec/requirements.txt +++ b/code-env/python/spec/requirements.txt @@ -1,5 +1,8 @@ graphviz==0.16 -matplotlib>=3.3.4,<=3.10.8 +matplotlib==3.3.4; python_version <= '3.9' +matplotlib>=3.5.0; python_version > '3.9' +matplotlib-inline==0.1.6; python_version <= '3.9' +matplotlib-inline>=0.1.7; python_version > '3.9' scikit-learn>=0.20,<1.1; python_version <= '3.10' scikit-learn>=1.1.3,<=1.8.0; python_version == '3.11' From 8939eabecbe1cb39b8679aa339c8618931340572 Mon Sep 17 00:00:00 2001 From: Alexis Durieux Date: Thu, 5 Feb 2026 09:36:36 +0100 Subject: [PATCH 11/11] Make code compatible accross matplotlib versions --- python-lib/mealy_local/error_visualizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-lib/mealy_local/error_visualizer.py b/python-lib/mealy_local/error_visualizer.py index cb63fb7c..5b59dad9 100644 --- a/python-lib/mealy_local/error_visualizer.py +++ b/python-lib/mealy_local/error_visualizer.py @@ -45,7 +45,7 @@ def _plot_histograms(hist_data, label, **params): @staticmethod def _add_new_plot(figsize, bins, x_ticks, feature_name, suptitle): plt.figure(figsize=figsize) - plt.xticks(x_ticks, rotation="90") + plt.xticks(x_ticks, rotation=90) plt.gca().set_xticklabels(labels=bins) plt.ylabel('Proportion of samples') plt.title('Distribution of {}'.format(feature_name))