Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions .github/workflows/auto-unit-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: Auto unit test

on:
push:
branches: [master] # Only run push on default branch after merge
pull_request: # Run on all PRs

# Cancel in-progress runs when a new commit is pushed
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
unit-tests:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- python-version: "3.6"
container: "python:3.6-buster"
- python-version: "3.7"
container: "python:3.7-buster"
- python-version: "3.8"
- python-version: "3.9"
- python-version: "3.10"
- python-version: "3.11"
- python-version: "3.12"
- python-version: "3.13"
- python-version: "3.14"

container: ${{ matrix.container }}

steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
if: matrix.container == ''
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: |
tests/python/unit/requirements.txt
code-env/python/spec/requirements.txt

- name: Install dependencies
run: |
pip install -r tests/python/unit/requirements.txt
pip install -r code-env/python/spec/requirements.txt

- name: Run tests
env:
PYTHONPATH: python-lib
run: pytest tests/python/unit -v
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## Version 1.3.5 (2026-01-29)
- Add python 3.12, 3.13 and 3.14 official support

## Version 1.3.4 (2025-02-05)
- Improve python 3.9 support (used to need local compilation of statsmodels, now uses a wheeled version)

Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ plugin:
unit-tests:
@echo "Running unit tests..."
@( \
PYTHON_VERSION=`python3 -V 2>&1 | sed 's/[^0-9]*//g' | cut -c 1,2`; \
PYTHON_VERSION_IS_CORRECT=`cat code-env/python/desc.json | python3 -c "import sys, json; print(str($$PYTHON_VERSION) in [x[-2:] for x in json.load(sys.stdin)['acceptedPythonInterpreters']]);"`; \
PYTHON_VERSION=`python3 -c "import sys; print('%s%s' % (sys.version_info.major, sys.version_info.minor))"`; \
PYTHON_VERSION_IS_CORRECT=`cat code-env/python/desc.json | python3 -c "import sys, json; print('PYTHON' + str($$PYTHON_VERSION) in json.load(sys.stdin)['acceptedPythonInterpreters']);"`; \
if [ $$PYTHON_VERSION_IS_CORRECT == "False" ]; then echo "Python version $$PYTHON_VERSION is not in acceptedPythonInterpreters"; exit 1; else echo "Python version $$PYTHON_VERSION is in acceptedPythonInterpreters"; fi; \
)
@( \
Expand Down
5 changes: 4 additions & 1 deletion code-env/python/desc.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
"PYTHON38",
"PYTHON39",
"PYTHON310",
"PYTHON311"
"PYTHON311",
"PYTHON312",
"PYTHON313",
"PYTHON314"
],
"corePackagesSet": "AUTO",
"forceConda": false,
Expand Down
21 changes: 17 additions & 4 deletions code-env/python/spec/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,27 @@
graphviz==0.16
matplotlib==3.3.4
matplotlib==3.3.4; python_version <= '3.9'
matplotlib>=3.5.0; python_version > '3.9'
matplotlib-inline==0.1.6; python_version <= '3.9'
matplotlib-inline>=0.1.7; python_version > '3.9'

scikit-learn>=0.20,<1.1; python_version <= '3.10'
scikit-learn==1.1.3; python_version >= '3.11'
scikit-learn>=1.1.3,<=1.8.0; python_version == '3.11'
scikit-learn==1.3.2; python_version == '3.12'
scikit-learn>=1.5.0,<=1.8.0; python_version >= '3.13'

scipy>=1.2,<1.3; python_version <= '3.7'
scipy==1.10.1; python_version >= '3.8'
scipy==1.10.1; python_version >= '3.8' and python_version < '3.12'
scipy==1.11.3; python_version == '3.12'
scipy>=1.13.0,<=1.17.0; python_version >= '3.13'

xgboost==0.82
lightgbm>=3.2,<3.3

statsmodels>=0.10,<0.11; python_version < '3.9'
statsmodels==0.13.5; python_version >= '3.9'
statsmodels==0.13.5; python_version >= '3.9' and python_version < '3.12'
statsmodels==0.14.0; python_version == '3.12'
statsmodels>=0.14.2,<=0.14.6; python_version >= '3.13'

jinja2>=2.10,<2.11
flask>=1.0,<1.1
cloudpickle>=1.3,<1.6
2 changes: 1 addition & 1 deletion plugin.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"id" : "model-error-analysis",
"version" : "1.3.4",
"version" : "1.3.5",
"meta" : {
"label" : "Model Error Analysis",
"description" : "Debug model performance with error analysis. A code env is only required to use the Jupyter Notebook.",
Expand Down
6 changes: 4 additions & 2 deletions python-lib/dku_error_analysis_decision_tree/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,10 @@ def get_stats_categorical_node(column, target_column, nr_bins, bins):
nr_bins = len(bins)
target_grouped = target_column.groupby(column.fillna("No values").apply(safe_str))
target_distrib = target_grouped.value_counts(dropna=False)
col_distrib = target_grouped.count().sort_values(ascending=False)
values = col_distrib.index if not bins else bins
col_distrib = target_grouped.count()
df = col_distrib.reset_index()
df.sort_values(by=[df.columns[1], df.columns[0]], ascending=[False, True], inplace=True)
values = df[df.columns[0]] if not bins else bins

for value in values:
target_distrib_dict = target_distrib[value].to_dict()
Expand Down
2 changes: 1 addition & 1 deletion python-lib/mealy_local/error_visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def _plot_histograms(hist_data, label, **params):
@staticmethod
def _add_new_plot(figsize, bins, x_ticks, feature_name, suptitle):
plt.figure(figsize=figsize)
plt.xticks(x_ticks, rotation="90")
plt.xticks(x_ticks, rotation=90)
plt.gca().set_xticklabels(labels=bins)
plt.ylabel('Proportion of samples')
plt.title('Distribution of {}'.format(feature_name))
Expand Down
39 changes: 21 additions & 18 deletions resource/py/test_tree_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def preproc_array():

# PARSING METHODS
@pytest.mark.parsing
def test_create_tree(mocker, df, create_parser, caplog):
def test_create_tree(mocker, df, create_parser, caplog, dss_target):
error_model = mocker.Mock()
error_model.feature_importances_ = np.array([1, 3, 0, 5, 2, 4])
feature_names = [
Expand Down Expand Up @@ -106,7 +106,7 @@ def test_create_tree(mocker, df, create_parser, caplog):
pd.testing.assert_series_equal(dataframe["vector [element #0]"],
pd.Series(['e','a',np.nan,'e','i','e','e','i','i','i','i','i'], name="vector [element #0]"))
pd.testing.assert_series_equal(dataframe["vector [element #1]"],
pd.Series([1, 0, np.nan, 0, 2, np.nan, np.nan, 1, 1, 2, 100, np.nan], name="vector [element #1]"))
pd.Series([1, 0, np.nan, 0, 2, np.nan, np.nan, 1, 1, 2, 100, np.nan], name="vector [element #1]"))
assert ranked_features[:3] == ["feat_c", "feat_a", "feat_b"]
assert set(ranked_features) == {"feat_c", "feat_a", "feat_b", "vector [element #0]", "vector [element #1]", "cat_1", "num_1"}
assert parser.num_features == {"num_1", "vector [element #1]"}
Expand Down Expand Up @@ -144,7 +144,7 @@ def mocked_get_split_param(feature):
return TreeParser.SplitParameters(Node.TYPES.NUM, "foo", None, "num_1")

@pytest.mark.parsing
def test_build_tree(mocker, df, create_parser):
def test_build_tree(mocker, df, create_parser, dss_target):
mocker.patch("dku_error_analysis_tree_parsing.tree_parser.descale_numerical_thresholds",
return_value=[8, -2, .5, 3, -.5, -2, -2, -2, 1, -2, -2])

Expand Down Expand Up @@ -265,7 +265,7 @@ def check_dummy(split, name, value=None, others=False):
assert split.invert_left_and_right and split.invert_left_and_right(0)

@pytest.mark.categorical
def test_dummy(create_parser, mocker):
def test_dummy(create_parser, mocker, dss_target):
parser = create_parser()
step = mocker.Mock(values=["A", "B"], input_column_name="test", should_drop=True)
parser._add_dummy_mapping(step)
Expand Down Expand Up @@ -298,7 +298,7 @@ def test_dummy(create_parser, mocker):
check_dummy(others, "test", ["A", "B"], True)

@pytest.mark.categorical
def test_target_encoding(create_parser, mocker, preproc_array):
def test_target_encoding(create_parser, mocker, preproc_array, dss_target):
# Test classification
parser = create_parser()
step = mocker.Mock(column_name="test", encoding_name="enc_name")
Expand Down Expand Up @@ -339,7 +339,7 @@ def test_target_encoding(create_parser, mocker, preproc_array):
assert (a.add_preprocessed_feature(preproc_array, 0) == [-1,0,0,0,1,0,4]).all()

@pytest.mark.categorical
def test_whole_cat_hashing(create_parser, mocker):
def test_whole_cat_hashing(create_parser, mocker, dss_target):
parser = create_parser()
step = mocker.Mock(column_name="test", n_features=3)
parser._add_cat_hashing_whole_mapping(step)
Expand Down Expand Up @@ -385,7 +385,7 @@ def test_whole_cat_hashing(create_parser, mocker):
assert (third.add_preprocessed_feature(preproc_array, 2) == added_column).all()

@pytest.mark.categorical
def test_not_whole_cat_hashing(create_parser, mocker, preproc_array):
def test_not_whole_cat_hashing(create_parser, mocker, preproc_array, dss_target):
parser = create_parser()
step = mocker.Mock(column_name="test", n_features=2)
parser._add_cat_hashing_not_whole_mapping(step)
Expand All @@ -411,7 +411,7 @@ def test_not_whole_cat_hashing(create_parser, mocker, preproc_array):
and not second.invert_left_and_right(.5)
assert (second.add_preprocessed_feature(preproc_array, 1) == [0,2,3,0,1,0,0]).all()

def test_frequency_encoding(create_parser, mocker, preproc_array):
def test_frequency_encoding(create_parser, mocker, preproc_array, dss_target):
parser = create_parser()
step = mocker.Mock(column_name="test", suffix="suffix")
parser._add_frequency_encoding_mapping(step)
Expand All @@ -427,7 +427,7 @@ def test_frequency_encoding(create_parser, mocker, preproc_array):
assert (a.add_preprocessed_feature(preproc_array, 0) == [-1,0,0,0,1,0,4]).all()

@pytest.mark.categorical
def test_ordinal_encoding(create_parser, mocker, preproc_array):
def test_ordinal_encoding(create_parser, mocker, preproc_array, dss_target):
parser = create_parser()
step = mocker.Mock(column_name="test", suffix="suffix")
parser._add_ordinal_encoding_mapping(step)
Expand All @@ -444,7 +444,7 @@ def test_ordinal_encoding(create_parser, mocker, preproc_array):

# VECTOR HANDLING
@pytest.mark.vector
def test_unfold(create_parser, mocker, preproc_array):
def test_unfold(create_parser, mocker, preproc_array, dss_target):
parser = create_parser()
step = mocker.Mock(input_column_name="test", vector_length=2)
parser._add_unfold_mapping(step)
Expand Down Expand Up @@ -473,7 +473,7 @@ def test_unfold(create_parser, mocker, preproc_array):

# NUM HANDLINGS
@pytest.mark.numerical
def test_identity(create_parser, mocker, preproc_array):
def test_identity(create_parser, mocker, preproc_array, dss_target):
parser = create_parser()
parser._add_identity_mapping("test")
assert len(parser.preprocessed_feature_mapping) == 1
Expand All @@ -490,7 +490,7 @@ def test_identity(create_parser, mocker, preproc_array):
assert (split.add_preprocessed_feature(preproc_array, 0) == [-1,0,0,0,1,0,4]).all()

@pytest.mark.numerical
def test_binarize(create_parser, mocker, preproc_array):
def test_binarize(create_parser, mocker, preproc_array, dss_target):
parser = create_parser()
step = mocker.Mock(in_col="test", threshold=42)
step._output_name.return_value = "output"
Expand All @@ -509,7 +509,7 @@ def test_binarize(create_parser, mocker, preproc_array):
assert (split.add_preprocessed_feature(preproc_array, 0) == [-1,0,0,0,1,0,4]).all()

@pytest.mark.numerical
def test_quantize(create_parser, mocker, preproc_array):
def test_quantize(create_parser, mocker, preproc_array, dss_target):
parser = create_parser()
step = mocker.Mock(in_col="test", nb_bins=42, r={"bounds": ["0.5", "1.6", "7.8"]})
parser._add_quantize_mapping(step)
Expand All @@ -527,7 +527,7 @@ def test_quantize(create_parser, mocker, preproc_array):
assert (split.add_preprocessed_feature(preproc_array, 0) == [-1,0,0,0,1,0,4]).all()

@pytest.mark.numerical
def test_flag_missing(create_parser, mocker):
def test_flag_missing(create_parser, mocker, dss_target):
# Flag on numerical feature
parser = create_parser()
step = mocker.Mock(feature="test", output_block_name="num_flagonly")
Expand Down Expand Up @@ -562,7 +562,7 @@ def test_flag_missing(create_parser, mocker):
and not split.invert_left_and_right(.5)

@pytest.mark.numerical
def test_datetime_encoding(create_parser, mocker, preproc_array):
def test_datetime_encoding(create_parser, mocker, preproc_array, dss_target):
parser = create_parser()
step = mocker.Mock(column_name="test", selected_periods=["p1", "p2"])

Expand Down Expand Up @@ -623,7 +623,7 @@ def check_text_features(preproc_array, split, name):
assert (split.add_preprocessed_feature(preproc_array, 1) == [0,2,3,0,1,0,0]).all()

@pytest.mark.text
def test_vect_hashing(create_parser, mocker, caplog, preproc_array):
def test_vect_hashing(create_parser, mocker, caplog, preproc_array, dss_target):
caplog.set_level(logging.INFO)
# Hash without SVD
parser = create_parser()
Expand Down Expand Up @@ -653,12 +653,13 @@ def test_vect_hashing(create_parser, mocker, caplog, preproc_array):
assert log.msg == "Feature test_bis is a text feature. Its distribution plot will not be available"

@pytest.mark.text
def test_count_vect(create_parser, mocker, caplog, preproc_array):
def test_count_vect(create_parser, mocker, caplog, preproc_array, dss_target):
caplog.set_level(logging.INFO)
parser = create_parser()
step = mocker.Mock(column_name="test", prefix="prefix")
vectorizer = mocker.Mock()
vectorizer.get_feature_names.return_value = ["word", "random"]
vectorizer.get_feature_names_out.return_value = ["word", "random"]
step.resource = {"vectorizer": vectorizer}
parser._add_text_count_vect_mapping(step)
assert len(parser.preprocessed_feature_mapping) == 2
Expand All @@ -673,12 +674,13 @@ def test_count_vect(create_parser, mocker, caplog, preproc_array):
assert log.msg == "Feature test is a text feature. Its distribution plot will not be available"

@pytest.mark.text
def test_tfidf_vect(create_parser, mocker, caplog, preproc_array):
def test_tfidf_vect(create_parser, mocker, caplog, preproc_array, dss_target):
caplog.set_level(logging.INFO)
parser = create_parser()
step = mocker.Mock(column_name="test")
vectorizer = mocker.Mock(idf_=[42.4242])
vectorizer.get_feature_names.return_value = ["word", "random"]
vectorizer.get_feature_names_out.return_value = ["word", "random"]
step.resource = {"vectorizer": vectorizer}
parser._add_tfidf_vect_mapping(step)
assert len(parser.preprocessed_feature_mapping) == 1
Expand All @@ -693,6 +695,7 @@ def test_tfidf_vect(create_parser, mocker, caplog, preproc_array):
step = mocker.Mock(column_name="test_bis")
vectorizer = mocker.Mock(idf_=[42.4242, 1])
vectorizer.get_feature_names.return_value = ["word", "random"]
vectorizer.get_feature_names_out.return_value = ["word", "random"]
step.resource = {"vectorizer": vectorizer}
parser._add_tfidf_vect_mapping(step)
assert len(parser.preprocessed_feature_mapping) == 2
Expand Down
5 changes: 3 additions & 2 deletions tests/python/integration/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pytest~=6.2
pytest~=6.2; python_version < '3.7'
pytest>=7.4,<10.0; python_version >= '3.7'
dataiku-api-client
git+git://github.com/dataiku/dataiku-plugin-tests-utils.git@master#egg=dataiku-plugin-tests-utils
git+https://github.com/dataiku/dataiku-plugin-tests-utils.git@master#egg=dataiku-plugin-tests-utils
11 changes: 6 additions & 5 deletions tests/python/unit/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pandas~=1.0
pytest~=6.2
allure-pytest==2.8.29
numpy==1.16.6
pytest-mock~=3.6
pandas~=1.0; python_version < '3.12'
pandas>=2.0,<3.0; python_version >= '3.12'
pytest>=6.2,<10.0
allure-pytest>=2.8.29,<3.0
numpy>=1.16.6
pytest-mock>=3.6,<4.0
Loading