diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 18db3e03..dbbd67a9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,18 +17,21 @@ jobs: steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python 3.10 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.10' + cache: pip + cache-dependency-path: | + **/requirements*.txt - name: Install pre-commit run: | python -m pip install --upgrade pip pip install -U setuptools wheel - pip install .[dev,bayesian] + pip install .[dev] - name: Run pre-commit run: | @@ -45,10 +48,10 @@ jobs: steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: 'pip' @@ -59,9 +62,13 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements.txt - pip install .[test,dev,bayesian] + pip install .[test,dev] + + - name: List installed versions + run: | + pip list - name: Run tests with pytest run: | # run tests with pytest, reporting coverage and timings - pytest -m "not slow" -rs -vvv --durations=0 --cov=./modnet/ + pytest -m "not slow" -m "not deprecated" -rs -vvv --durations=0 --cov=./modnet/ diff --git a/modnet/models/ensemble.py b/modnet/models/ensemble.py index 880d09e9..dc2fce8e 100644 --- a/modnet/models/ensemble.py +++ b/modnet/models/ensemble.py @@ -438,7 +438,7 @@ def fit_preset( for i in range(n_splits): best_5_idx = np.argsort(val_losses[:, i])[:5] for idx in best_5_idx: - final_models += models[idx][i].model + final_models.extend(models[idx][i].models) self.__init__(modnet_models=final_models) os.environ["TF_CPP_MIN_LOG_LEVEL"] = "0" # reset diff --git a/modnet/models/vanilla.py b/modnet/models/vanilla.py index 56ebd7f6..6eb8b4ff 100644 --- a/modnet/models/vanilla.py +++ b/modnet/models/vanilla.py @@ -1343,6 +1343,9 @@ def fit( } fit_params.update(fit_params_kw) + if "learning_rate" in fit_params: + fit_params.pop("learning_rate") + warnings.warn("learning_rate is deprecated, use lr instead.") if loss is None: loss = "mse" diff --git a/modnet/tests/test_hyper_opt.py b/modnet/tests/test_hyper_opt.py index dc117b74..7c4ed851 100644 --- a/modnet/tests/test_hyper_opt.py +++ b/modnet/tests/test_hyper_opt.py @@ -29,3 +29,41 @@ def test_ga(small_moddata, tf_session): assert type(model) is EnsembleMODNetModel assert len(model.models) == 1 + + +def test_ga_multi_target(small_moddata, tf_session): + """Tests the modnet.hyper_opt.FitGenetic algorithm.""" + from modnet.hyper_opt import FitGenetic + + data = small_moddata + # set 'optimal' features manually + data.optimal_features = [ + col for col in data.df_featurized.columns if col.startswith("ElementProperty") + ] + + def custom_loss(y_true, y_pred): + import tensorflow as tf + loss1 = y_pred - y_true + return tf.reduce_mean( + tf.math.abs( + tf.boolean_mask(loss1, tf.reduce_all(~tf.math.is_nan(loss1), axis=1)) + ) + ) + + ga = FitGenetic(data) + model = ga.run( + size_pop=2, + num_generations=2, + prob_mut=0.5, + nested=2, + n_jobs=2, + early_stopping=2, + refit=1, + loss=[custom_loss, custom_loss], + fast=True, + ) + + from modnet.models import EnsembleMODNetModel + + assert type(model) is EnsembleMODNetModel + assert len(model.models) == 1 diff --git a/modnet/tests/test_model.py b/modnet/tests/test_model.py index c4853e07..1bcd4464 100644 --- a/modnet/tests/test_model.py +++ b/modnet/tests/test_model.py @@ -75,6 +75,38 @@ def test_train_small_model_multi_target(subset_moddata, tf_session): model.predict(data) assert not np.isnan(model.evaluate(data)) +def test_train_small_model_multi_target_custom_loss(subset_moddata, tf_session): + """Tests the multi-target training.""" + from modnet.models import MODNetModel + from functools import partial + import tensorflow as tf + + data = subset_moddata + # set 'optimal' features manually + data.optimal_features = [ + col for col in data.df_featurized.columns if col.startswith("ElementProperty") + ] + + def custom_loss(y_true, y_pred, rescale=1): + loss1 = y_pred - y_true + return rescale * tf.reduce_mean( + tf.math.abs( + tf.boolean_mask(loss1, tf.reduce_all(~tf.math.is_nan(loss1), axis=1)) + ) + ) + + model = MODNetModel( + [[["eform", "egap"]]], + weights={"eform": 1, "egap": 1}, + num_neurons=[[16], [8], [8], [4]], + n_feat=10, + ) + + model.fit(data, loss=[partial(custom_loss, rescale=10), custom_loss], epochs=2) + model.predict(data) + breakpoint() + assert not np.isnan(model.evaluate(data)) + def test_train_small_model_presets(subset_moddata, tf_session): """Tests the `fit_preset()` method.""" @@ -142,6 +174,7 @@ def test_model_integration(subset_moddata, tf_session): assert not np.isnan(model.evaluate(data)) +@pytest.mark.deprecated def test_train_small_bayesian_single_target(subset_moddata, tf_session): """Tests the single target training.""" from modnet.models import BayesianMODNetModel @@ -165,6 +198,7 @@ def test_train_small_bayesian_single_target(subset_moddata, tf_session): assert not np.isnan(model.evaluate(data)) +@pytest.mark.deprecated def test_train_small_bayesian_single_target_classif(subset_moddata, tf_session): """Tests the single target training.""" from modnet.models import BayesianMODNetModel @@ -196,6 +230,7 @@ def is_metal(egap): assert not np.isnan(model.evaluate(data)) +@pytest.mark.deprecated def test_train_small_bayesian_multi_target(subset_moddata, tf_session): """Tests the multi-target training.""" from modnet.models import BayesianMODNetModel @@ -300,6 +335,41 @@ def test_train_small_bootstrap_multi_target(small_moddata, tf_session): model.predict(data, return_unc=True) +def test_train_small_bootstrap_custom_loss_multi_target(small_moddata, tf_session): + """Tests a multi-target ensemble model with a custom loss per target, + modified from Hao Wu's example. + + """ + from modnet.models import EnsembleMODNetModel + import tensorflow as tf + + def custom_loss(y_true, y_pred): + loss1 = y_pred - y_true + return tf.reduce_mean( + tf.math.abs( + tf.boolean_mask(loss1, tf.reduce_all(~tf.math.is_nan(loss1), axis=1)) + ) + ) + + data = small_moddata + # set 'optimal' features manually + data.optimal_features = [ + col for col in data.df_featurized.columns if col.startswith("ElementProperty") + ] + + model = EnsembleMODNetModel( + [[["eform", "egap"]]], + weights={"eform": 1, "egap": 1}, + num_neurons=[[16], [8], [8], [4]], + n_feat=10, + n_models=3, + bootstrap=True, + ) + + model.fit(data, loss=[custom_loss, custom_loss], epochs=2) + model.predict(data, return_unc=True) + + @pytest.mark.slow def test_train_small_bootstrap_presets(small_moddata, tf_session): """Tests the `fit_preset()` method.""" diff --git a/pytest.ini b/pytest.ini index 76178536..c382c881 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,3 +1,4 @@ [pytest] markers = slow: marks tests as slow (deselect with '-m "not slow"') + deprecated: mark tests as deprecated (deselect with -m "not deprecated") diff --git a/requirements.txt b/requirements.txt index 81c0afa6..24046bcd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,6 @@ -tensorflow==2.11.0 -pandas==1.5.2 -scikit-learn==1.3.2 +tensorflow==2.15.1 +pandas==2.2.3 +scikit-learn==1.5.0 matminer==0.9.2 numpy>=1.25 -pymatgen==2024.3.1 -scikit-learn==1.3.2 +pymatgen==2024.5.31 diff --git a/setup.py b/setup.py index 28781da1..849a3396 100644 --- a/setup.py +++ b/setup.py @@ -32,17 +32,17 @@ include_package_data=True, packages=setuptools.find_packages(), install_requires=[ - "pandas~=1.5", - "tensorflow~=2.10,<2.12", - "pymatgen>=2023", - "matminer~=0.9", - "numpy>=1.24", - "scikit-learn~=1.3", + "pandas <= 1.5, < 3", + "tensorflow ~= 2.10, < 2.16", + "pymatgen >= 2023", + "matminer ~= 0.9", + "numpy >= 1.24", + "scikit-learn ~= 1.3", ], tests_require=tests_require, test_suite="modnet.tests", extras_require={ - "bayesian": ["tensorflow-probability==0.18"], + "bayesian": ["tensorflow-probability==0.18", "tensorflow == 2.11.*"], "test": tests_require, "dev": dev_require, },