From 84fb228b649fde68fb7a47d04c32b081c0e8f438 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Tue, 27 Jan 2026 14:55:09 +0100 Subject: [PATCH 1/6] Add type hint --- .../train_new_model/DataGeneratorEmbeddingEvaluation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ms2deepscore/train_new_model/DataGeneratorEmbeddingEvaluation.py b/ms2deepscore/train_new_model/DataGeneratorEmbeddingEvaluation.py index a5697ba9..016e6517 100644 --- a/ms2deepscore/train_new_model/DataGeneratorEmbeddingEvaluation.py +++ b/ms2deepscore/train_new_model/DataGeneratorEmbeddingEvaluation.py @@ -7,6 +7,7 @@ from matchms.similarity.vector_similarity_functions import jaccard_similarity_matrix from ms2deepscore.SettingsMS2Deepscore import SettingsEmbeddingEvaluator +from ms2deepscore.models import SiameseSpectralModel from ms2deepscore.tensorize_spectra import tensorize_spectra from ms2deepscore.train_new_model.inchikey_pair_selection import compute_fingerprints_for_training from ms2deepscore.vector_operations import cosine_similarity_matrix @@ -29,7 +30,7 @@ class DataGeneratorEmbeddingEvaluation: """ def __init__(self, spectrums: List[Spectrum], - ms2ds_model, + ms2ds_model: SiameseSpectralModel, settings: SettingsEmbeddingEvaluator, device="cpu", ): From 25dafcf9f2d81435c24f4d72beaa68353b7d44c4 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Tue, 27 Jan 2026 14:55:28 +0100 Subject: [PATCH 2/6] Add model file name to SettingsEmbeddingEvaluator --- ms2deepscore/SettingsMS2Deepscore.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ms2deepscore/SettingsMS2Deepscore.py b/ms2deepscore/SettingsMS2Deepscore.py index ab9011ec..76a14261 100644 --- a/ms2deepscore/SettingsMS2Deepscore.py +++ b/ms2deepscore/SettingsMS2Deepscore.py @@ -375,6 +375,10 @@ def __init__(self, **settings): self.learning_rate = 0.0001 self.num_epochs = 5 + # When using the training wrapper, + # this is used as the file name of the model, it is stored in the same folder as the ms2deepscore_model. + self.model_file_name = "embedding_evaluator.pt" + if settings: # Coerce incoming values against defaults for consistency settings = _coerce_settings_dict(settings, self) From 12e0200825a61c2b476c06a03e90a4a190aa2ef4 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Tue, 27 Jan 2026 14:55:59 +0100 Subject: [PATCH 3/6] Add the training of an embedding evaluator to the train_ms2deepscore_wrapper --- .../training_wrapper_functions.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/ms2deepscore/wrapper_functions/training_wrapper_functions.py b/ms2deepscore/wrapper_functions/training_wrapper_functions.py index 8a00b5b8..59f718b9 100644 --- a/ms2deepscore/wrapper_functions/training_wrapper_functions.py +++ b/ms2deepscore/wrapper_functions/training_wrapper_functions.py @@ -10,9 +10,10 @@ from matchms.importing import load_spectra from ms2deepscore.benchmarking.CalculateScoresBetweenAllIonmodes import CalculateScoresBetweenAllIonmodes +from ms2deepscore.models import EmbeddingEvaluationModel from ms2deepscore.models.SiameseSpectralModel import (SiameseSpectralModel, train) -from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore +from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore, SettingsEmbeddingEvaluator from ms2deepscore.train_new_model import TrainingBatchGenerator, create_spectrum_pair_generator from ms2deepscore.validation_loss_calculation.ValidationLossCalculator import ValidationLossCalculator from ms2deepscore.train_new_model.train_ms2deepscore import \ @@ -25,6 +26,7 @@ def train_ms2deepscore_wrapper(settings: SettingsMS2Deepscore, + settings_embedding_evaluator: SettingsEmbeddingEvaluator = None ): """Splits data, trains a ms2deepscore model, and does benchmarking. @@ -43,7 +45,7 @@ def train_ms2deepscore_wrapper(settings: SettingsMS2Deepscore, validation_spectra = load_spectra_in_ionmode(settings.validation_spectra_file_name, settings.ionisation_mode) # Train model - _, history = train_ms2ds_model(training_spectra, validation_spectra, settings.model_directory_name, settings) + ms2ds_model, history = train_ms2ds_model(training_spectra, validation_spectra, settings.model_directory_name, settings) ms2ds_history_plot_file_name = os.path.join(settings.model_directory_name, settings.history_plot_file_name) plot_history(history["losses"], history["val_losses"], ms2ds_history_plot_file_name) @@ -59,6 +61,14 @@ def train_ms2deepscore_wrapper(settings: SettingsMS2Deepscore, create_plots_between_ionmodes(scores_between_all_ionmodes, results_folder=os.path.join(settings.model_directory_name, "benchmarking_results"), nr_of_bins=50) + + if settings_embedding_evaluator: + model = EmbeddingEvaluationModel(settings_embedding_evaluator) + model.train_evaluator(ms2ds_model=ms2ds_model, + training_spectra=training_spectra, + validation_spectra=validation_spectra) + model.save(os.path.join(settings.model_directory_name, settings_embedding_evaluator.model_file_name)) + return settings.model_directory_name From da343d99ac8496100ad7199cb60adf3195ce181f Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Tue, 27 Jan 2026 15:03:26 +0100 Subject: [PATCH 4/6] Add to test --- tests/test_training_wrapper_function.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/test_training_wrapper_function.py b/tests/test_training_wrapper_function.py index d1b870f2..140165bc 100644 --- a/tests/test_training_wrapper_function.py +++ b/tests/test_training_wrapper_function.py @@ -3,8 +3,8 @@ from matchms.exporting import save_as_mgf from matchms.importing import load_spectra -from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore -from ms2deepscore.models import load_model +from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore, SettingsEmbeddingEvaluator +from ms2deepscore.models import load_model, load_embedding_evaluator from ms2deepscore.wrapper_functions.training_wrapper_functions import (train_ms2deepscore_wrapper, parameter_search, split_data_if_necessary) from tests.create_test_spectra import pesticides_test_spectra @@ -31,12 +31,14 @@ def test_train_wrapper_ms2ds_model(tmp_path): "train_test_split_fraction": 5, }) - model_directory_name = train_ms2deepscore_wrapper(settings) + model_directory_name = train_ms2deepscore_wrapper(settings, SettingsEmbeddingEvaluator()) # Test model is created and can be loaded model_file_name = os.path.join(model_directory_name, settings.model_file_name) assert os.path.isfile(model_file_name) load_model(model_file_name) - + embedding_evaluator_file_name = os.path.join(settings.model_directory_name, "embedding_evaluator.pt") + assert os.path.isfile(embedding_evaluator_file_name) + load_embedding_evaluator(embedding_evaluator_file_name) assert os.path.isfile(os.path.join(tmp_path, settings.results_folder, model_directory_name, "benchmarking_results", "average_per_bin.svg")) assert os.path.isfile(os.path.join(tmp_path, settings.results_folder, From d7adf18979a96217152e42774648706bc7ff59c1 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Tue, 27 Jan 2026 15:04:46 +0100 Subject: [PATCH 5/6] Add to test --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 872462b7..87113432 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added - The training pair sampling for both ionmodes is now balanced over the different ionmode pairs. - +- The training wrapper now also trains the embedding evaluator (if settings are given) ### Fixed - Datasplit of test, train and val, is not done sepparately for ionmodes anymore. From 2f951af025db5e8cc86979246610d879d5659227 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Tue, 27 Jan 2026 15:06:31 +0100 Subject: [PATCH 6/6] Add training of Embedding evaluator to readme as well --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c53b2efb..c0819c91 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![GitHub](https://img.shields.io/github/license/matchms/ms2deepscore) +from ms2deepscore.SettingsMS2Deepscore import SettingsEmbeddingEvaluator![GitHub](https://img.shields.io/github/license/matchms/ms2deepscore) [![PyPI](https://img.shields.io/pypi/v/ms2deepscore?color=teal)](https://pypi.org/project/ms2deepscore/) ![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/matchms/ms2deepscore/CI_build.yml?branch=main) [![SonarCloud Quality Gate](https://sonarcloud.io/api/project_badges/measure?project=matchms_ms2deepscore&metric=alert_status)](https://sonarcloud.io/dashboard?id=matchms_ms2deepscore) @@ -116,7 +116,7 @@ To train your own model you can run the code below. Please first ensure cleaning your spectra. We recommend using the cleaning pipeline in [matchms](https://github.com/matchms/matchms). ```python -from ms2deepscore import SettingsMS2Deepscore +from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore, SettingsEmbeddingEvaluator from ms2deepscore.wrapper_functions.training_wrapper_functions import train_ms2deepscore_wrapper spectrum_file = "./combined_libraries.mgf" @@ -131,7 +131,9 @@ settings = SettingsMS2Deepscore( "mean": 0, "standard_deviation": 1000})], validation_split_fraction=20) -train_ms2deepscore_wrapper(settings) +train_ms2deepscore_wrapper(settings, + SettingsEmbeddingEvaluator() # this results in also training the embedding evaluator. Leave as None if you don't want to train this. + ) ``` ## Contributing We welcome contributions to the development of ms2deepscore! Have a look at the [contribution guidelines](https://github.com/matchms/ms2deepscore/blob/main/CONTRIBUTING.md).