Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
- The training pair sampling for both ionmodes is now balanced over the different ionmode pairs.

- The training wrapper now also trains the embedding evaluator (if settings are given)
### Fixed
- Datasplit of test, train and val, is not done sepparately for ionmodes anymore.

Expand Down
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
![GitHub](https://img.shields.io/github/license/matchms/ms2deepscore)
from ms2deepscore.SettingsMS2Deepscore import SettingsEmbeddingEvaluator![GitHub](https://img.shields.io/github/license/matchms/ms2deepscore)
[![PyPI](https://img.shields.io/pypi/v/ms2deepscore?color=teal)](https://pypi.org/project/ms2deepscore/)
![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/matchms/ms2deepscore/CI_build.yml?branch=main)
[![SonarCloud Quality Gate](https://sonarcloud.io/api/project_badges/measure?project=matchms_ms2deepscore&metric=alert_status)](https://sonarcloud.io/dashboard?id=matchms_ms2deepscore)
Expand Down Expand Up @@ -116,7 +116,7 @@ To train your own model you can run the code below.
Please first ensure cleaning your spectra. We recommend using the cleaning pipeline in [matchms](https://github.com/matchms/matchms).

```python
from ms2deepscore import SettingsMS2Deepscore
from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore, SettingsEmbeddingEvaluator
from ms2deepscore.wrapper_functions.training_wrapper_functions import train_ms2deepscore_wrapper

spectrum_file = "./combined_libraries.mgf"
Expand All @@ -131,7 +131,9 @@ settings = SettingsMS2Deepscore(
"mean": 0, "standard_deviation": 1000})],
validation_split_fraction=20)

train_ms2deepscore_wrapper(settings)
train_ms2deepscore_wrapper(settings,
SettingsEmbeddingEvaluator() # this results in also training the embedding evaluator. Leave as None if you don't want to train this.
)
```
## Contributing
We welcome contributions to the development of ms2deepscore! Have a look at the [contribution guidelines](https://github.com/matchms/ms2deepscore/blob/main/CONTRIBUTING.md).
Expand Down
4 changes: 4 additions & 0 deletions ms2deepscore/SettingsMS2Deepscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,10 @@ def __init__(self, **settings):
self.learning_rate = 0.0001
self.num_epochs = 5

# When using the training wrapper,
# this is used as the file name of the model, it is stored in the same folder as the ms2deepscore_model.
self.model_file_name = "embedding_evaluator.pt"

if settings:
# Coerce incoming values against defaults for consistency
settings = _coerce_settings_dict(settings, self)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from matchms.similarity.vector_similarity_functions import jaccard_similarity_matrix

from ms2deepscore.SettingsMS2Deepscore import SettingsEmbeddingEvaluator
from ms2deepscore.models import SiameseSpectralModel
from ms2deepscore.tensorize_spectra import tensorize_spectra
from ms2deepscore.train_new_model.inchikey_pair_selection import compute_fingerprints_for_training
from ms2deepscore.vector_operations import cosine_similarity_matrix
Expand All @@ -29,7 +30,7 @@ class DataGeneratorEmbeddingEvaluation:
"""

def __init__(self, spectrums: List[Spectrum],
ms2ds_model,
ms2ds_model: SiameseSpectralModel,
settings: SettingsEmbeddingEvaluator,
device="cpu",
):
Expand Down
14 changes: 12 additions & 2 deletions ms2deepscore/wrapper_functions/training_wrapper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
from matchms.importing import load_spectra

from ms2deepscore.benchmarking.CalculateScoresBetweenAllIonmodes import CalculateScoresBetweenAllIonmodes
from ms2deepscore.models import EmbeddingEvaluationModel
from ms2deepscore.models.SiameseSpectralModel import (SiameseSpectralModel,
train)
from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore
from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore, SettingsEmbeddingEvaluator
from ms2deepscore.train_new_model import TrainingBatchGenerator, create_spectrum_pair_generator
from ms2deepscore.validation_loss_calculation.ValidationLossCalculator import ValidationLossCalculator
from ms2deepscore.train_new_model.train_ms2deepscore import \
Expand All @@ -25,6 +26,7 @@


def train_ms2deepscore_wrapper(settings: SettingsMS2Deepscore,
settings_embedding_evaluator: SettingsEmbeddingEvaluator = None
):
"""Splits data, trains a ms2deepscore model, and does benchmarking.

Expand All @@ -43,7 +45,7 @@ def train_ms2deepscore_wrapper(settings: SettingsMS2Deepscore,
validation_spectra = load_spectra_in_ionmode(settings.validation_spectra_file_name, settings.ionisation_mode)

# Train model
_, history = train_ms2ds_model(training_spectra, validation_spectra, settings.model_directory_name, settings)
ms2ds_model, history = train_ms2ds_model(training_spectra, validation_spectra, settings.model_directory_name, settings)

ms2ds_history_plot_file_name = os.path.join(settings.model_directory_name, settings.history_plot_file_name)
plot_history(history["losses"], history["val_losses"], ms2ds_history_plot_file_name)
Expand All @@ -59,6 +61,14 @@ def train_ms2deepscore_wrapper(settings: SettingsMS2Deepscore,
create_plots_between_ionmodes(scores_between_all_ionmodes,
results_folder=os.path.join(settings.model_directory_name, "benchmarking_results"),
nr_of_bins=50)

if settings_embedding_evaluator:
model = EmbeddingEvaluationModel(settings_embedding_evaluator)
model.train_evaluator(ms2ds_model=ms2ds_model,
training_spectra=training_spectra,
validation_spectra=validation_spectra)
model.save(os.path.join(settings.model_directory_name, settings_embedding_evaluator.model_file_name))

return settings.model_directory_name


Expand Down
10 changes: 6 additions & 4 deletions tests/test_training_wrapper_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from matchms.exporting import save_as_mgf
from matchms.importing import load_spectra

from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore
from ms2deepscore.models import load_model
from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore, SettingsEmbeddingEvaluator
from ms2deepscore.models import load_model, load_embedding_evaluator
from ms2deepscore.wrapper_functions.training_wrapper_functions import (train_ms2deepscore_wrapper, parameter_search,
split_data_if_necessary)
from tests.create_test_spectra import pesticides_test_spectra
Expand All @@ -31,12 +31,14 @@ def test_train_wrapper_ms2ds_model(tmp_path):
"train_test_split_fraction": 5,
})

model_directory_name = train_ms2deepscore_wrapper(settings)
model_directory_name = train_ms2deepscore_wrapper(settings, SettingsEmbeddingEvaluator())
# Test model is created and can be loaded
model_file_name = os.path.join(model_directory_name, settings.model_file_name)
assert os.path.isfile(model_file_name)
load_model(model_file_name)

embedding_evaluator_file_name = os.path.join(settings.model_directory_name, "embedding_evaluator.pt")
assert os.path.isfile(embedding_evaluator_file_name)
load_embedding_evaluator(embedding_evaluator_file_name)
assert os.path.isfile(os.path.join(tmp_path, settings.results_folder,
model_directory_name, "benchmarking_results", "average_per_bin.svg"))
assert os.path.isfile(os.path.join(tmp_path, settings.results_folder,
Expand Down