From 83e084ba494837ce0d9aa50a6c4c6da917ed0279 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Wed, 20 Aug 2025 16:40:41 +0200 Subject: [PATCH 01/48] Add split by ionmode to utils.py --- ms2deepscore/utils.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/ms2deepscore/utils.py b/ms2deepscore/utils.py index ed85d6d0..73d99777 100644 --- a/ms2deepscore/utils.py +++ b/ms2deepscore/utils.py @@ -1,6 +1,6 @@ import os import pickle -from typing import Generator, List +from typing import Generator, List, Tuple import numba import numpy as np from matchms import Spectrum @@ -130,3 +130,25 @@ def validate_bin_order(score_bins): if low != previous_high: raise ValueError("There is a gap or overlap between bins; The bins should cover everything between 0 and 1.") previous_high = high + +def split_by_ionmode(spectra:List[Spectrum]) -> Tuple[List[Spectrum], List[Spectrum]]: + """Splits spectra into list of positive ionmode and list of negative ionmode spectra. + + Removes spectra without correct ionmode metadata entry. + """ + pos_spectra = [] + neg_spectra = [] + spectra_removed = 0 + for spectrum in tqdm(spectra, + desc="Splitting pos and neg mode spectra"): + if spectrum is not None: + ionmode = spectrum.get("ionmode") + if ionmode == "positive": + pos_spectra.append(spectrum) + elif ionmode == "negative": + neg_spectra.append(spectrum) + else: + spectra_removed += 1 + print(f"The spectra, are split in {len(pos_spectra)} positive spectra " + f"and {len(neg_spectra)} negative mode spectra. {spectra_removed} were removed") + return pos_spectra, neg_spectra \ No newline at end of file From efd56c8c3fed8d0a6f9d3b620c9bf24b245fe8de Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Wed, 20 Aug 2025 16:41:37 +0200 Subject: [PATCH 02/48] Make inchikey pair selection clearer --- .../inchikey_pair_selection.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/ms2deepscore/train_new_model/inchikey_pair_selection.py b/ms2deepscore/train_new_model/inchikey_pair_selection.py index bc619465..1042606d 100644 --- a/ms2deepscore/train_new_model/inchikey_pair_selection.py +++ b/ms2deepscore/train_new_model/inchikey_pair_selection.py @@ -126,7 +126,8 @@ def compute_jaccard_similarity_per_bin( selected_scores_per_bin = np.zeros((num_bins, size, max_pairs_per_bin), dtype=np.float32) for idx_fingerprint_i in prange(size): - tanimoto_scores = tanimoto_scores_row(fingerprints, idx_fingerprint_i) + fingerprint_i = fingerprints[idx_fingerprint_i, :] + tanimoto_scores = tanimoto_scores_row(fingerprint_i, fingerprints) for bin_number in range(num_bins): selection_bin = selection_bins[bin_number] @@ -238,16 +239,16 @@ def convert_to_selected_pairs_list(pair_frequency_matrixes: np.ndarray, selected_pairs_per_bin = [] for bin_id, bin_pair_frequency_matrix in enumerate(tqdm(pair_frequency_matrixes)): selected_pairs = [] - for inchikey1, pair_frequency_row in enumerate(bin_pair_frequency_matrix): + for inchikey1_index, pair_frequency_row in enumerate(bin_pair_frequency_matrix): for inchikey2_index, pair_frequency in enumerate(pair_frequency_row): if pair_frequency > 0: - inchikey2 = available_pairs_per_bin_matrix[bin_id][inchikey1][inchikey2_index] - score = scores_matrix[bin_id][inchikey1][inchikey2_index] + inchikey2 = available_pairs_per_bin_matrix[bin_id][inchikey1_index][inchikey2_index] + score = scores_matrix[bin_id][inchikey1_index][inchikey2_index] selected_pairs.extend( - [(inchikeys14_unique[inchikey1], inchikeys14_unique[inchikey2], score)] * pair_frequency) + [(inchikeys14_unique[inchikey1_index], inchikeys14_unique[inchikey2], score)] * pair_frequency) # remove duplicate pairs position_of_first_inchikey_in_matrix = available_pairs_per_bin_matrix[bin_id][ - inchikey2] == inchikey1 + inchikey2] == inchikey1_index bin_pair_frequency_matrix[inchikey2][position_of_first_inchikey_in_matrix] = 0 selected_pairs_per_bin.append(selected_pairs) return selected_pairs_per_bin @@ -392,18 +393,18 @@ def get_nr_of_available_pairs_in_bin(selected_pairs_per_bin_matrix: np.ndarray) @jit(nopython=True) -def tanimoto_scores_row(fingerprints, idx): - size = fingerprints.shape[0] +def tanimoto_scores_row(single_fingerprint, list_of_fingerprints): + size = list_of_fingerprints.shape[0] tanimoto_scores = np.zeros(size) - fingerprint_i = fingerprints[idx, :] for idx_fingerprint_j in range(size): - fingerprint_j = fingerprints[idx_fingerprint_j, :] - tanimoto_score = jaccard_index(fingerprint_i, fingerprint_j) + fingerprint_j = list_of_fingerprints[idx_fingerprint_j, :] + tanimoto_score = jaccard_index(single_fingerprint, fingerprint_j) tanimoto_scores[idx_fingerprint_j] = tanimoto_score return tanimoto_scores + def select_inchi_for_unique_inchikeys( list_of_spectra: List['Spectrum'] ) -> Tuple[List['Spectrum'], List[str]]: From 31c2fd370a90acb9458408647a0e7107e6fa9615 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Wed, 20 Aug 2025 16:42:04 +0200 Subject: [PATCH 03/48] Create inchikey_pair_selection_cross_ionmode.py --- .../inchikey_pair_selection_cross_ionmode.py | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py diff --git a/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py b/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py new file mode 100644 index 00000000..2c4a14a2 --- /dev/null +++ b/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py @@ -0,0 +1,116 @@ +from typing import List, Tuple +import numpy as np +from matchms import Spectrum +from numba import jit, prange +from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore +from ms2deepscore.train_new_model.inchikey_pair_selection import compute_fingerprints_for_training, \ + balanced_selection_of_pairs_per_bin, convert_to_selected_pairs_list, tanimoto_scores_row + + +def select_compound_pairs_wrapper_across_ionmode( + spectra_1: List[Spectrum], + spectra_2: List[Spectrum], + settings: SettingsMS2Deepscore, +) -> List[Tuple[str, str, float]]: + """Returns a InchikeyPairGenerator object containing equally balanced pairs over the different bins + + spectra: + A list of spectra + settings: + The settings that should be used for selecting the compound pairs wrapper. The settings should be specified as a + SettingsMS2Deepscore object. + + Returns + ------- + InchikeyPairGenerator + InchikeyPairGenerator containing balanced pairs. The pairs are stored as [(inchikey1, inchikey2, score)] + """ + if settings.random_seed is not None: + np.random.seed(settings.random_seed) + + fingerprints_1, inchikeys14_unique_1 = compute_fingerprints_for_training( + spectra_1, + settings.fingerprint_type, + settings.fingerprint_nbits + ) + fingerprints_2, inchikeys14_unique_2 = compute_fingerprints_for_training( + spectra_2, + settings.fingerprint_type, + settings.fingerprint_nbits + ) + + if len(inchikeys14_unique_1) < settings.batch_size or len(inchikeys14_unique_2) < settings.batch_size: + raise ValueError("The number of unique inchikeys must be larger than the batch size.") + + available_pairs_per_bin_matrix, available_scores_per_bin_matrix = compute_jaccard_similarity_per_bin_across_ionmodes( + fingerprints_1, fingerprints_2, settings.max_pairs_per_bin, settings.same_prob_bins) + + pair_frequency_matrixes = balanced_selection_of_pairs_per_bin( + available_pairs_per_bin_matrix, settings) + + selected_pairs_per_bin = convert_to_selected_pairs_list( + pair_frequency_matrixes, available_pairs_per_bin_matrix, + available_scores_per_bin_matrix, inchikeys14_unique_1 + inchikeys14_unique_2) + return [pair for pairs in selected_pairs_per_bin for pair in pairs] + + +@jit(nopython=True, parallel=True) +def compute_jaccard_similarity_per_bin_across_ionmodes( + fingerprints_1, + fingerprints_2, + max_pairs_per_bin, + selection_bins=np.array([(x / 10, x / 10 + 0.1) for x in range(10)]) +) -> Tuple[np.ndarray, np.ndarray]: + """Randomly selects compound pairs per tanimoto bin, up to max_pairs_per_bin + + returns: + 2 3d numpy arrays are returned, the first encodes the pairs per bin and the second the corresponding scores. + A 3D numpy array with shape [nr_of_bins, nr_of_fingerprints, max_pairs_per_bin]. + An example structure for bin 1, with 3 fingerprints and max_pairs_per_bin =4 would be: + [[1,2,-1,-1], + [0,3,-1,-1], + [0,2,-1,-1],] + The pairs are encoded by the index and the value. + So the first row encodes pairs between fingerpint 0 and 1, fingerprint 0 and 2. + The -1 encode that no more pairs were found for this fingerprint in this bin. + """ + + size_1 = fingerprints_1.shape[0] + size_2 = fingerprints_2.shape[0] + + num_bins = len(selection_bins) + + selected_pairs_per_bin = -1 * np.ones((num_bins, size_1 + size_2, max_pairs_per_bin), dtype=np.int32) + selected_scores_per_bin = np.zeros((num_bins, size_1 + size_2, max_pairs_per_bin), dtype=np.float32) + + for idx_fingerprint_i in prange(size_1): + fingerprint_i = fingerprints_1[idx_fingerprint_i, :] + tanimoto_scores = tanimoto_scores_row(fingerprint_i, fingerprints_2) + + for bin_number in range(num_bins): + selection_bin = selection_bins[bin_number] + indices = np.nonzero((tanimoto_scores > selection_bin[0]) & (tanimoto_scores <= selection_bin[1]))[0] + + np.random.shuffle(indices) + indices = indices[:max_pairs_per_bin] + num_indices = len(indices) + selected_scores_per_bin[bin_number, idx_fingerprint_i, :num_indices] = tanimoto_scores[indices] + selected_pairs_per_bin[bin_number, idx_fingerprint_i, :num_indices] = indices + size_1 + + for idx_fingerprint_2 in prange(size_2): + fingerprint_i = fingerprints_2[idx_fingerprint_2, :] + idx_fingerprint_corrected = idx_fingerprint_2 + size_1 + tanimoto_scores = tanimoto_scores_row(fingerprint_i, fingerprints_2) + + for bin_number in range(num_bins): + selection_bin = selection_bins[bin_number] + indices = np.nonzero((tanimoto_scores > selection_bin[0]) & (tanimoto_scores <= selection_bin[1]))[0] + + np.random.shuffle(indices) + indices = indices[:max_pairs_per_bin] + num_indices = len(indices) + + selected_pairs_per_bin[bin_number, idx_fingerprint_corrected, :num_indices] = indices + selected_scores_per_bin[bin_number, idx_fingerprint_corrected, :num_indices] = tanimoto_scores[indices] + + return selected_pairs_per_bin, selected_scores_per_bin From 46fb9c4e0689743e5a5f41e9c944b8da2411b7e5 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Wed, 20 Aug 2025 16:42:52 +0200 Subject: [PATCH 04/48] Add first test for select_compound_pairs_wrapper_with_resampling_across_ionmodes --- tests/test_inchikey_pair_selection.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/test_inchikey_pair_selection.py b/tests/test_inchikey_pair_selection.py index 87720017..71eb63a0 100644 --- a/tests/test_inchikey_pair_selection.py +++ b/tests/test_inchikey_pair_selection.py @@ -299,3 +299,17 @@ def check_balanced_scores_selecting_inchikey_pairs(selected_inchikey_pairs: Inch # Check that the number of pairs per bin is equal for all bins assert len(set(score_bin_counts.values())) == 1 +from ms2deepscore.train_new_model.inchikey_pair_selection_cross_ionmode import select_compound_pairs_wrapper_across_ionmode +def test_select_compound_pairs_wrapper_with_resampling_across_ionmodes(): + spectrums_1 = create_test_spectra(num_of_unique_inchikeys=26, num_of_spectra_per_inchikey=1) + spectrums_2 = create_test_spectra(num_of_unique_inchikeys=25, num_of_spectra_per_inchikey=2) + for spectrum in spectrums_1: + spectrum.set("inchikey", "a" + spectrum.get("inchikey")) + bins = [(0.8, 0.9), (0.7, 0.8), (0.9, 1.0), (0.6, 0.7), (0.5, 0.6), + (0.4, 0.5), (0.3, 0.4), (0.2, 0.3), (0.1, 0.2), (-0.01, 0.1)] + max_pair_resampling = 10 + settings = SettingsMS2Deepscore(same_prob_bins=np.array(bins, dtype="float32"), + average_inchikey_sampling_count=10, + batch_size=8, + max_pair_resampling=max_pair_resampling) + selected_inchikey_pairs = select_compound_pairs_wrapper_across_ionmode(spectrums_1, spectrums_2, settings) From 528b0e5083f68638acd68493af6b7e580455c79f Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Thu, 21 Aug 2025 09:16:08 +0200 Subject: [PATCH 05/48] Move InchikeyPairGenerator to separate file --- .../train_new_model/InchikeyPairGenerator.py | 61 ++++++++++++++++++ ms2deepscore/train_new_model/__init__.py | 6 +- .../train_new_model/data_generators.py | 64 +------------------ tests/test_siamese_spectra_model.py | 3 +- 4 files changed, 69 insertions(+), 65 deletions(-) create mode 100644 ms2deepscore/train_new_model/InchikeyPairGenerator.py diff --git a/ms2deepscore/train_new_model/InchikeyPairGenerator.py b/ms2deepscore/train_new_model/InchikeyPairGenerator.py new file mode 100644 index 00000000..4127e502 --- /dev/null +++ b/ms2deepscore/train_new_model/InchikeyPairGenerator.py @@ -0,0 +1,61 @@ +import json +from collections import Counter +from typing import List, Tuple + + +class InchikeyPairGenerator: + def __init__(self, selected_inchikey_pairs: List[Tuple[str, str, float]]): + """ + Parameters + ---------- + selected_inchikey_pairs: + A list with tuples encoding inchikey pairs like: (inchikey1, inchikey2, tanimoto_score) + """ + self.selected_inchikey_pairs = selected_inchikey_pairs + + def generator(self, shuffle: bool, random_nr_generator): + """Infinite generator to loop through all inchikeys. + After looping through all inchikeys the order is shuffled. + """ + while True: + if shuffle: + random_nr_generator.shuffle(self.selected_inchikey_pairs) + + for inchikey1, inchikey2, tanimoto_score in self.selected_inchikey_pairs: + yield inchikey1, inchikey2, tanimoto_score + + def __len__(self): + return len(self.selected_inchikey_pairs) + + def __str__(self): + return f"InchikeyPairGenerator with {len(self.selected_inchikey_pairs)} pairs available" + + def get_scores(self): + return [score for _, _, score in self.selected_inchikey_pairs] + + def get_inchikey_counts(self) -> Counter: + """returns the frequency each inchikey occurs""" + inchikeys = Counter() + for inchikey_1, inchikey_2, _ in self.selected_inchikey_pairs: + inchikeys[inchikey_1] += 1 + inchikeys[inchikey_2] += 1 + return inchikeys + + def get_scores_per_inchikey(self): + inchikey_scores = {} + for inchikey_1, inchikey_2, score in self.selected_inchikey_pairs: + if inchikey_1 in inchikey_scores: + inchikey_scores[inchikey_1].append(score) + else: + inchikey_scores[inchikey_1] = [] + if inchikey_2 in inchikey_scores: + inchikey_scores[inchikey_2].append(score) + else: + inchikey_scores[inchikey_2] = [] + return inchikey_scores + + def save_as_json(self, file_name): + data_for_json = [(item[0], item[1], float(item[2])) for item in self.selected_inchikey_pairs] + + with open(file_name, "w", encoding="utf-8") as f: + json.dump(data_for_json, f) diff --git a/ms2deepscore/train_new_model/__init__.py b/ms2deepscore/train_new_model/__init__.py index c04e0d39..708a63cc 100644 --- a/ms2deepscore/train_new_model/__init__.py +++ b/ms2deepscore/train_new_model/__init__.py @@ -1,9 +1,9 @@ -from .data_generators import SpectrumPairGenerator, InchikeyPairGenerator +from .data_generators import SpectrumPairGenerator +from .InchikeyPairGenerator import InchikeyPairGenerator from .inchikey_pair_selection import (select_compound_pairs_wrapper) __all__ = [ "SpectrumPairGenerator", - "select_compound_pairs_wrapper", - "InchikeyPairGenerator" + "select_compound_pairs_wrapper" ] diff --git a/ms2deepscore/train_new_model/data_generators.py b/ms2deepscore/train_new_model/data_generators.py index 4e638d87..3e422ca6 100644 --- a/ms2deepscore/train_new_model/data_generators.py +++ b/ms2deepscore/train_new_model/data_generators.py @@ -1,8 +1,6 @@ """ Data generators for training/inference with MS2DeepScore model. """ -import json -from collections import Counter -from typing import List, Tuple +from typing import List import numpy as np import pandas as pd import torch @@ -12,8 +10,10 @@ from ms2deepscore.SettingsMS2Deepscore import (SettingsEmbeddingEvaluator, SettingsMS2Deepscore) from ms2deepscore.tensorize_spectra import tensorize_spectra +from ms2deepscore.train_new_model import InchikeyPairGenerator from ms2deepscore.train_new_model.inchikey_pair_selection import ( select_compound_pairs_wrapper, compute_fingerprints_for_training) +from ms2deepscore.utils import split_by_ionmode from ms2deepscore.vector_operations import cosine_similarity_matrix @@ -317,61 +317,3 @@ def compute_fingerprint_dataframe(self, fingerprints_df = pd.DataFrame(fingerprints, index=inchikeys14_unique) return fingerprints_df - - -class InchikeyPairGenerator: - def __init__(self, selected_inchikey_pairs: List[Tuple[str, str, float]]): - """ - Parameters - ---------- - selected_inchikey_pairs: - A list with tuples encoding inchikey pairs like: (inchikey1, inchikey2, tanimoto_score) - """ - self.selected_inchikey_pairs = selected_inchikey_pairs - - def generator(self, shuffle: bool, random_nr_generator): - """Infinite generator to loop through all inchikeys. - After looping through all inchikeys the order is shuffled. - """ - while True: - if shuffle: - random_nr_generator.shuffle(self.selected_inchikey_pairs) - - for inchikey1, inchikey2, tanimoto_score in self.selected_inchikey_pairs: - yield inchikey1, inchikey2, tanimoto_score - - def __len__(self): - return len(self.selected_inchikey_pairs) - - def __str__(self): - return f"InchikeyPairGenerator with {len(self.selected_inchikey_pairs)} pairs available" - - def get_scores(self): - return [score for _, _, score in self.selected_inchikey_pairs] - - def get_inchikey_counts(self) -> Counter: - """returns the frequency each inchikey occurs""" - inchikeys = Counter() - for inchikey_1, inchikey_2, _ in self.selected_inchikey_pairs: - inchikeys[inchikey_1] += 1 - inchikeys[inchikey_2] += 1 - return inchikeys - - def get_scores_per_inchikey(self): - inchikey_scores = {} - for inchikey_1, inchikey_2, score in self.selected_inchikey_pairs: - if inchikey_1 in inchikey_scores: - inchikey_scores[inchikey_1].append(score) - else: - inchikey_scores[inchikey_1] = [] - if inchikey_2 in inchikey_scores: - inchikey_scores[inchikey_2].append(score) - else: - inchikey_scores[inchikey_2] = [] - return inchikey_scores - - def save_as_json(self, file_name): - data_for_json = [(item[0], item[1], float(item[2])) for item in self.selected_inchikey_pairs] - - with open(file_name, "w", encoding="utf-8") as f: - json.dump(data_for_json, f) diff --git a/tests/test_siamese_spectra_model.py b/tests/test_siamese_spectra_model.py index 21e2e2a3..287c685b 100644 --- a/tests/test_siamese_spectra_model.py +++ b/tests/test_siamese_spectra_model.py @@ -5,7 +5,8 @@ train) from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore from ms2deepscore.tensorize_spectra import tensorize_spectra -from ms2deepscore.train_new_model.data_generators import SpectrumPairGenerator, InchikeyPairGenerator +from ms2deepscore.train_new_model.data_generators import SpectrumPairGenerator +from ms2deepscore.train_new_model import InchikeyPairGenerator from ms2deepscore.train_new_model.inchikey_pair_selection import \ select_compound_pairs_wrapper from ms2deepscore.validation_loss_calculation.ValidationLossCalculator import \ From 0aaa092402c1fd9a7721c8c7960a5590d44691ac Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Thu, 21 Aug 2025 09:17:51 +0200 Subject: [PATCH 06/48] Move DataGeneratorEmbeddingEvaluation to separate file --- .../models/EmbeddingEvaluatorModel.py | 3 +- .../DataGeneratorEmbeddingEvaluation.py | 127 ++++++++++++++++++ .../train_new_model/data_generators.py | 124 +---------------- tests/test_data_generators.py | 3 +- 4 files changed, 132 insertions(+), 125 deletions(-) create mode 100644 ms2deepscore/train_new_model/DataGeneratorEmbeddingEvaluation.py diff --git a/ms2deepscore/models/EmbeddingEvaluatorModel.py b/ms2deepscore/models/EmbeddingEvaluatorModel.py index e8ae2528..059a900b 100644 --- a/ms2deepscore/models/EmbeddingEvaluatorModel.py +++ b/ms2deepscore/models/EmbeddingEvaluatorModel.py @@ -7,8 +7,7 @@ from ms2deepscore.__version__ import __version__ from ms2deepscore.models.helper_functions import initialize_device from ms2deepscore.SettingsMS2Deepscore import SettingsEmbeddingEvaluator -from ms2deepscore.train_new_model.data_generators import \ - DataGeneratorEmbeddingEvaluation +from ms2deepscore.train_new_model.DataGeneratorEmbeddingEvaluation import DataGeneratorEmbeddingEvaluation class EmbeddingEvaluationModel(nn.Module): diff --git a/ms2deepscore/train_new_model/DataGeneratorEmbeddingEvaluation.py b/ms2deepscore/train_new_model/DataGeneratorEmbeddingEvaluation.py new file mode 100644 index 00000000..a5697ba9 --- /dev/null +++ b/ms2deepscore/train_new_model/DataGeneratorEmbeddingEvaluation.py @@ -0,0 +1,127 @@ +from typing import List + +import numpy as np +import pandas as pd +import torch +from matchms import Spectrum +from matchms.similarity.vector_similarity_functions import jaccard_similarity_matrix + +from ms2deepscore.SettingsMS2Deepscore import SettingsEmbeddingEvaluator +from ms2deepscore.tensorize_spectra import tensorize_spectra +from ms2deepscore.train_new_model.inchikey_pair_selection import compute_fingerprints_for_training +from ms2deepscore.vector_operations import cosine_similarity_matrix + + +class DataGeneratorEmbeddingEvaluation: + """Generates data for training an embedding evaluation model. + + This class provides a data for the training of an embedding evaluation model. + It follows a simple strategy: iterate through all spectra and randomly pick another + spectrum for comparison. This will not compensate the usually drastic biases + in Tanimoto similarity and is hence not meant for training the prediction of those + scores. + The purpose is rather to show a high number of spectra to a model to learn + embedding evaluations. + + Spectra are sampled in groups of size batch_size. Before every epoch the indexes are + shuffled at random. For selected spectra the tanimoto scores, ms2deepscore scores and + embeddings are returned. + """ + + def __init__(self, spectrums: List[Spectrum], + ms2ds_model, + settings: SettingsEmbeddingEvaluator, + device="cpu", + ): + """ + + Parameters + ---------- + spectrums + List of matchms Spectrum objects. + settings + The available settings can be found in SettignsMS2Deepscore + """ + self.current_index = 0 + self.settings = settings + self.spectrums = spectrums + self.inchikey14s = [s.get("inchikey")[:14] for s in spectrums] + self.ms2ds_model = ms2ds_model + self.device = device + self.ms2ds_model.to(self.device) + self.indexes = np.arange(len(self.spectrums)) + self.batch_size = self.settings.evaluator_distribution_size + self.fingerprint_df = self.compute_fingerprint_dataframe( + self.spectrums, + fingerprint_type=self.ms2ds_model.model_settings.fingerprint_type, + fingerprint_nbits=self.ms2ds_model.model_settings.fingerprint_nbits + ) + + # Initialize random number generator + self.rng = np.random.default_rng(self.settings.random_seed) + + self.on_epoch_end() + + def __len__(self): + return int(np.floor(len(self.spectrums) / self.batch_size)) + + def __iter__(self): + return self + + def __next__(self): + if self.current_index < self.__len__(): + batch = self.__getitem__(self.current_index) + self.current_index += 1 + return batch + self.current_index = 0 # make generator executable again + self.on_epoch_end() + raise StopIteration + + def _compute_embeddings_and_scores(self, batch_index: int): + batch_size = self.batch_size + indexes = self.indexes[batch_index * batch_size:((batch_index + 1) * batch_size)] + + spec_tensors, meta_tensors = tensorize_spectra([self.spectrums[i] for i in indexes], + self.ms2ds_model.model_settings) + embeddings = self.ms2ds_model.encoder(spec_tensors.to(self.device), meta_tensors.to(self.device)) + + ms2ds_scores = cosine_similarity_matrix(embeddings.cpu().detach().numpy(), embeddings.cpu().detach().numpy()) + + # Compute true scores + inchikeys = [self.inchikey14s[i] for i in indexes] + fingerprints = self.fingerprint_df.loc[inchikeys].to_numpy() + + tanimoto_scores = jaccard_similarity_matrix(fingerprints, fingerprints) + + return torch.tensor(tanimoto_scores), torch.tensor(ms2ds_scores), embeddings.cpu().detach() + + def on_epoch_end(self): + """Updates indexes after each epoch.""" + self.rng.shuffle(self.indexes) + + def __getitem__(self, batch_index: int): + """Generate one batch of data. + """ + return self._compute_embeddings_and_scores(batch_index) + + def compute_fingerprint_dataframe(self, + spectrums: List[Spectrum], + fingerprint_type, + fingerprint_nbits, + ) -> pd.DataFrame: + """Returns a dataframe with a fingerprints dataframe + + spectrums: + A list of spectra + settings: + The settings that should be used for selecting the compound pairs wrapper. The settings should be specified as a + SettingsMS2Deepscore object. + """ + fingerprints, inchikeys14_unique = compute_fingerprints_for_training( + spectrums, + fingerprint_type, + fingerprint_nbits + ) + + fingerprints_df = pd.DataFrame(fingerprints, index=inchikeys14_unique) + return fingerprints_df diff --git a/ms2deepscore/train_new_model/data_generators.py b/ms2deepscore/train_new_model/data_generators.py index 3e422ca6..fb9add77 100644 --- a/ms2deepscore/train_new_model/data_generators.py +++ b/ms2deepscore/train_new_model/data_generators.py @@ -2,19 +2,14 @@ """ from typing import List import numpy as np -import pandas as pd import torch from matchms import Spectrum -from matchms.similarity.vector_similarity_functions import \ - jaccard_similarity_matrix -from ms2deepscore.SettingsMS2Deepscore import (SettingsEmbeddingEvaluator, - SettingsMS2Deepscore) +from ms2deepscore.SettingsMS2Deepscore import (SettingsMS2Deepscore) from ms2deepscore.tensorize_spectra import tensorize_spectra from ms2deepscore.train_new_model import InchikeyPairGenerator from ms2deepscore.train_new_model.inchikey_pair_selection import ( - select_compound_pairs_wrapper, compute_fingerprints_for_training) + select_compound_pairs_wrapper) from ms2deepscore.utils import split_by_ionmode -from ms2deepscore.vector_operations import cosine_similarity_matrix class SpectrumPairGenerator: @@ -202,118 +197,3 @@ def create_data_generator(training_spectra, selected_compound_pairs=inchikey_pair_generator, settings=settings) return train_generator - - -class DataGeneratorEmbeddingEvaluation: - """Generates data for training an embedding evaluation model. - - This class provides a data for the training of an embedding evaluation model. - It follows a simple strategy: iterate through all spectra and randomly pick another - spectrum for comparison. This will not compensate the usually drastic biases - in Tanimoto similarity and is hence not meant for training the prediction of those - scores. - The purpose is rather to show a high number of spectra to a model to learn - embedding evaluations. - - Spectra are sampled in groups of size batch_size. Before every epoch the indexes are - shuffled at random. For selected spectra the tanimoto scores, ms2deepscore scores and - embeddings are returned. - """ - - def __init__(self, spectrums: List[Spectrum], - ms2ds_model, - settings: SettingsEmbeddingEvaluator, - device="cpu", - ): - """ - - Parameters - ---------- - spectrums - List of matchms Spectrum objects. - settings - The available settings can be found in SettignsMS2Deepscore - """ - self.current_index = 0 - self.settings = settings - self.spectrums = spectrums - self.inchikey14s = [s.get("inchikey")[:14] for s in spectrums] - self.ms2ds_model = ms2ds_model - self.device = device - self.ms2ds_model.to(self.device) - self.indexes = np.arange(len(self.spectrums)) - self.batch_size = self.settings.evaluator_distribution_size - self.fingerprint_df = self.compute_fingerprint_dataframe( - self.spectrums, - fingerprint_type=self.ms2ds_model.model_settings.fingerprint_type, - fingerprint_nbits=self.ms2ds_model.model_settings.fingerprint_nbits - ) - - # Initialize random number generator - self.rng = np.random.default_rng(self.settings.random_seed) - - self.on_epoch_end() - - def __len__(self): - return int(np.floor(len(self.spectrums) / self.batch_size)) - - def __iter__(self): - return self - - def __next__(self): - if self.current_index < self.__len__(): - batch = self.__getitem__(self.current_index) - self.current_index += 1 - return batch - self.current_index = 0 # make generator executable again - self.on_epoch_end() - raise StopIteration - - def _compute_embeddings_and_scores(self, batch_index: int): - batch_size = self.batch_size - indexes = self.indexes[batch_index * batch_size:((batch_index + 1) * batch_size)] - - spec_tensors, meta_tensors = tensorize_spectra([self.spectrums[i] for i in indexes], - self.ms2ds_model.model_settings) - embeddings = self.ms2ds_model.encoder(spec_tensors.to(self.device), meta_tensors.to(self.device)) - - ms2ds_scores = cosine_similarity_matrix(embeddings.cpu().detach().numpy(), embeddings.cpu().detach().numpy()) - - # Compute true scores - inchikeys = [self.inchikey14s[i] for i in indexes] - fingerprints = self.fingerprint_df.loc[inchikeys].to_numpy() - - tanimoto_scores = jaccard_similarity_matrix(fingerprints, fingerprints) - - return torch.tensor(tanimoto_scores), torch.tensor(ms2ds_scores), embeddings.cpu().detach() - - def on_epoch_end(self): - """Updates indexes after each epoch.""" - self.rng.shuffle(self.indexes) - - def __getitem__(self, batch_index: int): - """Generate one batch of data. - """ - return self._compute_embeddings_and_scores(batch_index) - - def compute_fingerprint_dataframe(self, - spectrums: List[Spectrum], - fingerprint_type, - fingerprint_nbits, - ) -> pd.DataFrame: - """Returns a dataframe with a fingerprints dataframe - - spectrums: - A list of spectra - settings: - The settings that should be used for selecting the compound pairs wrapper. The settings should be specified as a - SettingsMS2Deepscore object. - """ - fingerprints, inchikeys14_unique = compute_fingerprints_for_training( - spectrums, - fingerprint_type, - fingerprint_nbits - ) - - fingerprints_df = pd.DataFrame(fingerprints, index=inchikeys14_unique) - return fingerprints_df diff --git a/tests/test_data_generators.py b/tests/test_data_generators.py index 18af2303..a39ce1eb 100644 --- a/tests/test_data_generators.py +++ b/tests/test_data_generators.py @@ -6,7 +6,8 @@ from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore, SettingsEmbeddingEvaluator from ms2deepscore.tensorize_spectra import tensorize_spectra from ms2deepscore.train_new_model.data_generators import SpectrumPairGenerator, \ - DataGeneratorEmbeddingEvaluation, create_data_generator + create_data_generator +from ms2deepscore.train_new_model.DataGeneratorEmbeddingEvaluation import DataGeneratorEmbeddingEvaluation from ms2deepscore.train_new_model import InchikeyPairGenerator from tests.create_test_spectra import create_test_spectra From f5ff5e2658e54a4f856f7bd392dd08797a2eabd1 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Thu, 21 Aug 2025 09:22:07 +0200 Subject: [PATCH 07/48] Rename SpectrumPairGenerator.py --- .../{data_generators.py => SpectrumPairGenerator.py} | 1 + ms2deepscore/train_new_model/__init__.py | 2 +- ms2deepscore/train_new_model/train_ms2deepscore.py | 2 +- ms2deepscore/wrapper_functions/training_wrapper_functions.py | 2 +- tests/test_data_generators.py | 2 +- tests/test_siamese_spectra_model.py | 2 +- 6 files changed, 6 insertions(+), 5 deletions(-) rename ms2deepscore/train_new_model/{data_generators.py => SpectrumPairGenerator.py} (99%) diff --git a/ms2deepscore/train_new_model/data_generators.py b/ms2deepscore/train_new_model/SpectrumPairGenerator.py similarity index 99% rename from ms2deepscore/train_new_model/data_generators.py rename to ms2deepscore/train_new_model/SpectrumPairGenerator.py index fb9add77..4ed1554e 100644 --- a/ms2deepscore/train_new_model/data_generators.py +++ b/ms2deepscore/train_new_model/SpectrumPairGenerator.py @@ -190,6 +190,7 @@ def create_data_generator(training_spectra, json_save_file=None) -> SpectrumPairGenerator: selected_compound_pairs_training = select_compound_pairs_wrapper(training_spectra, settings=settings) inchikey_pair_generator = InchikeyPairGenerator(selected_compound_pairs_training) + if json_save_file is not None: inchikey_pair_generator.save_as_json(json_save_file) # Create generators diff --git a/ms2deepscore/train_new_model/__init__.py b/ms2deepscore/train_new_model/__init__.py index 708a63cc..14588440 100644 --- a/ms2deepscore/train_new_model/__init__.py +++ b/ms2deepscore/train_new_model/__init__.py @@ -1,4 +1,4 @@ -from .data_generators import SpectrumPairGenerator +from .SpectrumPairGenerator import SpectrumPairGenerator from .InchikeyPairGenerator import InchikeyPairGenerator from .inchikey_pair_selection import (select_compound_pairs_wrapper) diff --git a/ms2deepscore/train_new_model/train_ms2deepscore.py b/ms2deepscore/train_new_model/train_ms2deepscore.py index 8d483498..1a46b167 100644 --- a/ms2deepscore/train_new_model/train_ms2deepscore.py +++ b/ms2deepscore/train_new_model/train_ms2deepscore.py @@ -10,7 +10,7 @@ from ms2deepscore.models.SiameseSpectralModel import (SiameseSpectralModel, train) from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore -from ms2deepscore.train_new_model.data_generators import create_data_generator +from ms2deepscore.train_new_model.SpectrumPairGenerator import create_data_generator from ms2deepscore.validation_loss_calculation.ValidationLossCalculator import \ ValidationLossCalculator diff --git a/ms2deepscore/wrapper_functions/training_wrapper_functions.py b/ms2deepscore/wrapper_functions/training_wrapper_functions.py index 3c878168..9936124b 100644 --- a/ms2deepscore/wrapper_functions/training_wrapper_functions.py +++ b/ms2deepscore/wrapper_functions/training_wrapper_functions.py @@ -14,7 +14,7 @@ train) from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore from ms2deepscore.validation_loss_calculation.ValidationLossCalculator import ValidationLossCalculator -from ms2deepscore.train_new_model.data_generators import create_data_generator +from ms2deepscore.train_new_model.SpectrumPairGenerator import create_data_generator from ms2deepscore.train_new_model.train_ms2deepscore import \ train_ms2ds_model, plot_history, save_history from ms2deepscore.train_new_model.validation_and_test_split import \ diff --git a/tests/test_data_generators.py b/tests/test_data_generators.py index a39ce1eb..37ea3af5 100644 --- a/tests/test_data_generators.py +++ b/tests/test_data_generators.py @@ -5,7 +5,7 @@ from matchms import Spectrum from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore, SettingsEmbeddingEvaluator from ms2deepscore.tensorize_spectra import tensorize_spectra -from ms2deepscore.train_new_model.data_generators import SpectrumPairGenerator, \ +from ms2deepscore.train_new_model.SpectrumPairGenerator import SpectrumPairGenerator, \ create_data_generator from ms2deepscore.train_new_model.DataGeneratorEmbeddingEvaluation import DataGeneratorEmbeddingEvaluation from ms2deepscore.train_new_model import InchikeyPairGenerator diff --git a/tests/test_siamese_spectra_model.py b/tests/test_siamese_spectra_model.py index 287c685b..a86b69b7 100644 --- a/tests/test_siamese_spectra_model.py +++ b/tests/test_siamese_spectra_model.py @@ -5,7 +5,7 @@ train) from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore from ms2deepscore.tensorize_spectra import tensorize_spectra -from ms2deepscore.train_new_model.data_generators import SpectrumPairGenerator +from ms2deepscore.train_new_model.SpectrumPairGenerator import SpectrumPairGenerator from ms2deepscore.train_new_model import InchikeyPairGenerator from ms2deepscore.train_new_model.inchikey_pair_selection import \ select_compound_pairs_wrapper From 2ae0dc3e4a22e33e363bd9d24a8359e0699309e6 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Thu, 21 Aug 2025 09:25:18 +0200 Subject: [PATCH 08/48] Fix InchikeyPairGenerator import --- ms2deepscore/train_new_model/SpectrumPairGenerator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ms2deepscore/train_new_model/SpectrumPairGenerator.py b/ms2deepscore/train_new_model/SpectrumPairGenerator.py index 4ed1554e..15dd083b 100644 --- a/ms2deepscore/train_new_model/SpectrumPairGenerator.py +++ b/ms2deepscore/train_new_model/SpectrumPairGenerator.py @@ -6,7 +6,7 @@ from matchms import Spectrum from ms2deepscore.SettingsMS2Deepscore import (SettingsMS2Deepscore) from ms2deepscore.tensorize_spectra import tensorize_spectra -from ms2deepscore.train_new_model import InchikeyPairGenerator +from ms2deepscore.train_new_model.InchikeyPairGenerator import InchikeyPairGenerator from ms2deepscore.train_new_model.inchikey_pair_selection import ( select_compound_pairs_wrapper) from ms2deepscore.utils import split_by_ionmode @@ -27,7 +27,7 @@ class SpectrumPairGenerator: """ def __init__(self, spectrums: List[Spectrum], - selected_compound_pairs: "InchikeyPairGenerator", + selected_compound_pairs: InchikeyPairGenerator, settings: SettingsMS2Deepscore): """Generates data for training a siamese Pytorch model. From 1ed2816c52a1399d5258bd5330707c400cd84d66 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Thu, 21 Aug 2025 14:01:15 +0200 Subject: [PATCH 09/48] Factor out data augmentation from SpectrumPairGenerator --- .../train_new_model/SpectrumPairGenerator.py | 47 +----------- .../train_new_model/data_augmentation.py | 74 +++++++++++++++++++ 2 files changed, 77 insertions(+), 44 deletions(-) create mode 100644 ms2deepscore/train_new_model/data_augmentation.py diff --git a/ms2deepscore/train_new_model/SpectrumPairGenerator.py b/ms2deepscore/train_new_model/SpectrumPairGenerator.py index 15dd083b..e1840aa5 100644 --- a/ms2deepscore/train_new_model/SpectrumPairGenerator.py +++ b/ms2deepscore/train_new_model/SpectrumPairGenerator.py @@ -7,6 +7,7 @@ from ms2deepscore.SettingsMS2Deepscore import (SettingsMS2Deepscore) from ms2deepscore.tensorize_spectra import tensorize_spectra from ms2deepscore.train_new_model.InchikeyPairGenerator import InchikeyPairGenerator +from ms2deepscore.train_new_model.data_augmentation import data_augmentation from ms2deepscore.train_new_model.inchikey_pair_selection import ( select_compound_pairs_wrapper) from ms2deepscore.utils import split_by_ionmode @@ -113,8 +114,8 @@ def __getitem__(self, batch_index: int): # Store batches for later epochs self.fixed_set[batch_index] = (spectra_1, spectra_2, meta_1, meta_2, targets) else: - spectra_1 = self._data_augmentation(spectra_1) - spectra_2 = self._data_augmentation(spectra_2) + spectra_1 = data_augmentation(spectra_1, self.model_settings, self.rng) + spectra_2 = data_augmentation(spectra_2, self.model_settings, self.rng) return spectra_1, spectra_2, meta_1, meta_2, targets def _tensorize_all(self, spectrum_pairs): @@ -142,48 +143,6 @@ def _get_spectrum_with_inchikey(self, inchikey: str) -> Spectrum: raise ValueError("No matching inchikey found (note: expected first 14 characters)") return self.spectrums[self.rng.choice(matching_spectrum_id)] - def _data_augmentation(self, spectra_tensors): - for i in range(spectra_tensors.shape[0]): - spectra_tensors[i, :] = self._data_augmentation_spectrum(spectra_tensors[i, :]) - return spectra_tensors - - def _data_augmentation_spectrum(self, spectrum_tensor): - """Data augmentation. - - Parameters - ---------- - spectrum_tensor - Spectrum in Pytorch tensor form. - """ - # Augmentation 1: peak removal (peaks < augment_removal_max) - if self.model_settings.augment_removal_max or self.model_settings.augment_removal_intensity: - # TODO: Factor out function with documentation + example? - - indices_select = torch.where((spectrum_tensor > 0) - & (spectrum_tensor < self.model_settings.augment_removal_intensity))[0] - removal_part = self.rng.random(1) * self.model_settings.augment_removal_max - indices = self.rng.choice(indices_select, int(np.ceil((1 - removal_part) * len(indices_select)))) - if len(indices) > 0: - spectrum_tensor[indices] = 0 - - # Augmentation 2: Change peak intensities - if self.model_settings.augment_intensity: - # TODO: Factor out function with documentation + example? - spectrum_tensor = spectrum_tensor * ( - 1 - self.model_settings.augment_intensity * 2 * (torch.rand(spectrum_tensor.shape) - 0.5)) - - # Augmentation 3: Peak addition - if self.model_settings.augment_noise_max and self.model_settings.augment_noise_max > 0: - indices_select = torch.where(spectrum_tensor == 0)[0] - if len(indices_select) > self.model_settings.augment_noise_max: - indices_noise = self.rng.choice(indices_select, - self.rng.integers(0, self.model_settings.augment_noise_max), - replace=False, - ) - spectrum_tensor[indices_noise] = self.model_settings.augment_noise_intensity * torch.rand( - len(indices_noise)) - return spectrum_tensor - def create_data_generator(training_spectra, settings, diff --git a/ms2deepscore/train_new_model/data_augmentation.py b/ms2deepscore/train_new_model/data_augmentation.py new file mode 100644 index 00000000..f02fb65c --- /dev/null +++ b/ms2deepscore/train_new_model/data_augmentation.py @@ -0,0 +1,74 @@ +import numpy as np +import torch + +from ms2deepscore import SettingsMS2Deepscore + + +def data_augmentation(spectra_tensors, + model_settings: SettingsMS2Deepscore, + random_number_generator): + for i in range(spectra_tensors.shape[0]): + spectra_tensors[i, :] = data_augmentation_spectrum(spectra_tensors[i, :], + model_settings, + random_number_generator) + return spectra_tensors + + +def data_augmentation_spectrum(spectrum_tensor, + model_settings: SettingsMS2Deepscore, + random_number_generator): + """Data augmentation. + + Parameters + ---------- + spectrum_tensor + Spectrum in Pytorch tensor form. + """ + # Augmentation 1: peak removal (peaks < augment_removal_max) + peak_removal_for_data_augmentation(spectrum_tensor, model_settings.augment_removal_max, + model_settings.augment_removal_intensity, random_number_generator) + + # Augmentation 2: Change peak intensities + if model_settings.augment_intensity: + spectrum_tensor = change_peak_intensity(spectrum_tensor, model_settings) + + peak_addition_for_data_augmentation(spectrum_tensor, model_settings, random_number_generator) + return spectrum_tensor + +def peak_removal_for_data_augmentation(spectrum_tensor, augment_removal_max, + augment_removal_intensity, random_number_generator): + """Removes small peaks at random for data augmentation. + + Parameters + spectrum_tensor: + Tensorized spectrum + augment_removal_max + Maximum fraction of peaks (if intensity < below augment_removal_intensity) + to be removed randomly. Default is set to 0.2, which means that between + 0 and 20% of all peaks with intensities < augment_removal_intensity + will be removed. + augment_removal_intensity + Specifying that only peaks with intensities < max_intensity will be removed. + random_number_generator + Random number generator used to generate random numbers. Can be generated with np.random.default_rng(42) + """ + if augment_removal_max or augment_removal_intensity: + bin_indices_below_removal_intensity = torch.where((spectrum_tensor > 0) + & (spectrum_tensor < augment_removal_intensity))[0] + fraction_of_noise_to_remove = random_number_generator.random(1) * augment_removal_max + number_of_peaks_to_remove = int(np.ceil((1 - fraction_of_noise_to_remove) * len(bin_indices_below_removal_intensity))) + indices = random_number_generator.choice(bin_indices_below_removal_intensity, number_of_peaks_to_remove) + if len(indices) > 0: + spectrum_tensor[indices] = 0 + +def change_peak_intensity(spectrum_tensor, model_settings): + return spectrum_tensor * (1 - model_settings.augment_intensity * 2 * (torch.rand(spectrum_tensor.shape) - 0.5)) + +def peak_addition_for_data_augmentation(spectrum_tensor, model_settings, random_number_generator): + if model_settings.augment_noise_max and model_settings.augment_noise_max > 0: + indices_select = torch.where(spectrum_tensor == 0)[0] + if len(indices_select) > model_settings.augment_noise_max: + indices_noise = random_number_generator.choice( + indices_select, + random_number_generator.integers(0, model_settings.augment_noise_max), replace=False,) + spectrum_tensor[indices_noise] = model_settings.augment_noise_intensity * torch.rand(len(indices_noise)) \ No newline at end of file From 5ea602749c1d046ab6b8cc2827498864557f859b Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Thu, 21 Aug 2025 14:01:54 +0200 Subject: [PATCH 10/48] Fix bug in peak_removal_for_data_augmentation, picking with replacing --- ms2deepscore/train_new_model/data_augmentation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ms2deepscore/train_new_model/data_augmentation.py b/ms2deepscore/train_new_model/data_augmentation.py index f02fb65c..c930c93b 100644 --- a/ms2deepscore/train_new_model/data_augmentation.py +++ b/ms2deepscore/train_new_model/data_augmentation.py @@ -57,7 +57,7 @@ def peak_removal_for_data_augmentation(spectrum_tensor, augment_removal_max, & (spectrum_tensor < augment_removal_intensity))[0] fraction_of_noise_to_remove = random_number_generator.random(1) * augment_removal_max number_of_peaks_to_remove = int(np.ceil((1 - fraction_of_noise_to_remove) * len(bin_indices_below_removal_intensity))) - indices = random_number_generator.choice(bin_indices_below_removal_intensity, number_of_peaks_to_remove) + indices = random_number_generator.choice(bin_indices_below_removal_intensity, number_of_peaks_to_remove, replace=False) if len(indices) > 0: spectrum_tensor[indices] = 0 From 7a3c9b264128feb5512d011c7cfdd1955fa011cd Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Thu, 21 Aug 2025 14:02:08 +0200 Subject: [PATCH 11/48] Add test for peak_removal_for_data_augmentation --- tests/test_data_augmentation.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tests/test_data_augmentation.py diff --git a/tests/test_data_augmentation.py b/tests/test_data_augmentation.py new file mode 100644 index 00000000..1bfae246 --- /dev/null +++ b/tests/test_data_augmentation.py @@ -0,0 +1,17 @@ +import numpy as np +import torch +from matchms import Spectrum + +from ms2deepscore import SettingsMS2Deepscore +from ms2deepscore.tensorize_spectra import tensorize_spectra +from ms2deepscore.train_new_model.data_augmentation import (data_augmentation, data_augmentation_spectrum, + peak_addition_for_data_augmentation, + peak_removal_for_data_augmentation, change_peak_intensity) + +def test_peak_removal_for_data_augmentation(): + spectrum_tensor = torch.tensor([0.0, 0.12, 0.05, 0.78, 0.0, 0.34, 1.0, 0.0, 0.27, 0.65]) + peak_removal_for_data_augmentation(spectrum_tensor, + augment_removal_max=0.5 , + augment_removal_intensity=0.3, + random_number_generator= np.random.default_rng(42)) + assert torch.equal(spectrum_tensor, torch.tensor([0.0, 0.12, 0.0, 0.78, 0.0, 0.34, 1.0, 0.0, 0.0, 0.65])) From 2b61290088a0ed8c7774246504bb8023c7f1d726 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Thu, 21 Aug 2025 14:36:58 +0200 Subject: [PATCH 12/48] Add docstring to peak_addition_for_data_augmentation --- .../train_new_model/data_augmentation.py | 33 ++++++++++++++----- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/ms2deepscore/train_new_model/data_augmentation.py b/ms2deepscore/train_new_model/data_augmentation.py index c930c93b..8ccf5281 100644 --- a/ms2deepscore/train_new_model/data_augmentation.py +++ b/ms2deepscore/train_new_model/data_augmentation.py @@ -32,7 +32,8 @@ def data_augmentation_spectrum(spectrum_tensor, if model_settings.augment_intensity: spectrum_tensor = change_peak_intensity(spectrum_tensor, model_settings) - peak_addition_for_data_augmentation(spectrum_tensor, model_settings, random_number_generator) + peak_addition_for_data_augmentation(spectrum_tensor, model_settings.augment_noise_max, + model_settings.augment_noise_intensity, random_number_generator) return spectrum_tensor def peak_removal_for_data_augmentation(spectrum_tensor, augment_removal_max, @@ -64,11 +65,25 @@ def peak_removal_for_data_augmentation(spectrum_tensor, augment_removal_max, def change_peak_intensity(spectrum_tensor, model_settings): return spectrum_tensor * (1 - model_settings.augment_intensity * 2 * (torch.rand(spectrum_tensor.shape) - 0.5)) -def peak_addition_for_data_augmentation(spectrum_tensor, model_settings, random_number_generator): - if model_settings.augment_noise_max and model_settings.augment_noise_max > 0: - indices_select = torch.where(spectrum_tensor == 0)[0] - if len(indices_select) > model_settings.augment_noise_max: - indices_noise = random_number_generator.choice( - indices_select, - random_number_generator.integers(0, model_settings.augment_noise_max), replace=False,) - spectrum_tensor[indices_noise] = model_settings.augment_noise_intensity * torch.rand(len(indices_noise)) \ No newline at end of file +def peak_addition_for_data_augmentation(spectrum_tensor, augment_noise_max, + augment_noise_intensity, random_number_generator): + """Adds noise to a spectrum tensor + spectrum_tensor: + Tensorized spectrum + augment_noise_max + Max number of 'new' noise peaks to add to the spectrum, between 0 to `augment_noise_max` + of peaks are added. + augment_noise_intensity + maximum intensity of the 'new' noise peaks to add to the spectrum, + random_number_generator + Random number generator used to generate random numbers. Can be generated with np.random.default_rng(42) + """ + if augment_noise_max and augment_noise_max > 0: + bin_indices_zero = torch.where(spectrum_tensor == 0)[0] + number_of_noise_peaks_to_add = random_number_generator.integers(0, augment_noise_max) + if len(bin_indices_zero) > number_of_noise_peaks_to_add: + selected_bin_indices_to_add_noise = random_number_generator.choice( + bin_indices_zero,number_of_noise_peaks_to_add, replace=False,) + else: + selected_bin_indices_to_add_noise = bin_indices_zero + spectrum_tensor[selected_bin_indices_to_add_noise] = augment_noise_intensity * torch.rand(len(selected_bin_indices_to_add_noise)) \ No newline at end of file From 187df44986a89a082ca4f97fb44df005d5a3a78b Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Thu, 21 Aug 2025 14:37:06 +0200 Subject: [PATCH 13/48] Add test for peak_addition_for_data_augmentation --- tests/test_data_augmentation.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_data_augmentation.py b/tests/test_data_augmentation.py index 1bfae246..da029113 100644 --- a/tests/test_data_augmentation.py +++ b/tests/test_data_augmentation.py @@ -15,3 +15,15 @@ def test_peak_removal_for_data_augmentation(): augment_removal_intensity=0.3, random_number_generator= np.random.default_rng(42)) assert torch.equal(spectrum_tensor, torch.tensor([0.0, 0.12, 0.0, 0.78, 0.0, 0.34, 1.0, 0.0, 0.0, 0.65])) + +def test_peak_addition_for_data_augmentation(): + spectrum_tensor = torch.tensor([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.27, 0.0]) + peak_addition_for_data_augmentation(spectrum_tensor, + 4, + 0.02, + random_number_generator= np.random.default_rng(0)) + assert spectrum_tensor[6] == 1.0 + assert spectrum_tensor[8] == 0.27 + assert spectrum_tensor[0] == 0.0 + assert spectrum_tensor[2] != 0.0 # we know this one is changed because of the random number generator + From d71392eb7b876da5daafbded7a083ef709c14fb7 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Thu, 21 Aug 2025 14:47:05 +0200 Subject: [PATCH 14/48] Make change_peak_intensity in place --- ms2deepscore/train_new_model/data_augmentation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ms2deepscore/train_new_model/data_augmentation.py b/ms2deepscore/train_new_model/data_augmentation.py index 8ccf5281..7e2edc0c 100644 --- a/ms2deepscore/train_new_model/data_augmentation.py +++ b/ms2deepscore/train_new_model/data_augmentation.py @@ -29,8 +29,7 @@ def data_augmentation_spectrum(spectrum_tensor, model_settings.augment_removal_intensity, random_number_generator) # Augmentation 2: Change peak intensities - if model_settings.augment_intensity: - spectrum_tensor = change_peak_intensity(spectrum_tensor, model_settings) + change_peak_intensity_for_data_augmentation(spectrum_tensor, model_settings.augment_intensity) peak_addition_for_data_augmentation(spectrum_tensor, model_settings.augment_noise_max, model_settings.augment_noise_intensity, random_number_generator) @@ -62,8 +61,9 @@ def peak_removal_for_data_augmentation(spectrum_tensor, augment_removal_max, if len(indices) > 0: spectrum_tensor[indices] = 0 -def change_peak_intensity(spectrum_tensor, model_settings): - return spectrum_tensor * (1 - model_settings.augment_intensity * 2 * (torch.rand(spectrum_tensor.shape) - 0.5)) +def change_peak_intensity_for_data_augmentation(spectrum_tensor, augment_intensity): + if augment_intensity: + spectrum_tensor.mul_(1 - augment_intensity * 2 * (torch.rand(spectrum_tensor.shape) - 0.5)) def peak_addition_for_data_augmentation(spectrum_tensor, augment_noise_max, augment_noise_intensity, random_number_generator): From e01cd1f7145c97265cfdbb69bf64a2b9fd13360f Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Thu, 21 Aug 2025 14:47:22 +0200 Subject: [PATCH 15/48] Add basic check for change_peak_intensity_for_data_augmentation --- tests/test_data_augmentation.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/test_data_augmentation.py b/tests/test_data_augmentation.py index da029113..e074e43d 100644 --- a/tests/test_data_augmentation.py +++ b/tests/test_data_augmentation.py @@ -6,7 +6,7 @@ from ms2deepscore.tensorize_spectra import tensorize_spectra from ms2deepscore.train_new_model.data_augmentation import (data_augmentation, data_augmentation_spectrum, peak_addition_for_data_augmentation, - peak_removal_for_data_augmentation, change_peak_intensity) + peak_removal_for_data_augmentation, change_peak_intensity_for_data_augmentation) def test_peak_removal_for_data_augmentation(): spectrum_tensor = torch.tensor([0.0, 0.12, 0.05, 0.78, 0.0, 0.34, 1.0, 0.0, 0.27, 0.65]) @@ -27,3 +27,9 @@ def test_peak_addition_for_data_augmentation(): assert spectrum_tensor[0] == 0.0 assert spectrum_tensor[2] != 0.0 # we know this one is changed because of the random number generator +def test_change_peak_intensity_for_data_augmentation(): + spectrum_tensor = torch.tensor([0.0, 0.12, 0.05, 0.78, 0.0, 0.34, 1.0, 0.0, 0.27, 0.65]) + change_peak_intensity_for_data_augmentation(spectrum_tensor, + 0.2) + assert spectrum_tensor[0] == 0.0 # Check that zero's are not changed. + assert spectrum_tensor[1] != 0.12 # Check that the value is changed. From 2b176780792e3316260b0af9d8cbb0599c0c10d6 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Thu, 21 Aug 2025 14:48:12 +0200 Subject: [PATCH 16/48] Remove unnecessary imports --- ms2deepscore/train_new_model/SpectrumPairGenerator.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ms2deepscore/train_new_model/SpectrumPairGenerator.py b/ms2deepscore/train_new_model/SpectrumPairGenerator.py index e1840aa5..a57cddfe 100644 --- a/ms2deepscore/train_new_model/SpectrumPairGenerator.py +++ b/ms2deepscore/train_new_model/SpectrumPairGenerator.py @@ -147,12 +147,17 @@ def _get_spectrum_with_inchikey(self, inchikey: str) -> Spectrum: def create_data_generator(training_spectra, settings, json_save_file=None) -> SpectrumPairGenerator: + # todo actually create, both between and across ionmodes. + pos_spectra, neg_spectra = split_by_ionmode(training_spectra) + selected_compound_pairs_training = select_compound_pairs_wrapper(training_spectra, settings=settings) inchikey_pair_generator = InchikeyPairGenerator(selected_compound_pairs_training) if json_save_file is not None: inchikey_pair_generator.save_as_json(json_save_file) + # todo possibly create a single SpectrumPairGenerator which takes in 3 generators and pos and neg spectra to iteratively select each one. # Create generators + # todo also make sure that the SpectrumPairGenerator can work across ionmodes. train_generator = SpectrumPairGenerator(spectrums=training_spectra, selected_compound_pairs=inchikey_pair_generator, settings=settings) From 41018435abd61ed57ff89eb999e7bf62a6d3894d Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Thu, 21 Aug 2025 14:49:02 +0200 Subject: [PATCH 17/48] Remove unnecessary imports --- tests/test_data_augmentation.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/test_data_augmentation.py b/tests/test_data_augmentation.py index e074e43d..c5b5ce89 100644 --- a/tests/test_data_augmentation.py +++ b/tests/test_data_augmentation.py @@ -1,12 +1,8 @@ import numpy as np import torch -from matchms import Spectrum - -from ms2deepscore import SettingsMS2Deepscore -from ms2deepscore.tensorize_spectra import tensorize_spectra -from ms2deepscore.train_new_model.data_augmentation import (data_augmentation, data_augmentation_spectrum, - peak_addition_for_data_augmentation, - peak_removal_for_data_augmentation, change_peak_intensity_for_data_augmentation) +from ms2deepscore.train_new_model.data_augmentation import (peak_addition_for_data_augmentation, + peak_removal_for_data_augmentation, + change_peak_intensity_for_data_augmentation) def test_peak_removal_for_data_augmentation(): spectrum_tensor = torch.tensor([0.0, 0.12, 0.05, 0.78, 0.0, 0.34, 1.0, 0.0, 0.27, 0.65]) From 64cef6b6aaa0d0b55404aa62d53053734648fbc9 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Thu, 21 Aug 2025 14:52:48 +0200 Subject: [PATCH 18/48] Add some typehinting --- ms2deepscore/train_new_model/SpectrumPairGenerator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ms2deepscore/train_new_model/SpectrumPairGenerator.py b/ms2deepscore/train_new_model/SpectrumPairGenerator.py index a57cddfe..954e11c6 100644 --- a/ms2deepscore/train_new_model/SpectrumPairGenerator.py +++ b/ms2deepscore/train_new_model/SpectrumPairGenerator.py @@ -1,6 +1,6 @@ """ Data generators for training/inference with MS2DeepScore model. """ -from typing import List +from typing import List, Tuple, Generator import numpy as np import torch from matchms import Spectrum @@ -84,7 +84,7 @@ def __next__(self): self.current_batch_index = 0 # make generator executable again raise StopIteration - def _spectrum_pair_generator(self): + def _spectrum_pair_generator(self) -> Generator[Tuple[Spectrum, Spectrum, float]]: """Use the provided SelectedCompoundPairs object to pick pairs.""" for _ in range(self.model_settings.batch_size): try: @@ -118,7 +118,7 @@ def __getitem__(self, batch_index: int): spectra_2 = data_augmentation(spectra_2, self.model_settings, self.rng) return spectra_1, spectra_2, meta_1, meta_2, targets - def _tensorize_all(self, spectrum_pairs): + def _tensorize_all(self, spectrum_pairs: Generator[Tuple[Spectrum, Spectrum, float]]): spectra_1 = [] spectra_2 = [] targets = [] @@ -148,7 +148,7 @@ def create_data_generator(training_spectra, settings, json_save_file=None) -> SpectrumPairGenerator: # todo actually create, both between and across ionmodes. - pos_spectra, neg_spectra = split_by_ionmode(training_spectra) + # pos_spectra, neg_spectra = split_by_ionmode(training_spectra) selected_compound_pairs_training = select_compound_pairs_wrapper(training_spectra, settings=settings) inchikey_pair_generator = InchikeyPairGenerator(selected_compound_pairs_training) From 112674d3553b5f87ec10da9495b0413da29f53f1 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Fri, 22 Aug 2025 12:57:21 +0200 Subject: [PATCH 19/48] Include Spectrum selection in InchikeyPairGenerator --- .../train_new_model/InchikeyPairGenerator.py | 23 +++++++++- .../train_new_model/SpectrumPairGenerator.py | 43 +++++-------------- 2 files changed, 31 insertions(+), 35 deletions(-) diff --git a/ms2deepscore/train_new_model/InchikeyPairGenerator.py b/ms2deepscore/train_new_model/InchikeyPairGenerator.py index 4127e502..d3083613 100644 --- a/ms2deepscore/train_new_model/InchikeyPairGenerator.py +++ b/ms2deepscore/train_new_model/InchikeyPairGenerator.py @@ -2,9 +2,12 @@ from collections import Counter from typing import List, Tuple +import numpy as np +from matchms import Spectrum + class InchikeyPairGenerator: - def __init__(self, selected_inchikey_pairs: List[Tuple[str, str, float]]): + def __init__(self, selected_inchikey_pairs: List[Tuple[str, str, float]], spectra): """ Parameters ---------- @@ -12,6 +15,8 @@ def __init__(self, selected_inchikey_pairs: List[Tuple[str, str, float]]): A list with tuples encoding inchikey pairs like: (inchikey1, inchikey2, tanimoto_score) """ self.selected_inchikey_pairs = selected_inchikey_pairs + self.spectra = spectra + self.spectrum_inchikeys = np.array([s.get("inchikey")[:14] for s in self.spectra]) def generator(self, shuffle: bool, random_nr_generator): """Infinite generator to loop through all inchikeys. @@ -22,7 +27,9 @@ def generator(self, shuffle: bool, random_nr_generator): random_nr_generator.shuffle(self.selected_inchikey_pairs) for inchikey1, inchikey2, tanimoto_score in self.selected_inchikey_pairs: - yield inchikey1, inchikey2, tanimoto_score + spectrum1 = self._get_spectrum_with_inchikey(inchikey1, random_nr_generator) + spectrum2 = self._get_spectrum_with_inchikey(inchikey2, random_nr_generator) + yield spectrum1, spectrum2, tanimoto_score def __len__(self): return len(self.selected_inchikey_pairs) @@ -59,3 +66,15 @@ def save_as_json(self, file_name): with open(file_name, "w", encoding="utf-8") as f: json.dump(data_for_json, f) + + def _get_spectrum_with_inchikey(self, inchikey: str, random_number_generator) -> Spectrum: + """ + Get a random spectrum matching the `inchikey` argument. + + NB: A compound (identified by an + inchikey) can have multiple measured spectrums in a binned spectrum dataset. + """ + matching_spectrum_id = np.where(self.spectrum_inchikeys == inchikey)[0] + if len(matching_spectrum_id) <= 0: + raise ValueError("No matching inchikey found (note: expected first 14 characters)") + return self.spectra[random_number_generator.choice(matching_spectrum_id)] diff --git a/ms2deepscore/train_new_model/SpectrumPairGenerator.py b/ms2deepscore/train_new_model/SpectrumPairGenerator.py index 954e11c6..b2234163 100644 --- a/ms2deepscore/train_new_model/SpectrumPairGenerator.py +++ b/ms2deepscore/train_new_model/SpectrumPairGenerator.py @@ -1,6 +1,6 @@ """ Data generators for training/inference with MS2DeepScore model. """ -from typing import List, Tuple, Generator +from typing import List, Tuple import numpy as np import torch from matchms import Spectrum @@ -27,15 +27,13 @@ class SpectrumPairGenerator: In addition inchikeys are selected to occur equally for each pair. """ - def __init__(self, spectrums: List[Spectrum], + def __init__(self, selected_compound_pairs: InchikeyPairGenerator, settings: SettingsMS2Deepscore): """Generates data for training a siamese Pytorch model. Parameters ---------- - spectrums - List of matchms Spectrum objects. selected_compound_pairs SelectedCompoundPairs object which contains selected compounds pairs and the respective similarity scores. @@ -43,10 +41,6 @@ def __init__(self, spectrums: List[Spectrum], The available settings can be found in SettignsMS2Deepscore """ self.current_batch_index = 0 - self.spectrums = spectrums - - # Collect all inchikeys - self.spectrum_inchikeys = np.array([s.get("inchikey")[:14] for s in self.spectrums]) # Set all other settings to input (or otherwise to defaults): self.model_settings = settings @@ -59,14 +53,12 @@ def __init__(self, spectrums: List[Spectrum], if self.model_settings.random_seed is None: self.model_settings.random_seed = 0 self.rng = np.random.default_rng(self.model_settings.random_seed) - - unique_inchikeys = np.unique(self.spectrum_inchikeys) + self.inchikey_pair_generator = selected_compound_pairs.generator(self.model_settings.shuffle, self.rng) + unique_inchikeys = np.unique(selected_compound_pairs.spectrum_inchikeys) if len(unique_inchikeys) < self.model_settings.batch_size: raise ValueError("The number of unique inchikeys must be larger than the batch size.") self.fixed_set = {} - self.selected_compound_pairs = selected_compound_pairs - self.inchikey_pair_generator = self.selected_compound_pairs.generator(self.model_settings.shuffle, self.rng) self.nr_of_batches = int(self.model_settings.num_turns) * int(np.ceil(len(unique_inchikeys) / self.model_settings.batch_size)) @@ -84,18 +76,16 @@ def __next__(self): self.current_batch_index = 0 # make generator executable again raise StopIteration - def _spectrum_pair_generator(self) -> Generator[Tuple[Spectrum, Spectrum, float]]: + def _spectrum_pair_generator(self): """Use the provided SelectedCompoundPairs object to pick pairs.""" for _ in range(self.model_settings.batch_size): try: - inchikey1, inchikey2, score = next(self.inchikey_pair_generator) + spectrum1, spectrum2, score = next(self.inchikey_pair_generator) + yield spectrum1, spectrum2, score except StopIteration as exc: raise RuntimeError("The inchikey pair generator is not expected to end, " "but should instead generate infinite pairs") from exc - spectrum1 = self._get_spectrum_with_inchikey(inchikey1) - spectrum2 = self._get_spectrum_with_inchikey(inchikey2) - yield spectrum1, spectrum2, score def __getitem__(self, batch_index: int): """Generate one batch of data. @@ -118,7 +108,7 @@ def __getitem__(self, batch_index: int): spectra_2 = data_augmentation(spectra_2, self.model_settings, self.rng) return spectra_1, spectra_2, meta_1, meta_2, targets - def _tensorize_all(self, spectrum_pairs: Generator[Tuple[Spectrum, Spectrum, float]]): + def _tensorize_all(self, spectrum_pairs): spectra_1 = [] spectra_2 = [] targets = [] @@ -131,18 +121,6 @@ def _tensorize_all(self, spectrum_pairs: Generator[Tuple[Spectrum, Spectrum, flo binned_spectra_2, metadata_2 = tensorize_spectra(spectra_2, self.model_settings) return binned_spectra_1, binned_spectra_2, metadata_1, metadata_2, torch.tensor(targets, dtype=torch.float32) - def _get_spectrum_with_inchikey(self, inchikey: str) -> Spectrum: - """ - Get a random spectrum matching the `inchikey` argument. - - NB: A compound (identified by an - inchikey) can have multiple measured spectrums in a binned spectrum dataset. - """ - matching_spectrum_id = np.where(self.spectrum_inchikeys == inchikey)[0] - if len(matching_spectrum_id) <= 0: - raise ValueError("No matching inchikey found (note: expected first 14 characters)") - return self.spectrums[self.rng.choice(matching_spectrum_id)] - def create_data_generator(training_spectra, settings, @@ -151,14 +129,13 @@ def create_data_generator(training_spectra, # pos_spectra, neg_spectra = split_by_ionmode(training_spectra) selected_compound_pairs_training = select_compound_pairs_wrapper(training_spectra, settings=settings) - inchikey_pair_generator = InchikeyPairGenerator(selected_compound_pairs_training) + inchikey_pair_generator = InchikeyPairGenerator(selected_compound_pairs_training, training_spectra) if json_save_file is not None: inchikey_pair_generator.save_as_json(json_save_file) # todo possibly create a single SpectrumPairGenerator which takes in 3 generators and pos and neg spectra to iteratively select each one. # Create generators # todo also make sure that the SpectrumPairGenerator can work across ionmodes. - train_generator = SpectrumPairGenerator(spectrums=training_spectra, - selected_compound_pairs=inchikey_pair_generator, + train_generator = SpectrumPairGenerator(selected_compound_pairs=inchikey_pair_generator, settings=settings) return train_generator From d73e746e20ec978bcbc19abb120d8160287c9bee Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Fri, 22 Aug 2025 12:57:44 +0200 Subject: [PATCH 20/48] Update tests to handle new InchikeyPairGenerator --- tests/test_data_generators.py | 11 ++++--- tests/test_inchikey_pair_selection.py | 47 +++++++++++++++------------ tests/test_siamese_spectra_model.py | 4 +-- 3 files changed, 35 insertions(+), 27 deletions(-) diff --git a/tests/test_data_generators.py b/tests/test_data_generators.py index 37ea3af5..535ab980 100644 --- a/tests/test_data_generators.py +++ b/tests/test_data_generators.py @@ -66,7 +66,8 @@ def dummy_data_generator(): selected_pairs = InchikeyPairGenerator([('CCCCCCCCCCCCCC', 'DDDDDDDDDDDDDD', 0.25), ('BBBBBBBBBBBBBB', 'DDDDDDDDDDDDDD', 0.6666667), ('AAAAAAAAAAAAAA', 'CCCCCCCCCCCCCC', 1.0), - ('AAAAAAAAAAAAAA', 'BBBBBBBBBBBBBB', 0.33333334)]) + ('AAAAAAAAAAAAAA', 'BBBBBBBBBBBBBB', 0.33333334)], + spectrums) batch_size = 2 settings = SettingsMS2Deepscore(min_mz=10, max_mz=1000, @@ -80,7 +81,7 @@ def dummy_data_generator(): augment_removal_intensity=0.0, augment_intensity=0.0, augment_noise_max=0) - return SpectrumPairGenerator(spectrums, selected_pairs, settings) + return SpectrumPairGenerator(selected_pairs, settings) def test_correct_batch_format_data_generator(dummy_data_generator): @@ -108,6 +109,8 @@ def test_equal_sampling_of_spectra(dummy_data_generator): The sampling is random, but for enough repetitions very likely to always happen. This test is mostly to make sure we don't accidentally implement something where we just resample the same spectrum every time for one inchikey""" + spectrums = create_test_spectra(4, 3) # the same spectra used for the dummy_data_generator + tensorized_spectra = [] epochs = 20 for _ in range(epochs): @@ -128,7 +131,7 @@ def test_equal_sampling_of_spectra(dummy_data_generator): # but since we sample 640 spectra from 24 options, it is very unlikely (1 in 28 billion) # that this will result in not sampling all at least once. # Because we have a fixed seed, this should not result in random failing tests. - assert len(unique_tensors) == len(dummy_data_generator.spectrums), "Not all spectra are selected at least once" + assert len(unique_tensors) == 12, "Not all spectra are selected at least once" def reverse_tensorize(tensor, list_of_spectra, settings): """Finds the spectrum in a list of spectra based on the tensorized vesion""" @@ -146,7 +149,7 @@ def reverse_tensorize(tensor, list_of_spectra, settings): inchikey_counts = Counter() for unique_tensor, count in tensor_counts.items(): spectrum = reverse_tensorize(unique_tensor, - dummy_data_generator.spectrums, + spectrums, dummy_data_generator.model_settings) inchikey = spectrum.get("inchikey")[:14] diff --git a/tests/test_inchikey_pair_selection.py b/tests/test_inchikey_pair_selection.py index 71eb63a0..d6afd22f 100644 --- a/tests/test_inchikey_pair_selection.py +++ b/tests/test_inchikey_pair_selection.py @@ -58,12 +58,16 @@ def test_spectra(): @pytest.fixture -def dummy_spectrum_pairs(): +def dummy_inchikey_pair_generator(): spectrum_pairs = [("Inchikey0", "Inchikey1", 0.8), ("Inchikey0", "Inchikey2", 0.6), ("Inchikey2", "Inchikey1", 0.3), ("Inchikey2", "Inchikey2", 1.0)] - return spectrum_pairs + return InchikeyPairGenerator(spectrum_pairs, [ + Spectrum(mz=np.array([90.]), intensities=np.array([0.4]), metadata={"inchikey": "Inchikey0"}), + Spectrum(mz=np.array([90.]), intensities=np.array([0.4]), metadata={"inchikey": "Inchikey1"}), + Spectrum(mz=np.array([90.]), intensities=np.array([0.4]), metadata={"inchikey": "Inchikey2"}), + ]) def test_compute_jaccard_similarity_per_bin(simple_fingerprints): @@ -146,34 +150,35 @@ def test_select_inchi_for_unique_inchikeys_two_inchikeys(test_spectra): assert [s.get("inchi")[:15] for s in spectrums_selected] == ['InChI=1/C6H8O6/', 'InChI=1S/C8H10N'] -def test_SelectedInchikeyPairs_generator_with_shuffle(dummy_spectrum_pairs): - selected_inchikey_pairs = InchikeyPairGenerator(dummy_spectrum_pairs) +def test_SelectedInchikeyPairs_generator_with_shuffle(dummy_inchikey_pair_generator): rng = np.random.default_rng(0) - gen = selected_inchikey_pairs.generator(True, rng) + gen = dummy_inchikey_pair_generator.generator(True, rng) found_pairs = [] # do one complete loop - for i in range(len(selected_inchikey_pairs)): - found_pairs.append(next(gen)) + for i in range(len(dummy_inchikey_pair_generator)): + spectrum_1, spectrum_2, score = next(gen) + found_pairs.append((spectrum_1.get("inchikey"), spectrum_2.get("inchikey"), score)) - assert len(found_pairs) == len(dummy_spectrum_pairs) - assert sorted(found_pairs) == sorted(dummy_spectrum_pairs) + assert len(found_pairs) == len(dummy_inchikey_pair_generator.selected_inchikey_pairs) + assert sorted(found_pairs) == sorted(dummy_inchikey_pair_generator.selected_inchikey_pairs) found_pairs = [] # do one complete loop - for i in range(len(selected_inchikey_pairs)): - found_pairs.append(next(gen)) + for i in range(len(dummy_inchikey_pair_generator)): + spectrum_1, spectrum_2, score = next(gen) + found_pairs.append((spectrum_1.get("inchikey"), spectrum_2.get("inchikey"), score)) - assert len(found_pairs) == len(dummy_spectrum_pairs) - assert sorted(found_pairs) == sorted(dummy_spectrum_pairs) + assert len(found_pairs) == len(dummy_inchikey_pair_generator.selected_inchikey_pairs) + assert sorted(found_pairs) == sorted(dummy_inchikey_pair_generator.selected_inchikey_pairs) -def test_SelectedInchikeyPairs_generator_without_shuffle(dummy_spectrum_pairs): - selected_inchikey_pairs = InchikeyPairGenerator(dummy_spectrum_pairs) - gen = selected_inchikey_pairs.generator(False, None) +def test_SelectedInchikeyPairs_generator_without_shuffle(dummy_inchikey_pair_generator): + gen = dummy_inchikey_pair_generator.generator(False, np.random.default_rng(0)) - for _, expected_pair in enumerate(dummy_spectrum_pairs): - assert expected_pair == next(gen) + for _, expected_pair in enumerate(dummy_inchikey_pair_generator.selected_inchikey_pairs): + spectrum_1, spectrum_2, score = next(gen) + assert expected_pair == (spectrum_1.get("inchikey"), spectrum_2.get("inchikey"), score) def test_select_compound_pairs_wrapper_no_resampling(): @@ -185,7 +190,7 @@ def test_select_compound_pairs_wrapper_no_resampling(): batch_size=8, max_pair_resampling=max_pair_resampling) selected_inchikey_pairs = select_compound_pairs_wrapper(spectrums, settings) - inchikey_pair_generator = InchikeyPairGenerator(selected_inchikey_pairs) + inchikey_pair_generator = InchikeyPairGenerator(selected_inchikey_pairs, spectrums) check_balanced_scores_selecting_inchikey_pairs(inchikey_pair_generator, bins) check_correct_oversampling(inchikey_pair_generator, max_pair_resampling) @@ -206,7 +211,7 @@ def test_select_compound_pairs_wrapper_with_resampling(): batch_size=8, max_pair_resampling=max_pair_resampling) selected_inchikey_pairs = select_compound_pairs_wrapper(spectrums, settings) - inchikey_pair_generator = InchikeyPairGenerator(selected_inchikey_pairs) + inchikey_pair_generator = InchikeyPairGenerator(selected_inchikey_pairs, spectrums) check_balanced_scores_selecting_inchikey_pairs(inchikey_pair_generator, bins) check_correct_oversampling(inchikey_pair_generator, max_pair_resampling) @@ -229,7 +234,7 @@ def test_select_compound_pairs_wrapper_maximum_inchikey_count(): max_inchikey_sampling=max_inchikey_sampling ) selected_inchikey_pairs = select_compound_pairs_wrapper(spectrums, settings) - inchikey_pair_generator = InchikeyPairGenerator(selected_inchikey_pairs) + inchikey_pair_generator = InchikeyPairGenerator(selected_inchikey_pairs, spectrums) highest_inchikey_count = max(inchikey_pair_generator.get_inchikey_counts().values()) assert highest_inchikey_count <= max_inchikey_sampling + 1 # +1 because there is a chance that the last added inchikey is a pair to itself... diff --git a/tests/test_siamese_spectra_model.py b/tests/test_siamese_spectra_model.py index a86b69b7..fa1661ff 100644 --- a/tests/test_siamese_spectra_model.py +++ b/tests/test_siamese_spectra_model.py @@ -132,9 +132,9 @@ def test_model_training(simple_training_spectra): num_turns=20, ) scp_train = select_compound_pairs_wrapper(simple_training_spectra, settings) - inchikey_pair_generator = InchikeyPairGenerator(scp_train) + inchikey_pair_generator = InchikeyPairGenerator(scp_train, simple_training_spectra) # Create generators - train_generator_simple = SpectrumPairGenerator(spectrums=simple_training_spectra, selected_compound_pairs=inchikey_pair_generator, + train_generator_simple = SpectrumPairGenerator(selected_compound_pairs=inchikey_pair_generator, settings=settings) settings.same_prob_bins = np.array([(-0.01, 1.0)]) validation_loss_calculator = ValidationLossCalculator( From b3d2a18e2db00559acdaba84fc1a8fc17e7ccd8a Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Fri, 22 Aug 2025 13:00:13 +0200 Subject: [PATCH 21/48] Rename SpectrumPairGenerator to TrainingBatchGenerator.py --- ...irGenerator.py => TrainingBatchGenerator.py} | 17 +++++++---------- ms2deepscore/train_new_model/__init__.py | 4 ++-- .../train_new_model/train_ms2deepscore.py | 2 +- .../training_wrapper_functions.py | 2 +- tests/test_data_generators.py | 4 ++-- tests/test_siamese_spectra_model.py | 6 +++--- 6 files changed, 16 insertions(+), 19 deletions(-) rename ms2deepscore/train_new_model/{SpectrumPairGenerator.py => TrainingBatchGenerator.py} (90%) diff --git a/ms2deepscore/train_new_model/SpectrumPairGenerator.py b/ms2deepscore/train_new_model/TrainingBatchGenerator.py similarity index 90% rename from ms2deepscore/train_new_model/SpectrumPairGenerator.py rename to ms2deepscore/train_new_model/TrainingBatchGenerator.py index b2234163..3a4ed47b 100644 --- a/ms2deepscore/train_new_model/SpectrumPairGenerator.py +++ b/ms2deepscore/train_new_model/TrainingBatchGenerator.py @@ -1,19 +1,16 @@ """ Data generators for training/inference with MS2DeepScore model. """ -from typing import List, Tuple import numpy as np import torch -from matchms import Spectrum from ms2deepscore.SettingsMS2Deepscore import (SettingsMS2Deepscore) from ms2deepscore.tensorize_spectra import tensorize_spectra from ms2deepscore.train_new_model.InchikeyPairGenerator import InchikeyPairGenerator from ms2deepscore.train_new_model.data_augmentation import data_augmentation from ms2deepscore.train_new_model.inchikey_pair_selection import ( select_compound_pairs_wrapper) -from ms2deepscore.utils import split_by_ionmode -class SpectrumPairGenerator: +class TrainingBatchGenerator: """Generates data for training a siamese Pytorch model. This class provides a data generator specifically designed for training a Siamese Pytorch model with a curated set @@ -21,7 +18,7 @@ class SpectrumPairGenerator: inchikey pair. By using pre-selected compound pairs (in the InchikeyPairGenerator), this allows more control over the training - process. The selection of inchikey pairs does not happen in SpectrumPairGenerator (only spectrum selection), but in + process. The selection of inchikey pairs does not happen in TrainingBatchGenerator (only spectrum selection), but in inchikey_pair_selection.py. In inchikey_pair_selection inchikey pairs are picked to balance selected pairs equally over different tanimoto score bins to make sure both pairs of similar and dissimilar compounds are sampled. In addition inchikeys are selected to occur equally for each pair. @@ -124,7 +121,7 @@ def _tensorize_all(self, spectrum_pairs): def create_data_generator(training_spectra, settings, - json_save_file=None) -> SpectrumPairGenerator: + json_save_file=None) -> TrainingBatchGenerator: # todo actually create, both between and across ionmodes. # pos_spectra, neg_spectra = split_by_ionmode(training_spectra) @@ -133,9 +130,9 @@ def create_data_generator(training_spectra, if json_save_file is not None: inchikey_pair_generator.save_as_json(json_save_file) - # todo possibly create a single SpectrumPairGenerator which takes in 3 generators and pos and neg spectra to iteratively select each one. + # todo possibly create a single TrainingBatchGenerator which takes in 3 generators and pos and neg spectra to iteratively select each one. # Create generators - # todo also make sure that the SpectrumPairGenerator can work across ionmodes. - train_generator = SpectrumPairGenerator(selected_compound_pairs=inchikey_pair_generator, - settings=settings) + # todo also make sure that the TrainingBatchGenerator can work across ionmodes. + train_generator = TrainingBatchGenerator(selected_compound_pairs=inchikey_pair_generator, + settings=settings) return train_generator diff --git a/ms2deepscore/train_new_model/__init__.py b/ms2deepscore/train_new_model/__init__.py index 14588440..858dc12c 100644 --- a/ms2deepscore/train_new_model/__init__.py +++ b/ms2deepscore/train_new_model/__init__.py @@ -1,9 +1,9 @@ -from .SpectrumPairGenerator import SpectrumPairGenerator +from .TrainingBatchGenerator import TrainingBatchGenerator from .InchikeyPairGenerator import InchikeyPairGenerator from .inchikey_pair_selection import (select_compound_pairs_wrapper) __all__ = [ - "SpectrumPairGenerator", + "TrainingBatchGenerator", "select_compound_pairs_wrapper" ] diff --git a/ms2deepscore/train_new_model/train_ms2deepscore.py b/ms2deepscore/train_new_model/train_ms2deepscore.py index 1a46b167..2f03896f 100644 --- a/ms2deepscore/train_new_model/train_ms2deepscore.py +++ b/ms2deepscore/train_new_model/train_ms2deepscore.py @@ -10,7 +10,7 @@ from ms2deepscore.models.SiameseSpectralModel import (SiameseSpectralModel, train) from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore -from ms2deepscore.train_new_model.SpectrumPairGenerator import create_data_generator +from ms2deepscore.train_new_model.TrainingBatchGenerator import create_data_generator from ms2deepscore.validation_loss_calculation.ValidationLossCalculator import \ ValidationLossCalculator diff --git a/ms2deepscore/wrapper_functions/training_wrapper_functions.py b/ms2deepscore/wrapper_functions/training_wrapper_functions.py index 9936124b..f6efa957 100644 --- a/ms2deepscore/wrapper_functions/training_wrapper_functions.py +++ b/ms2deepscore/wrapper_functions/training_wrapper_functions.py @@ -14,7 +14,7 @@ train) from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore from ms2deepscore.validation_loss_calculation.ValidationLossCalculator import ValidationLossCalculator -from ms2deepscore.train_new_model.SpectrumPairGenerator import create_data_generator +from ms2deepscore.train_new_model.TrainingBatchGenerator import create_data_generator from ms2deepscore.train_new_model.train_ms2deepscore import \ train_ms2ds_model, plot_history, save_history from ms2deepscore.train_new_model.validation_and_test_split import \ diff --git a/tests/test_data_generators.py b/tests/test_data_generators.py index 535ab980..0a8e88b2 100644 --- a/tests/test_data_generators.py +++ b/tests/test_data_generators.py @@ -5,7 +5,7 @@ from matchms import Spectrum from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore, SettingsEmbeddingEvaluator from ms2deepscore.tensorize_spectra import tensorize_spectra -from ms2deepscore.train_new_model.SpectrumPairGenerator import SpectrumPairGenerator, \ +from ms2deepscore.train_new_model.TrainingBatchGenerator import TrainingBatchGenerator, \ create_data_generator from ms2deepscore.train_new_model.DataGeneratorEmbeddingEvaluation import DataGeneratorEmbeddingEvaluation from ms2deepscore.train_new_model import InchikeyPairGenerator @@ -81,7 +81,7 @@ def dummy_data_generator(): augment_removal_intensity=0.0, augment_intensity=0.0, augment_noise_max=0) - return SpectrumPairGenerator(selected_pairs, settings) + return TrainingBatchGenerator(selected_pairs, settings) def test_correct_batch_format_data_generator(dummy_data_generator): diff --git a/tests/test_siamese_spectra_model.py b/tests/test_siamese_spectra_model.py index fa1661ff..fc1f4a83 100644 --- a/tests/test_siamese_spectra_model.py +++ b/tests/test_siamese_spectra_model.py @@ -5,7 +5,7 @@ train) from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore from ms2deepscore.tensorize_spectra import tensorize_spectra -from ms2deepscore.train_new_model.SpectrumPairGenerator import SpectrumPairGenerator +from ms2deepscore.train_new_model.TrainingBatchGenerator import TrainingBatchGenerator from ms2deepscore.train_new_model import InchikeyPairGenerator from ms2deepscore.train_new_model.inchikey_pair_selection import \ select_compound_pairs_wrapper @@ -134,8 +134,8 @@ def test_model_training(simple_training_spectra): scp_train = select_compound_pairs_wrapper(simple_training_spectra, settings) inchikey_pair_generator = InchikeyPairGenerator(scp_train, simple_training_spectra) # Create generators - train_generator_simple = SpectrumPairGenerator(selected_compound_pairs=inchikey_pair_generator, - settings=settings) + train_generator_simple = TrainingBatchGenerator(selected_compound_pairs=inchikey_pair_generator, + settings=settings) settings.same_prob_bins = np.array([(-0.01, 1.0)]) validation_loss_calculator = ValidationLossCalculator( simple_training_spectra, From 9c39a44f9b59c5132a574625604f4cf8a88fd505 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Fri, 22 Aug 2025 13:04:16 +0200 Subject: [PATCH 22/48] Rename SpectrumPairGenerator to TrainingBatchGenerator.py --- ...irGenerator.py => SpectrumPairGenerator.py} | 4 ++-- .../train_new_model/TrainingBatchGenerator.py | 16 ++++++++-------- ms2deepscore/train_new_model/__init__.py | 2 +- .../train_new_model/inchikey_pair_selection.py | 6 +++--- .../inchikey_pair_selection_cross_ionmode.py | 6 +++--- tests/test_data_generators.py | 4 ++-- tests/test_inchikey_pair_selection.py | 18 +++++++++--------- tests/test_siamese_spectra_model.py | 4 ++-- 8 files changed, 30 insertions(+), 30 deletions(-) rename ms2deepscore/train_new_model/{InchikeyPairGenerator.py => SpectrumPairGenerator.py} (97%) diff --git a/ms2deepscore/train_new_model/InchikeyPairGenerator.py b/ms2deepscore/train_new_model/SpectrumPairGenerator.py similarity index 97% rename from ms2deepscore/train_new_model/InchikeyPairGenerator.py rename to ms2deepscore/train_new_model/SpectrumPairGenerator.py index d3083613..3539505a 100644 --- a/ms2deepscore/train_new_model/InchikeyPairGenerator.py +++ b/ms2deepscore/train_new_model/SpectrumPairGenerator.py @@ -6,7 +6,7 @@ from matchms import Spectrum -class InchikeyPairGenerator: +class SpectrumPairGenerator: def __init__(self, selected_inchikey_pairs: List[Tuple[str, str, float]], spectra): """ Parameters @@ -35,7 +35,7 @@ def __len__(self): return len(self.selected_inchikey_pairs) def __str__(self): - return f"InchikeyPairGenerator with {len(self.selected_inchikey_pairs)} pairs available" + return f"SpectrumPairGenerator with {len(self.selected_inchikey_pairs)} pairs available" def get_scores(self): return [score for _, _, score in self.selected_inchikey_pairs] diff --git a/ms2deepscore/train_new_model/TrainingBatchGenerator.py b/ms2deepscore/train_new_model/TrainingBatchGenerator.py index 3a4ed47b..6ee8a347 100644 --- a/ms2deepscore/train_new_model/TrainingBatchGenerator.py +++ b/ms2deepscore/train_new_model/TrainingBatchGenerator.py @@ -4,7 +4,7 @@ import torch from ms2deepscore.SettingsMS2Deepscore import (SettingsMS2Deepscore) from ms2deepscore.tensorize_spectra import tensorize_spectra -from ms2deepscore.train_new_model.InchikeyPairGenerator import InchikeyPairGenerator +from ms2deepscore.train_new_model.SpectrumPairGenerator import SpectrumPairGenerator from ms2deepscore.train_new_model.data_augmentation import data_augmentation from ms2deepscore.train_new_model.inchikey_pair_selection import ( select_compound_pairs_wrapper) @@ -14,18 +14,18 @@ class TrainingBatchGenerator: """Generates data for training a siamese Pytorch model. This class provides a data generator specifically designed for training a Siamese Pytorch model with a curated set - of compound pairs. It takes a InchikeyPairGenerator and randomly selects, augments and tensorizes spectra for each - inchikey pair. + of compound pairs. It takes a SpectrumPairGenerator and augments and tensorizes spectra and combines them into + batches. - By using pre-selected compound pairs (in the InchikeyPairGenerator), this allows more control over the training - process. The selection of inchikey pairs does not happen in TrainingBatchGenerator (only spectrum selection), but in - inchikey_pair_selection.py. In inchikey_pair_selection inchikey pairs are picked to balance selected pairs equally + By using pre-selected compound pairs (in the SpectrumPairGenerator), this allows more control over the training + process. The selection of inchikey pairs does not happen in SpectrumPairGenerator, but in + inchikey_pair_selection.py. In inchikey_pair_selection.py inchikey pairs are picked to balance selected pairs equally over different tanimoto score bins to make sure both pairs of similar and dissimilar compounds are sampled. In addition inchikeys are selected to occur equally for each pair. """ def __init__(self, - selected_compound_pairs: InchikeyPairGenerator, + selected_compound_pairs: SpectrumPairGenerator, settings: SettingsMS2Deepscore): """Generates data for training a siamese Pytorch model. @@ -126,7 +126,7 @@ def create_data_generator(training_spectra, # pos_spectra, neg_spectra = split_by_ionmode(training_spectra) selected_compound_pairs_training = select_compound_pairs_wrapper(training_spectra, settings=settings) - inchikey_pair_generator = InchikeyPairGenerator(selected_compound_pairs_training, training_spectra) + inchikey_pair_generator = SpectrumPairGenerator(selected_compound_pairs_training, training_spectra) if json_save_file is not None: inchikey_pair_generator.save_as_json(json_save_file) diff --git a/ms2deepscore/train_new_model/__init__.py b/ms2deepscore/train_new_model/__init__.py index 858dc12c..f31333e0 100644 --- a/ms2deepscore/train_new_model/__init__.py +++ b/ms2deepscore/train_new_model/__init__.py @@ -1,5 +1,5 @@ from .TrainingBatchGenerator import TrainingBatchGenerator -from .InchikeyPairGenerator import InchikeyPairGenerator +from .SpectrumPairGenerator import SpectrumPairGenerator from .inchikey_pair_selection import (select_compound_pairs_wrapper) diff --git a/ms2deepscore/train_new_model/inchikey_pair_selection.py b/ms2deepscore/train_new_model/inchikey_pair_selection.py index 1042606d..68d95fc8 100644 --- a/ms2deepscore/train_new_model/inchikey_pair_selection.py +++ b/ms2deepscore/train_new_model/inchikey_pair_selection.py @@ -14,7 +14,7 @@ def select_compound_pairs_wrapper( spectra: List[Spectrum], settings: SettingsMS2Deepscore, ) -> List[Tuple[str, str, float]]: - """Returns a InchikeyPairGenerator object containing equally balanced pairs over the different bins + """Returns a SpectrumPairGenerator object containing equally balanced pairs over the different bins spectra: A list of spectra @@ -24,8 +24,8 @@ def select_compound_pairs_wrapper( Returns ------- - InchikeyPairGenerator - InchikeyPairGenerator containing balanced pairs. The pairs are stored as [(inchikey1, inchikey2, score)] + SpectrumPairGenerator + SpectrumPairGenerator containing balanced pairs. The pairs are stored as [(inchikey1, inchikey2, score)] """ if settings.random_seed is not None: np.random.seed(settings.random_seed) diff --git a/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py b/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py index 2c4a14a2..287c5a9c 100644 --- a/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py +++ b/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py @@ -12,7 +12,7 @@ def select_compound_pairs_wrapper_across_ionmode( spectra_2: List[Spectrum], settings: SettingsMS2Deepscore, ) -> List[Tuple[str, str, float]]: - """Returns a InchikeyPairGenerator object containing equally balanced pairs over the different bins + """Returns a SpectrumPairGenerator object containing equally balanced pairs over the different bins spectra: A list of spectra @@ -22,8 +22,8 @@ def select_compound_pairs_wrapper_across_ionmode( Returns ------- - InchikeyPairGenerator - InchikeyPairGenerator containing balanced pairs. The pairs are stored as [(inchikey1, inchikey2, score)] + SpectrumPairGenerator + SpectrumPairGenerator containing balanced pairs. The pairs are stored as [(inchikey1, inchikey2, score)] """ if settings.random_seed is not None: np.random.seed(settings.random_seed) diff --git a/tests/test_data_generators.py b/tests/test_data_generators.py index 0a8e88b2..beeeb013 100644 --- a/tests/test_data_generators.py +++ b/tests/test_data_generators.py @@ -8,7 +8,7 @@ from ms2deepscore.train_new_model.TrainingBatchGenerator import TrainingBatchGenerator, \ create_data_generator from ms2deepscore.train_new_model.DataGeneratorEmbeddingEvaluation import DataGeneratorEmbeddingEvaluation -from ms2deepscore.train_new_model import InchikeyPairGenerator +from ms2deepscore.train_new_model import SpectrumPairGenerator from tests.create_test_spectra import create_test_spectra @@ -63,7 +63,7 @@ def test_tensorize_spectra(): @pytest.fixture() def dummy_data_generator(): spectrums = create_test_spectra(4, 3) - selected_pairs = InchikeyPairGenerator([('CCCCCCCCCCCCCC', 'DDDDDDDDDDDDDD', 0.25), + selected_pairs = SpectrumPairGenerator([('CCCCCCCCCCCCCC', 'DDDDDDDDDDDDDD', 0.25), ('BBBBBBBBBBBBBB', 'DDDDDDDDDDDDDD', 0.6666667), ('AAAAAAAAAAAAAA', 'CCCCCCCCCCCCCC', 1.0), ('AAAAAAAAAAAAAA', 'BBBBBBBBBBBBBB', 0.33333334)], diff --git a/tests/test_inchikey_pair_selection.py b/tests/test_inchikey_pair_selection.py index d6afd22f..e0dcac7b 100644 --- a/tests/test_inchikey_pair_selection.py +++ b/tests/test_inchikey_pair_selection.py @@ -8,7 +8,7 @@ from ms2deepscore import SettingsMS2Deepscore from ms2deepscore.train_new_model.inchikey_pair_selection import ( compute_jaccard_similarity_per_bin, select_inchi_for_unique_inchikeys, select_compound_pairs_wrapper, compute_fingerprints_for_training) -from ms2deepscore.train_new_model import InchikeyPairGenerator +from ms2deepscore.train_new_model import SpectrumPairGenerator from tests.create_test_spectra import create_test_spectra @@ -63,7 +63,7 @@ def dummy_inchikey_pair_generator(): ("Inchikey0", "Inchikey2", 0.6), ("Inchikey2", "Inchikey1", 0.3), ("Inchikey2", "Inchikey2", 1.0)] - return InchikeyPairGenerator(spectrum_pairs, [ + return SpectrumPairGenerator(spectrum_pairs, [ Spectrum(mz=np.array([90.]), intensities=np.array([0.4]), metadata={"inchikey": "Inchikey0"}), Spectrum(mz=np.array([90.]), intensities=np.array([0.4]), metadata={"inchikey": "Inchikey1"}), Spectrum(mz=np.array([90.]), intensities=np.array([0.4]), metadata={"inchikey": "Inchikey2"}), @@ -190,7 +190,7 @@ def test_select_compound_pairs_wrapper_no_resampling(): batch_size=8, max_pair_resampling=max_pair_resampling) selected_inchikey_pairs = select_compound_pairs_wrapper(spectrums, settings) - inchikey_pair_generator = InchikeyPairGenerator(selected_inchikey_pairs, spectrums) + inchikey_pair_generator = SpectrumPairGenerator(selected_inchikey_pairs, spectrums) check_balanced_scores_selecting_inchikey_pairs(inchikey_pair_generator, bins) check_correct_oversampling(inchikey_pair_generator, max_pair_resampling) @@ -211,7 +211,7 @@ def test_select_compound_pairs_wrapper_with_resampling(): batch_size=8, max_pair_resampling=max_pair_resampling) selected_inchikey_pairs = select_compound_pairs_wrapper(spectrums, settings) - inchikey_pair_generator = InchikeyPairGenerator(selected_inchikey_pairs, spectrums) + inchikey_pair_generator = SpectrumPairGenerator(selected_inchikey_pairs, spectrums) check_balanced_scores_selecting_inchikey_pairs(inchikey_pair_generator, bins) check_correct_oversampling(inchikey_pair_generator, max_pair_resampling) @@ -234,13 +234,13 @@ def test_select_compound_pairs_wrapper_maximum_inchikey_count(): max_inchikey_sampling=max_inchikey_sampling ) selected_inchikey_pairs = select_compound_pairs_wrapper(spectrums, settings) - inchikey_pair_generator = InchikeyPairGenerator(selected_inchikey_pairs, spectrums) + inchikey_pair_generator = SpectrumPairGenerator(selected_inchikey_pairs, spectrums) highest_inchikey_count = max(inchikey_pair_generator.get_inchikey_counts().values()) assert highest_inchikey_count <= max_inchikey_sampling + 1 # +1 because there is a chance that the last added inchikey is a pair to itself... -def check_correct_oversampling(selected_inchikey_pairs: InchikeyPairGenerator, max_resampling: int): +def check_correct_oversampling(selected_inchikey_pairs: SpectrumPairGenerator, max_resampling: int): pair_counts = Counter(selected_inchikey_pairs.selected_inchikey_pairs) for count in pair_counts.values(): assert count <= max_resampling, "the resampling was done too frequently" @@ -265,7 +265,7 @@ def get_available_score_distribution(settings, spectra): return score_distribution_per_inchikey -def print_balanced_bins_per_inchikey(selected_inchikey_pairs: InchikeyPairGenerator, settings, spectra): +def print_balanced_bins_per_inchikey(selected_inchikey_pairs: SpectrumPairGenerator, settings, spectra): """Prints the available distribution and the balanced distribution Currently doesn't do any checks, because it is hard to check if the wanted behaviour is achieved, @@ -288,9 +288,9 @@ def print_balanced_bins_per_inchikey(selected_inchikey_pairs: InchikeyPairGenera # assert minimum_available_distribution*settings.max_pair_resampling == min(balanced_distribution) -def check_balanced_scores_selecting_inchikey_pairs(selected_inchikey_pairs: InchikeyPairGenerator, +def check_balanced_scores_selecting_inchikey_pairs(selected_inchikey_pairs: SpectrumPairGenerator, score_bins): - """Test if InchikeyPairGenerator has an equal inchikey distribution + """Test if SpectrumPairGenerator has an equal inchikey distribution """ scores = selected_inchikey_pairs.get_scores() # converting to float32 is required, since the scores are float32, otherwise equal numbers are seen as not equal diff --git a/tests/test_siamese_spectra_model.py b/tests/test_siamese_spectra_model.py index fc1f4a83..2578b880 100644 --- a/tests/test_siamese_spectra_model.py +++ b/tests/test_siamese_spectra_model.py @@ -6,7 +6,7 @@ from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore from ms2deepscore.tensorize_spectra import tensorize_spectra from ms2deepscore.train_new_model.TrainingBatchGenerator import TrainingBatchGenerator -from ms2deepscore.train_new_model import InchikeyPairGenerator +from ms2deepscore.train_new_model import SpectrumPairGenerator from ms2deepscore.train_new_model.inchikey_pair_selection import \ select_compound_pairs_wrapper from ms2deepscore.validation_loss_calculation.ValidationLossCalculator import \ @@ -132,7 +132,7 @@ def test_model_training(simple_training_spectra): num_turns=20, ) scp_train = select_compound_pairs_wrapper(simple_training_spectra, settings) - inchikey_pair_generator = InchikeyPairGenerator(scp_train, simple_training_spectra) + inchikey_pair_generator = SpectrumPairGenerator(scp_train, simple_training_spectra) # Create generators train_generator_simple = TrainingBatchGenerator(selected_compound_pairs=inchikey_pair_generator, settings=settings) From 417e890eba41afc486933847c1fa0bd27ec57562 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Fri, 22 Aug 2025 13:44:09 +0200 Subject: [PATCH 23/48] Make SpectrumPairGenerator a real generator --- .../train_new_model/SpectrumPairGenerator.py | 33 ++++++++------ .../train_new_model/TrainingBatchGenerator.py | 7 +-- tests/test_data_generators.py | 2 +- tests/test_inchikey_pair_selection.py | 43 +++++++++---------- 4 files changed, 46 insertions(+), 39 deletions(-) diff --git a/ms2deepscore/train_new_model/SpectrumPairGenerator.py b/ms2deepscore/train_new_model/SpectrumPairGenerator.py index 3539505a..4246233f 100644 --- a/ms2deepscore/train_new_model/SpectrumPairGenerator.py +++ b/ms2deepscore/train_new_model/SpectrumPairGenerator.py @@ -7,7 +7,8 @@ class SpectrumPairGenerator: - def __init__(self, selected_inchikey_pairs: List[Tuple[str, str, float]], spectra): + def __init__(self, selected_inchikey_pairs: List[Tuple[str, str, float]], spectra, + shuffle: bool = True, random_seed: int = 0): """ Parameters ---------- @@ -17,19 +18,27 @@ def __init__(self, selected_inchikey_pairs: List[Tuple[str, str, float]], spectr self.selected_inchikey_pairs = selected_inchikey_pairs self.spectra = spectra self.spectrum_inchikeys = np.array([s.get("inchikey")[:14] for s in self.spectra]) + self.shuffle = shuffle + self.random_nr_generator = np.random.default_rng(random_seed) + self._idx = 0 + if self.shuffle: + self.random_nr_generator.shuffle(self.selected_inchikey_pairs) - def generator(self, shuffle: bool, random_nr_generator): - """Infinite generator to loop through all inchikeys. - After looping through all inchikeys the order is shuffled. - """ - while True: - if shuffle: - random_nr_generator.shuffle(self.selected_inchikey_pairs) + def __iter__(self): + return self + + def __next__(self): + # reshuffle when we've gone through everything + if self._idx >= len(self.selected_inchikey_pairs): + self._idx = 0 + if self.shuffle: + self.random_nr_generator.shuffle(self.selected_inchikey_pairs) - for inchikey1, inchikey2, tanimoto_score in self.selected_inchikey_pairs: - spectrum1 = self._get_spectrum_with_inchikey(inchikey1, random_nr_generator) - spectrum2 = self._get_spectrum_with_inchikey(inchikey2, random_nr_generator) - yield spectrum1, spectrum2, tanimoto_score + inchikey1, inchikey2, tanimoto_score = self.selected_inchikey_pairs[self._idx] + spectrum1 = self._get_spectrum_with_inchikey(inchikey1, self.random_nr_generator) + spectrum2 = self._get_spectrum_with_inchikey(inchikey2, self.random_nr_generator) + self._idx += 1 + return spectrum1, spectrum2, tanimoto_score def __len__(self): return len(self.selected_inchikey_pairs) diff --git a/ms2deepscore/train_new_model/TrainingBatchGenerator.py b/ms2deepscore/train_new_model/TrainingBatchGenerator.py index 6ee8a347..93aee9b8 100644 --- a/ms2deepscore/train_new_model/TrainingBatchGenerator.py +++ b/ms2deepscore/train_new_model/TrainingBatchGenerator.py @@ -50,7 +50,7 @@ def __init__(self, if self.model_settings.random_seed is None: self.model_settings.random_seed = 0 self.rng = np.random.default_rng(self.model_settings.random_seed) - self.inchikey_pair_generator = selected_compound_pairs.generator(self.model_settings.shuffle, self.rng) + self.inchikey_pair_generator = selected_compound_pairs unique_inchikeys = np.unique(selected_compound_pairs.spectrum_inchikeys) if len(unique_inchikeys) < self.model_settings.batch_size: raise ValueError("The number of unique inchikeys must be larger than the batch size.") @@ -120,13 +120,14 @@ def _tensorize_all(self, spectrum_pairs): def create_data_generator(training_spectra, - settings, + settings: SettingsMS2Deepscore, json_save_file=None) -> TrainingBatchGenerator: # todo actually create, both between and across ionmodes. # pos_spectra, neg_spectra = split_by_ionmode(training_spectra) selected_compound_pairs_training = select_compound_pairs_wrapper(training_spectra, settings=settings) - inchikey_pair_generator = SpectrumPairGenerator(selected_compound_pairs_training, training_spectra) + inchikey_pair_generator = SpectrumPairGenerator(selected_compound_pairs_training, training_spectra, + settings.shuffle, settings.random_seed) if json_save_file is not None: inchikey_pair_generator.save_as_json(json_save_file) diff --git a/tests/test_data_generators.py b/tests/test_data_generators.py index beeeb013..192378c5 100644 --- a/tests/test_data_generators.py +++ b/tests/test_data_generators.py @@ -67,7 +67,7 @@ def dummy_data_generator(): ('BBBBBBBBBBBBBB', 'DDDDDDDDDDDDDD', 0.6666667), ('AAAAAAAAAAAAAA', 'CCCCCCCCCCCCCC', 1.0), ('AAAAAAAAAAAAAA', 'BBBBBBBBBBBBBB', 0.33333334)], - spectrums) + spectrums, True, 0) batch_size = 2 settings = SettingsMS2Deepscore(min_mz=10, max_mz=1000, diff --git a/tests/test_inchikey_pair_selection.py b/tests/test_inchikey_pair_selection.py index e0dcac7b..cb1bd944 100644 --- a/tests/test_inchikey_pair_selection.py +++ b/tests/test_inchikey_pair_selection.py @@ -57,19 +57,6 @@ def test_spectra(): return [spectrum_1, spectrum_2, spectrum_3, spectrum_4] -@pytest.fixture -def dummy_inchikey_pair_generator(): - spectrum_pairs = [("Inchikey0", "Inchikey1", 0.8), - ("Inchikey0", "Inchikey2", 0.6), - ("Inchikey2", "Inchikey1", 0.3), - ("Inchikey2", "Inchikey2", 1.0)] - return SpectrumPairGenerator(spectrum_pairs, [ - Spectrum(mz=np.array([90.]), intensities=np.array([0.4]), metadata={"inchikey": "Inchikey0"}), - Spectrum(mz=np.array([90.]), intensities=np.array([0.4]), metadata={"inchikey": "Inchikey1"}), - Spectrum(mz=np.array([90.]), intensities=np.array([0.4]), metadata={"inchikey": "Inchikey2"}), - ]) - - def test_compute_jaccard_similarity_per_bin(simple_fingerprints): max_pairs_per_bin = 5 nr_of_bins = 10 @@ -150,14 +137,18 @@ def test_select_inchi_for_unique_inchikeys_two_inchikeys(test_spectra): assert [s.get("inchi")[:15] for s in spectrums_selected] == ['InChI=1/C6H8O6/', 'InChI=1S/C8H10N'] -def test_SelectedInchikeyPairs_generator_with_shuffle(dummy_inchikey_pair_generator): - rng = np.random.default_rng(0) - gen = dummy_inchikey_pair_generator.generator(True, rng) - +def test_SelectedInchikeyPairs_generator_with_shuffle(): + dummy_inchikey_pair_generator = SpectrumPairGenerator( [ + ("Inchikey0", "Inchikey1", 0.8), ("Inchikey0", "Inchikey2", 0.6), + ("Inchikey2", "Inchikey1", 0.3), ("Inchikey2", "Inchikey2", 1.0)], [ + Spectrum(mz=np.array([90.]), intensities=np.array([0.4]), metadata={"inchikey": "Inchikey0"}), + Spectrum(mz=np.array([90.]), intensities=np.array([0.4]), metadata={"inchikey": "Inchikey1"}), + Spectrum(mz=np.array([90.]), intensities=np.array([0.4]), metadata={"inchikey": "Inchikey2"}),], + True, 0) found_pairs = [] # do one complete loop for i in range(len(dummy_inchikey_pair_generator)): - spectrum_1, spectrum_2, score = next(gen) + spectrum_1, spectrum_2, score = next(dummy_inchikey_pair_generator) found_pairs.append((spectrum_1.get("inchikey"), spectrum_2.get("inchikey"), score)) assert len(found_pairs) == len(dummy_inchikey_pair_generator.selected_inchikey_pairs) @@ -166,18 +157,24 @@ def test_SelectedInchikeyPairs_generator_with_shuffle(dummy_inchikey_pair_genera found_pairs = [] # do one complete loop for i in range(len(dummy_inchikey_pair_generator)): - spectrum_1, spectrum_2, score = next(gen) + spectrum_1, spectrum_2, score = next(dummy_inchikey_pair_generator) found_pairs.append((spectrum_1.get("inchikey"), spectrum_2.get("inchikey"), score)) assert len(found_pairs) == len(dummy_inchikey_pair_generator.selected_inchikey_pairs) assert sorted(found_pairs) == sorted(dummy_inchikey_pair_generator.selected_inchikey_pairs) -def test_SelectedInchikeyPairs_generator_without_shuffle(dummy_inchikey_pair_generator): - gen = dummy_inchikey_pair_generator.generator(False, np.random.default_rng(0)) +def test_SelectedInchikeyPairs_generator_without_shuffle(): + dummy_inchikey_pair_generator = SpectrumPairGenerator( [ + ("Inchikey0", "Inchikey1", 0.8), ("Inchikey0", "Inchikey2", 0.6), + ("Inchikey2", "Inchikey1", 0.3), ("Inchikey2", "Inchikey2", 1.0)], [ + Spectrum(mz=np.array([90.]), intensities=np.array([0.4]), metadata={"inchikey": "Inchikey0"}), + Spectrum(mz=np.array([90.]), intensities=np.array([0.4]), metadata={"inchikey": "Inchikey1"}), + Spectrum(mz=np.array([90.]), intensities=np.array([0.4]), metadata={"inchikey": "Inchikey2"}),], + True, 0) for _, expected_pair in enumerate(dummy_inchikey_pair_generator.selected_inchikey_pairs): - spectrum_1, spectrum_2, score = next(gen) + spectrum_1, spectrum_2, score = next(dummy_inchikey_pair_generator) assert expected_pair == (spectrum_1.get("inchikey"), spectrum_2.get("inchikey"), score) @@ -190,7 +187,7 @@ def test_select_compound_pairs_wrapper_no_resampling(): batch_size=8, max_pair_resampling=max_pair_resampling) selected_inchikey_pairs = select_compound_pairs_wrapper(spectrums, settings) - inchikey_pair_generator = SpectrumPairGenerator(selected_inchikey_pairs, spectrums) + inchikey_pair_generator = SpectrumPairGenerator(selected_inchikey_pairs, spectrums, True, 0) check_balanced_scores_selecting_inchikey_pairs(inchikey_pair_generator, bins) check_correct_oversampling(inchikey_pair_generator, max_pair_resampling) From 1eaae2383e13901a502f8995e9ff0a5856e24e1f Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Fri, 22 Aug 2025 13:45:26 +0200 Subject: [PATCH 24/48] Rename self.spectrum_pair_generator in TrainingBatchGenerator --- .../train_new_model/TrainingBatchGenerator.py | 13 ++++++------- tests/test_siamese_spectra_model.py | 3 +-- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/ms2deepscore/train_new_model/TrainingBatchGenerator.py b/ms2deepscore/train_new_model/TrainingBatchGenerator.py index 93aee9b8..fc8e518a 100644 --- a/ms2deepscore/train_new_model/TrainingBatchGenerator.py +++ b/ms2deepscore/train_new_model/TrainingBatchGenerator.py @@ -25,13 +25,13 @@ class TrainingBatchGenerator: """ def __init__(self, - selected_compound_pairs: SpectrumPairGenerator, + spectrum_pair_generator: SpectrumPairGenerator, settings: SettingsMS2Deepscore): """Generates data for training a siamese Pytorch model. Parameters ---------- - selected_compound_pairs + spectrum_pair_generator SelectedCompoundPairs object which contains selected compounds pairs and the respective similarity scores. settings @@ -50,8 +50,8 @@ def __init__(self, if self.model_settings.random_seed is None: self.model_settings.random_seed = 0 self.rng = np.random.default_rng(self.model_settings.random_seed) - self.inchikey_pair_generator = selected_compound_pairs - unique_inchikeys = np.unique(selected_compound_pairs.spectrum_inchikeys) + self.spectrum_pair_generator = spectrum_pair_generator + unique_inchikeys = np.unique(spectrum_pair_generator.spectrum_inchikeys) if len(unique_inchikeys) < self.model_settings.batch_size: raise ValueError("The number of unique inchikeys must be larger than the batch size.") self.fixed_set = {} @@ -77,7 +77,7 @@ def _spectrum_pair_generator(self): """Use the provided SelectedCompoundPairs object to pick pairs.""" for _ in range(self.model_settings.batch_size): try: - spectrum1, spectrum2, score = next(self.inchikey_pair_generator) + spectrum1, spectrum2, score = next(self.spectrum_pair_generator) yield spectrum1, spectrum2, score except StopIteration as exc: raise RuntimeError("The inchikey pair generator is not expected to end, " @@ -134,6 +134,5 @@ def create_data_generator(training_spectra, # todo possibly create a single TrainingBatchGenerator which takes in 3 generators and pos and neg spectra to iteratively select each one. # Create generators # todo also make sure that the TrainingBatchGenerator can work across ionmodes. - train_generator = TrainingBatchGenerator(selected_compound_pairs=inchikey_pair_generator, - settings=settings) + train_generator = TrainingBatchGenerator(spectrum_pair_generator=inchikey_pair_generator, settings=settings) return train_generator diff --git a/tests/test_siamese_spectra_model.py b/tests/test_siamese_spectra_model.py index 2578b880..2036b193 100644 --- a/tests/test_siamese_spectra_model.py +++ b/tests/test_siamese_spectra_model.py @@ -134,8 +134,7 @@ def test_model_training(simple_training_spectra): scp_train = select_compound_pairs_wrapper(simple_training_spectra, settings) inchikey_pair_generator = SpectrumPairGenerator(scp_train, simple_training_spectra) # Create generators - train_generator_simple = TrainingBatchGenerator(selected_compound_pairs=inchikey_pair_generator, - settings=settings) + train_generator_simple = TrainingBatchGenerator(spectrum_pair_generator=inchikey_pair_generator, settings=settings) settings.same_prob_bins = np.array([(-0.01, 1.0)]) validation_loss_calculator = ValidationLossCalculator( simple_training_spectra, From b514b1154232e980066b45af6a28bd5dfe05bce7 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Fri, 22 Aug 2025 13:57:34 +0200 Subject: [PATCH 25/48] Move create data generator to train_ms2deepscore.py --- .../train_new_model/TrainingBatchGenerator.py | 21 ----------------- .../train_new_model/train_ms2deepscore.py | 23 ++++++++++++++++++- .../training_wrapper_functions.py | 3 +-- tests/test_data_generators.py | 4 ++-- 4 files changed, 25 insertions(+), 26 deletions(-) diff --git a/ms2deepscore/train_new_model/TrainingBatchGenerator.py b/ms2deepscore/train_new_model/TrainingBatchGenerator.py index fc8e518a..8df11390 100644 --- a/ms2deepscore/train_new_model/TrainingBatchGenerator.py +++ b/ms2deepscore/train_new_model/TrainingBatchGenerator.py @@ -6,8 +6,6 @@ from ms2deepscore.tensorize_spectra import tensorize_spectra from ms2deepscore.train_new_model.SpectrumPairGenerator import SpectrumPairGenerator from ms2deepscore.train_new_model.data_augmentation import data_augmentation -from ms2deepscore.train_new_model.inchikey_pair_selection import ( - select_compound_pairs_wrapper) class TrainingBatchGenerator: @@ -117,22 +115,3 @@ def _tensorize_all(self, spectrum_pairs): binned_spectra_1, metadata_1 = tensorize_spectra(spectra_1, self.model_settings) binned_spectra_2, metadata_2 = tensorize_spectra(spectra_2, self.model_settings) return binned_spectra_1, binned_spectra_2, metadata_1, metadata_2, torch.tensor(targets, dtype=torch.float32) - - -def create_data_generator(training_spectra, - settings: SettingsMS2Deepscore, - json_save_file=None) -> TrainingBatchGenerator: - # todo actually create, both between and across ionmodes. - # pos_spectra, neg_spectra = split_by_ionmode(training_spectra) - - selected_compound_pairs_training = select_compound_pairs_wrapper(training_spectra, settings=settings) - inchikey_pair_generator = SpectrumPairGenerator(selected_compound_pairs_training, training_spectra, - settings.shuffle, settings.random_seed) - - if json_save_file is not None: - inchikey_pair_generator.save_as_json(json_save_file) - # todo possibly create a single TrainingBatchGenerator which takes in 3 generators and pos and neg spectra to iteratively select each one. - # Create generators - # todo also make sure that the TrainingBatchGenerator can work across ionmodes. - train_generator = TrainingBatchGenerator(spectrum_pair_generator=inchikey_pair_generator, settings=settings) - return train_generator diff --git a/ms2deepscore/train_new_model/train_ms2deepscore.py b/ms2deepscore/train_new_model/train_ms2deepscore.py index 2f03896f..fbf43473 100644 --- a/ms2deepscore/train_new_model/train_ms2deepscore.py +++ b/ms2deepscore/train_new_model/train_ms2deepscore.py @@ -7,10 +7,12 @@ import numpy as np from matplotlib import pyplot as plt + +from ms2deepscore import SettingsMS2Deepscore from ms2deepscore.models.SiameseSpectralModel import (SiameseSpectralModel, train) from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore -from ms2deepscore.train_new_model.TrainingBatchGenerator import create_data_generator +from ms2deepscore.train_new_model import TrainingBatchGenerator, select_compound_pairs_wrapper, SpectrumPairGenerator from ms2deepscore.validation_loss_calculation.ValidationLossCalculator import \ ValidationLossCalculator @@ -53,6 +55,25 @@ def train_ms2ds_model( return model, history +def create_data_generator(training_spectra, + settings: SettingsMS2Deepscore, + json_save_file=None) -> TrainingBatchGenerator: + # todo actually create, both between and across ionmodes. + # pos_spectra, neg_spectra = split_by_ionmode(training_spectra) + + selected_compound_pairs_training = select_compound_pairs_wrapper(training_spectra, settings=settings) + inchikey_pair_generator = SpectrumPairGenerator(selected_compound_pairs_training, training_spectra, + settings.shuffle, settings.random_seed) + + if json_save_file is not None: + inchikey_pair_generator.save_as_json(json_save_file) + # todo possibly create a single TrainingBatchGenerator which takes in 3 generators and pos and neg spectra to iteratively select each one. + # Create generators + # todo also make sure that the TrainingBatchGenerator can work across ionmodes. + train_generator = TrainingBatchGenerator(spectrum_pair_generator=inchikey_pair_generator, settings=settings) + return train_generator + + def plot_history(losses, val_losses, file_name: Optional[str] = None): plt.plot(losses) plt.plot(val_losses) diff --git a/ms2deepscore/wrapper_functions/training_wrapper_functions.py b/ms2deepscore/wrapper_functions/training_wrapper_functions.py index f6efa957..4f57b60a 100644 --- a/ms2deepscore/wrapper_functions/training_wrapper_functions.py +++ b/ms2deepscore/wrapper_functions/training_wrapper_functions.py @@ -14,9 +14,8 @@ train) from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore from ms2deepscore.validation_loss_calculation.ValidationLossCalculator import ValidationLossCalculator -from ms2deepscore.train_new_model.TrainingBatchGenerator import create_data_generator from ms2deepscore.train_new_model.train_ms2deepscore import \ - train_ms2ds_model, plot_history, save_history + train_ms2ds_model, plot_history, save_history, create_data_generator from ms2deepscore.train_new_model.validation_and_test_split import \ split_spectra_in_random_inchikey_sets from ms2deepscore.utils import load_spectra_as_list diff --git a/tests/test_data_generators.py b/tests/test_data_generators.py index 192378c5..faf540ee 100644 --- a/tests/test_data_generators.py +++ b/tests/test_data_generators.py @@ -5,8 +5,8 @@ from matchms import Spectrum from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore, SettingsEmbeddingEvaluator from ms2deepscore.tensorize_spectra import tensorize_spectra -from ms2deepscore.train_new_model.TrainingBatchGenerator import TrainingBatchGenerator, \ - create_data_generator +from ms2deepscore.train_new_model.TrainingBatchGenerator import TrainingBatchGenerator +from ms2deepscore.train_new_model.train_ms2deepscore import create_data_generator from ms2deepscore.train_new_model.DataGeneratorEmbeddingEvaluation import DataGeneratorEmbeddingEvaluation from ms2deepscore.train_new_model import SpectrumPairGenerator from tests.create_test_spectra import create_test_spectra From ad9425df83520849b6a206517f04510d80ab927b Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Fri, 22 Aug 2025 14:29:55 +0200 Subject: [PATCH 26/48] Directly return a SpectrumPairGenerator instead of list of pairs from the inchikey_pair_selection wrapper --- .../inchikey_pair_selection.py | 6 ++-- .../train_new_model/train_ms2deepscore.py | 32 ++++++++++++------- tests/test_inchikey_pair_selection.py | 9 ++---- tests/test_siamese_spectra_model.py | 3 +- 4 files changed, 28 insertions(+), 22 deletions(-) diff --git a/ms2deepscore/train_new_model/inchikey_pair_selection.py b/ms2deepscore/train_new_model/inchikey_pair_selection.py index 68d95fc8..cc153b86 100644 --- a/ms2deepscore/train_new_model/inchikey_pair_selection.py +++ b/ms2deepscore/train_new_model/inchikey_pair_selection.py @@ -8,12 +8,13 @@ from numba import jit, prange from tqdm import tqdm from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore +from ms2deepscore.train_new_model import SpectrumPairGenerator def select_compound_pairs_wrapper( spectra: List[Spectrum], settings: SettingsMS2Deepscore, -) -> List[Tuple[str, str, float]]: +) -> SpectrumPairGenerator: """Returns a SpectrumPairGenerator object containing equally balanced pairs over the different bins spectra: @@ -53,7 +54,8 @@ def select_compound_pairs_wrapper( pair_frequency_matrixes, available_pairs_per_bin_matrix, available_scores_per_bin_matrix, inchikeys14_unique) - return [pair for pairs in selected_pairs_per_bin for pair in pairs] + return SpectrumPairGenerator([pair for pairs in selected_pairs_per_bin for pair in pairs], + spectra, settings.shuffle, settings.random_seed) def compute_fingerprints_for_training( diff --git a/ms2deepscore/train_new_model/train_ms2deepscore.py b/ms2deepscore/train_new_model/train_ms2deepscore.py index fbf43473..8107a9e8 100644 --- a/ms2deepscore/train_new_model/train_ms2deepscore.py +++ b/ms2deepscore/train_new_model/train_ms2deepscore.py @@ -8,7 +8,6 @@ import numpy as np from matplotlib import pyplot as plt -from ms2deepscore import SettingsMS2Deepscore from ms2deepscore.models.SiameseSpectralModel import (SiameseSpectralModel, train) from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore @@ -54,23 +53,32 @@ def train_ms2ds_model( checkpoint_filename=output_model_file_name, lambda_l1=0, lambda_l2=0) return model, history +# def create_data_generator_across_ionmodes(training_spectra, +# settings: SettingsMS2Deepscore, +# json_save_file=None) -> TrainingBatchGenerator: +# # todo actually create, both between and across ionmodes. +# pos_spectra, neg_spectra = split_by_ionmode(training_spectra) +# +# pos_spectrum_pair_generator = select_compound_pairs_wrapper(pos_spectra, settings=settings) +# neg_spectrum_pair_generator = select_compound_pairs_wrapper(neg_spectra, settings=settings) +# pos_neg_spectrum_pair_generator = select_compound_pairs_wrapper_across_ionmode(pos_spectra, neg_spectra, settings) +# +# if json_save_file is not None: +# inchikey_pair_generator.save_as_json(json_save_file) +# # todo possibly create a single TrainingBatchGenerator which takes in 3 generators and pos and neg spectra to iteratively select each one. +# # Create generators +# # todo also make sure that the TrainingBatchGenerator can work across ionmodes. +# train_generator = TrainingBatchGenerator(spectrum_pair_generator=inchikey_pair_generator, settings=settings) +# return train_generator def create_data_generator(training_spectra, settings: SettingsMS2Deepscore, json_save_file=None) -> TrainingBatchGenerator: - # todo actually create, both between and across ionmodes. - # pos_spectra, neg_spectra = split_by_ionmode(training_spectra) - - selected_compound_pairs_training = select_compound_pairs_wrapper(training_spectra, settings=settings) - inchikey_pair_generator = SpectrumPairGenerator(selected_compound_pairs_training, training_spectra, - settings.shuffle, settings.random_seed) - + spectrum_pair_generator = select_compound_pairs_wrapper(training_spectra, settings=settings) if json_save_file is not None: - inchikey_pair_generator.save_as_json(json_save_file) - # todo possibly create a single TrainingBatchGenerator which takes in 3 generators and pos and neg spectra to iteratively select each one. + spectrum_pair_generator.save_as_json(json_save_file) # Create generators - # todo also make sure that the TrainingBatchGenerator can work across ionmodes. - train_generator = TrainingBatchGenerator(spectrum_pair_generator=inchikey_pair_generator, settings=settings) + train_generator = TrainingBatchGenerator(spectrum_pair_generator=spectrum_pair_generator, settings=settings) return train_generator diff --git a/tests/test_inchikey_pair_selection.py b/tests/test_inchikey_pair_selection.py index cb1bd944..e23c2343 100644 --- a/tests/test_inchikey_pair_selection.py +++ b/tests/test_inchikey_pair_selection.py @@ -186,8 +186,7 @@ def test_select_compound_pairs_wrapper_no_resampling(): average_inchikey_sampling_count=10, batch_size=8, max_pair_resampling=max_pair_resampling) - selected_inchikey_pairs = select_compound_pairs_wrapper(spectrums, settings) - inchikey_pair_generator = SpectrumPairGenerator(selected_inchikey_pairs, spectrums, True, 0) + inchikey_pair_generator = select_compound_pairs_wrapper(spectrums, settings) check_balanced_scores_selecting_inchikey_pairs(inchikey_pair_generator, bins) check_correct_oversampling(inchikey_pair_generator, max_pair_resampling) @@ -207,8 +206,7 @@ def test_select_compound_pairs_wrapper_with_resampling(): average_inchikey_sampling_count=10, batch_size=8, max_pair_resampling=max_pair_resampling) - selected_inchikey_pairs = select_compound_pairs_wrapper(spectrums, settings) - inchikey_pair_generator = SpectrumPairGenerator(selected_inchikey_pairs, spectrums) + inchikey_pair_generator = select_compound_pairs_wrapper(spectrums, settings) check_balanced_scores_selecting_inchikey_pairs(inchikey_pair_generator, bins) check_correct_oversampling(inchikey_pair_generator, max_pair_resampling) @@ -230,8 +228,7 @@ def test_select_compound_pairs_wrapper_maximum_inchikey_count(): max_pair_resampling=max_pair_resampling, max_inchikey_sampling=max_inchikey_sampling ) - selected_inchikey_pairs = select_compound_pairs_wrapper(spectrums, settings) - inchikey_pair_generator = SpectrumPairGenerator(selected_inchikey_pairs, spectrums) + inchikey_pair_generator = select_compound_pairs_wrapper(spectrums, settings) highest_inchikey_count = max(inchikey_pair_generator.get_inchikey_counts().values()) assert highest_inchikey_count <= max_inchikey_sampling + 1 # +1 because there is a chance that the last added inchikey is a pair to itself... diff --git a/tests/test_siamese_spectra_model.py b/tests/test_siamese_spectra_model.py index 2036b193..d69b56f8 100644 --- a/tests/test_siamese_spectra_model.py +++ b/tests/test_siamese_spectra_model.py @@ -131,8 +131,7 @@ def test_model_training(simple_training_spectra): batch_size=2, num_turns=20, ) - scp_train = select_compound_pairs_wrapper(simple_training_spectra, settings) - inchikey_pair_generator = SpectrumPairGenerator(scp_train, simple_training_spectra) + inchikey_pair_generator = select_compound_pairs_wrapper(simple_training_spectra, settings) # Create generators train_generator_simple = TrainingBatchGenerator(spectrum_pair_generator=inchikey_pair_generator, settings=settings) settings.same_prob_bins = np.array([(-0.01, 1.0)]) From b158ecfc6cc4e2308ae6cce6d9e61977bc7b06cd Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Fri, 22 Aug 2025 14:31:48 +0200 Subject: [PATCH 27/48] Remove option for saving the inchikey pairs when training model --- ms2deepscore/train_new_model/train_ms2deepscore.py | 1 - ms2deepscore/wrapper_functions/training_wrapper_functions.py | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/ms2deepscore/train_new_model/train_ms2deepscore.py b/ms2deepscore/train_new_model/train_ms2deepscore.py index 8107a9e8..20c48c9a 100644 --- a/ms2deepscore/train_new_model/train_ms2deepscore.py +++ b/ms2deepscore/train_new_model/train_ms2deepscore.py @@ -21,7 +21,6 @@ def train_ms2ds_model( validation_spectra, results_folder, settings: SettingsMS2Deepscore, - inchikey_pairs_file: str = None, ): """Full workflow to train a MS2DeepScore model. """ diff --git a/ms2deepscore/wrapper_functions/training_wrapper_functions.py b/ms2deepscore/wrapper_functions/training_wrapper_functions.py index 4f57b60a..01d88865 100644 --- a/ms2deepscore/wrapper_functions/training_wrapper_functions.py +++ b/ms2deepscore/wrapper_functions/training_wrapper_functions.py @@ -42,10 +42,7 @@ def train_ms2deepscore_wrapper(settings: SettingsMS2Deepscore, validation_spectra = load_spectra_in_ionmode(settings.validation_spectra_file_name, settings.ionisation_mode) # Train model - _, history = train_ms2ds_model( - training_spectra, validation_spectra, settings.model_directory_name, - settings, - ) + _, history = train_ms2ds_model(training_spectra, validation_spectra, settings.model_directory_name, settings) ms2ds_history_plot_file_name = os.path.join(settings.model_directory_name, settings.history_plot_file_name) plot_history(history["losses"], history["val_losses"], ms2ds_history_plot_file_name) From 606144191616d694e88c42da60d3550131162178 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Fri, 22 Aug 2025 14:39:35 +0200 Subject: [PATCH 28/48] Remove create_data_generator function --- .../train_new_model/train_ms2deepscore.py | 17 +++----------- .../training_wrapper_functions.py | 6 +++-- tests/test_data_generators.py | 22 +++++++++---------- 3 files changed, 17 insertions(+), 28 deletions(-) diff --git a/ms2deepscore/train_new_model/train_ms2deepscore.py b/ms2deepscore/train_new_model/train_ms2deepscore.py index 20c48c9a..6d85753c 100644 --- a/ms2deepscore/train_new_model/train_ms2deepscore.py +++ b/ms2deepscore/train_new_model/train_ms2deepscore.py @@ -29,11 +29,9 @@ def train_ms2ds_model( settings.save_to_file(os.path.join(results_folder, "settings.json")) # Create a training generator - if inchikey_pairs_file is None: - train_generator = create_data_generator(training_spectra, settings, None) - else: - train_generator = create_data_generator(training_spectra, settings, - os.path.join(results_folder, inchikey_pairs_file)) + spectrum_pair_generator = select_compound_pairs_wrapper(training_spectra, settings=settings) + train_generator = TrainingBatchGenerator(spectrum_pair_generator=spectrum_pair_generator, settings=settings) + # Create a validation loss calculator validation_loss_calculator = ValidationLossCalculator(validation_spectra, settings=settings) @@ -70,15 +68,6 @@ def train_ms2ds_model( # train_generator = TrainingBatchGenerator(spectrum_pair_generator=inchikey_pair_generator, settings=settings) # return train_generator -def create_data_generator(training_spectra, - settings: SettingsMS2Deepscore, - json_save_file=None) -> TrainingBatchGenerator: - spectrum_pair_generator = select_compound_pairs_wrapper(training_spectra, settings=settings) - if json_save_file is not None: - spectrum_pair_generator.save_as_json(json_save_file) - # Create generators - train_generator = TrainingBatchGenerator(spectrum_pair_generator=spectrum_pair_generator, settings=settings) - return train_generator def plot_history(losses, val_losses, file_name: Optional[str] = None): diff --git a/ms2deepscore/wrapper_functions/training_wrapper_functions.py b/ms2deepscore/wrapper_functions/training_wrapper_functions.py index 01d88865..06151226 100644 --- a/ms2deepscore/wrapper_functions/training_wrapper_functions.py +++ b/ms2deepscore/wrapper_functions/training_wrapper_functions.py @@ -13,9 +13,10 @@ from ms2deepscore.models.SiameseSpectralModel import (SiameseSpectralModel, train) from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore +from ms2deepscore.train_new_model import TrainingBatchGenerator, select_compound_pairs_wrapper from ms2deepscore.validation_loss_calculation.ValidationLossCalculator import ValidationLossCalculator from ms2deepscore.train_new_model.train_ms2deepscore import \ - train_ms2ds_model, plot_history, save_history, create_data_generator + train_ms2ds_model, plot_history, save_history from ms2deepscore.train_new_model.validation_and_test_split import \ split_spectra_in_random_inchikey_sets from ms2deepscore.utils import load_spectra_as_list @@ -127,7 +128,8 @@ def parameter_search( os.makedirs(settings.model_directory_name, exist_ok=True) settings.save_to_file(os.path.join(settings.model_directory_name, "settings.json")) # Create a training generator - train_generator = create_data_generator(training_spectra, settings) + spectrum_pair_generator = select_compound_pairs_wrapper(training_spectra, settings=settings) + train_generator = TrainingBatchGenerator(spectrum_pair_generator=spectrum_pair_generator, settings=settings) # Create a validation loss calculator validation_loss_calculator = ValidationLossCalculator(validation_spectra, settings=settings) diff --git a/tests/test_data_generators.py b/tests/test_data_generators.py index faf540ee..51f7e099 100644 --- a/tests/test_data_generators.py +++ b/tests/test_data_generators.py @@ -6,9 +6,8 @@ from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore, SettingsEmbeddingEvaluator from ms2deepscore.tensorize_spectra import tensorize_spectra from ms2deepscore.train_new_model.TrainingBatchGenerator import TrainingBatchGenerator -from ms2deepscore.train_new_model.train_ms2deepscore import create_data_generator from ms2deepscore.train_new_model.DataGeneratorEmbeddingEvaluation import DataGeneratorEmbeddingEvaluation -from ms2deepscore.train_new_model import SpectrumPairGenerator +from ms2deepscore.train_new_model import SpectrumPairGenerator, select_compound_pairs_wrapper from tests.create_test_spectra import create_test_spectra @@ -162,21 +161,20 @@ def test_create_data_generator(): """tests if a the function create_data_generator creates a datagenerator that samples all input spectra correct distributions of inchikeys and scores are tested in other tests""" test_spectra = create_test_spectra(8, 3) - data_generator = create_data_generator(training_spectra=test_spectra, - settings=SettingsMS2Deepscore( - min_mz=10, - max_mz=1000, - mz_bin_width=0.1, - intensity_scaling=0.5, - additional_metadata=[], - same_prob_bins=np.array([(-0.000001, 0.25), (0.25, 0.5), (0.5, 0.75), + settings = SettingsMS2Deepscore(min_mz=10, max_mz=1000, + mz_bin_width=0.1, + intensity_scaling=0.5, + additional_metadata=[], + same_prob_bins=np.array([(-0.000001, 0.25), (0.25, 0.5), (0.5, 0.75), (0.75, 1)]), - batch_size=2, + batch_size=2, num_turns=4, augment_removal_max=0.0, augment_removal_intensity=0.0, augment_intensity=0.0, - augment_noise_max=0)) + augment_noise_max=0) + spectrum_pair_generator = select_compound_pairs_wrapper(test_spectra, settings=settings) + data_generator = TrainingBatchGenerator(spectrum_pair_generator=spectrum_pair_generator, settings=settings) tensorized_spectra = [] epochs = 20 for _ in range(epochs): From 7cb6d3120c710e14709d24f1ca2535ed09de1817 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Fri, 22 Aug 2025 14:52:57 +0200 Subject: [PATCH 29/48] Remove cross ionmode function from train_ms2ds_model --- .../train_new_model/train_ms2deepscore.py | 21 +------------------ 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/ms2deepscore/train_new_model/train_ms2deepscore.py b/ms2deepscore/train_new_model/train_ms2deepscore.py index 6d85753c..e5ae982f 100644 --- a/ms2deepscore/train_new_model/train_ms2deepscore.py +++ b/ms2deepscore/train_new_model/train_ms2deepscore.py @@ -11,7 +11,7 @@ from ms2deepscore.models.SiameseSpectralModel import (SiameseSpectralModel, train) from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore -from ms2deepscore.train_new_model import TrainingBatchGenerator, select_compound_pairs_wrapper, SpectrumPairGenerator +from ms2deepscore.train_new_model import TrainingBatchGenerator, select_compound_pairs_wrapper from ms2deepscore.validation_loss_calculation.ValidationLossCalculator import \ ValidationLossCalculator @@ -50,25 +50,6 @@ def train_ms2ds_model( checkpoint_filename=output_model_file_name, lambda_l1=0, lambda_l2=0) return model, history -# def create_data_generator_across_ionmodes(training_spectra, -# settings: SettingsMS2Deepscore, -# json_save_file=None) -> TrainingBatchGenerator: -# # todo actually create, both between and across ionmodes. -# pos_spectra, neg_spectra = split_by_ionmode(training_spectra) -# -# pos_spectrum_pair_generator = select_compound_pairs_wrapper(pos_spectra, settings=settings) -# neg_spectrum_pair_generator = select_compound_pairs_wrapper(neg_spectra, settings=settings) -# pos_neg_spectrum_pair_generator = select_compound_pairs_wrapper_across_ionmode(pos_spectra, neg_spectra, settings) -# -# if json_save_file is not None: -# inchikey_pair_generator.save_as_json(json_save_file) -# # todo possibly create a single TrainingBatchGenerator which takes in 3 generators and pos and neg spectra to iteratively select each one. -# # Create generators -# # todo also make sure that the TrainingBatchGenerator can work across ionmodes. -# train_generator = TrainingBatchGenerator(spectrum_pair_generator=inchikey_pair_generator, settings=settings) -# return train_generator - - def plot_history(losses, val_losses, file_name: Optional[str] = None): plt.plot(losses) From 9df6593951d240893d1439b9bf4bfe379723873d Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Fri, 22 Aug 2025 15:02:28 +0200 Subject: [PATCH 30/48] Derive nr_of_unique inchikeys from the nr of pairs --- ms2deepscore/train_new_model/TrainingBatchGenerator.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ms2deepscore/train_new_model/TrainingBatchGenerator.py b/ms2deepscore/train_new_model/TrainingBatchGenerator.py index 8df11390..34412855 100644 --- a/ms2deepscore/train_new_model/TrainingBatchGenerator.py +++ b/ms2deepscore/train_new_model/TrainingBatchGenerator.py @@ -49,12 +49,14 @@ def __init__(self, self.model_settings.random_seed = 0 self.rng = np.random.default_rng(self.model_settings.random_seed) self.spectrum_pair_generator = spectrum_pair_generator - unique_inchikeys = np.unique(spectrum_pair_generator.spectrum_inchikeys) - if len(unique_inchikeys) < self.model_settings.batch_size: + # The number of unique inchikeys derived from the number of spectrum pairs. + nr_of_unique_inchikeys = int(len(spectrum_pair_generator) / settings.average_inchikey_sampling_count * 2) + # The length of unique inchikeys is len(selected_inchikeys_pairs) / average number of pairs + if nr_of_unique_inchikeys < self.model_settings.batch_size: raise ValueError("The number of unique inchikeys must be larger than the batch size.") self.fixed_set = {} - self.nr_of_batches = int(self.model_settings.num_turns) * int(np.ceil(len(unique_inchikeys) / + self.nr_of_batches = int(self.model_settings.num_turns) * int(np.ceil(nr_of_unique_inchikeys / self.model_settings.batch_size)) def __len__(self): From 2cfbe1ae74284ea8986a7264317b290506ca7b2a Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Fri, 22 Aug 2025 15:11:14 +0200 Subject: [PATCH 31/48] Fix test to calculate unique number of inchikeys correctly again --- tests/test_data_generators.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_data_generators.py b/tests/test_data_generators.py index 51f7e099..f3f002c1 100644 --- a/tests/test_data_generators.py +++ b/tests/test_data_generators.py @@ -79,7 +79,8 @@ def dummy_data_generator(): augment_removal_max=0.0, augment_removal_intensity=0.0, augment_intensity=0.0, - augment_noise_max=0) + augment_noise_max=0, + average_inchikey_sampling_count=2) return TrainingBatchGenerator(selected_pairs, settings) From aeea0af91f58f52184f852ff45c92083dfb0384b Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Fri, 22 Aug 2025 15:11:29 +0200 Subject: [PATCH 32/48] Add cross ionization mode generators --- .../inchikey_pair_selection_cross_ionmode.py | 139 +++++++++++++++++- 1 file changed, 135 insertions(+), 4 deletions(-) diff --git a/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py b/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py index 287c5a9c..6a2b8264 100644 --- a/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py +++ b/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py @@ -1,17 +1,21 @@ +import json from typing import List, Tuple +from collections import Counter import numpy as np from matchms import Spectrum from numba import jit, prange from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore +from ms2deepscore.train_new_model import TrainingBatchGenerator, SpectrumPairGenerator from ms2deepscore.train_new_model.inchikey_pair_selection import compute_fingerprints_for_training, \ - balanced_selection_of_pairs_per_bin, convert_to_selected_pairs_list, tanimoto_scores_row - + balanced_selection_of_pairs_per_bin, convert_to_selected_pairs_list, tanimoto_scores_row, \ + select_compound_pairs_wrapper +from ms2deepscore.utils import split_by_ionmode def select_compound_pairs_wrapper_across_ionmode( spectra_1: List[Spectrum], spectra_2: List[Spectrum], settings: SettingsMS2Deepscore, -) -> List[Tuple[str, str, float]]: +) -> "SpectrumPairGeneratorAcrossIonmodes": """Returns a SpectrumPairGenerator object containing equally balanced pairs over the different bins spectra: @@ -51,7 +55,8 @@ def select_compound_pairs_wrapper_across_ionmode( selected_pairs_per_bin = convert_to_selected_pairs_list( pair_frequency_matrixes, available_pairs_per_bin_matrix, available_scores_per_bin_matrix, inchikeys14_unique_1 + inchikeys14_unique_2) - return [pair for pairs in selected_pairs_per_bin for pair in pairs] + return SpectrumPairGeneratorAcrossIonmodes([pair for pairs in selected_pairs_per_bin for pair in pairs], + spectra_1, spectra_2, settings.shuffle, settings.random_seed) @jit(nopython=True, parallel=True) @@ -114,3 +119,129 @@ def compute_jaccard_similarity_per_bin_across_ionmodes( selected_scores_per_bin[bin_number, idx_fingerprint_corrected, :num_indices] = tanimoto_scores[indices] return selected_pairs_per_bin, selected_scores_per_bin + + +class SpectrumPairGeneratorAcrossIonmodes: + def __init__(self, selected_inchikey_pairs: List[Tuple[str, str, float]], + spectra_pos: List[Spectrum], spectra_neg: List[Spectrum], + shuffle: bool = True, random_seed: int = 0): + """ + Parameters + ---------- + selected_inchikey_pairs: + A list with tuples encoding inchikey pairs like: (inchikey1, inchikey2, tanimoto_score) + """ + self.selected_inchikey_pairs = selected_inchikey_pairs + self.spectra_pos = spectra_pos + self.spectra_neg = spectra_neg + + self.pos_inchikeys = np.array([s.get("inchikey")[:14] for s in self.spectra_pos]) + self.neg_inchikeys= np.array([s.get("inchikey")[:14] for s in self.spectra_neg]) + + self.shuffle = shuffle + self.random_nr_generator = np.random.default_rng(random_seed) + self._idx = 0 + if self.shuffle: + self.random_nr_generator.shuffle(self.selected_inchikey_pairs) + + def __iter__(self): + return self + + def __next__(self): + # reshuffle when we've gone through everything + if self._idx >= len(self.selected_inchikey_pairs): + self._idx = 0 + if self.shuffle: + self.random_nr_generator.shuffle(self.selected_inchikey_pairs) + + inchikey1, inchikey2, tanimoto_score = self.selected_inchikey_pairs[self._idx] + spectrum1 = self._get_pos_spectrum_with_inchikey(inchikey1, self.random_nr_generator) + spectrum2 = self._get_neg_spectrum_with_inchikey(inchikey2, self.random_nr_generator) + self._idx += 1 + return spectrum1, spectrum2, tanimoto_score + + def __len__(self): + return len(self.selected_inchikey_pairs) + + def __str__(self): + return f"SpectrumPairGenerator with {len(self.selected_inchikey_pairs)} pairs available" + + def get_scores(self): + return [score for _, _, score in self.selected_inchikey_pairs] + + def get_inchikey_counts(self) -> Counter: + """returns the frequency each inchikey occurs""" + inchikeys = Counter() + for inchikey_1, inchikey_2, _ in self.selected_inchikey_pairs: + inchikeys[inchikey_1] += 1 + inchikeys[inchikey_2] += 1 + return inchikeys + + def get_scores_per_inchikey(self): + inchikey_scores = {} + for inchikey_1, inchikey_2, score in self.selected_inchikey_pairs: + if inchikey_1 in inchikey_scores: + inchikey_scores[inchikey_1].append(score) + else: + inchikey_scores[inchikey_1] = [] + if inchikey_2 in inchikey_scores: + inchikey_scores[inchikey_2].append(score) + else: + inchikey_scores[inchikey_2] = [] + return inchikey_scores + + def save_as_json(self, file_name): + data_for_json = [(item[0], item[1], float(item[2])) for item in self.selected_inchikey_pairs] + + with open(file_name, "w", encoding="utf-8") as f: + json.dump(data_for_json, f) + + def _get_pos_spectrum_with_inchikey(self, inchikey: str, random_number_generator) -> Spectrum: + matching_spectrum_id = np.where(self.pos_inchikeys == inchikey)[0] + if len(matching_spectrum_id) <= 0: + raise ValueError("No matching inchikey found (note: expected first 14 characters), " + "likely switched pos and neg in entry") + return self.spectra_pos[random_number_generator.choice(matching_spectrum_id)] + + def _get_neg_spectrum_with_inchikey(self, inchikey: str, random_number_generator) -> Spectrum: + matching_spectrum_id = np.where(self.neg_inchikeys == inchikey)[0] + if len(matching_spectrum_id) <= 0: + raise ValueError("No matching inchikey found (note: expected first 14 characters), " + "likely switched pos and neg in entry") + return self.spectra_neg[random_number_generator.choice(matching_spectrum_id)] + + +def create_data_generator_across_ionmodes(training_spectra, + settings: SettingsMS2Deepscore) -> TrainingBatchGenerator: + pos_spectra, neg_spectra = split_by_ionmode(training_spectra) + + pos_spectrum_pair_generator = select_compound_pairs_wrapper(pos_spectra, settings=settings) + neg_spectrum_pair_generator = select_compound_pairs_wrapper(neg_spectra, settings=settings) + pos_neg_spectrum_pair_generator = select_compound_pairs_wrapper_across_ionmode(pos_spectra, neg_spectra, settings) + + spectrum_pair_generator = CombinedSpectrumGenerator([pos_spectrum_pair_generator, neg_spectrum_pair_generator, pos_neg_spectrum_pair_generator]) + + train_generator = TrainingBatchGenerator(spectrum_pair_generator=spectrum_pair_generator, settings=settings) + return train_generator + + +class CombinedSpectrumGenerator: + """Combines multiple SpectrumPairGenerators into a single generator + + This is used to combine different iterators for each ionmode pair""" + def __init__(self, spectrum_pair_generators: List[SpectrumPairGenerator]): + self.generators = spectrum_pair_generators + self._idx = 0 + + def __iter__(self): + return self + + def __next__(self): + if not self.generators: + raise StopIteration + current_generator = self.generators[self._idx % len(self.generators)] + self._idx += 1 + return next(current_generator) + + def __len__(self): + return sum([len(generator) for generator in self.generators]) \ No newline at end of file From 555f0d61be06489bb165068624e6b6413ef27ea5 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Fri, 22 Aug 2025 18:26:55 +0200 Subject: [PATCH 33/48] Fix the order of pairs in convert_to_selected_pairs_list, so pos is always first and neg always second --- .../train_new_model/inchikey_pair_selection.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/ms2deepscore/train_new_model/inchikey_pair_selection.py b/ms2deepscore/train_new_model/inchikey_pair_selection.py index cc153b86..9a07786f 100644 --- a/ms2deepscore/train_new_model/inchikey_pair_selection.py +++ b/ms2deepscore/train_new_model/inchikey_pair_selection.py @@ -242,16 +242,22 @@ def convert_to_selected_pairs_list(pair_frequency_matrixes: np.ndarray, for bin_id, bin_pair_frequency_matrix in enumerate(tqdm(pair_frequency_matrixes)): selected_pairs = [] for inchikey1_index, pair_frequency_row in enumerate(bin_pair_frequency_matrix): - for inchikey2_index, pair_frequency in enumerate(pair_frequency_row): + for column_index, pair_frequency in enumerate(pair_frequency_row): if pair_frequency > 0: - inchikey2 = available_pairs_per_bin_matrix[bin_id][inchikey1_index][inchikey2_index] + inchikey2_index = available_pairs_per_bin_matrix[bin_id][inchikey1_index][column_index] score = scores_matrix[bin_id][inchikey1_index][inchikey2_index] - selected_pairs.extend( - [(inchikeys14_unique[inchikey1_index], inchikeys14_unique[inchikey2], score)] * pair_frequency) + # This ensures that the order is the same. + # This is important for the cross ionization mode selection. + if inchikey1_index < inchikey2_index: + selected_pairs.extend( + [(inchikeys14_unique[inchikey1_index], inchikeys14_unique[inchikey2_index], score)] * pair_frequency) + else: + selected_pairs.extend( + [(inchikeys14_unique[inchikey2_index], inchikeys14_unique[inchikey1_index], score)] * pair_frequency) # remove duplicate pairs position_of_first_inchikey_in_matrix = available_pairs_per_bin_matrix[bin_id][ - inchikey2] == inchikey1_index - bin_pair_frequency_matrix[inchikey2][position_of_first_inchikey_in_matrix] = 0 + inchikey2_index] == inchikey1_index + bin_pair_frequency_matrix[inchikey2_index][position_of_first_inchikey_in_matrix] = 0 selected_pairs_per_bin.append(selected_pairs) return selected_pairs_per_bin From df5015539322ce8d405b4c45a593351837d7adc8 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Fri, 22 Aug 2025 18:27:18 +0200 Subject: [PATCH 34/48] Change test training wrapper function to both ionization modes --- tests/test_training_wrapper_function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_training_wrapper_function.py b/tests/test_training_wrapper_function.py index 9fc94d31..d1b870f2 100644 --- a/tests/test_training_wrapper_function.py +++ b/tests/test_training_wrapper_function.py @@ -21,7 +21,7 @@ def test_train_wrapper_ms2ds_model(tmp_path): settings = SettingsMS2Deepscore(**{ "spectrum_file_path": spectra_file_name, "epochs": 2, # to speed up tests --> usually many more - "ionisation_mode": "negative", + "ionisation_mode": "both", "base_dims": (200, 200), # to speed up tests --> usually larger "embedding_dim": 100, # to speed up tests --> usually larger "same_prob_bins": np.array([(-0.01, 0.2), (0.2, 1.0)]), From 7fc9c24f9aa5cfdf145d87dc8a6c44ef71cff2d5 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Fri, 22 Aug 2025 18:27:44 +0200 Subject: [PATCH 35/48] Make train ms2deepscore handle both and single ion mode model training --- ms2deepscore/train_new_model/train_ms2deepscore.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ms2deepscore/train_new_model/train_ms2deepscore.py b/ms2deepscore/train_new_model/train_ms2deepscore.py index e5ae982f..a9cdaa3f 100644 --- a/ms2deepscore/train_new_model/train_ms2deepscore.py +++ b/ms2deepscore/train_new_model/train_ms2deepscore.py @@ -12,6 +12,7 @@ train) from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore from ms2deepscore.train_new_model import TrainingBatchGenerator, select_compound_pairs_wrapper +from ms2deepscore.train_new_model.inchikey_pair_selection_cross_ionmode import create_data_generator_across_ionmodes from ms2deepscore.validation_loss_calculation.ValidationLossCalculator import \ ValidationLossCalculator @@ -27,11 +28,11 @@ def train_ms2ds_model( # Make folder and save settings os.makedirs(results_folder, exist_ok=True) settings.save_to_file(os.path.join(results_folder, "settings.json")) - - # Create a training generator - spectrum_pair_generator = select_compound_pairs_wrapper(training_spectra, settings=settings) - train_generator = TrainingBatchGenerator(spectrum_pair_generator=spectrum_pair_generator, settings=settings) - + if settings.ionisation_mode == "both": + train_generator = create_data_generator_across_ionmodes(training_spectra, settings=settings) + else: + spectrum_pair_generator = select_compound_pairs_wrapper(training_spectra, settings=settings) + train_generator = TrainingBatchGenerator(spectrum_pair_generator=spectrum_pair_generator, settings=settings) # Create a validation loss calculator validation_loss_calculator = ValidationLossCalculator(validation_spectra, settings=settings) From 0cc1cb8de89eaf2b1e1d998b5ea53bf0c1823234 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Mon, 25 Aug 2025 10:22:05 +0200 Subject: [PATCH 36/48] Fix bug in variable naming --- ms2deepscore/train_new_model/inchikey_pair_selection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ms2deepscore/train_new_model/inchikey_pair_selection.py b/ms2deepscore/train_new_model/inchikey_pair_selection.py index 9a07786f..5de3241d 100644 --- a/ms2deepscore/train_new_model/inchikey_pair_selection.py +++ b/ms2deepscore/train_new_model/inchikey_pair_selection.py @@ -245,7 +245,7 @@ def convert_to_selected_pairs_list(pair_frequency_matrixes: np.ndarray, for column_index, pair_frequency in enumerate(pair_frequency_row): if pair_frequency > 0: inchikey2_index = available_pairs_per_bin_matrix[bin_id][inchikey1_index][column_index] - score = scores_matrix[bin_id][inchikey1_index][inchikey2_index] + score = scores_matrix[bin_id][inchikey1_index][column_index] # This ensures that the order is the same. # This is important for the cross ionization mode selection. if inchikey1_index < inchikey2_index: From 301bdba3824fad51ea35f0d361f02232aa7381b8 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Wed, 27 Aug 2025 10:14:38 +0200 Subject: [PATCH 37/48] added basic tests for spectrum pair generation across ionmodes. --- tests/test_data_generators.py | 54 +++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests/test_data_generators.py b/tests/test_data_generators.py index f3f002c1..a05038b2 100644 --- a/tests/test_data_generators.py +++ b/tests/test_data_generators.py @@ -8,6 +8,8 @@ from ms2deepscore.train_new_model.TrainingBatchGenerator import TrainingBatchGenerator from ms2deepscore.train_new_model.DataGeneratorEmbeddingEvaluation import DataGeneratorEmbeddingEvaluation from ms2deepscore.train_new_model import SpectrumPairGenerator, select_compound_pairs_wrapper +from ms2deepscore.train_new_model.inchikey_pair_selection_cross_ionmode import create_data_generator_across_ionmodes, \ + select_compound_pairs_wrapper_across_ionmode from tests.create_test_spectra import create_test_spectra @@ -235,3 +237,55 @@ def test_epoch_end_functionality(data_generator_embedding_evaluation): assert counter == 10 assert not np.array_equal(data_generator_embedding_evaluation.indexes, initial_indexes), "Indexes not shuffled after epoch end" + +def test_create_data_generator_across_ionmodes(): + """Just a test that is runs, not a test if it is actually well balanced""" + test_spectra = create_test_spectra(20, 2) + pos_spectra = [] + for spectrum in test_spectra[:20]: + spectrum.set("ionmode", "positive") + pos_spectra.append(spectrum) + neg_spectra = [] + for spectrum in test_spectra[20:]: + spectrum.set("ionmode", "negative") + neg_spectra.append(spectrum) + + settings = SettingsMS2Deepscore(min_mz=10, max_mz=1000, + mz_bin_width=0.1, + intensity_scaling=0.5, + additional_metadata=[], + same_prob_bins=np.array([(-0.000001, 0.25), (0.25, 0.5), (0.5, 0.75), + (0.75, 1)]), + batch_size=2, + num_turns=4,) + data_generator = create_data_generator_across_ionmodes(pos_spectra + neg_spectra, settings) + for _ in range(len(data_generator)): + spectra_1, spectra_2, meta_1, meta_2, targets = data_generator.__next__() + +def test_select_compound_pairs_wrapper_across_ionmode(): + test_spectra = create_test_spectra(20, 2) + pos_spectra = [] + for spectrum in test_spectra[:20]: + spectrum.set("ionmode", "positive") + pos_spectra.append(spectrum) + neg_spectra = [] + for spectrum in test_spectra[20:]: + spectrum.set("ionmode", "negative") + neg_spectra.append(spectrum) + settings = SettingsMS2Deepscore(min_mz=10, max_mz=1000, + mz_bin_width=0.1, + intensity_scaling=0.5, + additional_metadata=[], + same_prob_bins=np.array([(-0.000001, 0.25), (0.25, 0.5), (0.5, 0.75), + (0.75, 1)]), + batch_size=2, + num_turns=4, ) + spectrum_pair_generator = select_compound_pairs_wrapper_across_ionmode(pos_spectra, neg_spectra, settings) + + for _ in range(len(spectrum_pair_generator)): + spectrum_1, spectrum_2, score = spectrum_pair_generator.__next__() + assert spectrum_1.get("ionmode") == "positive" + assert spectrum_2.get("ionmode") == "negative" + # it should be an infinite generator, so it should continue after a loop + spectrum_pair_generator.__next__() + From 4cd43efb8d2de865f4d9b9b14ff4fbc303c82243 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Wed, 27 Aug 2025 10:18:24 +0200 Subject: [PATCH 38/48] Add SpectrumPairGenerator to init --- ms2deepscore/train_new_model/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ms2deepscore/train_new_model/__init__.py b/ms2deepscore/train_new_model/__init__.py index f31333e0..0dcaca5f 100644 --- a/ms2deepscore/train_new_model/__init__.py +++ b/ms2deepscore/train_new_model/__init__.py @@ -5,5 +5,6 @@ __all__ = [ "TrainingBatchGenerator", - "select_compound_pairs_wrapper" + "select_compound_pairs_wrapper", + "SpectrumPairGenerator" ] From b17f82b9bf0bdd38056a68144dc307525d8e9725 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Wed, 27 Aug 2025 10:25:49 +0200 Subject: [PATCH 39/48] Remove unused import --- tests/test_siamese_spectra_model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_siamese_spectra_model.py b/tests/test_siamese_spectra_model.py index d69b56f8..28811375 100644 --- a/tests/test_siamese_spectra_model.py +++ b/tests/test_siamese_spectra_model.py @@ -6,7 +6,6 @@ from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore from ms2deepscore.tensorize_spectra import tensorize_spectra from ms2deepscore.train_new_model.TrainingBatchGenerator import TrainingBatchGenerator -from ms2deepscore.train_new_model import SpectrumPairGenerator from ms2deepscore.train_new_model.inchikey_pair_selection import \ select_compound_pairs_wrapper from ms2deepscore.validation_loss_calculation.ValidationLossCalculator import \ From ded5a1f6a6fcdc4ad8ed1c8584d1fff12cc26222 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Wed, 27 Aug 2025 10:28:00 +0200 Subject: [PATCH 40/48] Remove duplicated test --- tests/test_inchikey_pair_selection.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tests/test_inchikey_pair_selection.py b/tests/test_inchikey_pair_selection.py index e23c2343..a6fecd68 100644 --- a/tests/test_inchikey_pair_selection.py +++ b/tests/test_inchikey_pair_selection.py @@ -297,18 +297,3 @@ def check_balanced_scores_selecting_inchikey_pairs(selected_inchikey_pairs: Spec score_bin_counts[(min_bound, max_bound)] += 1 # Check that the number of pairs per bin is equal for all bins assert len(set(score_bin_counts.values())) == 1 - -from ms2deepscore.train_new_model.inchikey_pair_selection_cross_ionmode import select_compound_pairs_wrapper_across_ionmode -def test_select_compound_pairs_wrapper_with_resampling_across_ionmodes(): - spectrums_1 = create_test_spectra(num_of_unique_inchikeys=26, num_of_spectra_per_inchikey=1) - spectrums_2 = create_test_spectra(num_of_unique_inchikeys=25, num_of_spectra_per_inchikey=2) - for spectrum in spectrums_1: - spectrum.set("inchikey", "a" + spectrum.get("inchikey")) - bins = [(0.8, 0.9), (0.7, 0.8), (0.9, 1.0), (0.6, 0.7), (0.5, 0.6), - (0.4, 0.5), (0.3, 0.4), (0.2, 0.3), (0.1, 0.2), (-0.01, 0.1)] - max_pair_resampling = 10 - settings = SettingsMS2Deepscore(same_prob_bins=np.array(bins, dtype="float32"), - average_inchikey_sampling_count=10, - batch_size=8, - max_pair_resampling=max_pair_resampling) - selected_inchikey_pairs = select_compound_pairs_wrapper_across_ionmode(spectrums_1, spectrums_2, settings) From 608f292bc89f95a5cc16af8713935caf1c8eae41 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Wed, 27 Aug 2025 10:28:08 +0200 Subject: [PATCH 41/48] Linting --- tests/test_data_generators.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_data_generators.py b/tests/test_data_generators.py index a05038b2..fd2978f6 100644 --- a/tests/test_data_generators.py +++ b/tests/test_data_generators.py @@ -288,4 +288,3 @@ def test_select_compound_pairs_wrapper_across_ionmode(): assert spectrum_2.get("ionmode") == "negative" # it should be an infinite generator, so it should continue after a loop spectrum_pair_generator.__next__() - From 38a85a3fb0a16f106fb566a2188cd1679ad81e66 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Wed, 27 Aug 2025 10:44:21 +0200 Subject: [PATCH 42/48] Move create_data_generator_across_ionmodes to top of file --- .../inchikey_pair_selection_cross_ionmode.py | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py b/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py index 6a2b8264..96c17e55 100644 --- a/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py +++ b/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py @@ -5,12 +5,27 @@ from matchms import Spectrum from numba import jit, prange from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore -from ms2deepscore.train_new_model import TrainingBatchGenerator, SpectrumPairGenerator +from ms2deepscore.train_new_model.TrainingBatchGenerator import TrainingBatchGenerator +from ms2deepscore.train_new_model.SpectrumPairGenerator import SpectrumPairGenerator from ms2deepscore.train_new_model.inchikey_pair_selection import compute_fingerprints_for_training, \ balanced_selection_of_pairs_per_bin, convert_to_selected_pairs_list, tanimoto_scores_row, \ select_compound_pairs_wrapper from ms2deepscore.utils import split_by_ionmode +def create_data_generator_across_ionmodes(training_spectra, + settings: SettingsMS2Deepscore) -> TrainingBatchGenerator: + pos_spectra, neg_spectra = split_by_ionmode(training_spectra) + + pos_spectrum_pair_generator = select_compound_pairs_wrapper(pos_spectra, settings=settings) + neg_spectrum_pair_generator = select_compound_pairs_wrapper(neg_spectra, settings=settings) + pos_neg_spectrum_pair_generator = select_compound_pairs_wrapper_across_ionmode(pos_spectra, neg_spectra, settings) + + spectrum_pair_generator = CombinedSpectrumGenerator([pos_spectrum_pair_generator, neg_spectrum_pair_generator, pos_neg_spectrum_pair_generator]) + + train_generator = TrainingBatchGenerator(spectrum_pair_generator=spectrum_pair_generator, settings=settings) + return train_generator + + def select_compound_pairs_wrapper_across_ionmode( spectra_1: List[Spectrum], spectra_2: List[Spectrum], @@ -211,20 +226,6 @@ def _get_neg_spectrum_with_inchikey(self, inchikey: str, random_number_generator return self.spectra_neg[random_number_generator.choice(matching_spectrum_id)] -def create_data_generator_across_ionmodes(training_spectra, - settings: SettingsMS2Deepscore) -> TrainingBatchGenerator: - pos_spectra, neg_spectra = split_by_ionmode(training_spectra) - - pos_spectrum_pair_generator = select_compound_pairs_wrapper(pos_spectra, settings=settings) - neg_spectrum_pair_generator = select_compound_pairs_wrapper(neg_spectra, settings=settings) - pos_neg_spectrum_pair_generator = select_compound_pairs_wrapper_across_ionmode(pos_spectra, neg_spectra, settings) - - spectrum_pair_generator = CombinedSpectrumGenerator([pos_spectrum_pair_generator, neg_spectrum_pair_generator, pos_neg_spectrum_pair_generator]) - - train_generator = TrainingBatchGenerator(spectrum_pair_generator=spectrum_pair_generator, settings=settings) - return train_generator - - class CombinedSpectrumGenerator: """Combines multiple SpectrumPairGenerators into a single generator From 92d0470fbac167d1c1f2d6358240f7f042dc462c Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Wed, 27 Aug 2025 10:55:25 +0200 Subject: [PATCH 43/48] Update CHANGELOG.md --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15aa4f9a..5fe22184 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,11 +6,20 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added +- The training pair sampling for both ionmodes is now balanced over the different ionmode pairs. + ### Fixed - Datasplit of test, train and val, is not done sepparately for ionmodes anymore. ### Changed - Settings include file name of spectra now. This makes tracking of runs more easily and more flexibility for results folder. +- Split the different datagenerators to different files, before they were all in data_generators.py +- Renamed SpectrumPairGenerator -> TrainingBatchGenerator, this better captures what the class does. +- Moved the data augmentation to a separate file out of the TrainingBatchGenerator. +- Refactored the data augmentation to make it a bit more modular and testable (also added extra tests) +- Moved the Spectrum picking from TraininBatchGenerator into InchikeyPairGenerator and renamed InchikeyPairGenerator to SpectrumPairGenerator. +- Turned the new SpectrumPairGenerator (InchikeyPairGenerator before) into a real generator, before we had a generator method returning a generator. ## [2.5.2] - 2025-05-26 ### Changed From c281a7d6c51be78ab0731ae686ea6fc26fecd006 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Wed, 27 Aug 2025 11:52:28 +0200 Subject: [PATCH 44/48] Update link to zenodo for model to always point to the latest version --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7ead7320..f542e02a 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ If you are not familiar with `matchms` yet, then we also recommand our [tutorial ## 1) Compute spectral similarities We provide a model which was trained on > 500,000 MS/MS combined spectra from [GNPS](https://gnps.ucsd.edu/), [Mona](https://mona.fiehnlab.ucdavis.edu/), MassBank and MSnLib. -This model can be downloaded from [from zenodo here](https://zenodo.org/records/13897744). Only the ms2deepscore_model.pt is needed. +This model can be downloaded from [from zenodo here](https://zenodo.org/records/10814306). The model works for spectra in both positive and negative ionization modes and even predictions across ionization modes can be made by this model. To compute the similarities between spectra of your choice you can run the code below. From e99a31ebffbbf08f4cd2df0e5eb3c1783921c6ab Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Tue, 27 Jan 2026 10:37:25 +0100 Subject: [PATCH 45/48] Change select_compound_pairs_wrapper to create_spectrum_pair_generator to better reflect functionality. --- ms2deepscore/train_new_model/__init__.py | 4 ++-- ms2deepscore/train_new_model/inchikey_pair_selection.py | 2 +- .../inchikey_pair_selection_cross_ionmode.py | 6 +++--- ms2deepscore/train_new_model/train_ms2deepscore.py | 4 ++-- .../wrapper_functions/training_wrapper_functions.py | 4 ++-- tests/test_data_generators.py | 4 ++-- tests/test_inchikey_pair_selection.py | 8 ++++---- tests/test_siamese_spectra_model.py | 4 ++-- 8 files changed, 18 insertions(+), 18 deletions(-) diff --git a/ms2deepscore/train_new_model/__init__.py b/ms2deepscore/train_new_model/__init__.py index 0dcaca5f..c9ce05da 100644 --- a/ms2deepscore/train_new_model/__init__.py +++ b/ms2deepscore/train_new_model/__init__.py @@ -1,10 +1,10 @@ from .TrainingBatchGenerator import TrainingBatchGenerator from .SpectrumPairGenerator import SpectrumPairGenerator -from .inchikey_pair_selection import (select_compound_pairs_wrapper) +from .inchikey_pair_selection import (create_spectrum_pair_generator) __all__ = [ "TrainingBatchGenerator", - "select_compound_pairs_wrapper", + "create_spectrum_pair_generator", "SpectrumPairGenerator" ] diff --git a/ms2deepscore/train_new_model/inchikey_pair_selection.py b/ms2deepscore/train_new_model/inchikey_pair_selection.py index 5de3241d..0e2ca8ac 100644 --- a/ms2deepscore/train_new_model/inchikey_pair_selection.py +++ b/ms2deepscore/train_new_model/inchikey_pair_selection.py @@ -11,7 +11,7 @@ from ms2deepscore.train_new_model import SpectrumPairGenerator -def select_compound_pairs_wrapper( +def create_spectrum_pair_generator( spectra: List[Spectrum], settings: SettingsMS2Deepscore, ) -> SpectrumPairGenerator: diff --git a/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py b/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py index 96c17e55..d12ada34 100644 --- a/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py +++ b/ms2deepscore/train_new_model/inchikey_pair_selection_cross_ionmode.py @@ -9,15 +9,15 @@ from ms2deepscore.train_new_model.SpectrumPairGenerator import SpectrumPairGenerator from ms2deepscore.train_new_model.inchikey_pair_selection import compute_fingerprints_for_training, \ balanced_selection_of_pairs_per_bin, convert_to_selected_pairs_list, tanimoto_scores_row, \ - select_compound_pairs_wrapper + create_spectrum_pair_generator from ms2deepscore.utils import split_by_ionmode def create_data_generator_across_ionmodes(training_spectra, settings: SettingsMS2Deepscore) -> TrainingBatchGenerator: pos_spectra, neg_spectra = split_by_ionmode(training_spectra) - pos_spectrum_pair_generator = select_compound_pairs_wrapper(pos_spectra, settings=settings) - neg_spectrum_pair_generator = select_compound_pairs_wrapper(neg_spectra, settings=settings) + pos_spectrum_pair_generator = create_spectrum_pair_generator(pos_spectra, settings=settings) + neg_spectrum_pair_generator = create_spectrum_pair_generator(neg_spectra, settings=settings) pos_neg_spectrum_pair_generator = select_compound_pairs_wrapper_across_ionmode(pos_spectra, neg_spectra, settings) spectrum_pair_generator = CombinedSpectrumGenerator([pos_spectrum_pair_generator, neg_spectrum_pair_generator, pos_neg_spectrum_pair_generator]) diff --git a/ms2deepscore/train_new_model/train_ms2deepscore.py b/ms2deepscore/train_new_model/train_ms2deepscore.py index a9cdaa3f..60b11e3c 100644 --- a/ms2deepscore/train_new_model/train_ms2deepscore.py +++ b/ms2deepscore/train_new_model/train_ms2deepscore.py @@ -11,7 +11,7 @@ from ms2deepscore.models.SiameseSpectralModel import (SiameseSpectralModel, train) from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore -from ms2deepscore.train_new_model import TrainingBatchGenerator, select_compound_pairs_wrapper +from ms2deepscore.train_new_model import TrainingBatchGenerator, create_spectrum_pair_generator from ms2deepscore.train_new_model.inchikey_pair_selection_cross_ionmode import create_data_generator_across_ionmodes from ms2deepscore.validation_loss_calculation.ValidationLossCalculator import \ ValidationLossCalculator @@ -31,7 +31,7 @@ def train_ms2ds_model( if settings.ionisation_mode == "both": train_generator = create_data_generator_across_ionmodes(training_spectra, settings=settings) else: - spectrum_pair_generator = select_compound_pairs_wrapper(training_spectra, settings=settings) + spectrum_pair_generator = create_spectrum_pair_generator(training_spectra, settings=settings) train_generator = TrainingBatchGenerator(spectrum_pair_generator=spectrum_pair_generator, settings=settings) # Create a validation loss calculator validation_loss_calculator = ValidationLossCalculator(validation_spectra, diff --git a/ms2deepscore/wrapper_functions/training_wrapper_functions.py b/ms2deepscore/wrapper_functions/training_wrapper_functions.py index 06151226..8a00b5b8 100644 --- a/ms2deepscore/wrapper_functions/training_wrapper_functions.py +++ b/ms2deepscore/wrapper_functions/training_wrapper_functions.py @@ -13,7 +13,7 @@ from ms2deepscore.models.SiameseSpectralModel import (SiameseSpectralModel, train) from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore -from ms2deepscore.train_new_model import TrainingBatchGenerator, select_compound_pairs_wrapper +from ms2deepscore.train_new_model import TrainingBatchGenerator, create_spectrum_pair_generator from ms2deepscore.validation_loss_calculation.ValidationLossCalculator import ValidationLossCalculator from ms2deepscore.train_new_model.train_ms2deepscore import \ train_ms2ds_model, plot_history, save_history @@ -128,7 +128,7 @@ def parameter_search( os.makedirs(settings.model_directory_name, exist_ok=True) settings.save_to_file(os.path.join(settings.model_directory_name, "settings.json")) # Create a training generator - spectrum_pair_generator = select_compound_pairs_wrapper(training_spectra, settings=settings) + spectrum_pair_generator = create_spectrum_pair_generator(training_spectra, settings=settings) train_generator = TrainingBatchGenerator(spectrum_pair_generator=spectrum_pair_generator, settings=settings) # Create a validation loss calculator validation_loss_calculator = ValidationLossCalculator(validation_spectra, diff --git a/tests/test_data_generators.py b/tests/test_data_generators.py index fd2978f6..a3cabbc4 100644 --- a/tests/test_data_generators.py +++ b/tests/test_data_generators.py @@ -7,7 +7,7 @@ from ms2deepscore.tensorize_spectra import tensorize_spectra from ms2deepscore.train_new_model.TrainingBatchGenerator import TrainingBatchGenerator from ms2deepscore.train_new_model.DataGeneratorEmbeddingEvaluation import DataGeneratorEmbeddingEvaluation -from ms2deepscore.train_new_model import SpectrumPairGenerator, select_compound_pairs_wrapper +from ms2deepscore.train_new_model import SpectrumPairGenerator, create_spectrum_pair_generator from ms2deepscore.train_new_model.inchikey_pair_selection_cross_ionmode import create_data_generator_across_ionmodes, \ select_compound_pairs_wrapper_across_ionmode from tests.create_test_spectra import create_test_spectra @@ -176,7 +176,7 @@ def test_create_data_generator(): augment_removal_intensity=0.0, augment_intensity=0.0, augment_noise_max=0) - spectrum_pair_generator = select_compound_pairs_wrapper(test_spectra, settings=settings) + spectrum_pair_generator = create_spectrum_pair_generator(test_spectra, settings=settings) data_generator = TrainingBatchGenerator(spectrum_pair_generator=spectrum_pair_generator, settings=settings) tensorized_spectra = [] epochs = 20 diff --git a/tests/test_inchikey_pair_selection.py b/tests/test_inchikey_pair_selection.py index a6fecd68..2c451312 100644 --- a/tests/test_inchikey_pair_selection.py +++ b/tests/test_inchikey_pair_selection.py @@ -7,7 +7,7 @@ from ms2deepscore import SettingsMS2Deepscore from ms2deepscore.train_new_model.inchikey_pair_selection import ( - compute_jaccard_similarity_per_bin, select_inchi_for_unique_inchikeys, select_compound_pairs_wrapper, compute_fingerprints_for_training) + compute_jaccard_similarity_per_bin, select_inchi_for_unique_inchikeys, create_spectrum_pair_generator, compute_fingerprints_for_training) from ms2deepscore.train_new_model import SpectrumPairGenerator from tests.create_test_spectra import create_test_spectra @@ -186,7 +186,7 @@ def test_select_compound_pairs_wrapper_no_resampling(): average_inchikey_sampling_count=10, batch_size=8, max_pair_resampling=max_pair_resampling) - inchikey_pair_generator = select_compound_pairs_wrapper(spectrums, settings) + inchikey_pair_generator = create_spectrum_pair_generator(spectrums, settings) check_balanced_scores_selecting_inchikey_pairs(inchikey_pair_generator, bins) check_correct_oversampling(inchikey_pair_generator, max_pair_resampling) @@ -206,7 +206,7 @@ def test_select_compound_pairs_wrapper_with_resampling(): average_inchikey_sampling_count=10, batch_size=8, max_pair_resampling=max_pair_resampling) - inchikey_pair_generator = select_compound_pairs_wrapper(spectrums, settings) + inchikey_pair_generator = create_spectrum_pair_generator(spectrums, settings) check_balanced_scores_selecting_inchikey_pairs(inchikey_pair_generator, bins) check_correct_oversampling(inchikey_pair_generator, max_pair_resampling) @@ -228,7 +228,7 @@ def test_select_compound_pairs_wrapper_maximum_inchikey_count(): max_pair_resampling=max_pair_resampling, max_inchikey_sampling=max_inchikey_sampling ) - inchikey_pair_generator = select_compound_pairs_wrapper(spectrums, settings) + inchikey_pair_generator = create_spectrum_pair_generator(spectrums, settings) highest_inchikey_count = max(inchikey_pair_generator.get_inchikey_counts().values()) assert highest_inchikey_count <= max_inchikey_sampling + 1 # +1 because there is a chance that the last added inchikey is a pair to itself... diff --git a/tests/test_siamese_spectra_model.py b/tests/test_siamese_spectra_model.py index 28811375..88b225da 100644 --- a/tests/test_siamese_spectra_model.py +++ b/tests/test_siamese_spectra_model.py @@ -7,7 +7,7 @@ from ms2deepscore.tensorize_spectra import tensorize_spectra from ms2deepscore.train_new_model.TrainingBatchGenerator import TrainingBatchGenerator from ms2deepscore.train_new_model.inchikey_pair_selection import \ - select_compound_pairs_wrapper + create_spectrum_pair_generator from ms2deepscore.validation_loss_calculation.ValidationLossCalculator import \ ValidationLossCalculator @@ -130,7 +130,7 @@ def test_model_training(simple_training_spectra): batch_size=2, num_turns=20, ) - inchikey_pair_generator = select_compound_pairs_wrapper(simple_training_spectra, settings) + inchikey_pair_generator = create_spectrum_pair_generator(simple_training_spectra, settings) # Create generators train_generator_simple = TrainingBatchGenerator(spectrum_pair_generator=inchikey_pair_generator, settings=settings) settings.same_prob_bins = np.array([(-0.01, 1.0)]) From 4417ee8f1bdcc5a91a7937059615d8fca2a96374 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Tue, 27 Jan 2026 11:29:32 +0100 Subject: [PATCH 46/48] Add balanced_sampling_across_ionmodes setting, to have the default use the standard sampling algorithm. --- ms2deepscore/SettingsMS2Deepscore.py | 9 +++++++++ ms2deepscore/train_new_model/train_ms2deepscore.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ms2deepscore/SettingsMS2Deepscore.py b/ms2deepscore/SettingsMS2Deepscore.py index 2d392262..ab9011ec 100644 --- a/ms2deepscore/SettingsMS2Deepscore.py +++ b/ms2deepscore/SettingsMS2Deepscore.py @@ -87,6 +87,12 @@ class SettingsMS2Deepscore: The in between layers to be used. Default = (2000, 2000, 2000) embedding_dim: The dimension of the final embedding. Default = 400 + ionisation_mode: + The ionisation mode that is used for training the model. + balanced_sampling_across_ionmodes: + If True the model will do separate pair sampling for training for each ionmode. + This gives better balance over the ionmodes. Initial results showed a decrease in pos-pos prediction + accuracy. Which you can find in the notebook model_benchmarking/Compare balanced cross ion moe sampling.ipynb additional_metadata: Additional metadata that should be used in training the model. e.g. precursor_mz dropout_rate: @@ -184,6 +190,7 @@ def __init__(self, validate_settings=True, **settings): self.embedding_dim = 500 self.ionisation_mode = "positive" self.activation_function = "relu" + self.balanced_sampling_across_ionmodes = False # additional model structure options self.train_binning_layer: bool = False @@ -295,6 +302,8 @@ def validate_settings(self): if self.loss_function.lower() not in LOSS_FUNCTIONS: raise ValueError(f"Unknown loss function. Must be one of: {LOSS_FUNCTIONS.keys()}") validate_bin_order(self.same_prob_bins) + if self.balanced_sampling_across_ionmodes and self.ionisation_mode != "both": + raise ValueError("Balanced sampling across ionmodes only works if you train on both ionmodes") def create_model_directory_name(self): """Creates a directory name using metadata, it will contain the metadata, the binned spectra and final model""" diff --git a/ms2deepscore/train_new_model/train_ms2deepscore.py b/ms2deepscore/train_new_model/train_ms2deepscore.py index 60b11e3c..38f3bfa9 100644 --- a/ms2deepscore/train_new_model/train_ms2deepscore.py +++ b/ms2deepscore/train_new_model/train_ms2deepscore.py @@ -28,7 +28,7 @@ def train_ms2ds_model( # Make folder and save settings os.makedirs(results_folder, exist_ok=True) settings.save_to_file(os.path.join(results_folder, "settings.json")) - if settings.ionisation_mode == "both": + if settings.balanced_sampling_across_ionmodes: train_generator = create_data_generator_across_ionmodes(training_spectra, settings=settings) else: spectrum_pair_generator = create_spectrum_pair_generator(training_spectra, settings=settings) From a77c84df78f6108e1d515f78496886b2d86675d0 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Tue, 27 Jan 2026 11:41:23 +0100 Subject: [PATCH 47/48] Update pair sampling tutorial to match changes made to the sampling algorithm function names --- .../tutorials/pair_sampling_tutorial.ipynb | 189 ++++++++++++------ 1 file changed, 124 insertions(+), 65 deletions(-) diff --git a/notebooks/tutorials/pair_sampling_tutorial.ipynb b/notebooks/tutorials/pair_sampling_tutorial.ipynb index 4ce7daef..d5bdcc1c 100644 --- a/notebooks/tutorials/pair_sampling_tutorial.ipynb +++ b/notebooks/tutorials/pair_sampling_tutorial.ipynb @@ -20,10 +20,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "7250244e-194b-48ca-85e8-011ba79bb5b5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file validation_spectra.mgf already exists, the file won't be downloaded\n" + ] + } + ], "source": [ "import requests\n", "import os\n", @@ -48,10 +56,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "67e78338-f404-4112-a184-fb1d9471478e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25412it [00:23, 1095.42it/s]\n" + ] + } + ], "source": [ "from matchms.importing.load_spectra import load_spectra\n", "from tqdm import tqdm\n", @@ -61,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "c49289c0-b66c-40b8-b639-4d61e3a75c0f", "metadata": {}, "outputs": [], @@ -71,30 +87,68 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "2f9872ee-643b-4b82-ab07-3860a5ab334e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1831/1831 [00:05<00:00, 327.18it/s]\n", + "Calculating fingerprints: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1831/1831 [00:06<00:00, 269.27it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n" + ] + } + ], "source": [ "tanimoto_scores = calculate_tanimoto_scores_unique_inchikey(spectra, spectra)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "55541454-0880-4315-a017-50104609b649", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.22762467" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "tanimoto_scores.mean().mean()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "257f5cd3-7e03-421b-917e-e2764bf6951b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "from matplotlib import pyplot as plt\n", "plt.hist(tanimoto_scores.to_numpy().ravel())\n", @@ -113,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 8, "id": "df9e4707-9dc8-4d64-a411-53e6ce8faee5", "metadata": {}, "outputs": [ @@ -128,45 +182,46 @@ "name": "stderr", "output_type": "stream", "text": [ - "Calculating fingerprints: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1831/1831 [00:00<00:00, 11063.43it/s]\n", - "Balanced sampling of inchikey pairs (per bin): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 915/915 [00:00<00:00, 13671.38it/s]\n", - "Balanced sampling of inchikey pairs (per bin): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 915/915 [00:00<00:00, 13254.73it/s]\n", - "Balanced sampling of inchikey pairs (per bin): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 915/915 [00:00<00:00, 15341.70it/s]\n", - "Balanced sampling of inchikey pairs (per bin): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 915/915 [00:00<00:00, 12985.29it/s]\n", - "Balanced sampling of inchikey pairs (per bin): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 915/915 [00:00<00:00, 14196.94it/s]\n", - "Balanced sampling of inchikey pairs (per bin): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 915/915 [00:00<00:00, 14742.81it/s]\n", - "Balanced sampling of inchikey pairs (per bin): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 915/915 [00:00<00:00, 14620.09it/s]\n", - "Balanced sampling of inchikey pairs (per bin): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 915/915 [00:00<00:00, 14370.67it/s]\n", - "Balanced sampling of inchikey pairs (per bin): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 915/915 [00:00<00:00, 14187.13it/s]\n", - "Balanced sampling of inchikey pairs (per bin): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 915/915 [00:00<00:00, 16721.88it/s]\n", - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 14.69it/s]\n" + "Calculating fingerprints: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1831/1831 [00:00<00:00, 8830.61it/s]\n", + "Balanced sampling of inchikey pairs (per bin): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9155/9155 [00:00<00:00, 10997.12it/s]\n", + "Balanced sampling of inchikey pairs (per bin): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9155/9155 [00:00<00:00, 14929.39it/s]\n", + "Balanced sampling of inchikey pairs (per bin): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9155/9155 [00:00<00:00, 15755.28it/s]\n", + "Balanced sampling of inchikey pairs (per bin): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9155/9155 [00:00<00:00, 14699.81it/s]\n", + "Balanced sampling of inchikey pairs (per bin): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9155/9155 [00:00<00:00, 16051.44it/s]\n", + "Balanced sampling of inchikey pairs (per bin): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9155/9155 [00:00<00:00, 16805.94it/s]\n", + "Balanced sampling of inchikey pairs (per bin): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9155/9155 [00:00<00:00, 18401.84it/s]\n", + "Balanced sampling of inchikey pairs (per bin): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9155/9155 [00:00<00:00, 17945.20it/s]\n", + "Balanced sampling of inchikey pairs (per bin): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9155/9155 [00:00<00:00, 17741.57it/s]\n", + "Balanced sampling of inchikey pairs (per bin): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9155/9155 [00:00<00:00, 12120.94it/s]\n", + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 10.29it/s]\n" ] } ], "source": [ - "from ms2deepscore.train_new_model.inchikey_pair_selection import select_compound_pairs_wrapper\n", + "from ms2deepscore.train_new_model.inchikey_pair_selection import create_spectrum_pair_generator\n", "from ms2deepscore import SettingsMS2Deepscore\n", - "selected_compound_pairs = select_compound_pairs_wrapper(spectra, SettingsMS2Deepscore(average_inchikey_sampling_count=10, max_inchikey_sampling=13))" + "spectrum_pair_generator = create_spectrum_pair_generator(spectra, SettingsMS2Deepscore())" ] }, { "cell_type": "code", - "execution_count": null, - "id": "11a14265-50d0-477c-adc6-a381b6d2fd0c", - "metadata": {}, - "outputs": [], - "source": [ - "scores = [x[2] for x in selected_compound_pairs]" - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "6eb910a7-02f8-4cdd-8537-157d24603618", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "plt.hist(scores)\n", + "plt.hist(spectrum_pair_generator.get_scores())\n", "plt.xlabel(\"tanimoto score\")\n", "plt.show()" ] @@ -196,18 +251,6 @@ "Below we show an example of a badly distributed scores (because we only use the validation spectra, which contain 1800 unique molecules). " ] }, - { - "cell_type": "code", - "execution_count": 62, - "id": "04549827-e21b-46cb-a7b5-712234bc6b50", - "metadata": {}, - "outputs": [], - "source": [ - "from ms2deepscore.train_new_model.data_generators import InchikeyPairGenerator\n", - "\n", - "inchikey_pair_generator = InchikeyPairGenerator(selected_compound_pairs)" - ] - }, { "cell_type": "markdown", "id": "66368132-1c0a-4da7-8d9a-428eedea9421", @@ -218,7 +261,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "a60085fb-3ac7-4a61-b22d-bd53d733ef28", "metadata": {}, "outputs": [], @@ -277,13 +320,13 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 12, "id": "544bfe70-11b9-468f-9418-5788189a6796", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAAMGCAYAAADvN+dqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAADnq0lEQVR4nOzdeVgVdf//8ReIHFBZRGMrRFMTLbfUEC0zJdHMNOkul4zKtAztVlqUMtcK8670a1nelTfYYnbbrZZr7nqnuGFmqeGemoIlAaLJIvP7w5/n9iQqR8+cA/h8XNe5LmbmMzPvzxzgfd5nZj7jZhiGIQAAAAAA4HDurg4AAAAAAICKiqIbAAAAAACTUHQDAAAAAGASim4AAAAAAExC0Q0AAAAAgEkougEAAAAAMAlFNwAAAAAAJqHoBgAAAADAJBTdAAAAAACYhKIbAAAAAACTUHQDAIBrtnbtWnXr1k2hoaFyc3PTvHnzbJY//vjjcnNzs3l17tzZpk1WVpb69u0rX19f+fv7q3///srLy3NiLwAAcDwPVwdQFhQXF+vo0aPy8fGRm5ubq8MBAFRghmHo5MmTCg0Nlbt7xfnu+9SpU2ratKmefPJJ9ezZs8Q2nTt3VnJysnXaYrHYLO/bt6+OHTumZcuWqbCwUE888YQGDhyomTNnljoOcjoAwFlKm9MpuiUdPXpUYWFhrg4DAHAdOXz4sG666SZXh+EwXbp0UZcuXS7bxmKxKDg4uMRlu3bt0pIlS7R582a1bNlSkvTuu+/qvvvu01tvvaXQ0NBSxUFOBwA425VyOkW3JB8fH0nnDpavr6+LowEAVGS5ubkKCwuz5p7ryerVqxUYGKjq1aurQ4cOeu2111SjRg1JUmpqqvz9/a0FtyRFR0fL3d1dGzdu1IMPPljiNvPz85Wfn2+dNgxDEjkdAGC+0uZ0im7JevmZr68vCRoA4BTX26XPnTt3Vs+ePVWnTh3t27dPL7/8srp06aLU1FRVqlRJGRkZCgwMtFnHw8NDAQEBysjIuOR2k5KSNHbs2Ivmk9MBAM5ypZxO0Q0AAEzXq1cv68+NGzdWkyZNVLduXa1evVodO3a86u0mJiYqISHBOn3+rAMAAGVFxRnBBQAAlBs333yzatasqb1790qSgoODdfz4cZs2RUVFysrKuuR94NK5+8TPn9Xm7DYAoCyi6AYAAE535MgRnThxQiEhIZKkqKgoZWdnKy0tzdpm5cqVKi4uVmRkpKvCBADgmnF5OQAAuGZ5eXnWs9aSdODAAW3btk0BAQEKCAjQ2LFjFRsbq+DgYO3bt08vvfSS6tWrp5iYGElSw4YN1blzZw0YMEDTpk1TYWGhBg8erF69epV65HIAAMoiznQDAIBrtmXLFjVv3lzNmzeXJCUkJKh58+YaNWqUKlWqpO3bt+uBBx7QLbfcov79+6tFixb673//a/Os7s8//1wRERHq2LGj7rvvPt1555368MMPXdUlAAAcws04/2yN61hubq78/PyUk5PDvWAAAFORc8zF8QUAOEtpcw6XlztY7RELTd/HwQldTd8HAADA9ebCz3F83gLgKFxeDgAAAACASSi6AQAAAAAwCUU3AAAAAAAmoegGAAAAAMAkFN0AAAAAAJiEohsAAAAAAJNQdAMAAAAAYBKKbgAAAAAATELRDQAAAACASSi6AQAAAAAwCUU3AAAAAAAmoegGAAAAAMAkFN0AAAAAAJiEohsAAAAAAJNQdAMAAAAAYBKKbgAAAAAATELRDQAAAACASSi6AQAAAAAwicuL7l9//VWPPvqoatSoIW9vbzVu3FhbtmyxLjcMQ6NGjVJISIi8vb0VHR2tPXv22GwjKytLffv2la+vr/z9/dW/f3/l5eU5uysAAAAAANhwadH9xx9/qG3btqpcubIWL16snTt36u2331b16tWtbSZOnKgpU6Zo2rRp2rhxo6pWraqYmBidOXPG2qZv377asWOHli1bpgULFmjt2rUaOHCgK7oEAAAAAICVhyt3/uabbyosLEzJycnWeXXq1LH+bBiGJk+erJEjR6p79+6SpE8++URBQUGaN2+eevXqpV27dmnJkiXavHmzWrZsKUl69913dd999+mtt95SaGjoRfvNz89Xfn6+dTo3N9esLgIAAAAArmMuPdP9zTffqGXLlvrb3/6mwMBANW/eXB999JF1+YEDB5SRkaHo6GjrPD8/P0VGRio1NVWSlJqaKn9/f2vBLUnR0dFyd3fXxo0bS9xvUlKS/Pz8rK+wsDCTeggAwPVh7dq16tatm0JDQ+Xm5qZ58+ZZlxUWFmr48OFq3LixqlatqtDQUD322GM6evSozTZq164tNzc3m9eECROc3BMAABzLpUX3/v379cEHH6h+/fr69ttvNWjQID333HOaMWOGJCkjI0OSFBQUZLNeUFCQdVlGRoYCAwNtlnt4eCggIMDa5q8SExOVk5NjfR0+fNjRXQMA4Lpy6tQpNW3aVFOnTr1o2enTp7V161a9+uqr2rp1q+bMmaP09HQ98MADF7UdN26cjh07Zn0NGTLEGeEDAGAal15eXlxcrJYtW+qNN96QJDVv3lw//fSTpk2bpri4ONP2a7FYZLFYTNs+AADXmy5duqhLly4lLvPz89OyZcts5r333nu64447dOjQIdWqVcs638fHR8HBwaXeL7eMAQDKOpee6Q4JCVGjRo1s5jVs2FCHDh2SJGvSzczMtGmTmZlpXRYcHKzjx4/bLC8qKlJWVpZdSRsAADhPTk6O3Nzc5O/vbzN/woQJqlGjhpo3b65//OMfKioquux2uGUMAFDWubTobtu2rdLT023m7d69W+Hh4ZLODaoWHBysFStWWJfn5uZq48aNioqKkiRFRUUpOztbaWlp1jYrV65UcXGxIiMjndALAABgjzNnzmj48OHq3bu3fH19rfOfe+45zZo1S6tWrdLTTz+tN954Qy+99NJlt8UtYwCAss6ll5cPGzZMbdq00RtvvKGHH35YmzZt0ocffqgPP/xQkuTm5qahQ4fqtddeU/369VWnTh29+uqrCg0NVY8ePSSdOzPeuXNnDRgwQNOmTVNhYaEGDx6sXr16lThyOQAAcJ3CwkI9/PDDMgxDH3zwgc2yhIQE689NmjSRp6ennn76aSUlJV3ytjBuGQMAlHUuLbpbtWqluXPnKjExUePGjVOdOnU0efJk9e3b19rmpZde0qlTpzRw4EBlZ2frzjvv1JIlS+Tl5WVt8/nnn2vw4MHq2LGj3N3dFRsbqylTpriiSwAA4BLOF9y//PKLVq5caXOWuySRkZEqKirSwYMH1aBBAydFCQCAY7m06Jak+++/X/fff/8ll7u5uWncuHEaN27cJdsEBARo5syZZoQHAAAc4HzBvWfPHq1atUo1atS44jrbtm2Tu7v7RU8pAQCgPHF50Q0AAMq/vLw87d271zp94MABbdu2TQEBAQoJCdFDDz2krVu3asGCBTp79qz1sZ4BAQHy9PRUamqqNm7cqHvuuUc+Pj5KTU3VsGHD9Oijj6p69equ6hYAANeMohsAAFyzLVu26J577rFOn78/Oy4uTmPGjNE333wjSWrWrJnNeqtWrVL79u1lsVg0a9YsjRkzRvn5+apTp46GDRtmc583AADlEUU3AAC4Zu3bt5dhGJdcfrllknT77bdrw4YNjg4LAACXc+kjwwAAAAAAqMgougEAAAAAMAlFNwAAAAAAJqHoBgAAAADAJBTdAAAAAACYhKIbAAAAAACTUHQDAAAAAGAShxTd2dnZjtgMAABwMXI6AACOZXfR/eabb+rLL7+0Tj/88MOqUaOGbrzxRv3www8ODQ4AAJiHnA4AgPnsLrqnTZumsLAwSdKyZcu0bNkyLV68WF26dNGLL77o8AABAIA5yOkAAJjPw94VMjIyrAl6wYIFevjhh9WpUyfVrl1bkZGRDg8QAACYg5wOAID57D7TXb16dR0+fFiStGTJEkVHR0uSDMPQ2bNnHRsdAAAwDTkdAADz2X2mu2fPnurTp4/q16+vEydOqEuXLpKk77//XvXq1XN4gAAAwBzkdAAAzGd30T1p0iTVrl1bhw8f1sSJE1WtWjVJ0rFjx/Tss886PEAAAGAOcjoAAOazu+guKCjQCy+8cNH8YcOGOSQgAADgHOR0AADMZ/c93UFBQXryySf13XffmREPAABwEnI6AADms7vo/uyzz5SVlaUOHTrolltu0YQJE3T06FEzYgMAACYipwMAYD67i+4ePXpo3rx5+vXXX/XMM89o5syZCg8P1/333685c+aoqKjIjDgBAICDkdMBADCf3UX3eTfccIMSEhK0fft2vfPOO1q+fLkeeughhYaGatSoUTp9+rQj4wQAACYhpwMAYB67B1I7LzMzUzNmzFBKSop++eUXPfTQQ+rfv7+OHDmiN998Uxs2bNDSpUsdGSsAADABOR0AAPPYXXTPmTNHycnJ+vbbb9WoUSM9++yzevTRR+Xv729t06ZNGzVs2NCRcQIAAAcjpwMAYD67Ly9/4oknFBoaqnXr1mnbtm0aPHiwTXKWpNDQUL3yyiuOihEAAJjAkTl97dq16tatm0JDQ+Xm5qZ58+bZLDcMQ6NGjVJISIi8vb0VHR2tPXv22LTJyspS37595evrK39/f/Xv3195eXnX2k0AAFzK7jPdx44dU5UqVS7bxtvbW6NHj77qoAAAgPkcmdNPnTqlpk2b6sknn1TPnj0vWj5x4kRNmTJFM2bMUJ06dfTqq68qJiZGO3fulJeXlySpb9++OnbsmJYtW6bCwkI98cQTGjhwoGbOnHl1HQQAoAyw+0x3lSpVtG/fPo0cOVK9e/fW8ePHJUmLFy/Wjh07HB4gAAAwhyNzepcuXfTaa6/pwQcfvGiZYRiaPHmyRo4cqe7du6tJkyb65JNPdPToUesZ8V27dmnJkiX6+OOPFRkZqTvvvFPvvvuuZs2axWPMAADlmt1F95o1a9S4cWNt3LhRc+bMsV729cMPP3B2GwCAcsRZOf3AgQPKyMhQdHS0dZ6fn58iIyOVmpoqSUpNTZW/v79atmxpbRMdHS13d3dt3LjxktvOz89Xbm6uzQsAgLLE7qJ7xIgReu2117Rs2TJ5enpa53fo0EEbNmxwaHAAAMA8zsrpGRkZkqSgoCCb+UFBQdZlGRkZCgwMtFnu4eGhgIAAa5uSJCUlyc/Pz/oKCwtzWNwAADiC3UX3jz/+WOKlY4GBgfr9998dEhQAADBfRcjpiYmJysnJsb4OHz7s6pAAALBhd9Ht7++vY8eOXTT/+++/14033uiQoAAAgPmcldODg4MlnXse+IUyMzOty4KDg633lJ9XVFSkrKwsa5uSWCwW+fr62rwAAChL7C66e/XqpeHDhysjI0Nubm4qLi7WunXr9MILL+ixxx4zI0YAAGACZ+X0OnXqKDg4WCtWrLDOy83N1caNGxUVFSVJioqKUnZ2ttLS0qxtVq5cqeLiYkVGRjosFgAAnM3uR4a98cYbio+PV1hYmM6ePatGjRrp7Nmz6tOnj0aOHGlGjAAAwASOzOl5eXnau3evdfrAgQPatm2bAgICVKtWLQ0dOlSvvfaa6tevb31kWGhoqHr06CFJatiwoTp37qwBAwZo2rRpKiws1ODBg9WrVy+FhoY6stsAADiV3UW3p6enPvroI40aNUo//vij8vLy1Lx5c9WvX19//vmnvL29zYgTAAA4mCNz+pYtW3TPPfdYpxMSEiRJcXFxSklJ0UsvvaRTp05p4MCBys7O1p133qklS5ZYn9EtSZ9//rkGDx6sjh07yt3dXbGxsZoyZYrjOgwAgAu4GYZh2LPCc889V2ICPHXqlO6//36tWrXKYcE5S25urvz8/JSTk3PN94LVHrHQQVFd2sEJXU3fBwDAHI7MOdeKnA7YuvBzHJ+3AFxJaXOO3fd0L1y48KJnd546dUqdO3dWUVGR/ZECAACXIKcDAGA+uy8vX7p0qe666y5Vr15dQ4cO1cmTJxUTEyMPDw8tXrzYjBgBAIAJyOkAAJjP7qK7bt26WrJkie655x65u7vriy++kMVi0cKFC1W1alUzYgQAACYgpwMAYD67i25JatKkiRYsWKB7771XkZGRWrBgAQOoAQBQDpHTAQAwV6mK7ubNm8vNze2i+RaLRUePHlXbtm2t87Zu3eq46AAAgEOR0wEAcK5SFd3nn6EJAADKN3I6AADOVaqi+68jmwIAgPKJnA4AgHPZ/cgwAAAAAABQOqU60x0QEKDdu3erZs2aql69eon3gp2XlZXlsOAAAIBjkdMBAHCuUhXdkyZNko+PjyRp8uTJpgQyYcIEJSYm6u9//7t1H2fOnNHzzz+vWbNmKT8/XzExMXr//fcVFBRkXe/QoUMaNGiQVq1apWrVqikuLk5JSUny8LiqgdkBAKjQnJHTAQDA/5SqMo2LiyvxZ0fZvHmz/vnPf6pJkyY284cNG6aFCxdq9uzZ8vPz0+DBg9WzZ0+tW7dOknT27Fl17dpVwcHBWr9+vY4dO6bHHntMlStX1htvvOHwOAEAKO/MzukAAMDWVZ0OLi4u1t69e3X8+HEVFxfbLGvXrp1d28rLy1Pfvn310Ucf6bXXXrPOz8nJ0fTp0zVz5kx16NBBkpScnKyGDRtqw4YNat26tZYuXaqdO3dq+fLlCgoKUrNmzTR+/HgNHz5cY8aMkaen59V0DwCA64YjczoAALiY3UX3hg0b1KdPH/3yyy8yDMNmmZubm86ePWvX9uLj49W1a1dFR0fbFN1paWkqLCxUdHS0dV5ERIRq1aql1NRUtW7dWqmpqWrcuLHN5eYxMTEaNGiQduzYoebNm5e4z/z8fOXn51unc3Nz7YoZAICKwNE5HQAAXMzuovuZZ55Ry5YttXDhQoWEhFx2AJYrmTVrlrZu3arNmzdftCwjI0Oenp7y9/e3mR8UFKSMjAxrmwsL7vPLzy+7lKSkJI0dO/aq4wYAoCJwZE4HAAAls7vo3rNnj7766ivVq1fvmnZ8+PBh/f3vf9eyZcvk5eV1TduyV2JiohISEqzTubm5CgsLc2oMAAC4mqNyOlBe1R6x0NUhALgO2P2c7sjISO3du/ead5yWlqbjx4/r9ttvl4eHhzw8PLRmzRpNmTJFHh4eCgoKUkFBgbKzs23Wy8zMVHBwsCQpODhYmZmZFy0/v+xSLBaLfH19bV4AAFxvHJXTAQDApZXqTPf27dutPw8ZMkTPP/+8MjIy1LhxY1WuXNmm7V9HIL+Ujh076scff7SZ98QTTygiIkLDhw9XWFiYKleurBUrVig2NlaSlJ6erkOHDikqKkqSFBUVpddff13Hjx9XYGCgJGnZsmXy9fVVo0aNShUHAADXEzNyOgAAuLRSFd3NmjWTm5ubzSArTz75pPXn88vsGXTFx8dHt912m828qlWrqkaNGtb5/fv3V0JCggICAuTr66shQ4YoKipKrVu3liR16tRJjRo1Ur9+/TRx4kRlZGRo5MiRio+Pl8ViKVUcAABcT8zI6QAA4NJKVXQfOHDA7DhKNGnSJLm7uys2Nlb5+fmKiYnR+++/b11eqVIlLViwQIMGDVJUVJSqVq2quLg4jRs3ziXxAgBQ1rkqpwMAcL0qVdEdHh5udhySpNWrV9tMe3l5aerUqZo6deol1wkPD9eiRYtMjgwAgIrBWTkdAACcY/dAaklJSfrXv/510fx//etfevPNNx0SFAAAMB85HQAA89lddP/zn/9URETERfNvvfVWTZs2zSFBAQAA85HTAQAwn91Fd0ZGhkJCQi6af8MNN+jYsWMOCQoAAJjP2Tm9du3acnNzu+gVHx8vSWrfvv1Fy5555hmHxwEAgDOV6p7uC4WFhWndunWqU6eOzfx169YpNDTUYYEBAABzOTunb9682WZE9J9++kn33nuv/va3v1nnDRgwwGZA1CpVqjg8DgAAnMnuonvAgAEaOnSoCgsL1aFDB0nSihUr9NJLL+n55593eIAAAMAczs7pN9xwg830hAkTVLduXd19993WeVWqVFFwcHCpt5mfn6/8/HzrdG5u7rUHCgCAA9lddL/44os6ceKEnn32WRUUFEg6N8r48OHDlZiY6PAAAQCAOVyZ0wsKCvTZZ58pISFBbm5u1vmff/65PvvsMwUHB6tbt2569dVXL3u2OykpSWPHjjU1VgAAroWbYRjG1ayYl5enXbt2ydvbW/Xr15fFYnF0bE6Tm5srPz8/5eTkyNfX95q2VXvEQgdFdWkHJ3Q1fR8AAHM4Muc4iity+r///W/16dNHhw4dsl7K/uGHHyo8PFyhoaHavn27hg8frjvuuENz5sy55HZKOtMdFhZWpo4vyq7LfW7j8xaAKyltTrf7TPd51apVU6tWra52dQAAUEa4IqdPnz5dXbp0sbl3fODAgdafGzdurJCQEHXs2FH79u1T3bp1S9yOxWIp11/8AwAqPruL7lOnTmnChAlasWKFjh8/ruLiYpvl+/fvd1hwAADAPK7K6b/88ouWL19+2TPYkhQZGSlJ2rt37yWLbgAAyjq7i+6nnnpKa9asUb9+/RQSEmJzHxYAACg/XJXTk5OTFRgYqK5dL3/57rZt2ySpxMeaAQBQXthddC9evFgLFy5U27ZtzYgHAAA4iStyenFxsZKTkxUXFycPj/99DNm3b59mzpyp++67TzVq1ND27ds1bNgwtWvXTk2aNHFafAAAOJrdRXf16tUVEBBgRiwAAMCJXJHTly9frkOHDunJJ5+0me/p6anly5dr8uTJOnXqlMLCwhQbG6uRI0c6NT4AABzN7qJ7/PjxGjVqlGbMmHHZR3gAAICyzRU5vVOnTirpwSlhYWFas2aNU2IAAMCZ7C663377be3bt09BQUGqXbu2KleubLN869atDgsOAACYh5wOAID57C66e/ToYUIYAADA2cjpwKX99RnePLcbwNWyu+gePXq0GXEAAAAnI6cDAGA+d1cHAAAAAABARVWqM90BAQHavXu3atasqerVq1/2OZ5ZWVkOCw4AADgWOR0wH5emA7hQqYruSZMmycfHR5I0efJkM+MBAAAmIqcDAOBcpSq64+LiSvwZAACUL+R0AACci3u6AQAAAAAwid2jlwMAAAAoPe7xBq5vnOkGAAAAAMAkpTrTvX37dt12221yd6dGBwCUPX89i2SGinJmipwOAIBzlSrjNm/eXL///rsk6eabb9aJEydMDQoAAJiDnA4AgHOVquj29/fXgQMHJEkHDx5UcXGxqUEBAABzkNMBAHCuUl1eHhsbq7vvvlshISFyc3NTy5YtValSpRLb7t+/36EBAgAAxyGn43rmjFtRAOCvSlV0f/jhh+rZs6f27t2r5557TgMGDJCPj4/ZsQEAAAcjpwMA4FylfmRY586dJUlpaWn6+9//ToIGAKCcIqcDAOA8dj+nOzk52frzkSNHJEk33XST4yICAABOQU4HAMB8dj8vpLi4WOPGjZOfn5/Cw8MVHh4uf39/jR8/nsFYAAAoR8jpAACYz+4z3a+88oqmT5+uCRMmqG3btpKk7777TmPGjNGZM2f0+uuvOzxIADyHGIDjkdMBADCf3UX3jBkz9PHHH+uBBx6wzmvSpIluvPFGPfvssyRoAADKCXI6AADms7vozsrKUkRExEXzIyIilJWV5ZCgAACA+cjpqOh4RBiAssDue7qbNm2q995776L57733npo2beqQoAAAgPmcndPHjBkjNzc3m9eFRf+ZM2cUHx+vGjVqqFq1aoqNjVVmZqbD4wAAwJnsPtM9ceJEde3aVcuXL1dUVJQkKTU1VYcPH9aiRYscHiAAADCHK3L6rbfequXLl1unPTz+91Fk2LBhWrhwoWbPni0/Pz8NHjxYPXv21Lp160yJBQAAZ7D7TPfdd9+t3bt368EHH1R2drays7PVs2dPpaen66677jIjRgAAYAJX5HQPDw8FBwdbXzVr1pQk5eTkaPr06XrnnXfUoUMHtWjRQsnJyVq/fr02bNhgSiwAADiD3We6JSk0NJTBVQAAqACcndP37Nmj0NBQeXl5KSoqSklJSapVq5bS0tJUWFio6Ohoa9uIiAjVqlVLqampat26dYnby8/PV35+vnU6NzfX9D4AAGAPu890AwAAXI3IyEilpKRoyZIl+uCDD3TgwAHdddddOnnypDIyMuTp6Sl/f3+bdYKCgpSRkXHJbSYlJcnPz8/6CgsLM7kXAADY56rOdAMAANirS5cu1p+bNGmiyMhIhYeH69///re8vb2vapuJiYlKSEiwTufm5lJ4AwDKFM50AwAAl/D399ctt9yivXv3Kjg4WAUFBcrOzrZpk5mZqeDg4Etuw2KxyNfX1+YFAEBZYlfRbRiGDh06pDNnzpgVDwAAcIKykNPz8vK0b98+hYSEqEWLFqpcubJWrFhhXZ6enq5Dhw5ZR1YHAKA8srvorlevng4fPmxWPAAAwAlckdNfeOEFrVmzRgcPHtT69ev14IMPqlKlSurdu7f8/PzUv39/JSQkaNWqVUpLS9MTTzyhqKioSw6iBgBAeWDXPd3u7u6qX7++Tpw4ofr165sVEwAAMJkrcvqRI0fUu3dvnThxQjfccIPuvPNObdiwQTfccIMkadKkSXJ3d1dsbKzy8/MVExOj999/3ymxAQBgFrvv6Z4wYYJefPFF/fTTT9e886SkJLVq1Uo+Pj4KDAxUjx49lJ6ebtPmzJkzio+PV40aNVStWjXFxsYqMzPTps2hQ4fUtWtXValSRYGBgXrxxRdVVFR0zfEBAFCROTKnl8asWbN09OhR5efn68iRI5o1a5bq1q1rXe7l5aWpU6cqKytLp06d0pw5cy57PzcAAOWB3aOXP/bYYzp9+rSaNm0qT0/Pi0YbzcrKKvW21qxZo/j4eLVq1UpFRUV6+eWX1alTJ+3cuVNVq1aVJA0bNkwLFy7U7Nmz5efnp8GDB6tnz55at26dJOns2bPq2rWrgoODtX79eh07dkyPPfaYKleurDfeeMPe7gEAcN1wZE4HAAAls7vonjx5ssN2vmTJEpvplJQUBQYGKi0tTe3atVNOTo6mT5+umTNnqkOHDpKk5ORkNWzYUBs2bFDr1q21dOlS7dy5U8uXL1dQUJCaNWum8ePHa/jw4RozZow8PT0dFi8AABWJI3M6ALha7RELbaYPTujqokgAW3YX3XFxcWbEIUnKycmRJAUEBEiS0tLSVFhYqOjoaGubiIgI1apVS6mpqWrdurVSU1PVuHFjBQUFWdvExMRo0KBB2rFjh5o3b37RfvLz85Wfn2+dzs3NNatLuE789Z88AJQHZuZ0AABwTpl5TndxcbGGDh2qtm3b6rbbbpMkZWRkyNPTU/7+/jZtg4KClJGRYW1zYcF9fvn5ZSVJSkqSn5+f9RUWFubg3gAAAAAAYEfR7e7urkqVKl325eFh94lzq/j4eP3000+aNWvWVW+jtBITE5WTk2N98Qg0AMD1xOycDgAA/qfUGXXu3LmXXJaamqopU6aouLj4qoIYPHiwFixYoLVr1+qmm26yzg8ODlZBQYGys7NtznZnZmZaRzMNDg7Wpk2bbLZ3fnTzS414arFYZLFYripWAADKOzNzOgAAsFXqort79+4XzUtPT9eIESM0f/589e3bV+PGjbNr54ZhaMiQIZo7d65Wr16tOnXq2Cxv0aKFKleurBUrVig2Nta6z0OHDikqKkqSFBUVpddff13Hjx9XYGCgJGnZsmXy9fVVo0aN7IoHAIDrgRk5HQAAlOyqrh07evSoRo8erRkzZigmJkbbtm2z3odtj/j4eM2cOVNff/21fHx8rPdg+/n5ydvbW35+furfv78SEhIUEBAgX19fDRkyRFFRUWrdurUkqVOnTmrUqJH69euniRMnKiMjQyNHjlR8fDxnswEAuAJH5XTgesegquXPhe8ZI53DTHYNpJaTk6Phw4erXr162rFjh1asWKH58+dfdXL+4IMPlJOTo/bt2yskJMT6+vLLL61tJk2apPvvv1+xsbFq166dgoODNWfOHOvySpUqacGCBapUqZKioqL06KOP6rHHHuMbegAALsPROR0AAJSs1Ge6J06cqDfffFPBwcH64osvSrw0zV6GYVyxjZeXl6ZOnaqpU6desk14eLgWLVp0zfEAAHA9MCOnA4Az8CxulEelLrpHjBghb29v1atXTzNmzNCMGTNKbHfhWWgAAFD2kNMB16JwdA1uAYCrlLrofuyxx+Tm5mZmLAAAwAnI6QAAOE+pi+6UlBQTwwAAAM5CTgcAwHnsGkgNAAAAAACU3lU9MgwAAAAoi7hvF0BZQ9ENAAAAXId4TjXgHBTdAAAAgAvZU/wy8rmty13ZwFUPKCu4pxsAAAAAAJNwphsAAACwE2dRL42z8YAtiu5yyFn/5PkHCQAAUHHYcyn2Xz8HUkgDV4+iGwAAALgCzmzbYhA2oPS4pxsAAAAAAJNwphsAADhFUlKS5syZo59//lne3t5q06aN3nzzTTVo0MDapn379lqzZo3Nek8//bSmTZvm7HBRTnAG+n/K4yXg5THmK6mIfcK1oegGAABOsWbNGsXHx6tVq1YqKirSyy+/rE6dOmnnzp2qWrWqtd2AAQM0btw463SVKlVcES6AMoAvVVARUHQDAACnWLJkic10SkqKAgMDlZaWpnbt2lnnV6lSRcHBwc4OD0AZ4Koim4HkYCbu6QYAAC6Rk5MjSQoICLCZ//nnn6tmzZq67bbblJiYqNOnT19yG/n5+crNzbV5AQBQlnCmGwAAOF1xcbGGDh2qtm3b6rbbbrPO79Onj8LDwxUaGqrt27dr+PDhSk9P15w5c0rcTlJSksaOHeussIHrhiPPOHOWGNc7im4AAOB08fHx+umnn/Tdd9/ZzB84cKD158aNGyskJEQdO3bUvn37VLdu3Yu2k5iYqISEBOt0bm6uwsLCzAscAAA7UXQDAACnGjx4sBYsWKC1a9fqpptuumzbyMhISdLevXtLLLotFossFospcQLlAQONmYPjCkei6AYAAE5hGIaGDBmiuXPnavXq1apTp84V19m2bZskKSQkxOToAKD0KMphD4puAADgFPHx8Zo5c6a+/vpr+fj4KCMjQ5Lk5+cnb29v7du3TzNnztR9992nGjVqaPv27Ro2bJjatWunJk2auDh6AACuDkU3AABwig8++ECS1L59e5v5ycnJevzxx+Xp6anly5dr8uTJOnXqlMLCwhQbG6uRI0e6IFrANRh0DKh4KLoBAIBTGIZx2eVhYWFas2aNk6IBKj5nXQLNpdbA5VF0AwAAAGVURSxoK2KfLudarl7gyoeKwd3VAQAAAAAAUFFRdAMAAAAAYBKKbgAAAAAATMI93QAAAADgJBfep/3Xe7Svt/vdrxec6QYAAAAAwCQU3QAAAAAAmISiGwAAAAAAk3BPNwAAAMoN7nnF9cye+8F5pnfZwZluAAAAAABMQtENAAAAAIBJuLwcAAAAAFzAmbdLXO7SdJiLM90AAAAAAJiEM924JGd888a3bAAA4HIYOA1wPAZdcy7OdAMAAAAAYBLOdMOl+PYaAAAAcDw+Z5cdnOkGAAAAAMAknOkGAACAqa50xo37SQH7cSa7/OBMNwAAAAAAJuFMNwArRqwHAAAAHIuiGwAAAACuY/Y8QozHjdmvwlxePnXqVNWuXVteXl6KjIzUpk2bXB0SAAC4CuT060/tEQttXgBci79Jx6oQZ7q//PJLJSQkaNq0aYqMjNTkyZMVExOj9PR0BQYGujo8AE7GZfJA+UVOh8QAUUBZc7m/Sc58X5mbYRiGq4O4VpGRkWrVqpXee+89SVJxcbHCwsI0ZMgQjRgx4orr5+bmys/PTzk5OfL19b2mWEgSAGDLGcm3PH3R4sicUxGVpZwOx+HzEXD9uJ6K7tLmnHJ/prugoEBpaWlKTEy0znN3d1d0dLRSU1NLXCc/P1/5+fnW6ZycHEnnDtq1Ks4/fc3bAICKpNaw2a4OwSEckSMu3E4F+M7b4cpaTq+Ibhv9rc30T2NjnLYvANeHv+Z9M//PuFppc3q5L7p///13nT17VkFBQTbzg4KC9PPPP5e4TlJSksaOHXvR/LCwMFNiBACUf36THbu9kydPys/Pz7EbLefI6c7n6N9rAPir6+H/zJVyerkvuq9GYmKiEhISrNPFxcXKyspSjRo15Obm5sLIriw3N1dhYWE6fPhwub9srqL0hX6ULfSjbKEfFzMMQydPnlRoaKiDoru+mZnTK8rvrzNwrEqPY1V6HKvS41iVnityerkvumvWrKlKlSopMzPTZn5mZqaCg4NLXMdischisdjM8/f3NytEU/j6+laYP6iK0hf6UbbQj7KFftjiDHfJympOryi/v87AsSo9jlXpcaxKj2NVes7M6eX+kWGenp5q0aKFVqxYYZ1XXFysFStWKCoqyoWRAQAAe5DTAQAVUbk/0y1JCQkJiouLU8uWLXXHHXdo8uTJOnXqlJ544glXhwYAAOxATgcAVDQVouh+5JFH9Ntvv2nUqFHKyMhQs2bNtGTJkosGYqkILBaLRo8efdGldOVRRekL/Shb6EfZQj9gr7KU03nfS49jVXocq9LjWJUex6r0XHGsKsRzugEAAAAAKIvK/T3dAAAAAACUVRTdAAAAAACYhKIbAAAAAACTUHQDAAAAAGASim4AAAAAAExC0V1GnTx5UkOHDlV4eLi8vb3Vpk0bbd682abNrl279MADD8jPz09Vq1ZVq1atdOjQIRdFfGlX6kteXp4GDx6sm266Sd7e3mrUqJGmTZvmwoiltWvXqlu3bgoNDZWbm5vmzZtns9wwDI0aNUohISHy9vZWdHS09uzZY9MmKytLffv2la+vr/z9/dW/f3/l5eU5sRfX3o+DBw+qf//+qlOnjry9vVW3bl2NHj1aBQUF5aofF8rPz1ezZs3k5uambdu2mR/8BRzVj4ULFyoyMlLe3t6qXr26evTo4ZwO/H+O6Mfu3bvVvXt31axZU76+vrrzzju1atUqJ/biyv2YM2eOOnXqpBo1alzy9+XMmTOKj49XjRo1VK1aNcXGxiozM9M5HYBDTJ06VbVr15aXl5ciIyO1adOmy7afPXu2IiIi5OXlpcaNG2vRokVOitT17DlWH330ke666y5Vr15d1atXV3R09BWPbUVi7+/VebNmzZKbm5vT/6+7kr3HKjs7W/Hx8QoJCZHFYtEtt9xy3fwd2nusJk+erAYNGsjb21thYWEaNmyYzpw546RoXedK+b0kq1ev1u233y6LxaJ69eopJSXFoTFRdJdRTz31lJYtW6ZPP/1UP/74ozp16qTo6Gj9+uuvkqR9+/bpzjvvVEREhFavXq3t27fr1VdflZeXl4sjv9iV+pKQkKAlS5bos88+065duzR06FANHjxY33zzjctiPnXqlJo2baqpU6eWuHzixImaMmWKpk2bpo0bN6pq1aqKiYmx+UfWt29f7dixQ8uWLdOCBQu0du1aDRw40FldkHTt/fj5559VXFysf/7zn9qxY4cmTZqkadOm6eWXX3ZmNxzyfpz30ksvKTQ01OyQS+SIfvznP/9Rv3799MQTT+iHH37QunXr1KdPH2d1QZJj+nH//ferqKhIK1euVFpampo2bar7779fGRkZzurGFftx6tQp3XnnnXrzzTcvuY1hw4Zp/vz5mj17ttasWaOjR4+qZ8+eZoUMB/vyyy+VkJCg0aNHa+vWrWratKliYmJ0/PjxEtuvX79evXv3Vv/+/fX999+rR48e6tGjh3766ScnR+589h6r1atXq3fv3lq1apVSU1MVFhamTp06WXN/RWbvsTrv4MGDeuGFF3TXXXc5KVLXs/dYFRQU6N5779XBgwf11VdfKT09XR999JFuvPFGJ0fufPYeq5kzZ2rEiBEaPXq0du3apenTp+vLL790+mc4V7hSfv+rAwcOqGvXrrrnnnu0bds2DR06VE899ZS+/fZbxwVloMw5ffq0UalSJWPBggU282+//XbjlVdeMQzDMB555BHj0UcfdUV4dilNX2699VZj3Lhxl1zuapKMuXPnWqeLi4uN4OBg4x//+Id1XnZ2tmGxWIwvvvjCMAzD2LlzpyHJ2Lx5s7XN4sWLDTc3N+PXX391WuwXupp+lGTixIlGnTp1zAz1sq6lH4sWLTIiIiKMHTt2GJKM77//3klRX+xq+lFYWGjceOONxscff+zscC/pavrx22+/GZKMtWvXWtvk5uYakoxly5Y5LfYL/bUfFzpw4ECJvy/Z2dlG5cqVjdmzZ1vn7dq1y5BkpKammhgtHOWOO+4w4uPjrdNnz541QkNDjaSkpBLbP/zww0bXrl1t5kVGRhpPP/20qXGWBfYeq78qKioyfHx8jBkzZpgVYplxNceqqKjIaNOmjfHxxx8bcXFxRvfu3Z0QqevZe6w++OAD4+abbzYKCgqcFWKZYe+xio+PNzp06GAzLyEhwWjbtq2pcZY1l8vv57300kvGrbfeajPvkUceMWJiYhwWB2e6y6CioiKdPXv2orPW3t7e+u6771RcXKyFCxfqlltuUUxMjAIDAxUZGVmqSyec7Up9kaQ2bdrom2++0a+//irDMLRq1Srt3r1bnTp1ckXIV3TgwAFlZGQoOjraOs/Pz0+RkZFKTU2VJKWmpsrf318tW7a0tomOjpa7u7s2btzo9JhLUpp+lCQnJ0cBAQHOCLFUStuPzMxMDRgwQJ9++qmqVKniilAvqzT92Lp1q3799Ve5u7urefPmCgkJUZcuXcrUWbbS9KNGjRpq0KCBPvnkE506dUpFRUX65z//qcDAQLVo0cJVodstLS1NhYWFNn2NiIhQrVq1Lvs3hLKhoKBAaWlpNu+fu7u7oqOjL/n+paam2rSXpJiYmAr/fl/Nsfqr06dPq7CwsEzlDzNc7bEaN26cAgMD1b9/f2eEWSZczbH65ptvFBUVpfj4eAUFBem2227TG2+8obNnzzorbJe4mmPVpk0bpaWlWS9B379/vxYtWqT77rvPKTGXJ874307RXQb5+PgoKipK48eP19GjR3X27Fl99tlnSk1N1bFjx3T8+HHl5eVpwoQJ6ty5s5YuXaoHH3xQPXv21Jo1a1wdvo0r9UWS3n33XTVq1Eg33XSTPD091blzZ02dOlXt2rVzcfQlO3/5a1BQkM38oKAg67KMjAwFBgbaLPfw8FBAQIBTL5+9nNL046/27t2rd999V08//bTp8ZVWafphGIYef/xxPfPMMzZfhJQlpenH/v37JUljxozRyJEjtWDBAlWvXl3t27dXVlaWcwO+hNL0w83NTcuXL9f3338vHx8feXl56Z133tGSJUtUvXp1p8d8tTIyMuTp6Sl/f3+b+Zf7G0LZ8fvvv+vs2bN2/Q/MyMiwq31FcTXH6q+GDx+u0NDQiz7YVjRXc6y+++47TZ8+XR999JEzQiwzruZY7d+/X1999ZXOnj2rRYsW6dVXX9Xbb7+t1157zRkhu8zVHKs+ffpo3LhxuvPOO1W5cmXVrVtX7du3vy4uL7fXpf635+bm6s8//3TIPii6y6hPP/1UhmHoxhtvlMVi0ZQpU9S7d2+5u7uruLhYktS9e3cNGzZMzZo104gRI3T//fe7fACyklyuL9K5onvDhg365ptvlJaWprffflvx8fFavny5iyPHhX799Vd17txZf/vb3zRgwABXh2OXd999VydPnlRiYqKrQ7km5//2X3nlFcXGxqpFixZKTk6Wm5ubZs+e7eLoSs8wDMXHxyswMFD//e9/tWnTJvXo0UPdunWzfhkHoOKYMGGCZs2apblz55bJsWdc6eTJk+rXr58++ugj1axZ09XhlHnFxcUKDAzUhx9+qBYtWuiRRx7RK6+8UiY//7ra6tWr9cYbb+j999/X1q1bNWfOHC1cuFDjx493dWjXJYruMqpu3bpas2aN8vLydPjwYW3atEmFhYW6+eabVbNmTXl4eKhRo0Y26zRs2LBMjl5+ub78+eefevnll/XOO++oW7duatKkiQYPHqxHHnlEb731lqtDL1FwcLAkXTRCcWZmpnVZcHDwRQNbFBUVKSsry9rG1UrTj/OOHj2qe+65R23atNGHH37otBhLozT9WLlypVJTU2WxWOTh4aF69epJklq2bKm4uDjnBnwJpelHSEiIJNn87VssFt18881l5m+/tO/HggULNGvWLLVt21a333673n//fXl7e2vGjBlOj/lqBQcHq6CgQNnZ2TbzS/obQtlTs2ZNVapUqVT/A88LDg62q31FcTXH6ry33npLEyZM0NKlS9WkSRMzwywT7D1W+/bt08GDB9WtWzd5eHjIw8NDn3zyib755ht5eHho3759zgrd6a7m9yokJES33HKLKlWqZJ3XsGFDZWRkOP3JKs50Ncfq1VdfVb9+/fTUU0+pcePGevDBB/XGG28oKSnJ+iU+zrnU/3ZfX195e3s7ZB8U3WVc1apVFRISoj/++EPffvutunfvLk9PT7Vq1Urp6ek2bXfv3q3w8HAXRXplJfWlsLBQhYWF1rPe51WqVKnM/kOoU6eOgoODtWLFCuu83Nxcbdy4UVFRUZKkqKgoZWdnKy0tzdpm5cqVKi4uVmRkpNNjLklp+iGdO8Pdvn1761nVv75XrlaafkyZMkU//PCDtm3bpm3btlkfLfLll1/q9ddfd0ncf1WafrRo0UIWi8Xmb7+wsFAHDx4sM3/7penH6dOnJemi36ULr+QpD1q0aKHKlSvb9DU9PV2HDh2y+RtC2eTp6akWLVrYvH/FxcVasWLFJd+/qKgom/aStGzZsgr/fl/NsZLOPclg/PjxWrJkSZm9tcfR7D1WERER+vHHH635adu2bXrggQesoyiHhYU5M3ynuprfq7Zt22rv3r02uWL37t0KCQmRp6en6TG7ytUcq9OnT5f4+Vo6d8UZ/scp/9sdNiQbHGrJkiXG4sWLjf379xtLly41mjZtakRGRlpHa5wzZ45RuXJl48MPPzT27NljvPvuu0alSpWM//73vy6O/GJX6svdd99t3HrrrcaqVauM/fv3G8nJyYaXl5fx/vvvuyzmkydPGt9//73x/fffG5KMd955x/j++++NX375xTAMw5gwYYLh7+9vfP3118b27duN7t27G3Xq1DH+/PNP6zY6d+5sNG/e3Ni4caPx3XffGfXr1zd69+5drvpx5MgRo169ekbHjh2NI0eOGMeOHbO+ylM//upSo1GbzRH9+Pvf/27ceOONxrfffmv8/PPPRv/+/Y3AwEAjKyur3PTjt99+M2rUqGH07NnT2LZtm5Genm688MILRuXKlY1t27aVmX6cOHHC+P77742FCxcakoxZs2YZ33//vc3v/zPPPGPUqlXLWLlypbFlyxYjKirKiIqKclofcG1mzZplWCwWIyUlxdi5c6cxcOBAw9/f38jIyDAMwzD69etnjBgxwtp+3bp1hoeHh/HWW28Zu3btMkaPHm1UrlzZ+PHHH13VBaex91hNmDDB8PT0NL766iub3HHy5ElXdcFp7D1Wf3U9jV5u77E6dOiQ4ePjYwwePNhIT083FixYYAQGBhqvvfaaq7rgNPYeq9GjRxs+Pj7GF198Yf0MXrduXePhhx92VRec5kr5fcSIEUa/fv2s7ffv329UqVLFePHFF41du3YZU6dONSpVqmQsWbLEYTFRdJdRX375pXHzzTcbnp6eRnBwsBEfH29kZ2fbtJk+fbpRr149w8vLy2jatKkxb948F0V7eVfqy7Fjx4zHH3/cCA0NNby8vIwGDRoYb7/9tlFcXOyymFetWmVIuugVFxdnGMa5xyK9+uqrRlBQkGGxWIyOHTsa6enpNts4ceKE0bt3b6NatWqGr6+v8cQTTzj9w8a19iM5ObnE9Z39fZ0j3o8LuarodkQ/CgoKjOeff94IDAw0fHx8jOjoaOOnn34qd/3YvHmz0alTJyMgIMDw8fExWrdubSxatKhM9eNSv/+jR4+2buPPP/80nn32WaN69epGlSpVjAcffNDpX0rh2rz77rtGrVq1DE9PT+OOO+4wNmzYYF129913W38fzvv3v/9t3HLLLYanp6dx6623GgsXLnRyxK5jz7EKDw+/4t9PRWbv79WFrqei2zDsP1br1683IiMjDYvFYtx8883G66+/bhQVFTk5atew51gVFhYaY8aMMerWrWt4eXkZYWFhxrPPPmv88ccfzg/cya6U3+Pi4oy77777onWaNWtmeHp6GjfffLORnJzs0JjcDIPrCwAAAAAAMEPZujkTAAAAAIAKhKIbAAAAAACTUHQDAAAAAGASim4AAAAAAExC0Q0AAAAAgEkougEAAAAAMAlFNwAAAAAAJqHoBq4Dbm5umjdv3jVtY926dWrcuLEqV66sHj16OCQuAAAqutWrV8vNzU3Z2dmuDgXXoHbt2po8ebLDtvf4449f8fPUX/fpiM9zcA2KbsABfvvtNw0aNEi1atWSxWJRcHCwYmJitG7dOleH5jAJCQlq1qyZDhw4oJSUFFeHAwAwWWpqqipVqqSuXbu6OhTTHTx4UG5ubtq2bZvDt92mTRsdO3ZMfn5+Dt/2hVJSUuTv72/qPq5nmzdv1sCBAyv8PmEOD1cHAFQEsbGxKigo0IwZM3TzzTcrMzNTK1as0IkTJ1wdmsPs27dPzzzzjG666aYSlxuGobNnz8rDg38rAFARTJ8+XUOGDNH06dN19OhRhYaGmravipxDPD09FRwc7OowyqyCggJ5enq6OgyrwsJCVa5c+aL5N9xwg9NjccU+YQ7OdAPXKDs7W//973/15ptv6p577lF4eLjuuOMOJSYm6oEHHrC2e+edd9S4cWNVrVpVYWFhevbZZ5WXl2ddfv4b6gULFqhBgwaqUqWKHnroIZ0+fVozZsxQ7dq1Vb16dT333HM6e/asdb3atWtr/Pjx6t27t6pWraobb7xRU6dOvWzMhw8f1sMPPyx/f38FBASoe/fuOnjwYIltz3/7f+LECT355JNyc3NTSkqK9XK5xYsXq0WLFrJYLPruu+9UXFyspKQk1alTR97e3mratKm++uorm20uWrRIt9xyi7y9vXXPPfcoJSXF5tK7MWPGqFmzZjbrTJ48WbVr17aZ9/HHH6thw4by8vJSRESE3n///YvinjNnju655x5VqVJFTZs2VWpqqs021q1bp/bt26tKlSqqXr26YmJi9Mcff+iTTz5RjRo1lJ+fb9O+R48e6tev32WPLwCUd3l5efryyy81aNAgde3a1eYKpz59+uiRRx6xaV9YWKiaNWvqk08+kaQr5oJL5ZB9+/ape/fuCgoKUrVq1dSqVSstX77cZl/Hjh1T165d5e3trTp16mjmzJkXXYabnZ2tp556SjfccIN8fX3VoUMH/fDDD5fsb506dSRJzZs3l5ubm9q3by/p3JnGe++9VzVr1pSfn5/uvvtubd261WZdNzc3ffzxx3rwwQdVpUoV1a9fX998881FfT2f46423//xxx967LHHVL16dVWpUkVdunTRnj17rPt44oknlJOTIzc3N7m5uWnMmDFXXK8khmFozJgx1qv3QkND9dxzz1mX5+fna/jw4QoLC5PFYlG9evU0ffp06/I1a9bojjvukMViUUhIiEaMGKGioiLr8vbt22vw4MEaOnSoatasqZiYGEnSTz/9pC5duqhatWoKCgpSv3799Pvvv18yzvPHcd68eapfv768vLwUExOjw4cP27T7+uuvdfvtt8vLy0s333yzxo4daxOPm5ubPvjgAz3wwAOqWrWqXn/99RL3V9Kl3pd73yVpx44duv/+++Xr6ysfHx/ddddd2rdvn02bt956SyEhIapRo4bi4+NVWFh4yX3+1ejRoxUSEqLt27dLkr777jvddddd8vb2VlhYmJ577jmdOnVKkjRu3DjddtttF22jWbNmevXVVy+5DziIAeCaFBYWGtWqVTOGDh1qnDlz5pLtJk2aZKxcudI4cOCAsWLFCqNBgwbGoEGDrMuTk5ONypUrG/fee6+xdetWY82aNUaNGjWMTp06GQ8//LCxY8cOY/78+Yanp6cxa9Ys63rh4eGGj4+PkZSUZKSnpxtTpkwxKlWqZCxdutTaRpIxd+5cwzAMo6CgwGjYsKHx5JNPGtu3bzd27txp9OnTx2jQoIGRn59/UdxFRUXGsWPHDF9fX2Py5MnGsWPHjNOnTxurVq0yJBlNmjQxli5dauzdu9c4ceKE8dprrxkRERHGkiVLjH379hnJycmGxWIxVq9ebRiGYRw6dMiwWCxGQkKC8fPPPxufffaZERQUZEgy/vjjD8MwDGP06NFG06ZNLzp+4eHh1unPPvvMCAkJMf7zn/8Y+/fvN/7zn/8YAQEBRkpKimEYhnHgwAFDkhEREWEsWLDASE9PNx566CEjPDzcKCwsNAzDML7//nvDYrEYgwYNMrZt22b89NNPxrvvvmv89ttvxunTpw0/Pz/j3//+t3WfmZmZhoeHh7Fy5crL/EYAQPk3ffp0o2XLloZhGMb8+fONunXrGsXFxYZhGMaCBQsMb29v4+TJk9b28+fPN7y9vY3c3FzDMIwr5oJL5ZBt27YZ06ZNM3788Udj9+7dxsiRIw0vLy/jl19+se4rOjraaNasmbFhwwYjLS3NuPvuuw1vb29j0qRJNm26detmbN682di9e7fx/PPPGzVq1DBOnDhRYn83bdpkSDKWL19uHDt2zNpuxYoVxqeffmrs2rXL2Llzp9G/f38jKCjI2k/DOJdjb7rpJmPmzJnGnj17jOeee86oVq2adRvn+3o+x11tvn/ggQeMhg0bGmvXrjW2bdtmxMTEGPXq1TMKCgqM/Px8Y/LkyYavr69x7Ngx49ixY9b353LrlWT27NmGr6+vsWjRIuOXX34xNm7caHz44YfW5Q8//LARFhZmzJkzx9i3b5+xfPlya5xHjhwxqlSpYjz77LPGrl27jLlz5xo1a9Y0Ro8ebV3/7rvvNqpVq2a8+OKLxs8//2z8/PPPxh9//GHccMMNRmJiorFr1y5j69atxr333mvcc889JcZ44XFs2bKlsX79emPLli3GHXfcYbRp08baZu3atYavr6+RkpJi7Nu3z1i6dKlRu3ZtY8yYMTbvX2BgoPGvf/3L2Ldvn83v2oXCw8Ntfseu9L4fOXLECAgIMHr27Gls3rzZSE9PN/71r38ZP//8s2EYhhEXF2f4+voazzzzjLFr1y5j/vz5RpUqVWyOdUn7nDt3rlFcXGwMHjzYqF27trFnzx7DMAxj7969RtWqVY1JkyYZu3fvNtatW2c0b97cePzxxw3DMIzDhw8b7u7uxqZNm6zb27p1q+Hm5mbs27fvkscZjkHRDTjAV199ZVSvXt3w8vIy2rRpYyQmJho//PDDZdeZPXu2UaNGDet0cnKyIcnYu3evdd7TTz9tVKlSxeaDTUxMjPH0009bp8PDw43OnTvbbPuRRx4xunTpYp2+sOj+9NNPjQYNGlg/PBmGYeTn5xve3t7Gt99+e8l4/fz8jOTkZOv0+Q8R8+bNs847c+aMUaVKFWP9+vU26/bv39/o3bu3YRiGkZiYaDRq1Mhm+fDhw+0uuuvWrWvMnDnTps348eONqKgowzD+V3R//PHH1uU7duwwJBm7du0yDMMwevfubbRt2/aSfR40aJDNcXz77beNm2++2ebYAUBF1KZNG2Py5MmGYZz7crlmzZrGqlWrbKY/+eQTa/vevXsbjzzyiGEYpcsFJeWQS7n11luNd9991zAMw9i1a5chydi8ebN1+Z49ewxJ1uLkv//9r+Hr63vRF+F169Y1/vnPf5a4j/M54/vvv79sLGfPnjV8fHyM+fPnW+dJMkaOHGmdzsvLMyQZixcvtunrhUW3vfl+9+7dhiRj3bp11uW///674e3tbf1yODk52fDz87OJtzTr/dXbb79t3HLLLSUW5enp6YYkY9myZSWu+/LLL1/0GWPq1KlGtWrVjLNnzxqGca7obt68uc1648ePNzp16mQz7/Dhw4YkIz09vcR9nT+OGzZssM47//uxceNGwzAMo2PHjsYbb7xhs96nn35qhISEWKclGUOHDi1xHxcqqQC+3PuemJho1KlT55JfbsTFxRnh4eFGUVGRdd7f/vY369/RpfY5e/Zso0+fPkbDhg2NI0eOWJf179/fGDhwoM0+/vvf/xru7u7Gn3/+aRiGYXTp0sXmhM+QIUOM9u3bX7HvuHZcXg44QGxsrI4ePapvvvlGnTt31urVq3X77bfbXI63fPlydezYUTfeeKN8fHzUr18/nThxQqdPn7a2qVKliurWrWudDgoKUu3atVWtWjWbecePH7fZf1RU1EXTu3btKjHWH374QXv37pWPj4+qVaumatWqKSAgQGfOnLnokqfSaNmypfXnvXv36vTp07r33nut265WrZo++eQT67Z37dqlyMjIy8Z/JadOndK+ffvUv39/m/289tprF/WhSZMm1p9DQkIkyXr8tm3bpo4dO15yPwMGDNDSpUv166+/Sjp3Kdvjjz8uNzc3u+IFgPIkPT1dmzZtUu/evSVJHh4eeuSRR6yXEHt4eOjhhx/W559/Lunc/+Svv/5affv2lVS6XHDehTlEOndZ+wsvvKCGDRvK399f1apV065du3To0CFrbB4eHrr99tut69SrV0/Vq1e3Tv/www/Ky8tTjRo1bPZ/4MABu/NcZmamBgwYoPr168vPz0++vr7Ky8uzxnPehbmmatWq8vX1vShXX8jefL9r1y55eHjY5M8aNWqoQYMGl8z3V7ve3/72N/3555+6+eabNWDAAM2dO9d6Ofa2bdtUqVIl3X333ZfcX1RUlE2ebNu2rfLy8nTkyBHrvBYtWtis98MPP2jVqlU271dERIQkXfY98/DwUKtWrazTERER8vf3t/bthx9+0Lhx42y2O2DAAB07dszm89dffw9L63Lv+7Zt23TXXXeVeH/4ebfeeqsqVapknQ4JCbns740kDRs2TBs3btTatWt14403Wuf/8MMPSklJselrTEyMiouLdeDAAUnnPtd88cUXOnPmjAoKCjRz5kw9+eSTV9V32KfijVYBuIiXl5fuvfde3XvvvXr11Vf11FNPafTo0Xr88cd18OBB3X///Ro0aJBef/11BQQE6LvvvlP//v1VUFCgKlWqSNJF/5jd3NxKnFdcXHzVcebl5alFixbWD0sXupoBO6pWrWqzbUlauHChTSKQJIvFUupturu7yzAMm3kX3uN0fj8fffTRRQX8hclLsj2m5z8EnD9+3t7el42jefPmatq0qT755BN16tRJO3bs0MKFC0vdDwAoj6ZPn66ioiKbgdMMw5DFYtF7770nPz8/9e3bV3fffbeOHz+uZcuWydvbW507d5ZkXy64MIdI0gsvvKBly5bprbfeUr169eTt7a2HHnpIBQUFpY4/Ly9PISEhWr169UXL7B3dOy4uTidOnND//d//KTw8XBaLRVFRURfFY2+udka+v1phYWFKT0/X8uXLtWzZMj377LP6xz/+oTVr1lwxb5bWX9/3vLw8devWTW+++eZFbc9/YX418vLyNHbsWPXs2fOiZV5eXpeMp7Qu956V5lhdzXt+77336osvvtC3335r/aJLOtfXp59+2ub++/Nq1aolSerWrZssFovmzp0rT09PFRYW6qGHHrpinLh2FN2ASRo1amR9lmJaWpqKi4v19ttvy9393AUm//73vx22rw0bNlw03bBhwxLb3n777fryyy8VGBgoX19fh8UgneuzxWLRoUOHLvkteMOGDS8aaOSv8d9www3KyMiQYRjWQvnCx7gEBQUpNDRU+/fvt0k49mrSpIlWrFihsWPHXrLNU089pcmTJ+vXX39VdHS0wsLCrnp/AFDWFRUV6ZNPPtHbb7+tTp062Szr0aOHvvjiCz3zzDNq06aNwsLC9OWXX2rx4sX629/+Zi0gSpMLLmXdunV6/PHH9eCDD0o6V0hcONBngwYNVFRUpO+//956tnTv3r36448/rG1uv/12ZWRkyMPD46IBOC/l/OjZFw5cdj6e999/X/fdd5+kcwORXm5wL7M0bNhQRUVF2rhxo9q0aSNJOnHihNLT09WoUSNJ5/rw1/hLs15JvL291a1bN3Xr1k3x8fGKiIjQjz/+qMaNG6u4uFhr1qxRdHR0iXH+5z//scnf69atk4+PzyWffiKde8/+85//qHbt2naNYF9UVKQtW7bojjvukHTuSojs7GzrZ6Dbb79d6enpqlevXqm36ShNmjTRjBkzLjka+tV64IEH1K1bN/Xp00eVKlVSr169JJ3r686dOy/bVw8PD8XFxSk5OVmenp7q1auXw75IweVxeTlwjU6cOKEOHTros88+0/bt23XgwAHNnj1bEydOVPfu3SWdu/StsLBQ7777rvbv369PP/1U06ZNc1gM69at08SJE7V7925NnTpVs2fP1t///vcS2/bt21c1a9ZU9+7d9d///lcHDhzQ6tWr9dxzz9lc+nU1fHx89MILL2jYsGGaMWOG9u3bp61bt+rdd9/VjBkzJEnPPPOM9uzZoxdffFHp6emaOXPmRc/9bt++vX777TdNnDhR+/bt09SpU7V48WKbNmPHjlVSUpKmTJmi3bt368cff1RycrLeeeedUsebmJiozZs369lnn9X27dv1888/64MPPrD5QNWnTx8dOXJEH330EZdgAajwFixYoD/++EP9+/fXbbfdZvOKjY21GaW6T58+mjZtmpYtW2bzBWhpcsGl1K9fX3PmzNG2bdv0ww8/qE+fPjZn/iIiIhQdHa2BAwdq06ZN+v777zVw4EB5e3tbi7zo6GhFRUWpR48eWrp0qQ4ePKj169frlVde0ZYtW0rcb2BgoLy9vbVkyRJlZmYqJyfHGs+nn36qXbt2aePGjerbt69LipT69eure/fuGjBggL777jv98MMPevTRR3XjjTdaP2vUrl1beXl5WrFihX7//XedPn26VOv9VUpKiqZPn66ffvpJ+/fv12effSZvb2+Fh4erdu3aiouL05NPPql58+ZZP0OcP5Hw7LPP6vDhwxoyZIh+/vlnff311xo9erQSEhKsJx1KEh8fr6ysLPXu3VubN2/Wvn379O233+qJJ5646IuEC1WuXFlDhgzRxo0blZaWpscff1ytW7e2FuGjRo3SJ598orFjx2rHjh3atWuXZs2apZEjR17tW1FqgwcPVm5urnr16qUtW7Zoz549+vTTT5Wenn7N237wwQf16aef6oknnrA+FWD48OFav369Bg8erG3btmnPnj36+uuvNXjwYJt1n3rqKa1cuVJLlizhc40TUXQD16hatWqKjIzUpEmT1K5dO91222169dVXNWDAAL333nuSpKZNm+qdd97Rm2++qdtuu02ff/65kpKSHBbD888/ry1btqh58+Z67bXX9M4771gfwfFXVapU0dq1a1WrVi317NlTDRs2VP/+/XXmzBmHnPkeP368Xn31VSUlJalhw4bq3LmzFi5caH0cS61atfSf//xH8+bNU9OmTTVt2jS98cYbNtto2LCh3n//fU2dOlVNmzbVpk2b9MILL9i0eeqpp/Txxx8rOTlZjRs31t13362UlBTrfkrjlltu0dKlS/XDDz/ojjvuUFRUlL7++mubb9n9/PwUGxuratWqqUePHld/YACgHJg+fbqio6Pl5+d30bLY2Fht2bLF+niivn37aufOnbrxxhvVtm1bm7ZXygWX8s4776h69epq06aNunXrppiYGJv7tyXpk08+UVBQkNq1a6cHH3xQAwYMkI+Pj/VyYTc3Ny1atEjt2rXTE088oVtuuUW9evXSL7/8oqCgoBL36+HhoSlTpuif//ynQkNDrQXp9OnT9ccff+j2229Xv3799NxzzykwMLB0B9PBkpOT1aJFC91///2KioqSYRhatGiR9SxqmzZt9Mwzz+iRRx7RDTfcoIkTJ5Zqvb/y9/fXRx99pLZt26pJkyZavny55s+frxo1akiSPvjgAz300EN69tlnFRERoQEDBlgfS3XjjTdq0aJF2rRpk5o2bapnnnlG/fv3v2KRGxoaqnXr1uns2bPq1KmTGjdurKFDh8rf3/+yxXqVKlU0fPhw9enTR23btlW1atX05ZdfWpfHxMRowYIFWrp0qVq1aqXWrVtr0qRJCg8PL/2Bv0o1atTQypUrlZeXp7vvvlstWrTQRx995LCz3g899JBmzJihfv36ac6cOWrSpInWrFmj3bt366677lLz5s01atQom9tEpHNf4LRp00YREREX3aIH87gZf71xEkC5Urt2bQ0dOlRDhw51dShXbfXq1brnnnv0xx9/2H2/nTN07NhRt956q6ZMmeLqUAAAf3HkyBGFhYVZByzF9SElJUVDhw61Pv8cpWMYhurXr69nn31WCQkJrg7nusE93QBwCX/88YdWr16t1atX6/3333d1OAAAyXr2sHHjxjp27Jheeukl1a5dW+3atXN1aECZ9ttvv2nWrFnKyMjQE0884epwrisU3QBwCc2bN9cff/yhN998Uw0aNHB1OAAAnXuaxcsvv6z9+/fLx8dHbdq00eeff+7QwaqAiigwMFA1a9bUhx9+aPOYPZiPy8sBAAAAADAJA6kBAAAAAGASim4AAAAAAExC0Q0AAAAAgEkougEAAAAAMAlFNwAAAAAAJqHoBgAAAADAJBTdAAAAAACYhKIbAAAAAACTUHQDAAAAAGASim4AAAAAAExC0Q0AAAAAgEkougEAAAAAMImHqwMoC4qLi3X06FH5+PjIzc3N1eEAACowwzB08uRJhYaGyt2d774djZwOAHCW0uZ0im5JR48eVVhYmKvDAABcRw4fPqybbrrJ1WFUOOR0AICzXSmnU3RL8vHxkXTuYPn6+ro4GgBARZabm6uwsDBr7oFjkdMBAM5S2pxO0S1ZLz/z9fUlQQMAnIJLn81BTgcAONuVcjo3kwEAAAAAYBKKbgAAAAAATELRDQAAAACASSi6AQAAAAAwiUuL7g8++EBNmjSxDnYSFRWlxYsXW5efOXNG8fHxqlGjhqpVq6bY2FhlZmbabOPQoUPq2rWrqlSposDAQL344osqKipydlcAAAAAALiIS4vum266SRMmTFBaWpq2bNmiDh06qHv37tqxY4ckadiwYZo/f75mz56tNWvW6OjRo+rZs6d1/bNnz6pr164qKCjQ+vXrNWPGDKWkpGjUqFGu6hIAAAAAAFZuhmEYrg7iQgEBAfrHP/6hhx56SDfccINmzpyphx56SJL0888/q2HDhkpNTVXr1q21ePFi3X///Tp69KiCgoIkSdOmTdPw4cP122+/ydPTs8R95OfnKz8/3zp9/vlqOTk5PF4EAGCq3Nxc+fn5kXNMwvEFADhLaXNOmXlO99mzZzV79mydOnVKUVFRSktLU2FhoaKjo61tIiIiVKtWLWvRnZqaqsaNG1sLbkmKiYnRoEGDtGPHDjVv3rzEfSUlJWns2LGm96kiqT1ioUO2c3BCV4dsBwAA4Eoc9fnF0fg8BFxfXD6Q2o8//qhq1arJYrHomWee0dy5c9WoUSNlZGTI09NT/v7+Nu2DgoKUkZEhScrIyLApuM8vP7/sUhITE5WTk2N9HT582LGdAgAAAABAZeBMd4MGDbRt2zbl5OToq6++UlxcnNasWWPqPi0WiywWi6n7AAAAAADA5UW3p6en6tWrJ0lq0aKFNm/erP/7v//TI488ooKCAmVnZ9uc7c7MzFRwcLAkKTg4WJs2bbLZ3vnRzc+3AQAAAADAVVx+eflfFRcXKz8/Xy1atFDlypW1YsUK67L09HQdOnRIUVFRkqSoqCj9+OOPOn78uLXNsmXL5Ovrq0aNGjk9dgAAAAAALuTSM92JiYnq0qWLatWqpZMnT2rmzJlavXq1vv32W/n5+al///5KSEhQQECAfH19NWTIEEVFRal169aSpE6dOqlRo0bq16+fJk6cqIyMDI0cOVLx8fFcPg4AAAAAcDmXFt3Hjx/XY489pmPHjsnPz09NmjTRt99+q3vvvVeSNGnSJLm7uys2Nlb5+fmKiYnR+++/b12/UqVKWrBggQYNGqSoqChVrVpVcXFxGjdunKu6BAAAAACAlUuL7unTp192uZeXl6ZOnaqpU6desk14eLgWLVrk6NAAAAAAALhmZe6ebgAAAAAAKgqKbgAAAAAATELRDQAAAACASSi6AQAAAAAwCUU3AAAAAAAmoegGAAAAAMAkFN0AAAAAAJiEohsAAAAAAJNQdAMAAAAAYBKKbgAAAAAATELRDQAAAACASSi6AQAAAAAwCUU3AAAAAAAmoegGAAAAAMAkFN0AAAAAAJiEohsAAAAAAJNQdAMAAAAAYBKKbgAAAAAATELRDQAAAACASSi6AQAAAAAwiYcrd56UlKQ5c+bo559/lre3t9q0aaM333xTDRo0sLZp37691qxZY7Pe008/rWnTplmnDx06pEGDBmnVqlWqVq2a4uLilJSUJA8Pl3YPAAAAuEjtEQtdHcJFDk7o6uoQgArLpVXpmjVrFB8fr1atWqmoqEgvv/yyOnXqpJ07d6pq1arWdgMGDNC4ceOs01WqVLH+fPbsWXXt2lXBwcFav369jh07pscee0yVK1fWG2+84dT+AAAAAABwIZcW3UuWLLGZTklJUWBgoNLS0tSuXTvr/CpVqig4OLjEbSxdulQ7d+7U8uXLFRQUpGbNmmn8+PEaPny4xowZI09PT1P7AAAAAADApZSpe7pzcnIkSQEBATbzP//8c9WsWVO33XabEhMTdfr0aeuy1NRUNW7cWEFBQdZ5MTExys3N1Y4dO0rcT35+vnJzc21eAAAAAAA4Wpm56bm4uFhDhw5V27Ztddttt1nn9+nTR+Hh4QoNDdX27ds1fPhwpaena86cOZKkjIwMm4JbknU6IyOjxH0lJSVp7NixJvUEAAAAAIBzysyZ7vj4eP3000+aNWuWzfyBAwcqJiZGjRs3Vt++ffXJJ59o7ty52rdv31XvKzExUTk5OdbX4cOHrzV8AACuax988IGaNGkiX19f+fr6KioqSosXL7YuP3PmjOLj41WjRg1Vq1ZNsbGxyszMtNnGoUOH1LVrV1WpUkWBgYF68cUXVVRU5OyuAADgUGWi6B48eLAWLFigVatW6aabbrps28jISEnS3r17JUnBwcEXJe3z05e6D9xisVg/FJx/AQCAq3fTTTdpwoQJSktL05YtW9ShQwd1797deqvXsGHDNH/+fM2ePVtr1qzR0aNH1bNnT+v65wdGLSgo0Pr16zVjxgylpKRo1KhRruoSAAAO4dKi2zAMDR48WHPnztXKlStVp06dK66zbds2SVJISIgkKSoqSj/++KOOHz9ubbNs2TL5+vqqUaNGpsQNAABsdevWTffdd5/q16+vW265Ra+//rqqVaumDRs2KCcnR9OnT9c777yjDh06qEWLFkpOTtb69eu1YcMGSf8bGPWzzz5Ts2bN1KVLF40fP15Tp05VQUGBi3sHAMDVc2nRHR8fr88++0wzZ86Uj4+PMjIylJGRoT///FOStG/fPo0fP15paWk6ePCgvvnmGz322GNq166dmjRpIknq1KmTGjVqpH79+umHH37Qt99+q5EjRyo+Pl4Wi8WV3QMA4Lp09uxZzZo1S6dOnVJUVJTS0tJUWFio6Ohoa5uIiAjVqlVLqampkq5uYFSJwVEBAGWfS4vuDz74QDk5OWrfvr1CQkKsry+//FKS5OnpqeXLl6tTp06KiIjQ888/r9jYWM2fP9+6jUqVKmnBggWqVKmSoqKi9Oijj+qxxx6zea43AAAw348//qhq1arJYrHomWee0dy5c9WoUSNlZGTI09NT/v7+Nu2DgoKsg55ezcCo0rnBUf38/KyvsLAwx3YKAIBr5NLRyw3DuOzysLAwrVmz5orbCQ8P16JFixwVFuAStUcsdMh2Dk7o6pDtAIC9GjRooG3btiknJ0dfffWV4uLiSpXHr0ViYqISEhKs07m5uRTeAIAypcw8MgwAAJRvnp6eqlevniSpRYsW2rx5s/7v//5PjzzyiAoKCpSdnW1ztjszM9M66GlwcLA2bdpks70rDYwqnRscldvJAABlWZkYvRwAAFQ8xcXFys/PV4sWLVS5cmWtWLHCuiw9PV2HDh1SVFSUJAZGBQBUXJzpBgAA1ywxMVFdunRRrVq1dPLkSc2cOVOrV6/Wt99+Kz8/P/Xv318JCQkKCAiQr6+vhgwZoqioKLVu3VqS7cCoEydOVEZGBgOjAgAqBIpuAABwzY4fP67HHntMx44dk5+fn5o0aaJvv/1W9957ryRp0qRJcnd3V2xsrPLz8xUTE6P333/fuv75gVEHDRqkqKgoVa1aVXFxcQyMCgAo9+wuuk+dOqWqVauaEQsAAHAiR+b06dOnX3a5l5eXpk6dqqlTp16yDQOjAgAqIrvv6Q4KCtKTTz6p7777zox4AACAk5DTAQAwn91F92effaasrCx16NBBt9xyiyZMmKCjR4+aERsAADAROR0AAPPZXXT36NFD8+bN06+//qpnnnlGM2fOVHh4uO6//37NmTNHRUVFZsQJAAAcjJwOAID5rnogtRtuuEEJCQlKSEjQu+++qxdffFGLFi1SzZo19cwzz2jEiBGqUqWKI2MFrGqPWOiQ7Ryc0NUh2wGA8oycDgCAea666M7MzNSMGTOUkpKiX375RQ899JD69++vI0eO6M0339SGDRu0dOlSR8YKAABMQE4HAMA8dhfdc+bMUXJysr799ls1atRIzz77rB599FH5+/tb27Rp00YNGzZ0ZJwAAMDByOkAAJjP7qL7iSeeUK9evbRu3Tq1atWqxDahoaF65ZVXrjk4AABgHnI6AADms7voPnbs2BXv6/L29tbo0aOvOigAAGA+cjoAAOaze/TyKlWqaN++fRo5cqR69+6t48ePS5IWL16sHTt2ODxAAABgDnI6AADms7voXrNmjRo3bqyNGzdqzpw5ysvLkyT98MMPfBMOAEA5Qk4HAMB8dl9ePmLECL322mtKSEiQj4+PdX6HDh303nvvOTQ4AABgHnI6AFQcjnqkrqPxiN6rONP9448/6sEHH7xofmBgoH7//XeHBAUAAMxHTgcAwHx2F93+/v46duzYRfO///573XjjjQ4JCgAAmI+cDgCA+ey+vLxXr14aPny4Zs+eLTc3NxUXF2vdunV64YUX9Nhjj5kRIwAAMAE5HcB5XJoMmMfuM91vvPGGIiIiFBYWpry8PDVq1Ejt2rVTmzZtNHLkSDNiBAAAJiCnAwBgPruLbk9PT3300Ufav3+/FixYoM8++0w///yzPv30UxUUFNi1raSkJLVq1Uo+Pj4KDAxUjx49lJ6ebtPmzJkzio+PV40aNVStWjXFxsYqMzPTps2hQ4fUtWtXValSRYGBgXrxxRdVVFRkb9cAALiuODKnAwCAktlddD/33HOSpLCwMN133316+OGHVb9+fZ06dUr33XefXdtas2aN4uPjtWHDBi1btkyFhYXq1KmTTp06ZW0zbNgwzZ8/X7Nnz9aaNWt09OhR9ezZ07r87Nmz6tq1qwoKCrR+/XrNmDFDKSkpGjVqlL1dAwDguuLInA4AAEpm9z3dCxcuVPXq1TV27FjrvFOnTqlz585273zJkiU20ykpKQoMDFRaWpratWunnJwcTZ8+XTNnzlSHDh0kScnJyWrYsKE2bNig1q1ba+nSpdq5c6eWL1+uoKAgNWvWTOPHj9fw4cM1ZswYeXp62h0XAADXA0fmdAAAUDK7z3QvXbpUH330kSZPnixJOnnypO699165ubldVETbKycnR5IUEBAgSUpLS1NhYaGio6OtbSIiIlSrVi2lpqZKklJTU9W4cWMFBQVZ28TExCg3N1c7duwocT/5+fnKzc21eQEAcL0xM6cDAIBz7D7TXbduXS1ZskT33HOP3N3d9cUXX8hisWjhwoWqWrXqVQdSXFysoUOHqm3btrrtttskSRkZGfL09JS/v79N26CgIGVkZFjbXFhwn19+fllJkpKSbL7VBwDgemRWTgcAAP9j95luSWrSpIkWLFigl19+WVWqVNHixYuvOTnHx8frp59+0qxZs65pO6WRmJionJwc6+vw4cOm7xMAgLLIjJwOAAD+p1Rnups3by43N7eL5lssFh09elRt27a1ztu6davdQQwePFgLFizQ2rVrddNNN1nnBwcHq6CgQNnZ2TZnuzMzMxUcHGxts2nTJpvtnR/d/HybkuK2WCx2xwkAQHlndk4HAAC2SlV09+jRw5SdG4ahIUOGaO7cuVq9erXq1Kljs7xFixaqXLmyVqxYodjYWElSenq6Dh06pKioKElSVFSUXn/9dR0/flyBgYGSpGXLlsnX11eNGjUyJW4AAMors3I6AAAoWamK7tGjR5uy8/j4eM2cOVNff/21fHx8rPdg+/n5ydvbW35+furfv78SEhIUEBAgX19fDRkyRFFRUWrdurUkqVOnTmrUqJH69euniRMnKiMjQyNHjlR8fDxnswEA+AuzcjoAACiZ3QOpOdIHH3wgSWrfvr3N/OTkZD3++OOSpEmTJsnd3V2xsbHKz89XTEyM3n//fWvbSpUqacGCBRo0aJCioqJUtWpVxcXFady4cc7qBgAAAAAAJSpV0R0QEKDdu3erZs2aql69eon3gp2XlZVV6p0bhnHFNl5eXpo6daqmTp16yTbh4eFatGhRqfcLAMD1yqycDgBmqD1ioatDKNHBCV1dHQLKkVIV3ZMmTZKPj48kWZ/lCQAAyh9yOgAAzlWqojsuLq7EnwEAQPlCTgcAwLmu6p7u4uJi7d27V8ePH1dxcbHNsnbt2jkkMAAAYD5yOgAA5rK76N6wYYP69OmjX3755aJ7st3c3HT27FmHBQcAAMxDTgcAwHx2F93PPPOMWrZsqYULFyokJOSyA7AAAICyi5wOAID57C669+zZo6+++kr16tUzIx4AAOAk5HQAAMznbu8KkZGR2rt3rxmxAAAAJyKnAwBgvlKd6d6+fbv15yFDhuj5559XRkaGGjdurMqVK9u0bdKkiWMjBAAADkNOBwDAuUpVdDdr1kxubm42g6w8+eST1p/PL2PQFQAAyjZyOgAAzlWqovvAgQNmxwEAAJyAnA4AgHOVqugODw83Ow4AFVTtEQsdsp2DE7o6ZDvA9Y6cDgCAc9k9kFpSUpL+9a9/XTT/X//6l958802HBAUAAMxHTgcAwHx2F93//Oc/FRERcdH8W2+9VdOmTXNIUAAAwHzkdAAAzGd30Z2RkaGQkJCL5t9www06duyYQ4ICAADmc2ROT0pKUqtWreTj46PAwED16NFD6enpNm3OnDmj+Ph41ahRQ9WqVVNsbKwyMzNt2hw6dEhdu3ZVlSpVFBgYqBdffFFFRUX2dw4AgDLC7qI7LCxM69atu2j+unXrFBoa6pCgAACA+RyZ09esWaP4+Hht2LBBy5YtU2FhoTp16qRTp05Z2wwbNkzz58/X7NmztWbNGh09elQ9e/a0Lj979qy6du2qgoICrV+/XjNmzFBKSopGjRp19Z0EAMDFSjWQ2oUGDBigoUOHqrCwUB06dJAkrVixQi+99JKef/55hwcIAADM4cicvmTJEpvplJQUBQYGKi0tTe3atVNOTo6mT5+umTNnWveVnJyshg0basOGDWrdurWWLl2qnTt3avny5QoKClKzZs00fvx4DR8+XGPGjJGnp6djOg4AgBPZXXS/+OKLOnHihJ599lkVFBRIkry8vDR8+HAlJiY6PEAAAGAOM3N6Tk6OJCkgIECSlJaWpsLCQkVHR1vbREREqFatWkpNTVXr1q2Vmpqqxo0bKygoyNomJiZGgwYN0o4dO9S8efOL9pOfn6/8/HzrdG5u7jXFDQCAo9lddLu5uenNN9/Uq6++ql27dsnb21v169eXxWIxIz4AAGASs3J6cXGxhg4dqrZt2+q2226TdO7+cU9PT/n7+9u0DQoKUkZGhrXNhQX3+eXnl5UkKSlJY8eOvaZ4AcBejnokKq4Pdhfd51WrVk2tWrVyZCwAAMAFHJ3T4+Pj9dNPP+m7775z2DYvJTExUQkJCdbp3NxchYWFmb5fAABKy+6i+9SpU5owYYJWrFih48ePq7i42Gb5/v37HRYcAAAwjxk5ffDgwVqwYIHWrl2rm266yTo/ODhYBQUFys7OtjnbnZmZqeDgYGubTZs22Wzv/Ojm59v8lcVi4Wo7AECZZnfR/dRTT2nNmjXq16+fQkJC5ObmdtU7X7t2rf7xj38oLS1Nx44d09y5c9WjRw/r8scff1wzZsywWScmJsZmsJasrCwNGTJE8+fPl7u7u2JjY/V///d/qlat2lXHBQDA9cCROd0wDA0ZMkRz587V6tWrVadOHZvlLVq0UOXKlbVixQrFxsZKktLT03Xo0CFFRUVJkqKiovT666/r+PHjCgwMlCQtW7ZMvr6+atSo0VXHBgCAK9lddC9evFgLFy5U27Ztr3nnp06dUtOmTfXkk0/aPDLkQp07d1ZycrJ1+q/fZvft21fHjh2zPp7kiSee0MCBAzVz5sxrjg8AgIrMkTk9Pj5eM2fO1Ndffy0fHx/rPdh+fn7y9vaWn5+f+vfvr4SEBAUEBMjX11dDhgxRVFSUWrduLUnq1KmTGjVqpH79+mnixInKyMjQyJEjFR8fz9lsAEC5ZXfRXb16detIpNeqS5cu6tKly2XbWCyWS15StmvXLi1ZskSbN29Wy5YtJUnvvvuu7rvvPr311ls8NxzXJUcN7HFwQleHbAdA2eXInP7BBx9Iktq3b28zPzk5WY8//rgkadKkSdar0vLz8xUTE6P333/f2rZSpUpasGCBBg0apKioKFWtWlVxcXEaN26cQ2IEAMAV3O1dYfz48Ro1apROnz5tRjwXWb16tQIDA9WgQQMNGjRIJ06csC5LTU2Vv7+/teCWpOjoaLm7u2vjxo2X3GZ+fr5yc3NtXgAAXG8cmdMNwyjxdb7gls49jmzq1KnKysrSqVOnNGfOnIu+WA8PD9eiRYt0+vRp/fbbb3rrrbfk4XHV474CAOBydmext99+W/v27VNQUJBq166typUr2yzfunWrw4Lr3LmzevbsqTp16mjfvn16+eWX1aVLF6WmpqpSpUrKyMiw3vN1noeHhwICAi75aBGJx4sAACA5N6cDAHC9srvovnCgM7P16tXL+nPjxo3VpEkT1a1bV6tXr1bHjh2vers8XgQAAOfmdAAArld2F92jR482I45Sufnmm1WzZk3t3btXHTt2VHBwsI4fP27TpqioSFlZWZe8D1zi8SIAAEiuzekAAFwv7L6n25WOHDmiEydOKCQkRNK5R4tkZ2crLS3N2mblypUqLi5WZGSkq8IEAAAAAEBSKc90BwQEaPfu3apZs6aqV69+2ed4ZmVllXrneXl52rt3r3X6wIED2rZtmwICAhQQEKCxY8cqNjZWwcHB2rdvn1566SXVq1dPMTExkqSGDRuqc+fOGjBggKZNm6bCwkINHjxYvXr1YuRyAABKYFZOBwAAJStV0T1p0iT5+PhIkiZPnuywnW/ZskX33HOPdfr8fdZxcXH64IMPtH37ds2YMUPZ2dkKDQ1Vp06dNH78eJtLwz///HMNHjxYHTt2tD6GZMqUKQ6LEQCAisSsnA4AAEpWqqI7Li6uxJ+vVfv27WUYxiWXf/vtt1fcRkBAgGbOnOmwmAAAqMjMyukAAKBk5eqebgAAAAAAyhOKbgAAAAAATELRDQAAAACASUpVdG/fvl3FxcVmxwIAAExGTgcAwLlKVXQ3b95cv//+uyTp5ptv1okTJ0wNCgAAmIOcDgCAc5Wq6Pb399eBAwckSQcPHuQbcgAAyilyOgAAzlWqR4bFxsbq7rvvVkhIiNzc3NSyZUtVqlSpxLb79+93aIAAAMBxyOkAADhXqYruDz/8UD179tTevXv13HPPacCAAfLx8TE7NgAA4GDkdAAAnKtURbckde7cWZKUlpamv//97yRo4AK1Ryx0dQgAUGrkdAAAnKfURfd5ycnJ1p+PHDkiSbrpppscFxEAAHAKcjoAAOaz+zndxcXFGjdunPz8/BQeHq7w8HD5+/tr/PjxDMYCAEA5Qk4HAMB8dp/pfuWVVzR9+nRNmDBBbdu2lSR99913GjNmjM6cOaPXX3/d4UECAADHI6cDAGA+u4vuGTNm6OOPP9YDDzxgndekSRPdeOONevbZZ0nQAACUE+R0OBLjmwBAyewuurOyshQREXHR/IiICGVlZTkkKFRcJOTyg/cKqPjI6QAAmM/ue7qbNm2q995776L57733npo2beqQoAAAgPnI6QAAmM/uM90TJ05U165dtXz5ckVFRUmSUlNTdfjwYS1atMjhAQIAAHOQ0wEAMJ/dZ7rvvvtu7d69Ww8++KCys7OVnZ2tnj17Kj09XXfddZcZMQIAABOQ0wEAMJ/dZ7olKTQ0lMFVAACoAMjpAACYy+4z3QAAAAAAoHQougEAAAAAMIlLi+61a9eqW7duCg0NlZubm+bNm2ez3DAMjRo1SiEhIfL29lZ0dLT27Nlj0yYrK0t9+/aVr6+v/P391b9/f+Xl5TmxFwAAAAAAlMyuotswDB06dEhnzpxxyM5PnTqlpk2baurUqSUunzhxoqZMmaJp06Zp48aNqlq1qmJiYmz237dvX+3YsUPLli3TggULtHbtWg0cONAh8QEAUFE5OqcDAICS2V1016tXT4cPH3bIzrt06aLXXntNDz74YIn7mjx5skaOHKnu3burSZMm+uSTT3T06FHrGfFdu3ZpyZIl+vjjjxUZGak777xT7777rmbNmqWjR486JEYAACoiR+d0AABQMruKbnd3d9WvX18nTpwwKx6rAwcOKCMjQ9HR0dZ5fn5+ioyMVGpqqqRzzxL19/dXy5YtrW2io6Pl7u6ujRs3XnLb+fn5ys3NtXkBAHA9cWZOBwDgemb3Pd0TJkzQiy++qJ9++smMeKwyMjIkSUFBQTbzg4KCrMsyMjIUGBhos9zDw0MBAQHWNiVJSkqSn5+f9RUWFubg6AEAKPucldMBALie2f2c7scee0ynT59W06ZN5enpKW9vb5vlWVlZDgvOLImJiUpISLBO5+bmUngDAK47FSGnAwBQ1tlddE+ePNmEMC4WHBwsScrMzFRISIh1fmZmppo1a2Ztc/z4cZv1ioqKlJWVZV2/JBaLRRaLxfFBAwBQjjgrpwMAcD2zu+iOi4szI46L1KlTR8HBwVqxYoW1yM7NzdXGjRs1aNAgSVJUVJSys7OVlpamFi1aSJJWrlyp4uJiRUZGOiVOAADKK2fldAAArmd2F92OlJeXp71791qnDxw4oG3btikgIEC1atXS0KFD9dprr6l+/fqqU6eOXn31VYWGhqpHjx6SpIYNG6pz584aMGCApk2bpsLCQg0ePFi9evVSaGioi3oFAAAAAMA5pS663d3d5ebmdtk2bm5uKioqKvXOt2zZonvuucc6ff4+67i4OKWkpOill17SqVOnNHDgQGVnZ+vOO+/UkiVL5OXlZV3n888/1+DBg9WxY0e5u7srNjZWU6ZMKXUMAABcb8zI6QAAoGSlLrrnzp17yWWpqamaMmWKiouL7dp5+/btZRjGJZe7ublp3LhxGjdu3CXbBAQEaObMmXbtFwCA65kZOR0AAJSs1EV39+7dL5qXnp6uESNGaP78+erbt+9li2MAAFA2kNMBAHAeu5/TLUlHjx7VgAED1LhxYxUVFWnbtm2aMWOGwsPDHR0fAAAwETkdAABz2VV05+TkaPjw4apXr5527NihFStWaP78+brtttvMig8AAJjA0Tl97dq16tatm0JDQ+Xm5qZ58+bZLDcMQ6NGjVJISIi8vb0VHR2tPXv22LTJyspS37595evrK39/f/Xv3195eXlX20UAAMqEUhfdEydO1M0336wFCxboiy++0Pr163XXXXeZGRsAADCBGTn91KlTatq0qaZOnXrJfU6ZMkXTpk3Txo0bVbVqVcXExOjMmTPWNn379tWOHTu0bNkyLViwQGvXrtXAgQOvKS4AAFzNzbjcSGYXcHd3t34zXalSpUu2mzNnjsOCc5bc3Fz5+fkpJydHvr6+rg6nTKo9YqGrQzDFwQldHbKdinp8yhJHvVeAq5WFnGN2Tndzc9PcuXOtj/g0DEOhoaF6/vnn9cILL0g6d6Y9KChIKSkp6tWrl3bt2qVGjRpp8+bNatmypSRpyZIluu+++3TkyJFLPgo0Pz9f+fn51unc3FyFhYWR012AXAigJBX5M1xpc3qpB1J77LHHrvh4EQAAUPY5O6cfOHBAGRkZio6Ots7z8/NTZGSkUlNT1atXL6Wmpsrf399acEtSdHS03N3dtXHjRj344IMlbjspKUljx441vQ8AAFytUhfdKSkpJoYBAACcxdk5PSMjQ5IUFBRkMz8oKMi6LCMjQ4GBgTbLPTw8FBAQYG1TksTERCUkJFinz5/pBgCgrCh10Q0AAFDWWCwWWSwWV4cBAMAlXdUjwwAAAEorODhYkpSZmWkzPzMz07osODhYx48ft1leVFSkrKwsaxsAAMojim4AAGCqOnXqKDg4WCtWrLDOy83N1caNGxUVFSVJioqKUnZ2ttLS0qxtVq5cqeLiYkVGRjo9ZgAAHIXLywEAwDXLy8vT3r17rdMHDhzQtm3bFBAQoFq1amno0KF67bXXVL9+fdWpU0evvvqqQkNDrSOcN2zYUJ07d9aAAQM0bdo0FRYWavDgwerVq9clRy4HAKA8oOgGAADXbMuWLbrnnnus0+cHN4uLi1NKSopeeuklnTp1SgMHDlR2drbuvPNOLVmyRF5eXtZ1Pv/8cw0ePFgdO3aUu7u7YmNjNWXKFKf3BQAAR6LoBgAA16x9+/YyDOOSy93c3DRu3DiNGzfukm0CAgI0c+ZMM8IDAMBluKcbAAAAAACTUHQDAAAAAGASim4AAAAAAExC0Q0AAAAAgEkougEAAAAAMAlFNwAAAAAAJuGRYRVc7RELXR0CAAAAAFy3ONMNAAAAAIBJynTRPWbMGLm5udm8IiIirMvPnDmj+Ph41ahRQ9WqVVNsbKwyMzNdGDEAAAAAAP9T5i8vv/XWW7V8+XLrtIfH/0IeNmyYFi5cqNmzZ8vPz0+DBw9Wz549tW7dOleEinKIy+8BAAAAmKnMF90eHh4KDg6+aH5OTo6mT5+umTNnqkOHDpKk5ORkNWzYUBs2bFDr1q0vuc38/Hzl5+dbp3Nzcx0fOAAAAADgulemLy+XpD179ig0NFQ333yz+vbtq0OHDkmS0tLSVFhYqOjoaGvbiIgI1apVS6mpqZfdZlJSkvz8/KyvsLAwU/sAAAAAALg+lemiOzIyUikpKVqyZIk++OADHThwQHfddZdOnjypjIwMeXp6yt/f32adoKAgZWRkXHa7iYmJysnJsb4OHz5sYi8AAAAAANerMn15eZcuXaw/N2nSRJGRkQoPD9e///1veXt7X/V2LRaLLBaLI0IEAAAAAOCSynTR/Vf+/v665ZZbtHfvXt17770qKChQdna2zdnuzMzMEu8BBwCp7A2ed3BCV1eHAAAAABOV6cvL/yovL0/79u1TSEiIWrRoocqVK2vFihXW5enp6Tp06JCioqJcGCUAAAAAAOeU6TPdL7zwgrp166bw8HAdPXpUo0ePVqVKldS7d2/5+fmpf//+SkhIUEBAgHx9fTVkyBBFRUVdduRyAOVTWTtDDQAAAJRGmS66jxw5ot69e+vEiRO64YYbdOedd2rDhg264YYbJEmTJk2Su7u7YmNjlZ+fr5iYGL3//vsujhoAAAAAgHPKdNE9a9asyy738vLS1KlTNXXqVCdFBAAobxx1lQT33wMAgKtRru7pBgAAAACgPKHoBgAAAADAJBTdAAAAAACYhKIbAAAAAACTUHQDAAAAAGASim4AAAAAAExC0Q0AAAAAgEkougEAAAAAMImHqwMAAJQNtUcsdMh2Dk7o6pDtAAAAVAQU3QAAAOWIo74gAwA4B0W3g3GmCAAAAABwHvd0AwAAAABgEopuAAAAAABMQtENAAAAAIBJKLoBAAAAADAJA6kBQAXAaMYAAABlE0U3ALgQxTIAAEDFRtFdRvFBHAAAAADKP+7pBgAAAADAJJzpBgA4FFfqAAAA/E+FOdM9depU1a5dW15eXoqMjNSmTZtcHRIAALgK5HQAQEVSIYruL7/8UgkJCRo9erS2bt2qpk2bKiYmRsePH3d1aAAAwA7kdABAReNmGIbh6iCuVWRkpFq1aqX33ntPklRcXKywsDANGTJEI0aMuKh9fn6+8vPzrdM5OTmqVauWDh8+LF9f32uK5bbR317T+gCAsumnsTEO2U5ubq7CwsKUnZ0tPz8/h2yzIilLOV0irwPAtXJU/iyLSpvTy/093QUFBUpLS1NiYqJ1nru7u6Kjo5WamlriOklJSRo7duxF88PCwkyLEwBQvvlNduz2Tp48SdH9F+R0AKh4HJ0/y6Ir5fRyX3T//vvvOnv2rIKCgmzmBwUF6eeffy5xncTERCUkJFini4uLlZWVpRo1asjNzc2h8Z3/9sNR37g7S3mNWyq/sZfXuKXyGztxO195jd2RcRuGoZMnTyo0NNRB0VUcZS2nl9ffV1fgWJUex6r0OFalx7EqPVfk9HJfdF8Ni8Uii8ViM8/f39/Uffr6+pbLP4DyGrdUfmMvr3FL5Td24na+8hq7o+LmDLfjOCOnl9ffV1fgWJUex6r0OFalx7EqPWfm9HI/kFrNmjVVqVIlZWZm2szPzMxUcHCwi6ICAAD2IqcDACqicl90e3p6qkWLFlqxYoV1XnFxsVasWKGoqCgXRgYAAOxBTgcAVEQV4vLyhIQExcXFqWXLlrrjjjs0efJknTp1Sk888YSrQ5PFYtHo0aMvuvStrCuvcUvlN/byGrdUfmMnbucrr7GX17jLo7KU03nfS49jVXocq9LjWJUex6r0XHGsKsQjwyTpvffe0z/+8Q9lZGSoWbNmmjJliiIjI10dFgAAsBM5HQBQkVSYohsAAAAAgLKm3N/TDQAAAABAWUXRDQAAAACASSi6AQAAAAAwCUU3AAAAAAAmoeh2gKlTp6p27dry8vJSZGSkNm3adMm2H330ke666y5Vr15d1atXV3R09GXbm8meuOfMmaOWLVvK399fVatWVbNmzfTpp586MVpb9sR+oVmzZsnNzU09evQwN8BLsCfulJQUubm52by8vLycGO3/2Hu8s7OzFR8fr5CQEFksFt1yyy1atGiRk6K1ZU/s7du3v+iYu7m5qWvXrk6M+Bx7j/nkyZPVoEEDeXt7KywsTMOGDdOZM2ecFO3/2BN3YWGhxo0bp7p168rLy0tNmzbVkiVLnBjt/6xdu1bdunVTaGio3NzcNG/evCuus3r1at1+++2yWCyqV6+eUlJSTI8TznO1eeZ6kpSUpFatWsnHx0eBgYHq0aOH0tPTXR1WuTBhwgS5ublp6NChrg6lzPr111/16KOPqkaNGvL29lbjxo21ZcsWV4dV5pw9e1avvvqq6tSpI29vb9WtW1fjx48XY2VfObcbhqFRo0YpJCRE3t7eio6O1p49e8wJxsA1mTVrluHp6Wn861//Mnbs2GEMGDDA8Pf3NzIzM0ts36dPH2Pq1KnG999/b+zatct4/PHHDT8/P+PIkSNlOu5Vq1YZc+bMMXbu3Gns3bvXmDx5slGpUiVjyZIlTo3bMOyP/bwDBw4YN954o3HXXXcZ3bt3d06wF7A37uTkZMPX19c4duyY9ZWRkeHkqO2POz8/32jZsqVx3333Gd99951x4MABY/Xq1ca2bducHLn9sZ84ceL/tXfncVFV///AXwMybLK4sQkCigskgoIiaKlJIvoht0+pHypc+6ZDqeSCmeKOae6S5IqWhppLpeVGqR8NEVFKBTFXKAVzA0Flm/P7wx/348QiywzD8no+HvN4eM89c+77zMicec+99xyV1/vixYtCV1dXbN68uUbHvW3bNqGvry+2bdsmbty4IQ4dOiSsra3FpEmTanTcU6dOFTY2NuLAgQPi2rVr4osvvhAGBgbi3Llz1Rq3EEL8+OOPYsaMGWLPnj0CgNi7d2+Z9a9fvy6MjIxESEiISEpKEqtXr9baZyKpX2XHmfrGz89PbN68WVy8eFEkJiaKfv36iRYtWojs7Gxth1ajnTlzRjg4OIgOHTqICRMmaDucGunBgwfC3t5ejBgxQsTFxYnr16+LQ4cOiatXr2o7tBpnwYIFokmTJmL//v3ixo0bYteuXaJhw4Zi5cqV2g5N6142ti9atEiYmZmJffv2id9++028+eabwtHRUTx9+lTtsTDprqIuXboIhUIhbRcWFgobGxsRHh5erucXFBQIExMTsWXLFk2FWKKqxi2EEB07dhSffvqpJsIrU2ViLygoED4+PmLDhg0iKChIK0l3RePevHmzMDMzq6boSlfRuNeuXStatmwp8vLyqivEUlX1//ny5cuFiYlJtX+BrGjcCoVCvP766yplISEholu3bhqN858qGre1tbVYs2aNStngwYNFYGCgRuN8mfIk3VOnThWvvPKKStnQoUOFn5+fBiOj6qKOMbI+unv3rgAgjh8/ru1QaqzHjx+L1q1biyNHjogePXow6S7FtGnTRPfu3bUdRq3Qv39/MWrUKJWymjCW1jT/HNuVSqWwsrISS5YskcoePXok9PX1xTfffKP24/Py8irIy8tDQkICfH19pTIdHR34+voiNja2XG08efIE+fn5aNy4sabCLKaqcQshEBMTg5SUFLz22muaDLWYysY+d+5cWFhYYPTo0dURZjGVjTs7Oxv29vaws7PDgAEDcOnSpeoIV1KZuL///nt4e3tDoVDA0tIS7du3x8KFC1FYWFhdYQNQz9/nxo0bMWzYMBgbG2sqzGIqE7ePjw8SEhKky1+vX7+OH3/8Ef369auWmIHKxZ2bm1vslglDQ0OcPHlSo7GqQ2xsrEpfAcDPz6/c/7eo5lLHZ0d9lZmZCQDV+p2mtlEoFOjfv3+xzw9S9f3338PT0xNvvfUWLCws0LFjR6xfv17bYdVIPj4+iImJwZUrVwAAv/32G06ePAl/f38tR1az3bhxA+np6Sp/i2ZmZvDy8tLIZ30DtbdYj9y7dw+FhYWwtLRUKbe0tMTly5fL1ca0adNgY2NTrR++lY07MzMTzZs3R25uLnR1dfHFF1/gjTfe0HS4KioT+8mTJ7Fx40YkJiZWQ4Qlq0zcbdu2xaZNm9ChQwdkZmbi888/h4+PDy5dugRbW9vqCLtScV+/fh0///wzAgMD8eOPP+Lq1asYP3488vPzERYWVh1hA6j63+eZM2dw8eJFbNy4UVMhlqgycf/nP//BvXv30L17dwghUFBQgA8++ACffPJJdYQMoHJx+/n5YdmyZXjttdfQqlUrxMTEYM+ePdX+A01lpKenl9jXrKwsPH36FIaGhlqKjKpKHWN7faRUKjFx4kR069YN7du313Y4NVJ0dDTOnTuH+Ph4bYdS412/fh1r165FSEgIPvnkE8THx+Ojjz6CXC5HUFCQtsOrUUJDQ5GVlYV27dpBV1cXhYWFWLBgAQIDA7UdWo2Wnp4OACV+1hftUycm3Vq0aNEiREdH49ixY1qbIKsiTExMkJiYiOzsbMTExCAkJAQtW7ZEz549tR1aqR4/fox3330X69evR9OmTbUdToV4e3vD29tb2vbx8YGzszO+/PJLzJs3T4uRlU2pVMLCwgLr1q2Drq4uPDw88Ndff2HJkiXVmnRX1caNG+Hq6oouXbpoO5SXOnbsGBYuXIgvvvgCXl5euHr1KiZMmIB58+Zh5syZ2g6vVCtXrsTYsWPRrl07yGQytGrVCiNHjsSmTZu0HRoRVZBCocDFixdrxZUq2pCWloYJEybgyJEjteI7n7YplUp4enpi4cKFAICOHTvi4sWLiIyMZNL9Dzt37sS2bduwfft2vPLKK0hMTMTEiRNhY2PD16oGYdJdBU2bNoWuri4yMjJUyjMyMmBlZVXmcz///HMsWrQIR48eRYcOHTQZZjGVjVtHRwdOTk4AAHd3dyQnJyM8PLxak+6Kxn7t2jXcvHkTAQEBUplSqQQANGjQACkpKWjVqpVmg0bV/q8U0dPTQ8eOHXH16lVNhFiiysRtbW0NPT096OrqSmXOzs5IT09HXl4e5HK5RmMuUpXXPCcnB9HR0Zg7d64mQyxRZeKeOXMm3n33XYwZMwYA4OrqipycHLz//vuYMWMGdHQ0fydRZeJu1qwZ9u3bh2fPnuH+/fuwsbFBaGgoWrZsqfF4q8rKyqrEvpqamvIsdy2njs/r+iY4OBj79+/HiRMnqu1KrNomISEBd+/eRadOnaSywsJCnDhxAmvWrJGuIqTnrK2t4eLiolLm7OyM3bt3aymimmvKlCkIDQ3FsGHDADz/DnDr1i2Eh4cz6S5D0ed5RkYGrK2tpfKMjAy4u7ur/Xi8p7sK5HI5PDw8EBMTI5UplUrExMSonKH8p8WLF2PevHk4ePAgPD09qyNUFZWN+5+USiVyc3M1EWKpKhp7u3btcOHCBSQmJkqPN998E7169UJiYiLs7OxqZNwlKSwsxIULF1Q+GDStMnF369YNV69elX7cAIArV67A2tq62hJuoGqv+a5du5Cbm4t33nlH02EWU5m4nzx5UiyxLvryJqppyZCqvN4GBgZo3rw5CgoKsHv3bgwYMEDT4VaZt7e3Sl8B4MiRIxX6DKWaSV1jZH0ghEBwcDD27t2Ln3/+GY6OjtoOqcbq3bt3se8jnp6eCAwMRGJiIhPuf+jWrVux5eeuXLkCe3t7LUVUc5X2HeDF72FUnKOjI6ysrFQ+67OyshAXF6eZz3q1T81Wz0RHRwt9fX0RFRUlkpKSxPvvvy/Mzc2lpZ3effddERoaKtVftGiRkMvl4ttvv1VZmujx48c1Ou6FCxeKw4cPi2vXromkpCTx+eefiwYNGoj169dXa9yVif2ftDV7eUXjnjNnjjh06JC4du2aSEhIEMOGDRMGBgbi0qVLNTru1NRUYWJiIoKDg0VKSorYv3+/sLCwEPPnz6/WuCsTe5Hu3buLoUOHVne4korGHRYWJkxMTMQ333wjrl+/Lg4fPixatWol3n777Rod9+nTp8Xu3bvFtWvXxIkTJ8Trr78uHB0dxcOHD6s1biGezyh8/vx5cf78eQFALFu2TJw/f17cunVLCCFEaGioePfdd6X6RUuGTZkyRSQnJ4uIiAguGVaHvOz/Mj03btw4YWZmJo4dO6bynebJkyfaDq1W4OzlpTtz5oxo0KCBWLBggfjjjz/Etm3bhJGRkfj666+1HVqNExQUJJo3by4tGbZnzx7RtGlTMXXqVG2HpnUvG9sXLVokzM3NxXfffSd+//13MWDAAC4ZVpOtXr1atGjRQsjlctGlSxdx+vRpaV+PHj1EUFCQtG1vby8AFHuEhYXV6LhnzJghnJychIGBgWjUqJHw9vYW0dHR1R5zkYrE/k/aSrqFqFjcEydOlOpaWlqKfv36aWX9YiEq/nr/+uuvwsvLS+jr64uWLVuKBQsWiIKCgmqO+rmKxn758mUBQBw+fLiaI1VVkbjz8/PF7NmzRatWrYSBgYGws7MT48eP10ryWpG4jx07JpydnYW+vr5o0qSJePfdd8Vff/1V7TELIcQvv/xS4mdzUbxBQUGiR48exZ7j7u4u5HK5aNmyZbWv506aVdb/ZXqupL8ZAPxbKCcm3WX74YcfRPv27YW+vr5o166dWLdunbZDqpGysrLEhAkTRIsWLYSBgYFo2bKlmDFjhsjNzdV2aFr3srFdqVSKmTNnCktLS6Gvry969+4tUlJSNBKLTIhquvaQiIiIiIiIqJ7hPd1EREREREREGsKkm4iIiIiIiEhDmHQTERERERERaQiTbiIiIiIiIiINYdJNREREREREpCFMuomIiIiIiIg0hEk3ERERERERkYYw6SYiIiIiIiLSECbdRC9x7NgxyGQyPHr0SNuh4NSpU3B1dYWenh4GDhyoseNERUXB3NxcrW3WpNexyM2bNyGTyZCYmKjWdkeMGFHl98fBwQErVqwos45MJsO+ffuqdBwiIqKqUtd42rNnT0ycOLHU/bNnz4a7u3uZbahjDCZSNybdVGONGDECMpms2OPq1asaO2ZJH/Y+Pj64c+cOzMzMNHbc8goJCYG7uztu3LiBqKgojR1n6NChuHLlilrbrEmvYxE7OzvcuXMH7du313YolXLnzh34+/trOwwiojqp6HvIBx98UGyfQqGATCbDiBEj1HIsTf0IXB+tXLlSo9+RiCqDSTfVaH379sWdO3dUHo6OjsXq5eXlaSwGuVwOKysryGQyjR2jvK5du4bXX38dtra2aj8T/SJDQ0NYWFiUur8yr3dNeh2L6OrqwsrKCg0aNNB2KJViZWUFfX19bYdBRFRn2dnZITo6Gk+fPpXKnj17hu3bt6NFixZajIxKY2ZmptHvSESVwaSbajR9fX1YWVmpPHR1ddGzZ08EBwdj4sSJaNq0Kfz8/AAAy5Ytg6urK4yNjWFnZ4fx48cjOztbpc1Tp06hZ8+eMDIyQqNGjeDn54eHDx9ixIgROH78OFauXCmdVb9582aJl0Xv3r0br7zyCvT19eHg4IClS5eqHMPBwQELFy7EqFGjYGJighYtWmDdunVl9jU3NxcfffQRLCwsYGBggO7duyM+Ph7A/34Bv3//PkaNGgWZTFbqr7gODg6YN28ehg8fDmNjYzRv3hwREREqdV72Ov3z8vKiy7k2bNgAR0dHGBgYAAC+/fZbuLq6wtDQEE2aNIGvry9ycnJKjOufr2PRMQ4dOgRnZ2c0bNhQ+pGlNIWFhRg9ejQcHR1haGiItm3bYuXKlWW+rg8fPkRgYCCaNWsGQ0NDtG7dGps3b1Z5XYvOLBTFGBMTA09PTxgZGcHHxwcpKSkqbc6fPx8WFhYwMTHBmDFjEBoaWublbkqlEuHh4VLcbm5u+Pbbb8uMGwAeP35c5vv44uXlRX3Zs2cPevXqBSMjI7i5uSE2NvalxyEiopJ16tQJdnZ22LNnj1S2Z88etGjRAh07dlSpW9Y4DpQ9HhWdUOjYsSNkMhl69uxZYjxltQEA06ZNQ5s2bWBkZISWLVti5syZyM/Pl/YXjeebNm1CixYt0LBhQ4wfPx6FhYVYvHgxrKysYGFhgQULFqgcVyaTYe3atfD394ehoSFatmz50nHs4sWL8Pf3R8OGDWFpaYl3330X9+7dk/bn5OTgvffeQ8OGDWFtbV3su1RZvvzyS9jZ2cHIyAhvv/02MjMzpX3/vLy8Z8+e+OijjzB16lQ0btwYVlZWmD17drmPRaQOTLqp1tqyZQvkcjlOnTqFyMhIAICOjg5WrVqFS5cuYcuWLfj5558xdepU6TmJiYno3bs3XFxcEBsbi5MnTyIgIACFhYVYuXIlvL29MXbsWOmsup2dXbHjJiQk4O2338awYcNw4cIFzJ49GzNnziyWBC9duhSenp44f/48xo8fj3HjxhVL3l40depU7N69G1u2bMG5c+fg5OQEPz8/PHjwQLoM2tTUFCtWrMCdO3cwdOjQUttasmQJ3NzccP78eYSGhmLChAk4cuSItP9lr1NJrl69it27d2PPnj1ITEzEnTt3MHz4cIwaNQrJyck4duwYBg8eDCFEme286MmTJ/j888/x1Vdf4cSJE0hNTcXkyZNLra9UKmFra4tdu3YhKSkJs2bNwieffIKdO3eW+pyZM2ciKSkJP/30E5KTk7F27Vo0bdq0zLhmzJiBpUuX4uzZs2jQoAFGjRol7du2bRsWLFiAzz77DAkJCWjRogXWrl1bZnvh4eHYunUrIiMjcenSJUyaNAnvvPMOjh8/XubzXvY+lhb75MmTkZiYiDZt2mD48OEoKCgo8zlERFS6UaNGqSS2mzZtwsiRI4vVK2scB8oej86cOQMAOHr0KO7cuaOS5L/oZWOaiYkJoqKikJSUhJUrV2L9+vVYvny5ShvXrl3DTz/9hIMHD+Kbb77Bxo0b0b9/f/z55584fvw4PvvsM3z66aeIi4srduwhQ4bgt99+Q2BgIIYNG4bk5OQS43z06BFef/11dOzYEWfPnsXBgweRkZGBt99+W6ozZcoUHD9+HN999x0OHz6MY8eO4dy5c6W+D0WuXr2KnTt34ocffsDBgwel71ll2bJlC4yNjREXF4fFixdj7ty5Lx1PidRKENVQQUFBQldXVxgbG0uPf//730IIIXr06CE6duz40jZ27dolmjRpIm0PHz5cdOvWrdT6PXr0EBMmTFAp++WXXwQA8fDhQyGEEP/5z3/EG2+8oVJnypQpwsXFRdq2t7cX77zzjrStVCqFhYWFWLt2bYnHzc7OFnp6emLbtm1SWV5enrCxsRGLFy+WyszMzMTmzZtLjb/o2H379lUpGzp0qPD39y/1Of98nTZv3izMzMyk7bCwMKGnpyfu3r0rlSUkJAgA4ubNm2XGU+Sfr+PmzZsFAHH16lWpTkREhLC0tCxXe0UUCoUYMmRIqfsDAgLEyJEjS9x348YNAUCcP39eJcajR49KdQ4cOCAAiKdPnwohhPDy8hIKhUKlnW7dugk3NzdpOygoSAwYMEAIIcSzZ8+EkZGR+PXXX1WeM3r0aDF8+PBS4y7P+whA7N27V6UvGzZskPZfunRJABDJycmlHoeIiEpW9Fl+9+5doa+vL27evClu3rwpDAwMxN9//y0GDBgggoKChBDlG8crMh6Vpqw2SrJkyRLh4eEhbYeFhQkjIyORlZUllfn5+QkHBwdRWFgolbVt21aEh4dL2wDEBx98oNK2l5eXGDduXInxz5s3T/Tp00elflpamgAgUlJSxOPHj4VcLhc7d+6U9t+/f18YGhoW+x72orCwMKGrqyv+/PNPqeynn34SOjo64s6dO0II1TFYiOff7bp3767STufOncW0adNKPQ6RutXOGxmp3ujVq5fKWURjY2Pp3x4eHsXqHz16FOHh4bh8+TKysrJQUFCAZ8+e4cmTJzAyMkJiYiLeeuutKsWUnJyMAQMGqJR169YNK1asQGFhIXR1dQEAHTp0kPbLZDJYWVnh7t27JbZ57do15Ofno1u3blKZnp4eunTpUuqvyGXx9vYutv3iTNgve51KYm9vj2bNmknbbm5u6N27N1xdXeHn54c+ffrg3//+Nxo1alTuOI2MjNCqVStp29rautTXqEhERAQ2bdqE1NRUPH36FHl5eWVe2j1u3DgMGTIE586dQ58+fTBw4ED4+PiUeYwX3ztra2sAwN27d9GiRQukpKQU+0W9S5cu+Pnnn0ts6+rVq3jy5AneeOMNlfK8vLxilyb+08vex4rE3q5duzKfR0REJWvWrBn69++PqKgoCCHQv3//YldMlWccr8x49E8va2PHjh1YtWoVrl27huzsbBQUFMDU1FSlDQcHB5iYmEjblpaW0NXVhY6OjkrZP8fjksak0iZ+++233/DLL7+gYcOGxfZdu3ZNGr+9vLyk8saNG6Nt27YvfQ1atGiB5s2bq8ShVCqRkpICKyurEp/z4tgIlO/7BpE68fJyqtGMjY3h5OQkPYqSiKJ9L7p58yb+9a9/oUOHDti9ezcSEhKke2CLJv4yNDSsttj19PRUtmUyGZRKZbUdvzTleZ1K8s/XW1dXF0eOHMFPP/0EFxcXrF69Gm3btsWNGzfKHUtJr5Eo4/L06OhoTJ48GaNHj8bhw4eRmJiIkSNHlhm3v78/bt26hUmTJuH27dvo3bt3mZew/zOuoonfKvveFd0rf+DAASQmJkqPpKSkct3XXVHqjJ2IiJ4bNWoUoqKisGXLFpVbjiqiMuNRRdqIjY1FYGAg+vXrh/379+P8+fOYMWNGsTGypLFX3d9ZsrOzERAQoDLuJSYm4o8//sBrr71W6XYrq6Z+J6P6g0k31RkJCQlQKpVYunQpunbtijZt2uD27dsqdTp06ICYmJhS25DL5SgsLCzzOM7Ozjh16pRK2alTp9CmTRvpLHdFtWrVSro/vUh+fj7i4+Ph4uJS4fZOnz5dbNvZ2RlA+V6n8pLJZOjWrRvmzJmD8+fPQy6XY+/evZVqqzxOnToFHx8fjB8/Hh07doSTkxOuXbv20uc1a9YMQUFB+Prrr7FixYqXTmpXlrZt26pMjAOg2PaLXFxcoK+vj9TUVJUfkJycnEqcM+BFZb2PRERUffr27Yu8vDzk5+dLk7e+qLzjeGnjkVwuB4CXfgcpq41ff/0V9vb2mDFjBjw9PdG6dWvcunWrSv1+UUXGpE6dOuHSpUtwcHAoNvYZGxujVatW0NPTU7lv/OHDh+VarjQ1NVXle8vp06eho6NTrrPkRNrCy8upznByckJ+fj5Wr16NgIAAlQnWikyfPh2urq4YP348PvjgA8jlcvzyyy9466230LRpUzg4OCAuLg43b95Ew4YN0bhx42LH+fjjj9G5c2fMmzcPQ4cORWxsLNasWYMvvvii0rEbGxtj3LhxmDJlCho3bowWLVpg8eLFePLkCUaPHl3h9k6dOoXFixdj4MCBOHLkCHbt2oUDBw4AKN/rVB5xcXGIiYlBnz59YGFhgbi4OPz9998aTQpbt26NrVu34tChQ3B0dMRXX32F+Pj4EpeRKzJr1ix4eHjglVdeQW5uLvbv31+lGD/88EOMHTsWnp6e8PHxwY4dO/D777+jZcuWJdY3MTHB5MmTMWnSJCiVSnTv3h2ZmZk4deoUTE1NERQUVOqxynofiYio+ujq6kqXiZf0A3t5xvGyxiMLCwsYGhri4MGDsLW1hYGBAczMzIodp6w2WrdujdTUVERHR6Nz5844cOCAWn8I37VrFzw9PdG9e3ds27YNZ86cwcaNG0usq1AosH79egwfPlyaNfzq1auIjo7Ghg0b0LBhQ4wePRpTpkxBkyZNYGFhgRkzZqhc4l4aAwMDBAUF4fPPP0dWVhY++ugjvP3226VeWk5UE/BMN9UZbm5uWLZsGT777DO0b98e27ZtQ3h4uEqdNm3a4PDhw/jtt9/QpUsXeHt747vvvpPWaZ48eTJ0dXXh4uKCZs2aITU1tdhxOnXqhJ07dyI6Ohrt27fHrFmzMHfuXIwYMaJK8S9atAhDhgzBu+++i06dOuHq1as4dOhQhe6RLvLxxx/j7Nmz6NixI+bPn49ly5ZJv8yX53UqD1NTU5w4cQL9+vVDmzZt8Omnn2Lp0qXw9/evcFvl9X//938YPHgwhg4dCi8vL9y/f/+lM5bK5XJMnz4dHTp0wGuvvQZdXV1ER0dXOobAwEBMnz4dkydPRqdOnXDjxg2MGDFCWkatJPPmzcPMmTMRHh4OZ2dn9O3bFwcOHCjzxwKg7PeRiIiql6mpabH7o1/0snG8rPGoQYMGWLVqFb788kvY2NgUmzumSFltvPnmm5g0aRKCg4Ph7u6OX3/9FTNnzlRb/+fMmYPo6Gh06NABW7duxTfffFPq1Xg2NjY4deoUCgsL0adPH7i6umLixIkwNzeXEuslS5bg1VdfRUBAAHx9fdG9e/cS5+v5JycnJwwePBj9+vVDnz590KFDhyqd+CCqDjJR1g2URFTrODg4YOLEiZg4caK2Q6k33njjDVhZWeGrr77SdihERERqJ5PJsHfvXpX1r4mo/Hh5ORFRBTx58gSRkZHw8/ODrq4uvvnmGxw9epTrfRIRERFRiZh0ExFVgEwmw48//ogFCxbg2bNnaNu2LXbv3g1fX19th0ZERERENRAvLyciIiIiIiLSEE6kRkRERERERKQhTLqJiIiIiIiINIRJNxEREREREZGGMOkmIiIiIiIi0hAm3UREREREREQawqSbiIiIiIiISEOYdBMRERERERFpCJNuIiIiIiIiIg1h0k1ERERERESkIUy6iYiIiIiIiDSESTcRERERERGRhjDpJiIiIiIiItKQBtoOoCZQKpW4ffs2TExMIJPJtB0OERHVYUIIPH78GDY2NtDR4W/f6sYxnYiIqkt5x3Qm3QBu374NOzs7bYdBRET1SFpaGmxtbbUdRp3DMZ2IiKrby8Z0Jt0ATExMADx/sUxNTbUcDRER1WVZWVmws7OTxh5SL47pRERUXco7pjPpBqTLz0xNTTlAExFRteClz5rBMZ2IiKrby8Z03kxGREREREREpCFMuomIiIiIiIg0pF4n3REREXBxcUHnzp21HQoRERERERHVQfU66VYoFEhKSkJ8fLy2QyEiIiIiIqI6qF4n3URERERERESaxKSbiIiIiIiISEO4ZJiaOYQeKLH85qL+1RwJERERVVVp4zqRtvG7JVHtwTPdRERERERERBrCpJuIiIiIiIhIQ+p10s0lw4iIiIiIiEiT6nXSzSXDiIiIiIiISJPqddJNREREREREpElMuomIiIiIiIg0hEk3ERERERERkYYw6SYiIiIiIiLSECbdRERERERERBpSp5LuJ0+ewN7eHpMnT9Z2KERERERERER1K+lesGABunbtqu0wiIiIiIiIiADUoaT7jz/+wOXLl+Hv76/tUIiIiIiIiIgA1JCk+8SJEwgICICNjQ1kMhn27dtXrE5ERAQcHBxgYGAALy8vnDlzRmX/5MmTER4eXk0RExEREREREb1cjUi6c3Jy4ObmhoiIiBL379ixAyEhIQgLC8O5c+fg5uYGPz8/3L17FwDw3XffoU2bNmjTpk25jpebm4usrCyVBxEREREREZG6NdB2AADg7+9f5mXhy5Ytw9ixYzFy5EgAQGRkJA4cOIBNmzYhNDQUp0+fRnR0NHbt2oXs7Gzk5+fD1NQUs2bNKrG98PBwzJkzRyN9ISIiIiIiIipSI850lyUvLw8JCQnw9fWVynR0dODr64vY2FgAz5PotLQ03Lx5E59//jnGjh1basINANOnT0dmZqb0SEtL03g/iIiIiIiIqP6p8Un3vXv3UFhYCEtLS5VyS0tLpKenV6pNfX19mJqa4quvvkLXrl3Ru3dvdYRKREREREREpKJGXF6uTiNGjCh3XYVCAYVCgaysLJiZmWkuKCIiIiIiIqqXavyZ7qZNm0JXVxcZGRkq5RkZGbCysqpS2xEREXBxcUHnzp2r1A4RERERERFRSWp80i2Xy+Hh4YGYmBipTKlUIiYmBt7e3lVqW6FQICkpCfHx8VUNk4iIqFYqLCzEzJkz4ejoCENDQ7Rq1Qrz5s2DEEKqI4TArFmzYG1tDUNDQ/j6+uKPP/5QaefBgwcIDAyEqakpzM3NMXr0aGRnZ6vU+f333/Hqq6/CwMAAdnZ2WLx4cbF4du3ahXbt2sHAwACurq748ccfNdNxIiKialIjku7s7GwkJiYiMTERAHDjxg0kJiYiNTUVABASEoL169djy5YtSE5Oxrhx45CTkyPNZk5ERESV89lnn2Ht2rVYs2YNkpOT8dlnn2Hx4sVYvXq1VGfx4sVYtWoVIiMjERcXB2NjY/j5+eHZs2dSncDAQFy6dAlHjhzB/v37ceLECbz//vvS/qysLPTp0wf29vZISEjAkiVLMHv2bKxbt06q8+uvv2L48OEYPXo0zp8/j4EDB2LgwIG4ePFi9bwYREREGiATL/6UrSXHjh1Dr169ipUHBQUhKioKALBmzRosWbIE6enpcHd3x6pVq+Dl5VWl40ZERCAiIgKFhYW4cuUKMjMzYWpqWqU2HUIPlFh+c1H/KrVLRER1Q9E8IuoYc9ThX//6FywtLbFx40apbMiQITA0NMTXX38NIQRsbGzw8ccfY/LkyQCAzMxMWFpaIioqCsOGDUNycjJcXFwQHx8PT09PAMDBgwfRr18//Pnnn7CxscHatWsxY8YMpKenQy6XAwBCQ0Oxb98+XL58GQAwdOhQ5OTkYP/+/VIsXbt2hbu7OyIjI8vVH3W/vqWN60Taxu+WRNpX3jGnRpzp7tmzJ4QQxR5FCTcABAcH49atW8jNzUVcXFyVE26Al5cTERH5+PggJiYGV65cAQD89ttvOHnyJPz9/QE8v/osPT1dZelOMzMzeHl5SUt3xsbGwtzcXEq4AcDX1xc6OjqIi4uT6rz22mtSwg0Afn5+SElJwcOHD6U6Lx6nqE7RcUqSm5uLrKwslQcREVFNUudmL6+IF890ExER1UehoaHIyspCu3btoKuri8LCQixYsACBgYEAIC3PWdbSnenp6bCwsFDZ36BBAzRu3FiljqOjY7E2ivY1atQI6enpFV4iNDw8HHPmzKlot4mIiKpNjTjTrS08001ERPXdzp07sW3bNmzfvh3nzp3Dli1b8Pnnn2PLli3aDq1cpk+fjszMTOmRlpam7ZCIiIhU1Osz3URERPXdlClTEBoaimHDhgEAXF1dcevWLYSHhyMoKEhanjMjIwPW1tbS8zIyMuDu7g4AsLKywt27d1XaLSgowIMHD6TnW1lZlbj8Z9G+suqUtUSovr4+9PX1K9ptIiKialOvz3QTERHVd0+ePIGOjurXAV1dXSiVSgCAo6MjrKysVJbuzMrKQlxcnLR0p7e3Nx49eoSEhASpzs8//wylUinNweLt7Y0TJ04gPz9fqnPkyBG0bdsWjRo1kuq8eJyiOlVdIpSIiEib6nXSHRERARcXF3Tu3FnboRAREWlFQEAAFixYgAMHDuDmzZvYu3cvli1bhkGDBgEAZDIZJk6ciPnz5+P777/HhQsX8N5778HGxgYDBw4EADg7O6Nv374YO3Yszpw5g1OnTiE4OBjDhg2DjY0NAOA///kP5HI5Ro8ejUuXLmHHjh1YuXIlQkJCpFgmTJiAgwcPYunSpbh8+TJmz56Ns2fPIjg4uNpfFyIiInWp15eXKxQKKBQKaap3IiKi+mb16tWYOXMmxo8fj7t378LGxgb/93//h1mzZkl1pk6dipycHLz//vt49OgRunfvjoMHD8LAwECqs23bNgQHB6N3797Q0dHBkCFDsGrVKmm/mZkZDh8+DIVCAQ8PDzRt2hSzZs1SWcvbx8cH27dvx6effopPPvkErVu3xr59+9C+ffvqeTGIiIg0oEas061t6lzTk+t0ExFRWWraOt11DdfppvqC3y2JtK9WrdOtLby8nIiIiIiIiDSpXifdXDKMiIiIiIiINKleJ91EREREREREmsSkm4iIiIiIiEhDmHQTERERERERaQiTbiIiIiIiIiINqddJN2cvJyIiIiIiIk2q10k3Zy8nIiIiIiIiTarXSTcRERERERGRJjHpJiIiIiIiItIQJt1EREREREREGsKkm4iIiIiIiEhDmHQTERERERERaUi9Trq5ZBgRERERERFpUr1OurlkGBEREREREWlSvU66iYiIiIiIiDSJSTcRERERERGRhjDpJiIiIiIiItIQJt1EREREREREGsKkm4iIiIiIiEhD6kTS/ejRI3h6esLd3R3t27fH+vXrtR0SERERERERERpoOwB1MDExwYkTJ2BkZIScnBy0b98egwcPRpMmTbQdGhEREREREdVjdSLp1tXVhZGREQAgNzcXQggIIbQclfY4hB4oVnZzUX8tREJERERERFS/1YjLy0+cOIGAgADY2NhAJpNh3759xepERETAwcEBBgYG8PLywpkzZ1T2P3r0CG5ubrC1tcWUKVPQtGnTaoq+fBxCDxR7EBERERERUd1WqaR7y5YtOHDgf0nj1KlTYW5uDh8fH9y6davC7eXk5MDNzQ0REREl7t+xYwdCQkIQFhaGc+fOwc3NDX5+frh7965Ux9zcHL/99htu3LiB7du3IyMjo9Tj5ebmIisrS+VBRERUm6h7LCYiIiLNqFTSvXDhQhgaGgIAYmNjERERgcWLF6Np06aYNGlShdvz9/fH/PnzMWjQoBL3L1u2DGPHjsXIkSPh4uKCyMhIGBkZYdOmTcXqWlpaws3NDf/9739LPV54eDjMzMykh52dXYVjJiIi0iZ1j8VERESkGZVKutPS0uDk5AQA2LdvH4YMGYL3338f4eHhZSa7lZGXl4eEhAT4+vpKZTo6OvD19UVsbCwAICMjA48fPwYAZGZm4sSJE2jbtm2pbU6fPh2ZmZnSIy0tTa0xExERaVp1jsVERERUeZVKuhs2bIj79+8DAA4fPow33ngDAGBgYICnT5+qLzoA9+7dQ2FhISwtLVXKLS0tkZ6eDgC4desWXn31Vbi5ueHVV1/Fhx9+CFdX11Lb1NfXh6mpKb766it07doVvXv3VmvMREREmladYzERERFVXqVmL3/jjTcwZswYdOzYEVeuXEG/fv0AAJcuXYKDg4M64yuXLl26IDExscLPUygUUCgUyMrKgpmZmfoDIyIi0pCaNhYTERFRySp1pjsiIgI+Pj74+++/sXv3bmk97ISEBAwfPlytATZt2hS6urrFJkbLyMiAlZVVldqOiIiAi4sLOnfuXKV2iIiIqlt1jsVERERUeRVOugsKCrBq1SpMmzYN3333Hfr27SvtmzNnDmbMmKHWAOVyOTw8PBATEyOVKZVKxMTEwNvbu0ptKxQKJCUlIT4+vqphEhERVRt1j8V//fUX3nnnHTRp0gSGhoZwdXXF2bNnpf1CCMyaNQvW1tYwNDSEr68v/vjjD5U2Hjx4gMDAQJiamsLc3ByjR49Gdna2Sp3ff/8dr776KgwMDGBnZ4fFixcXi2XXrl1o164dDAwM4Orqih9//LFCfSEiIqppKpx0N2jQAIsXL0ZBQYHagsjOzkZiYqJ0ifiNGzeQmJiI1NRUAEBISAjWr1+PLVu2IDk5GePGjUNOTg5GjhxZpePyTDcREdVG6hyLHz58iG7dukFPTw8//fQTkpKSsHTpUjRq1Eiqs3jxYqxatQqRkZGIi4uDsbEx/Pz88OzZM6lOYGAgLl26hCNHjmD//v04ceIE3n//fWl/VlYW+vTpA3t7eyQkJGDJkiWYPXs21q1bJ9X59ddfMXz4cIwePRrnz5/HwIEDMXDgQFy8eLHK/SQiItIWmRBCVPRJAwYMwODBgxEUFKSWII4dO4ZevXoVKw8KCkJUVBQAYM2aNViyZAnS09Ph7u6OVatWwcvLSy3HL7qnOzMzE6amplVqyyH0wMsrvcTNRf3VHkNV2yQiIvVQ15ijrrE4NDQUp06dKnXGcyEEbGxs8PHHH2Py5MkAnq8UYmlpiaioKAwbNgzJyclwcXFBfHw8PD09AQAHDx5Ev3798Oeff8LGxgZr167FjBkzkJ6eDrlcLh173759uHz5MgBg6NChyMnJwf79+6Xjd+3aFe7u7oiMjCxXf9Q5pgPqGdeJNIHf7Yi0r7xjTqUmUvP390doaCguXLgADw8PGBsbq+x/8803K9Rez5498bLcPzg4GMHBwRWOlYiIqC5S11j8/fffw8/PD2+99RaOHz+O5s2bY/z48Rg7diyA51efpaenqyzdaWZmBi8vL8TGxmLYsGGIjY2Fubm5lHADgK+vL3R0dBAXF4dBgwYhNjYWr732mpRwA4Cfnx8+++wzPHz4EI0aNUJsbCxCQkJU4vPz88O+fftKjT83Nxe5ubnSdlZWVrn6TUREVF0qlXSPHz8eALBs2bJi+2QyGQoLC6sWVTWJiIhARERErYmXiIioiLrG4uvXr2Pt2rUICQnBJ598gvj4eHz00UeQy+UICgqSlucsa+nO9PR0WFhYqOxv0KABGjdurFLH0dGxWBtF+xo1aoT09PQyj1OS8PBwzJkzp1x9JSIi0oZKzV6uVCpLfdSmBJYTqRERUW2lrrFYqVSiU6dOWLhwITp27Ij3338fY8eOLffl3No2ffp0ZGZmSo+0tDRth0RERKSiUkk3ERER1Q3W1tZwcXFRKXN2dpYmMy1anrOspTutrKxw9+5dlf0FBQV48OCBSp2S2njxGKXVKWuJUH19fZiamqo8iIiIapJyX16+atUqvP/++zAwMMCqVavKrPvRRx9VObDqUFMvL+dEaEREVBJNjMXdunVDSkqKStmVK1dgb28PAHB0dISVlRViYmLg7u4O4Pl903FxcRg3bhwAwNvbG48ePUJCQgI8PDwAAD///DOUSqU06am3tzdmzJiB/Px86OnpAQCOHDmCtm3bSjOle3t7IyYmBhMnTpRiOXLkSJWXCCUiItKmcs9e7ujoiLNnz6JJkybF7slSaVAmw/Xr19UWYHWoabOXl6QiSTeTdiKimqsqY44mxuL4+Hj4+Phgzpw5ePvtt3HmzBmMHTsW69atQ2BgIADgs88+w6JFi7BlyxY4Ojpi5syZ+P3335GUlAQDAwMAzyd2y8jIQGRkJPLz8zFy5Eh4enpi+/btAJ7PeN62bVv06dMH06ZNw8WLFzFq1CgsX75cWlrs119/RY8ePbBo0SL0798f0dHRWLhwIc6dO4f27duXqz+cvZzqC363I9I+tc9efuPGjRL/TURERNVDE2Nx586dsXfvXkyfPh1z586Fo6MjVqxYISXcADB16lTk5OTg/fffx6NHj9C9e3ccPHhQSrgBYNu2bQgODkbv3r2ho6ODIUOGqJyNNzMzw+HDh6FQKODh4YGmTZti1qxZKmt5+/j4YPv27fj000/xySefoHXr1ti3b1+5E24iIqKaqFLrdNc1tfVMd0WOxV9DiYhqBnWfiSVVPNNN9QW/2xFpn0bX6QaAP//8E99//z1SU1ORl5ensq+k5Utqopp6TzcREVF51IWxmIiIqK6rVNIdExODN998Ey1btsTly5fRvn173Lx5E0IIdOrUSd0xaoxCoYBCoZB+oSAiIqot6spYTEREVNdVKumePn06Jk+ejDlz5sDExAS7d++GhYUFAgMD0bdvX3XHSODlbUREpIpjMRERUe1QqXW6k5OT8d577wEAGjRogKdPn6Jhw4aYO3cuPvvsM7UGSERERMVxLCYiIqodKpV0GxsbS/eOWVtb49q1a9K+e/fuqScyIiIiKhXHYiIiotqhUpeXd+3aFSdPnoSzszP69euHjz/+GBcuXMCePXvQtWtXdceoMZxIjYiIaqu6MhYTERHVdZVKupctW4bs7GwAwJw5c5CdnY0dO3agdevWtWq2VE6kRkREtVVdGYuJiIjqukol3S1btpT+bWxsjMjISLUFRERERC/HsZiIiKh2qPQ63QBw9uxZJCcnAwBcXFzg4eGhlqCIiIiofDgWExER1WyVSrr//PNPDB8+HKdOnYK5uTkA4NGjR/Dx8UF0dDRsbW3VGSMRERH9A8diIiKi2qFSs5ePGTMG+fn5SE5OxoMHD/DgwQMkJydDqVRizJgx6o6RiIiI/oFjMRERUe1QqTPdx48fx6+//oq2bdtKZW3btsXq1avx6quvqi04IiIiKhnHYiIiotqhUme67ezskJ+fX6y8sLAQNjY2VQ6KiIiIysaxmIiIqHaoVNK9ZMkSfPjhhzh79qxUdvbsWUyYMAGff/652oLTtIiICLi4uKBz587aDoWIiKhC6spYTEREVNfJhBCiok9q1KgRnjx5goKCAjRo8PwK9aJ/Gxsbq9R98OCBeiLVoKJ1ujMzM2FqalqlthxCD6gpKvW6uai/tkMgIiKob8ypa2OxuqhzTAdq7rhOxO92RNpX3jGnUvd0r1ixorJxERERkRpwLCYiIqodKpV0BwUFqTsO0rCSfqnnL6RERLUXx2IiIqLaoVL3dBMRERERERHRyzHpJiIiIiIiItKQSl1eTnUbL0UnIiIiIiJSj3Kf6f7999+hVCo1GUulpaWloWfPnnBxcUGHDh2wa9cubYdERESkdjV5LCYiIqKSlftMd8eOHXHnzh1YWFigZcuWiI+PR5MmTTQZW7k1aNAAK1asgLu7O9LT0+Hh4YF+/foVWzKFVHEZFCKi2qUmj8VERERUsnKf6TY3N8eNGzcAADdv3qxRv7RbW1vD3d0dAGBlZYWmTZvWqzVJiYiofqjJYzERERGVrNxJ95AhQ9CjRw84OjpCJpPB09MTLVu2LPFRUSdOnEBAQABsbGwgk8mwb9++YnUiIiLg4OAAAwMDeHl54cyZMyW2lZCQgMLCQtjZ2VU4DiIioppMk2MxERERaUa5Ly9ft24dBg8ejKtXr+Kjjz7C2LFjYWJiopYgcnJy4ObmhlGjRmHw4MHF9u/YsQMhISGIjIyEl5cXVqxYAT8/P6SkpMDCwkKq9+DBA7z33ntYv359mcfLzc1Fbm6utJ2VlaWWfhAREWmSJsdiIiIi0owKzV7et29fAM/PJk+YMEFtA72/vz/8/f1L3b9s2TKMHTsWI0eOBABERkbiwIED2LRpE0JDQwE8T6QHDhyI0NBQ+Pj4lHm88PBwzJkzRy2xExERVSdNjcVERESkGZVaMmzz5s3Sv//8808AgK2trXoi+oe8vDwkJCRg+vTpUpmOjg58fX0RGxsLABBCYMSIEXj99dfx7rvvvrTN6dOnIyQkRNrOysri5eiVUNpEbBVZXozLkxERVU51jsVERERUeeW+p/tFSqUSc+fOhZmZGezt7WFvbw9zc3PMmzdP7ZO63Lt3D4WFhbC0tFQpt7S0RHp6OgDg1KlT2LFjB/bt2wd3d3e4u7vjwoULpbapr68PU1NTfPXVV+jatSt69+6t1piJiIg0rTrHYiIiIqq8Sp3pnjFjBjZu3IhFixahW7duAICTJ09i9uzZePbsGRYsWKDWIF+me/fulfqCoVAooFAokJWVBTMzMw1ERkREpBk1bSwmIiKiklUq6d6yZQs2bNiAN998Uyrr0KEDmjdvjvHjx6t1oG/atCl0dXWRkZGhUp6RkQErK6sqtR0REYGIiAgUFhZWqR1SxUvGiYg0rzrHYiIiIqq8Sl1e/uDBA7Rr165Yebt27dS+PrZcLoeHhwdiYmKkMqVSiZiYGHh7e1epbYVCgaSkJMTHx1c1TCJ6gUPogWIPIlIvTY3FixYtgkwmw8SJE6WyZ8+eQaFQoEmTJmjYsCGGDBlS7Mfw1NRU9O/fH0ZGRrCwsMCUKVNQUFCgUufYsWPo1KkT9PX14eTkhKioqGLHL+8SoURERLVFpZJuNzc3rFmzplj5mjVr4ObmVuH2srOzkZiYiMTERADAjRs3kJiYiNTUVABASEgI1q9fjy1btiA5ORnjxo1DTk6ONJt5ZUVERMDFxQWdO3euUjtERETVTd1jMQDEx8fjyy+/RIcOHVTKJ02ahB9++AG7du3C8ePHcfv2bZUlPgsLC9G/f3/k5eXh119/xZYtWxAVFYVZs2ZJdW7cuIH+/fujV69eSExMxMSJEzFmzBgcOnRIqlO0RGhYWBjOnTsHNzc3+Pn54e7du5XqDxERUU1QqcvLFy9ejP79++Po0aPS2ebY2FikpaXhxx9/rHB7Z8+eRa9evaTtopnFg4KCEBUVhaFDh+Lvv//GrFmzkJ6eDnd3dxw8eLDY5GoVxXu66yZe3k5E9YG6x+Ls7GwEBgZi/fr1mD9/vlSemZmJjRs3Yvv27Xj99dcBPJ853dnZGadPn0bXrl1x+PBhJCUl4ejRo7C0tIS7uzvmzZuHadOmYfbs2ZDL5YiMjISjoyOWLl0KAHB2dsbJkyexfPly+Pn5ASjfEqFERES1TaWS7h49euDKlSuIiIjA5cuXAQCDBw/G+PHjYWNjU+H2evbsCSFEmXWCg4MRHBxcmXBJDarz8uCKJM28bJmI6it1j8UKhQL9+/eHr6+vStKdkJCA/Px8+Pr6SmXt2rVDixYtEBsbi65duyI2Nhaurq4qP4b7+flh3LhxuHTpEjp27IjY2FiVNorqFF3GXp4lQkuSm5uL3NxcaTsrK6vCfSciItKkSiXdAGBjY1PrJ2nhRGrVh8mxZvCsPlH9pq6xODo6GufOnStxjpP09HTI5XKYm5urlL+4dGd6enqJS3sW7SurTlZWFp4+fYqHDx+WukRo0Y8KJQkPD8ecOXPK11EiIiItqNQ93XUFJ1IjIqL6Li0tDRMmTMC2bdtgYGCg7XAqbPr06cjMzJQeaWlp2g6JiIhIRaXPdBNVJ54pJyLSjISEBNy9exedOnWSygoLC3HixAmsWbMGhw4dQl5eHh49eqRytvvFpTutrKyKzTJeNLv5i3VKWv7T1NQUhoaG0NXVrdQSofr6+tDX1694x4mIiKpJvT7TzdnLiYiovuvduzcuXLggrSKSmJgIT09PBAYGSv/W09NTWbozJSUFqamp0gRu3t7euHDhgsos40eOHIGpqSlcXFykOi+2UVSnqA1NLhFKRESkTRU+0y2EQFpaGiwsLGrlZWgv4uzl9UdpZ8rr4v3P9amvVLtwDgL1UedYbGJigvbt26uUGRsbo0mTJlL56NGjERISgsaNG8PU1BQffvghvL290bVrVwBAnz594OLignfffReLFy9Geno6Pv30UygUCuks9AcffIA1a9Zg6tSpGDVqFH7++Wfs3LkTBw787/9FSEgIgoKC4OnpiS5dumDFihVqWSKUiIhImyqVdDs5OeHSpUto3bq1JmIiIiKiMlT3WLx8+XLo6OhgyJAhyM3NhZ+fH7744gtpv66uLvbv349x48bB29sbxsbGCAoKwty5c6U6jo6OOHDgACZNmoSVK1fC1tYWGzZskJYLA6CxJUKJiIi0qcJJt46ODlq3bo379+8z6SYiItICTY/Fx44dU9k2MDCQVvwojb29/UvXB+/ZsyfOnz9fZh0uEUpERHVNpe7pXrRoEaZMmYKLFy+qO55qxXu6iYiotqorYzEREVFdV6nZy9977z08efIEbm5ukMvlMDQ0VNn/4MEDtQSnabynm4iIaqu6MhYTERHVdZVKulesWKHmMIiIiKgiOBYTERHVDpVKuoOCgtQdB1GdwdnDSds4S3jV1YbXkGMxERFR7VCv1+kmIiIiIiIi0qQKnenW0dGBTCYrs45MJkNBQUGVgqouRTOxFhYWajsUojqvNpw5JKoN6tpYTEREVNdVKOneu3dvqftiY2OxatUqKJXKKgdVXTiRGjERJKLapq6NxURERHVdhZLuAQMGFCtLSUlBaGgofvjhBwQGBmLu3LlqC46opivt/u3afiwiqrk4FhMREdUulZpIDQBu376NsLAwbNmyBX5+fkhMTET79u3VGRsREdUgvDKk5uFYTEREVPNVOOnOzMzEwoULsXr1ari7uyMmJgavvvqqJmIjqpdq6hltJlxENQfHYiIiotqjQkn34sWL8dlnn8HKygrffPNNiZe4ERERkeZwLCYiIqpdKpR0h4aGwtDQEE5OTtiyZQu2bNlSYr09e/aoJTgiIiJSxbGYiIiodqlQ0v3ee++9dJkSIiIi0hyOxURERLVLhZLuqKgoDYWhHVynm8qrpt5nrW18XaqO98pTRdW1sZiI6i5+T6Caqrq/a1V69vK6gOt0E1F14RcPIiIiovqpXifdRERVxWSaiIiIiMqio+0AiIiIiIiIiOoqnukmIiIVPHtPREREpD5MuonqCU7YRURERERU/Zh0E5HWlHZGtb7/GMAfSIiIiIjqjjqTdA8aNAjHjh1D79698e2332o7HKrFeGktUdXwb4iIiIjof+rMRGoTJkzA1q1btR0GERERERERkaTOJN09e/aEiYmJtsMgIiIiIiIiktSIpPvEiRMICAiAjY0NZDIZ9u3bV6xOREQEHBwcYGBgAC8vL5w5c6b6AyVSM4fQA8UeRERERERUd9SIpDsnJwdubm6IiIgocf+OHTsQEhKCsLAwnDt3Dm5ubvDz88Pdu3crdbzc3FxkZWWpPIiIiIiIiIjUrUYk3f7+/pg/fz4GDRpU4v5ly5Zh7NixGDlyJFxcXBAZGQkjIyNs2rSpUscLDw+HmZmZ9LCzs6tK+EREREREREQlqhFJd1ny8vKQkJAAX19fqUxHRwe+vr6IjY2tVJvTp09HZmam9EhLS1NXuERERERERESSGr9k2L1791BYWAhLS0uVcktLS1y+fFna9vX1xW+//YacnBzY2tpi165d8Pb2LrFNfX196OvrIyIiAhERESgsLNRoH4hqE95XThXB/y9EREREZavxSXd5HT16tMLPUSgUUCgUyMrKgpmZmQaiIiIiIiIiovqsxifdTZs2ha6uLjIyMlTKMzIyYGVlVaW2eaabqhPPCFZNSa/fzUX9tRAJEREREVH51fh7uuVyOTw8PBATEyOVKZVKxMTElHr5eHkpFAokJSUhPj6+qmESERHVSuHh4ejcuTNMTExgYWGBgQMHIiUlRaXOs2fPoFAo0KRJEzRs2BBDhgwp9mN4amoq+vfvDyMjI1hYWGDKlCkoKChQqXPs2DF06tQJ+vr6cHJyQlRUVLF4uEQoERHVNTXiTHd2djauXr0qbd+4cQOJiYlo3LgxWrRogZCQEAQFBcHT0xNdunTBihUrkJOTg5EjR1bpuDzTTVQz8aoAoupz/PhxKBQKdO7cGQUFBfjkk0/Qp08fJCUlwdjYGAAwadIkHDhwALt27YKZmRmCg4MxePBgnDp1CgBQWFiI/v37w8rKCr/++ivu3LmD9957D3p6eli4cCGA52N7//798cEHH2Dbtm2IiYnBmDFjYG1tDT8/PwD/WyI0MjISXl5eWLFiBfz8/JCSkgILCwvtvEBERERVVCOS7rNnz6JXr17SdkhICAAgKCgIUVFRGDp0KP7++2/MmjUL6enpcHd3x8GDB4tNrlZRvKebiIjqu4MHD6psR0VFwcLCAgkJCXjttdeQmZmJjRs3Yvv27Xj99dcBAJs3b4azszNOnz6Nrl274vDhw0hKSsLRo0dhaWkJd3d3zJs3D9OmTcPs2bMhl8sRGRkJR0dHLF26FADg7OyMkydPYvny5VLS/eISoQAQGRmJAwcOYNOmTQgNDa3GV4WIiEh9asTl5T179oQQotjjxcvOgoODcevWLeTm5iIuLg5eXl7aC5iIiKiOyszMBAA0btwYAJCQkID8/HyVpTvbtWuHFi1aSEt3xsbGwtXVVeXHcD8/P2RlZeHSpUtSnRfbKKpT1EZllwjNzc1FVlaWyoOIiKgmqRFnurWFl5cTUX3BS/apPJRKJSZOnIhu3bqhffv2AID09HTI5XKYm5ur1LW0tER6erpUp6SlPYv2lVUnKysLT58+xcOHD8u1ROg/hYeHY86cORXvLBERUTWpEWe6tYUTqREREf2PQqHAxYsXER0dre1Qym369OnIzMyUHmlpadoOiYiISEW9PtNNREREzwUHB2P//v04ceIEbG1tpXIrKyvk5eXh0aNHKme7X1y608rKqtgs40Wzm79Yp6TlP01NTWFoaAhdXd1KLRGqr68PfX39ineYiIiomtTrpJuXlxNRefHybKqrhBD48MMPsXfvXhw7dgyOjo4q+z08PKCnp4eYmBgMGTIEAJCSkoLU1FRp6U5vb28sWLAAd+/elWYZP3LkCExNTeHi4iLV+fHHH1XaPnLkiNTGi0uEDhw4EMD/lggNDg7WWP+JiIg0rV4n3Zy9nIiI6juFQoHt27fju+++g4mJiXQPtpmZGQwNDWFmZobRo0cjJCQEjRs3hqmpKT788EN4e3uja9euAIA+ffrAxcUF7777LhYvXoz09HR8+umnUCgU0lnoDz74AGvWrMHUqVMxatQo/Pzzz9i5cycOHPjfD1qaWiKUiIhIm+p10k1ERFTfrV27FsDzlURetHnzZowYMQIAsHz5cujo6GDIkCHIzc2Fn58fvvjiC6murq4u9u/fj3HjxsHb2xvGxsYICgrC3LlzpTqOjo44cOAAJk2ahJUrV8LW1hYbNmyQlgsDoLElQomIiLSJSTcREVE9JoR4aR0DAwPplqzS2NvbF7t8/J969uyJ8+fPl1knODiYl5MTEVGdUq+Tbt7TTUS1RWn3lN9c1L+aIyEiIiKiiuCSYVwyjIiIiIiIiDSkXifdRERERERERJrEpJuIiIiIiIhIQ5h0ExEREREREWkIJ1LjRGpUj5U2OVdtV95+cRIyIiIiItK0en2mmxOpERERERERkSbV66SbiIiIiIiISJOYdBMRERERERFpCJNuIiIiIiIiIg1h0k1ERERERESkIUy6iYiIiIiIiDSES4ZxyTCiWquqS56V9nwuJUZERERE6lKvz3RzyTAiIiIiIiLSpHqddBMRERERERFpEpNuIiIiIiIiIg1h0k1ERERERESkIUy6iYiIiIiIiDSESTcRERERERGRhtSZpHv//v1o27YtWrdujQ0bNmg7HCIiIiIiIqK6sU53QUEBQkJC8Msvv8DMzAweHh4YNGgQmjRpou3QiIiIiIiIqB6rE2e6z5w5g1deeQXNmzdHw4YN4e/vj8OHD2s7LCIiIiIiIqrnakTSfeLECQQEBMDGxgYymQz79u0rViciIgIODg4wMDCAl5cXzpw5I+27ffs2mjdvLm03b94cf/31V3WETkRERERERFSqGpF05+TkwM3NDRERESXu37FjB0JCQhAWFoZz587Bzc0Nfn5+uHv3bjVHSkRERERERFR+NSLp9vf3x/z58zFo0KAS9y9btgxjx47FyJEj4eLigsjISBgZGWHTpk0AABsbG5Uz23/99RdsbGxKPV5ubi6ysrJUHkRERERERETqVuMnUsvLy0NCQgKmT58uleno6MDX1xexsbEAgC5duuDixYv466+/YGZmhp9++gkzZ84stc3w8HDMmTNH47ETEWmaQ+gBbYdQJSXFf3NRf60en4iIiEidasSZ7rLcu3cPhYWFsLS0VCm3tLREeno6AKBBgwZYunQpevXqBXd3d3z88cdlzlw+ffp0ZGZmSo+0tDSN9oGIiIiIiIjqpxp/pru83nzzTbz55pvlqquvrw99fX1EREQgIiIChYWFGo6OiIiIiIiI6qMaf6a7adOm0NXVRUZGhkp5RkYGrKysqtS2QqFAUlIS4uPjq9QOERERERERUUlqfNItl8vh4eGBmJgYqUypVCImJgbe3t5VajsiIgIuLi7o3LlzVcMkIiIiIiIiKqZGJN3Z2dlITExEYmIiAODGjRtITExEamoqACAkJATr16/Hli1bkJycjHHjxiEnJwcjR46s0nF5ppuIiKjmiYiIgIODAwwMDODl5YUzZ85oOyQiIqJKqxH3dJ89exa9evWStkNCQgAAQUFBiIqKwtChQ/H3339j1qxZSE9Ph7u7Ow4ePFhscrWK4j3dRERENcuOHTsQEhKCyMhIeHl5YcWKFfDz80NKSgosLCy0HR4REVGF1Ygz3T179oQQotgjKipKqhMcHIxbt24hNzcXcXFx8PLyqvJxeaabiIioZlm2bBnGjh2LkSNHwsXFBZGRkTAyMsKmTZu0HRoREVGl1Igz3domhAAAZGVlVbktZe6TKrdBRNpV0mcB/7arjzo+i8urIu+ruuIqaqdo7KH/ycvLQ0JCAqZPny6V6ejowNfXF7GxsSU+Jzc3F7m5udJ2ZmYmAPW9X/zbp5qqOj8rK4t/P1RTVfeYzqQbwOPHjwEAdnZ2Wo6EiGoCsxXajqB+q6mvv7rjevz4MczMzNTbaC137949FBYWFrt9zNLSEpcvXy7xOeHh4ZgzZ06xco7pVNfV1M9Kotqgusd0Jt0AbGxskJaWBhMTE8hksnI9JysrC3Z2dkhLS4OpqamGI6w+dbFfdbFPAPtVm9TFPgHsV2UJIfD48WPY2Niove36aPr06dJcMMDzFU4ePHiAJk2alHtMJ82rq58XRNWFf0M1U3nHdCbdeH7pmq2tbaWea2pqWif/49fFftXFPgHsV21SF/sEsF+VwTPcJWvatCl0dXWRkZGhUp6RkQErK6sSn6Ovrw99fX2VMnNzc02FSFVUVz8viKoL/4ZqnvKM6TViIjUiIiIiuVwODw8PxMTESGVKpRIxMTHw9vbWYmRERESVxzPdREREVGOEhIQgKCgInp6e6NKlC1asWIGcnByMHDlS26ERERFVCpPuStLX10dYWFixS9pqu7rYr7rYJ4D9qk3qYp8A9os0Y+jQofj7778xa9YspKenw93dHQcPHiw2uRrVLvy7Iqoa/g3VbjLBNUuIiIiIiIiINIL3dBMRERERERFpCJNuIiIiIiIiIg1h0k1ERERERESkIUy6iYiIiEhtRowYgYEDB2r8OA4ODlixYoXGj0NUHseOHYNMJsOjR4+0HYpaREVFwdzcXNth1BlMuispIiICDg4OMDAwgJeXF86cOaPtkMrtxIkTCAgIgI2NDWQyGfbt26eyXwiBWbNmwdraGoaGhvD19cUff/yhnWArIDw8HJ07d4aJiQksLCwwcOBApKSkqNR59uwZFAoFmjRpgoYNG2LIkCHIyMjQUsQvt3btWnTo0AGmpqYwNTWFt7c3fvrpJ2l/betPaRYtWgSZTIaJEydKZbWxb7Nnz4ZMJlN5tGvXTtpfG/sEAH/99RfeeecdNGnSBIaGhnB1dcXZs2el/bXxM8PBwaHYeyWTyaBQKADU3veKqCQ9e/ZU+XzV9POqS3x8PN5//31th0EEAPDx8cGdO3dgZmam7VDUYujQobhy5Yq2w6gzmHRXwo4dOxASEoKwsDCcO3cObm5u8PPzw927d7UdWrnk5OTAzc0NERERJe5fvHgxVq1ahcjISMTFxcHY2Bh+fn549uxZNUdaMcePH4dCocDp06dx5MgR5Ofno0+fPsjJyZHqTJo0CT/88AN27dqF48eP4/bt2xg8eLAWoy6bra0tFi1ahISEBJw9exavv/46BgwYgEuXLgGoff0pSXx8PL788kt06NBBpby29u2VV17BnTt3pMfJkyelfbWxTw8fPkS3bt2gp6eHn376CUlJSVi6dCkaNWok1amNnxnx8fEq79ORI0cAAG+99RaA2vleEdU3zZo1g5GRUan78/PzqzEaqu/kcjmsrKwgk8m0HYpaGBoawsLCotT9eXl51RhNHSCowrp06SIUCoW0XVhYKGxsbER4eLgWo6ocAGLv3r3StlKpFFZWVmLJkiVS2aNHj4S+vr745ptvtBBh5d29e1cAEMePHxdCPO+Hnp6e2LVrl1QnOTlZABCxsbHaCrPCGjVqJDZs2FAn+vP48WPRunVrceTIEdGjRw8xYcIEIUTtfa/CwsKEm5tbiftqa5+mTZsmunfvXur+uvKZMWHCBNGqVSuhVCpr7XtFVJKgoCABQOVx48YNIYQQx44dE507dxZyuVxYWVmJadOmifz8/DKfV1BQIEaNGiUcHByEgYGBaNOmjVixYkWxYw4YMKDUmDZv3izMzMzE3r17hZOTk9DX1xd9+vQRqampUp2rV6+KN998U1hYWAhjY2Ph6ekpjhw5otKOvb29WL58ubQNQHzxxRciICBAGBkZibCwMPHgwQPxn//8RzRt2lQYGBgIJycnsWnTpqq9qFTn/PP/khBCuLm5ibCwMGkbgFi/fr0YOHCgMDQ0FE5OTuK7776T9v/yyy8CgHj48KFUtnnzZmFnZycMDQ3FwIEDxeeffy7MzMyk/SX9rUyYMEH06NFD2i4sLBQLFy6U/uY6dOigMj6V1p+5c+eKYcOGCSMjI2FjYyPWrFmjUmfp0qWiffv2wsjISNja2opx48aJx48fq8T+YqxF33HWr18vHBwchEwmE0IIsWvXLtG+fXthYGAgGjduLHr37i2ys7PLjK8+4pnuCsrLy0NCQgJ8fX2lMh0dHfj6+iI2NlaLkanHjRs3kJ6ertI/MzMzeHl51br+ZWZmAgAaN24MAEhISEB+fr5K39q1a4cWLVrUir4VFhYiOjoaOTk58Pb2rvX9AQCFQoH+/fur9AGo3e/VH3/8ARsbG7Rs2RKBgYFITU0FUHv79P3338PT0xNvvfUWLCws0LFjR6xfv17aXxc+M/Ly8vD1119j1KhRkMlktfa9IirJypUr4e3tjbFjx0pXdtjZ2eGvv/5Cv3790LlzZ/z2229Yu3YtNm7ciPnz55f5PKVSCVtbW+zatQtJSUmYNWsWPvnkE+zcubNCcT158gQLFizA1q1bcerUKTx69AjDhg2T9mdnZ6Nfv36IiYnB+fPn0bdvXwQEBEifqaWZPXs2Bg0ahAsXLmDUqFGYOXMmkpKS8NNPPyE5ORlr165F06ZNK/5CEgGYM2cO3n77bfz+++/o168fAgMD8eDBgxLrxsXFYfTo0QgODkZiYiJ69eol/X1VRHh4OLZu3YrIyEhcunQJkyZNwjvvvIPjx4+X+bwlS5bAzc0N58+fR2hoKCZMmCBd1QU8z19WrVqFS5cuYcuWLfj5558xderUMtu8evUqdu/ejT179iAxMRF37tzB8OHDMWrUKCQnJ+PYsWMYPHgwhBAV7mdd10DbAdQ29+7dQ2FhISwtLVXKLS0tcfnyZS1FpT7p6ekAUGL/ivbVBkqlEhMnTkS3bt3Qvn17AM/7JpfLi00KUdP7duHCBXh7e+PZs2do2LAh9u7dCxcXFyQmJtbK/hSJjo7GuXPnEB8fX2xfbX2vvLy8EBUVhbZt2+LOnTuYM2cOXn31VVy8eLHW9un69etYu3YtQkJC8MknnyA+Ph4fffQR5HI5goKC6sRnxr59+/Do0SOMGDECQO39/0dUEjMzM8jlchgZGcHKykoq/+KLL2BnZ4c1a9ZI80/cvn0b06ZNw6xZs0p9nq6uLubMmSNtOzo6IjY2Fjt37sTbb79d7rjy8/OxZs0aeHl5AQC2bNkCZ2dnnDlzBl26dIGbmxvc3Nyk+vPmzcPevXvx/fffIzg4uNR2//Of/2DkyJHSdmpqKjp27AhPT08Az+dzIKqsESNGYPjw4QCAhQsXYtWqVThz5gz69u1brO7KlSvRt29fKZFt06YNfv31Vxw8eLDcx8vNzcXChQtx9OhReHt7AwBatmyJkydP4ssvv0SPHj1KfW63bt0QGhoqHfvUqVNYvnw53njjDQBQma/BwcEB8+fPxwcffIAvvvii1Dbz8vKwdetWNGvWDABw7tw5FBQUYPDgwbC3twcAuLq6lrt/9QmTbqqTFAoFLl68qHI/bW3Vtm1bJCYmIjMzE99++y2CgoJe+utmTZeWlib94mpgYKDtcNTG399f+neHDh3g5eUFe3t77Ny5E4aGhlqMrPKUSiU8PT2xcOFCAEDHjh1x8eJFREZGIigoSMvRqcfGjRvh7+8PGxsbbYdCVG2Sk5Ph7e2tcv9pt27dkJ2djT///BMtWrQo9bkRERHYtGkTUlNT8fTpU+Tl5cHd3b1Cx2/QoAE6d+4sbbdr1w7m5uZITk5Gly5dkJ2djdmzZ+PAgQO4c+cOCgoK8PTp05ee6S5KrouMGzcOQ4YMwblz59CnTx8MHDgQPj4+FYqVqMiL888YGxvD1NS01DmdkpOTMWjQIJUyb2/vCiXdV69exZMnT6REuUheXh46duxY5nOLkvQXt1+c7f/o0aMIDw/H5cuXkZWVhYKCAjx79gxPnjwpda4Ee3t7KeEGADc3N/Tu3Ruurq7w8/NDnz598O9//1tl3hd6jpeXV1DTpk2hq6tbbBbbjIwMlV+Ca6uiPtTm/gUHB2P//v345ZdfYGtrK5VbWVkhLy+v2FIONb1vcrkcTk5O8PDwQHh4ONzc3LBy5cpa2x/g+aXWd+/eRadOndCgQQM0aNAAx48fx6pVq9CgQQNYWlrW2r69yNzcHG3atMHVq1dr7ftlbW0NFxcXlTJnZ2fpi29t/8y4desWjh49ijFjxkhltfW9IqoO0dHRmDx5MkaPHo3Dhw8jMTERI0eOVPukSpMnT8bevXuxcOFC/Pe//0ViYiJcXV1fehxjY2OVbX9/f9y6dQuTJk3C7du30bt3b0yePFmtsVLtp6OjU+yS6JIm4tPT01PZlslkUCqVGjtudnY2AODAgQNITEyUHklJSfj2228rfdybN2/iX//6Fzp06IDdu3cjISFBmmC5rL+xf/596erq4siRI/jpp5/g4uKC1atXo23btrhx40alY6urmHRXkFwuh4eHB2JiYqQypVKJmJiYYr8o1UaOjo6wsrJS6V9WVhbi4uJqfP+EEAgODsbevXvx888/w9HRUWW/h4cH9PT0VPqWkpKC1NTUGt+3FymVSuTm5tbq/vTu3RsXLlxQGUA8PT0RGBgo/bu29u1F2dnZuHbtGqytrWvt+9WtW7diS+9duXJFuoysNn9mAMDmzZthYWGB/v37S2W19b0iKo1cLkdhYaFKmbOzM2JjY1W+8J86dQomJibSD9YlPe/UqVPw8fHB+PHj0bFjRzg5OeHatWsVjqmgoEBl6cGUlBQ8evQIzs7O0nFGjBiBQYMGwdXVFVZWVrh582aFjwM8n+U8KCgIX3/9NVasWIF169ZVqh2qu5o1a4Y7d+5I21lZWVVOHJ2dnREXF6dSdvr06TKPCwCJiYnSv11cXKCvr4/U1FQ4OTmpPOzs7Mo8/j+Pdfr0aenvKyEhAUqlEkuXLkXXrl3Rpk0b3L59u6JdBPD8h4du3bphzpw5OH/+PORyOfbu3VuptuoyXl5eCSEhIQgKCoKnpye6dOmCFStWICcnR+UeoposOzsbV69elbZv3LiBxMRENG7cGC1atMDEiRMxf/58tG7dGo6Ojpg5cyZsbGwwcOBA7QVdDgqFAtu3b8d3330HExMT6d5LMzMzGBoawszMDKNHj0ZISAgaN24MU1NTfPjhh/D29kbXrl21HH3Jpk+fDn9/f7Ro0QKPHz/G9u3bcezYMRw6dKhW9qeIiYmJdK99EWNjYzRp0kQqr419mzx5MgICAmBvb4/bt28jLCwMurq6GD58eK19vyZNmgQfHx8sXLgQb7/9Ns6cOYN169ZJX1qL1levjZ8ZSqUSmzdvRlBQEBo0+N9wWFvfK6LSODg4IC4uDjdv3kTDhg3RuHFjjB8/HitWrMCHH36I4OBgpKSkICwsDCEhIdDR0Sn1ea1bt8bWrVtx6NAhODo64quvvkJ8fHyxH7pfRk9PDx9++KF0hVNwcDC6du2KLl26AABat26NPXv2ICAgADKZDDNnzqzUGcVZs2bBw8MDr7zyCnJzc7F//34p8SAq8vrrryMqKgoBAQEwNzfHrFmzoKurW6U2P/roI3Tr1g2ff/45BgwYgEOHDhW7tPz111/HkiVLsHXrVnh7e+Prr7/GxYsXpUvHTUxMMHnyZEyaNAlKpRLdu3dHZmYmTp06BVNT0zJv8zp16hQWL16MgQMH4siRI9i1axcOHDgAAHByckJ+fj5Wr16NgIAAnDp1CpGRkRXuY1xcHGJiYtCnTx9YWFggLi4Of//9N//GSqLVudNrsdWrV4sWLVoIuVwuunTpIk6fPq3tkMqtaEmDfz6CgoKEEM+XAJo5c6awtLQU+vr6onfv3iIlJUW7QZdDSX0CIDZv3izVefr0qRg/frxo1KiRMDIyEoMGDRJ37tzRXtAvMWrUKGFvby/kcrlo1qyZ6N27tzh8+LC0v7b1pywvLhkmRO3s29ChQ4W1tbWQy+WiefPmYujQoeLq1avS/trYJyGE+OGHH0T79u2Fvr6+aNeunVi3bp3K/tr6mXHo0CEBoMRYa+t7RVSSlJQU0bVrV2FoaFjuJcNKe96zZ8/EiBEjhJmZmTA3Nxfjxo0ToaGhKssllnfJsN27d4uWLVsKfX194evrK27duiXVuXHjhujVq5cwNDQUdnZ2Ys2aNcXGiZKWDHtxGVQhhJg3b55wdnYWhoaGonHjxmLAgAHi+vXrlXkZqQ7LzMwUQ4cOFaampsLOzk5ERUWVuGTYP/9/mZmZSd8zS1oybOPGjcLW1lYYGhqKgICAYkuGCSHErFmzhKWlpTAzMxOTJk0SwcHBKkuGKZVKsWLFCtG2bVuhp6cnmjVrJvz8/KQlcUtib28v5syZI9566y1hZGQkrKysxMqVK1XqLFu2TFhbWwtDQ0Ph5+cntm7dqhJ/aUuGvSgpKUn4+fmJZs2aCX19fdGmTRuxevXqUuOqz2RCcE53IiIiIqoeUVFRmDhxYrF5E4jquur6v+/g4ICJEyeqzFBO2sV7uomIiIiIiIg0hEk3ERERERERkYbw8nIiIiIiIiIiDeGZbiIiIiIiIiINYdJNREREREREpCFMuomIiIiIiIg0hEk3ERERERERkYYw6SYiIiIiIiLSECbdRKQRx44dg0wmw6NHjwAAUVFRMDc312pMREREVDkc14kqj0k3URWlpaVh1KhRsLGxgVwuh729PSZMmID79+9rO7QaZejQobhy5Yq2wyjVzZs3IZPJkJiYqO1QiIhIiziulw/HdaLyY9JNVAXXr1+Hp6cn/vjjD3zzzTe4evUqIiMjERMTA29vbzx48ECjx8/Pz9do++pkaGgICwuLaj9uXl5etR+TiIhqJ47r5cdxnaj8mHQTVYFCoYBcLsfhw4fRo0cPtGjRAv7+/jh69Cj++usvzJgxAwDwySefwMvLq9jz3dzcMHfuXGl7w4YNcHZ2hoGBAdq1a4cvvvhC2lf0i+2OHTvQo0cPGBgYYNu2bbh16xYCAgLQqFEjGBsb45VXXsGPP/4IACgsLMTo0aPh6OgIQ0NDtG3bFitXrlSJYcSIERg4cCAWLlwIS0tLmJubY+7cuSgoKMCUKVPQuHFj2NraYvPmzcViiY6Oho+PDwwMDNC+fXscP3681Nfqn5ehzZ49G+7u7vjqq6/g4OAAMzMzDBs2DI8fP5bqPH78GIGBgTA2Noa1tTWWL1+Onj17YuLEiaUep6jdDRs2wNHREQYGBgCAgwcPonv37jA3N0eTJk3wr3/9C9euXZOe5+joCADo2LEjZDIZevbsWa73hYiI6g6O6xzXiTRCEFGl3L9/X8hkMrFw4cIS948dO1Y0atRIKJVKcfHiRQFAXL16VdpfVPbHH38IIYT4+uuvhbW1tdi9e7e4fv262L17t2jcuLGIiooSQghx48YNAUA4ODhIdW7fvi369+8v3njjDfH777+La9euiR9++EEcP35cCCFEXl6emDVrloiPjxfXr18XX3/9tTAyMhI7duyQ4ggKChImJiZCoVCIy5cvi40bNwoAws/PTyxYsEBcuXJFzJs3T+jp6Ym0tDSVWGxtbcW3334rkpKSxJgxY4SJiYm4d++eEEKIX375RQAQDx8+FEIIsXnzZmFmZiYdNywsTDRs2FAMHjxYXLhwQZw4cUJYWVmJTz75RKozZswYYW9vL44ePSouXLggBg0aJExMTMSECRNKfV/CwsKEsbGx6Nu3rzh37pz47bffhBBCfPvtt2L37t3ijz/+EOfPnxcBAQHC1dVVFBYWCiGEOHPmjAAgjh49Ku7cuSPu379frveFiIjqBo7rHNeJNIVJN1ElnT59WgAQe/fuLXH/smXLBACRkZEhhBDCzc1NzJ07V9o/ffp04eXlJW23atVKbN++XaWNefPmCW9vbyHE/wbEFStWqNRxdXUVs2fPLnfcCoVCDBkyRNoOCgoS9vb20iAlhBBt27YVr776qrRdUFAgjI2NxTfffKMSy6JFi6Q6+fn5wtbWVnz22WdCiPINzkZGRiIrK0sqmzJlivSaZGVlCT09PbFr1y5p/6NHj4SRkdFLB2c9PT1x9+7dMl+Hv//+WwAQFy5cUOnT+fPnVeq97H0hIqK6geM6x3UiTeHl5URVJIQoV73AwEBs375des4333yDwMBAAEBOTg6uXbuG0aNHo2HDhtJj/vz5KpdKAYCnp6fK9kcffYT58+ejW7duCAsLw++//66yPyIiAh4eHmjWrBkaNmyIdevWITU1VaXOK6+8Ah2d/30cWFpawtXVVdrW1dVFkyZNcPfuXZXneXt7S/9u0KABPD09kZycXK7XAwAcHBxgYmIibVtbW0vHuH79OvLz89GlSxdpv5mZGdq2bfvSdu3t7dGsWTOVsj/++APDhw9Hy5YtYWpqCgcHBwAo9lq8qCLvCxER1Q0c15/juE6kPg20HQBRbeXk5ASZTIbk5GQMGjSo2P7k5GQ0atRIGiSGDx+OadOm4dy5c3j69CnS0tIwdOhQAEB2djYAYP369cXuEdPV1VXZNjY2VtkeM2YM/Pz8cODAARw+fBjh4eFYunQpPvzwQ0RHR2Py5MlYunQpvL29YWJigiVLliAuLk6lDT09PZVtmUxWYplSqSzvy1MumjrGP18jAAgICIC9vT3Wr18PGxsbKJVKtG/fvswJWSryvhARUe3Gcb3qOK4TlYxnuokqqUmTJnjjjTfwxRdf4OnTpyr70tPTsW3bNgwdOhQymQwAYGtrix49emDbtm3Ytm0b3njjDWnWT0tLS9jY2OD69etwcnJSeRRNBFIWOzs7fPDBB9izZw8+/vhjrF+/HgBw6tQp+Pj4YPz48ejYsSOcnJzU+kvu6dOnpX8XFBQgISEBzs7Oamm7ZcuW0NPTQ3x8vFSWmZlZqeVJ7t+/j5SUFHz66afo3bs3nJ2d8fDhQ5U6crkcwPNJaopU9X0hIqLag+M6x3UiTeGZbqIqWLNmDXx8fODn54f58+fD0dERly5dwpQpU9C8eXMsWLBApX5gYCDCwsKQl5eH5cuXq+ybM2cOPvroI5iZmaFv377Izc3F2bNn8fDhQ4SEhJQaw8SJE+Hv7482bdrg4cOH+OWXX6QBsnXr1ti6dSsOHToER0dHfPXVV4iPj1fbwBIREYHWrVvD2dkZy5cvx8OHDzFq1Ci1tG1iYoKgoCBpplULCwuEhYVBR0dH+sJTXo0aNUKTJk2wbt06WFtbIzU1FaGhoSp1LCwsYGhoiIMHD8LW1hYGBgYwMzOr9PtCRES1D8d1jutEmsAz3URV0Lp1a5w9exYtW7bE22+/jVatWuH9999Hr169EBsbi8aNG6vU//e//4379+/jyZMnGDhwoMq+MWPGYMOGDdi8eTNcXV3Ro0cPREVFvXQgLSwshEKhgLOzM/r27Ys2bdpIS1/83//9HwYPHoyhQ4fCy8sL9+/fx/jx49XW/0WLFmHRokVwc3PDyZMn8f3336Np06Zqa3/ZsmXw9vbGv/71L/j6+qJbt27SEh8VoaOjg+joaCQkJKB9+/aYNGkSlixZolKnQYMGWLVqFb788kvY2NhgwIABACr/vhARUe3DcZ3jOpEmyER5Z4sgIvr/bt68CUdHR5w/fx7u7u7VdtycnBw0b94cS5cuxejRo6vtuERERHUZx3UizeLl5URUY50/fx6XL19Gly5dkJmZiblz5wKA9Gs1ERER1R4c16m+YtJNRDXa559/jpSUFMjlcnh4eOC///2vWi91IyIiourDcZ3qI15eTkRERERERKQhnEiNiIiIiIiISEOYdBMRERERERFpCJNuIiIiIiIiIg1h0k1ERERERESkIUy6iYiIiIiIiDSESTcRERERERGRhjDpJiIiIiIiItIQJt1EREREREREGvL/AE0t5b1dO9j7AAAAAElFTkSuQmCC", "text/plain": [ "
" ] @@ -293,7 +336,7 @@ } ], "source": [ - "plot_diagnostic_plots(inchikey_pair_generator, \"\")" + "plot_diagnostic_plots(spectrum_pair_generator, \"\")" ] }, { @@ -339,7 +382,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 13, "id": "42a7cf88-27c7-4f54-a3b2-78cd84fbd357", "metadata": {}, "outputs": [ @@ -354,16 +397,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "Calculating fingerprints: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1831/1831 [00:00<00:00, 8846.58it/s]\n", - "Balanced sampling of inchikey pairs (per bin): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1525/1525 [00:00<00:00, 7941.05it/s]\n", - "Balanced sampling of inchikey pairs (per bin): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1525/1525 [00:00<00:00, 10656.40it/s]\n", - "Balanced sampling of inchikey pairs (per bin): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1525/1525 [00:00<00:00, 13589.11it/s]\n", - "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 12.70it/s]\n" + "Calculating fingerprints: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1831/1831 [00:00<00:00, 11680.24it/s]\n", + "Balanced sampling of inchikey pairs (per bin): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1525/1525 [00:00<00:00, 20989.14it/s]\n", + "Balanced sampling of inchikey pairs (per bin): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1525/1525 [00:00<00:00, 23527.10it/s]\n", + "Balanced sampling of inchikey pairs (per bin): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1525/1525 [00:00<00:00, 18984.89it/s]\n", + "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 18.57it/s]\n" ] }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -373,12 +416,28 @@ } ], "source": [ - "selected_compound_pairs = select_compound_pairs_wrapper(spectra, SettingsMS2Deepscore(\n", + "spectrum_pair_generator_new = create_spectrum_pair_generator(spectra, SettingsMS2Deepscore(\n", " average_inchikey_sampling_count=5, max_inchikey_sampling=100, \n", " same_prob_bins = np.array([(0.7, 1.0), (0.4, 0.7), (-0.001, 0.4)])))\n", - "inchikey_pair_generator = InchikeyPairGenerator(selected_compound_pairs)\n", - "plot_diagnostic_plots(inchikey_pair_generator, \"\")" + "plot_diagnostic_plots(spectrum_pair_generator_new, \"\")" ] + }, + { + "cell_type": "markdown", + "id": "6b57af8d-81d0-4530-890a-a9b50e323e78", + "metadata": {}, + "source": [ + "# Cross ionmode pair selection\n", + "Since version 2.7.0 there is a cross ionmode pair selection. Which is automatically used when you train on both ionmodes. This creates separate inchikey pairs per ionmodes. To make sure both modes are sampled equally. This was not used for the model in the paper. Some initial tests were performed, which can be found in \"model_benchmarking/Compare balanced cross ion mode sampling.ipynb\". The results were not yet very convincing, neg-neg becomes better, but pos-pos became worse. It is probably best to further optimize this first before using for model generation. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc461fb7-c8ed-4828-b1b4-77e5843af25d", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From cfc1921a1a60e24a64424a6cf33aa44c672e3494 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Tue, 27 Jan 2026 11:56:17 +0100 Subject: [PATCH 48/48] Add Compare balanced cross ion mode sampling.ipynb --- ...are balanced cross ion mode sampling.ipynb | 1703 +++++++++++++++++ 1 file changed, 1703 insertions(+) create mode 100644 notebooks/model_benchmarking/Compare balanced cross ion mode sampling.ipynb diff --git a/notebooks/model_benchmarking/Compare balanced cross ion mode sampling.ipynb b/notebooks/model_benchmarking/Compare balanced cross ion mode sampling.ipynb new file mode 100644 index 00000000..9013f987 --- /dev/null +++ b/notebooks/model_benchmarking/Compare balanced cross ion mode sampling.ipynb @@ -0,0 +1,1703 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "# Balanced sampling across ionmodes\n", + "In the manuscript we had a sampling algorithm that sampled from the training data without considering ionisation mode. In MS2DeepScore 2.7.0 it became possible to do pair sampling per ionmode. Equal pairs are selected in pos-pos, pos-neg or neg-neg. The hope was to have better neg neg prediction quality and better pos-neg prediction quality. Even though there is some improvement for neg neg prediction. The pos-pos prediction accuracy also substantially decreases. Which we don't think outweights the benefits. It might be interesting to further tweak these settings, but I did not have the time to fully explore this.\n", + "\n", + "The models used here were trained on the spectra here: https://zenodo.org/records/16882111\n", + "The model itself has not been uploaded to zenodo. So if you want to reproduce, you will need to retrain the model unfortunately." + ], + "id": "293195a426211dfe" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "from matchms.importing import load_from_mgf\n", + "from tqdm import tqdm\n", + "\n", + "normal_model_file_name = \"/lustre/BIF/nobackup/jonge094/ms2deepscore/data/library_22_07_2025/trained_models/both_mode_ionmode_precursor_mz_10000_layers_500_embedding_2025_08_18_14_48_59/ms2deepscore_model.pt\"\n", + "balanced_model_file_name = \"/lustre/BIF/nobackup/jonge094/ms2deepscore/data/library_22_07_2025/trained_models/both_mode_ionmode_precursor_mz_10000_layers_500_embedding_2025_08_24_00_04_17/ms2deepscore_model.pt\"\n", + "\n", + "test_spectra_file = \"/lustre/BIF/nobackup/jonge094/ms2deepscore/data/library_22_07_2025/trained_models/test_merged_and_cleaned_libraries_1.mgf\"" + ], + "id": "5f4bd4fe1813b454" + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ba8bfec2-87f3-491f-860a-b4896cf65b6b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "49422it [00:16, 2975.96it/s]\n" + ] + } + ], + "source": [ + "test_spectra = list(tqdm(load_from_mgf(test_spectra_file)))" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "6cb33659-a419-421a-84f8-9b80f8aa876a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'charge': 1, 'description': 'ENAMMOL', 'formula': 'C9H13NO2', 'inchi': 'InChI=1S/C9H13NO2/c1-4-10-6(2)5-8(7(10)3)9(11)12/h5H,4H2,1-3H3,(H,11,12)', 'smiles': 'CCn1c(C)cc(C(=O)O)c1C', 'feature_id': '20240405_pluskal_enammol_5003_B20_id_MSn_positive.mzML msn trees:5', 'adduct': '[M-H2O+H]+', 'feature_ms1_height': '1.028E7', 'spectype': 'SINGLE_BEST_SCAN', 'collision_energy': '20.0', 'fragmentation_method': 'HCD', 'isolation_window': '1.200000047684', 'acquisition': 'Commercial', 'ims_type': 'none', 'ion_source': 'ESI', 'ionmode': 'positive', 'dataset_id': 'MSV000094528', 'usi': '[mzspec:MSV000094528:20240405_pluskal_enammol_5003_B20_id_MSn_positive:390]', 'scans': '390', 'precursor_purity': '1.0', 'quality_chimeric': 'PASSED', 'quality_explained_intensity': '0.77921', 'quality_explained_signals': '0.42857143', 'num_peaks': '28', 'compound_name': '1-ethyl-2,5-dimethyl-1H-pyrrole-3-carboxylic acid', 'parent_mass': '167.09463', 'inchi_aux': 'IVFAZMHRJRGODH-UHFFFAOYSA-N', 'ms_level': '2', 'retention_time': 45.26, 'principal_investigator': 'Tomas Pluskal', 'data_collector': 'Corinna Brungs', 'precursor_mz': 150.09134, 'inchikey': 'IVFAZMHRJRGODH-UHFFFAOYSA-N', 'precursor_formula': 'C9H12NO', 'ms_mass_analyzer': 'Orbitrap'}\n" + ] + } + ], + "source": [ + "print(test_spectra[0].metadata)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6715584d-222a-4525-b0d5-e978f8080285", + "metadata": {}, + "outputs": [], + "source": [ + "def split_by_ionmode(spectra):\n", + " \"\"\"Splits spectra into list of positive ionmode and list of negative ionmode spectra.\n", + "\n", + " Removes spectra without correct ionmode metadata entry.\n", + " \"\"\"\n", + " pos_spectra = []\n", + " neg_spectra = []\n", + " spectra_removed = 0\n", + " for spectrum in tqdm(spectra,\n", + " desc=\"Splitting pos and neg mode spectra\"):\n", + " if spectrum is not None:\n", + " ionmode = spectrum.get(\"ionmode\")\n", + " if ionmode == \"positive\":\n", + " pos_spectra.append(spectrum)\n", + " elif ionmode == \"negative\":\n", + " neg_spectra.append(spectrum)\n", + " else:\n", + " spectra_removed += 1\n", + " print(f\"The spectra, are split in {len(pos_spectra)} positive spectra \"\n", + " f\"and {len(neg_spectra)} negative mode spectra. {spectra_removed} were removed\")\n", + " return pos_spectra, neg_spectra" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "cad7c4e5-bb6d-4ed7-a47e-92398d85cdb8", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Splitting pos and neg mode spectra: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 49422/49422 [00:00<00:00, 408628.88it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The spectra, are split in 34227 positive spectra and 15195 negative mode spectra. 0 were removed\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "pos_test, neg_test = split_by_ionmode(test_spectra)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82c4d220-7872-4890-ab54-4b1b9c068b98", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/lustre/BIF/nobackup/jonge094/ms2deepscore/ms2deepscore/ms2deepscore/models/load_model.py:34: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", + " model_settings = torch.load(filename, map_location=device)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "34227it [00:37, 900.92it/s]\n", + "15195it [00:15, 966.28it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3092/3092 [00:18<00:00, 168.93it/s]\n", + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1659/1659 [00:05<00:00, 323.15it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n", + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17466it [00:18, 966.30it/s]" + ] + } + ], + "source": [ + "from ms2deepscore.benchmarking.CalculateScoresBetweenAllIonmodes import CalculateScoresBetweenAllIonmodes\n", + "scores_normal_model_1 = CalculateScoresBetweenAllIonmodes(normal_model_file_name, pos_test, neg_test, fingerprint_type=\"daylight\", n_bits_fingerprint=4096)\n", + "scores_balanced_model_1 = CalculateScoresBetweenAllIonmodes(balanced_model_file_name, pos_test, neg_test, fingerprint_type=\"daylight\", n_bits_fingerprint=4096)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72731826-c613-4ce0-8846-a3b98ac400a3", + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.stats import gaussian_kde\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from ms2deepscore.utils import create_evenly_spaced_bins\n", + "from matplotlib import pyplot as plt\n", + "\n", + "from ms2deepscore.benchmarking.CalculateScoresBetweenAllIonmodes import CalculateScoresBetweenAllIonmodes\n", + "from ms2deepscore.utils import create_evenly_spaced_bins\n", + "import pandas as pd\n", + "def get_predictions_per_bin(predictions_and_tanimoto_scores,\n", + " average_per_inchikey_pair: pd.DataFrame,\n", + " tanimoto_bins: np.ndarray):\n", + " \"\"\"Compute average loss per Tanimoto score bin\n", + "\n", + " Parameters\n", + " ----------\n", + " average_per_inchikey_pair\n", + " Precalculated average (prediction or loss) per inchikey pair\n", + " ref_score_bins\n", + " Bins for the reference score to evaluate the performance of scores. in the form [(0.0, 0.1), (0.1, 0.2) ...]\n", + " \"\"\"\n", + " average_predictions = average_per_inchikey_pair.to_numpy()\n", + "\n", + " sorted_bins = sorted(tanimoto_bins, key=lambda b: b[0])\n", + "\n", + " bins = [bin_pair[0] for bin_pair in sorted_bins]\n", + " bins.append(sorted_bins[-1][1])\n", + "\n", + " digitized = np.digitize(predictions_and_tanimoto_scores.tanimoto_df, bins, right=True)\n", + " predictions_per_bin = []\n", + " for i, bin_edges in tqdm(enumerate(sorted_bins), desc=\"Selecting available inchikey pairs per bin\"):\n", + " row_idxs, col_idxs = np.where(digitized == i+ 1)\n", + " predictions_in_this_bin = average_predictions[row_idxs, col_idxs]\n", + " predictions_in_this_bin_not_nan = predictions_in_this_bin[~np.isnan(predictions_in_this_bin)]\n", + " predictions_per_bin.append(predictions_in_this_bin_not_nan)\n", + " return predictions_per_bin\n", + "\n", + "def plot_comparison_violinplot_three_panels(\n", + " list_a,\n", + " list_b,\n", + " bins\n", + "):\n", + " bin_labels = [f\"{a:.1f}–<{b:.1f}\" for (a, b) in bins]\n", + " nr_of_bins = len(bin_labels)\n", + " n_panels = 3\n", + " assert len(list_a) == n_panels and len(list_b) == n_panels, f\"Expected {n_panels} sets of scores in each input\"\n", + "\n", + " fig, axes = plt.subplots(\n", + " 2, n_panels, figsize=(5 * n_panels, 8),\n", + " sharex='col',\n", + " gridspec_kw={'height_ratios': [1, 4]},\n", + " constrained_layout=True\n", + " )\n", + "\n", + " def get_bin_data(scores):\n", + " average_predictions = scores.get_average_prediction_per_inchikey_pair()\n", + " return get_predictions_per_bin(scores, average_predictions, bins)\n", + "\n", + " def draw_stat_lines(ax, data, pos, side='left'):\n", + " if len(data) == 0:\n", + " return\n", + " median = np.median(data)\n", + " p1, p99 = np.percentile(data, [1, 99])\n", + "\n", + " if side == 'left':\n", + " x_range = [pos - 0.3, pos]\n", + " else:\n", + " x_range = [pos, pos + 0.3]\n", + "\n", + " # Median line\n", + " ax.plot(x_range, [median, median], color='black', lw=1.5)\n", + "\n", + " # 1st and 99th percentile lines\n", + " # ax.plot(x_range, [p1, p1], color='black', lw=1, linestyle='dotted')\n", + " # ax.plot(x_range, [p99, p99], color='black', lw=1, linestyle='dotted')\n", + "\n", + " x = np.arange(nr_of_bins)\n", + "\n", + " for i in range(n_panels):\n", + " scores_a = list_a[i]\n", + " scores_b = list_b[i]\n", + "\n", + " predictions_per_bin_a = get_bin_data(scores_a)\n", + " predictions_per_bin_b = get_bin_data(scores_b)\n", + "\n", + " counts_a = [len(p) for p in predictions_per_bin_a]\n", + " counts_b = [len(p) for p in predictions_per_bin_b]\n", + "\n", + " # === TOP BAR PLOTS ===\n", + " bar_width = 0.4\n", + " axes[0, i].bar(x - bar_width/2, counts_a, width=bar_width, label=scores_a.label, alpha=0.6)\n", + " axes[0, i].bar(x + bar_width/2, counts_b, width=bar_width, label=scores_b.label, alpha=0.6)\n", + " axes[0, i].set_yscale('log')\n", + " axes[0, i].set_ylabel('Nr of pairs')\n", + " axes[0, i].set_ylim(100, 2_000_000)\n", + " axes[0, i].tick_params(axis='x', labelbottom=False)\n", + " axes[0, i].legend()\n", + "\n", + "\n", + "\n", + " # === BOTTOM SPLIT VIOLIN PLOTS ===\n", + " ax = axes[1, i]\n", + " for j in range(nr_of_bins):\n", + " data_left = predictions_per_bin_a[j]\n", + " data_right = predictions_per_bin_b[j]\n", + " pos = x[j]\n", + "\n", + " if len(data_left) > 1:\n", + " kde_left = gaussian_kde(data_left)\n", + " y = np.linspace(0, 1, 200)\n", + " v = kde_left(y)\n", + " v = 0.3 * v / v.max()\n", + " ax.fill_betweenx(y, pos - v, pos, facecolor='#1f77b4', alpha=0.7)\n", + "\n", + " if len(data_right) > 1:\n", + " kde_right = gaussian_kde(data_right)\n", + " y = np.linspace(0, 1, 200)\n", + " v = kde_right(y)\n", + " v = 0.3 * v / v.max()\n", + " ax.fill_betweenx(y, pos, pos + v, facecolor='#ff7f0e', alpha=0.7)\n", + "\n", + " draw_stat_lines(ax, data_left, pos, side='left')\n", + " draw_stat_lines(ax, data_right, pos, side='right')\n", + "\n", + " ax.set_ylim(-0.05, 1.05)\n", + " ax.set_ylabel(\"Predicted score\")\n", + " ax.set_xlabel(\"True chemical similarity\")\n", + " ax.set_xticks(x)\n", + " ax.set_xticklabels(bin_labels, fontsize=9, rotation='vertical')\n", + " axes[0, 0].set_title(\"Positive vs positive\")\n", + " axes[0, 1].set_title(\"Positive vs negative\")\n", + " axes[0, 2].set_title(\"Negative vs negative\")\n", + "\n", + " return fig\n" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "c811caea-e921-4a7a-96af-8fd7cc61a241", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "d22e6c67-29ee-4d0f-abd6-280174e37dd0", + "metadata": {}, + "outputs": [], + "source": [ + "scores_normal_model_1.neg_vs_neg_scores.label = \"Normal model\"\n", + "scores_normal_model_1.pos_vs_pos_scores.label = \"Normal model\"\n", + "scores_normal_model_1.pos_vs_neg_scores.label = \"Normal model\"\n", + "\n", + "scores_balanced_model_1.neg_vs_neg_scores.label=\"Balanced across ionmodes\"\n", + "scores_balanced_model_1.pos_vs_pos_scores.label=\"Balanced across ionmodes\"\n", + "scores_balanced_model_1.pos_vs_neg_scores.label=\"Balanced across ionmodes\"" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "# We here use a 0.999 1.0 bin as well\n", + "This is different from the paper and gave some new unexpected results. The accuracy is a lot lower for identical matches cross-ionmode than almost identical matches. Which is surprising and we don't really know what is going on. However, it doesn't really seem to be an issue with the cross ion mode pair sampling." + ], + "id": "50b5474615f8a923" + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "68c7f80f-a3da-4279-9f66-ffc81b9895f2", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Selecting available inchikey pairs per bin: 11it [00:00, 22.22it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 21.83it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 40.01it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 41.27it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 80.47it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 81.34it/s]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "bins = [(-1e-08, 0.1),\n", + " (0.1, 0.2),\n", + " (0.2, 0.3),\n", + " (0.3, 0.4),\n", + " (0.4, 0.5),\n", + " (0.5, 0.6),\n", + " (0.6, 0.7),\n", + " (0.7, 0.8),\n", + " (0.8, 0.9),\n", + " (0.9, 0.9999),\n", + " (0.9999, 1.0)]\n", + "fig = plot_comparison_violinplot_three_panels([scores_normal_model_1.pos_vs_pos_scores, scores_normal_model_1.pos_vs_neg_scores, scores_normal_model_1.neg_vs_neg_scores],\n", + " [scores_balanced_model_1.pos_vs_pos_scores, scores_balanced_model_1.pos_vs_neg_scores, scores_balanced_model_1.neg_vs_neg_scores],\n", + " bins)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "0e319fb9-0b9b-4935-bc9a-f1469eb8009b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Selecting available inchikey pairs per bin: 13it [00:00, 23.76it/s]\n", + "Selecting available inchikey pairs per bin: 13it [00:00, 23.35it/s]\n", + "Selecting available inchikey pairs per bin: 13it [00:00, 44.71it/s]\n", + "Selecting available inchikey pairs per bin: 13it [00:00, 44.07it/s]\n", + "Selecting available inchikey pairs per bin: 13it [00:00, 84.89it/s]\n", + "Selecting available inchikey pairs per bin: 13it [00:00, 85.25it/s]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "bins = [(-1e-08, 0.001),\n", + " (0.001, 0.01),\n", + " (0.01, 0.1),\n", + " (0.1, 0.2),\n", + " (0.2, 0.3),\n", + " (0.3, 0.4),\n", + " (0.4, 0.5),\n", + " (0.5, 0.6),\n", + " (0.6, 0.7),\n", + " (0.7, 0.8),\n", + " (0.8, 0.9),\n", + " (0.9, 0.9999),\n", + " (0.9999, 1.0)]\n", + "fig = plot_comparison_violinplot_three_panels([scores_normal_model_1.pos_vs_pos_scores, scores_normal_model_1.pos_vs_neg_scores, scores_normal_model_1.neg_vs_neg_scores],\n", + " [scores_balanced_model_1.pos_vs_pos_scores, scores_balanced_model_1.pos_vs_neg_scores, scores_balanced_model_1.neg_vs_neg_scores],\n", + " bins)" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "230eadc1-4a74-433f-a843-655e47ab028a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Selecting available inchikey pairs per bin: 12it [00:00, 17.28it/s]\n", + "Selecting available inchikey pairs per bin: 12it [00:00, 22.15it/s]\n", + "Selecting available inchikey pairs per bin: 12it [00:00, 40.40it/s]\n", + "Selecting available inchikey pairs per bin: 12it [00:00, 44.55it/s]\n", + "Selecting available inchikey pairs per bin: 12it [00:00, 84.84it/s]\n", + "Selecting available inchikey pairs per bin: 12it [00:00, 84.13it/s]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "bins = [(-1e-08, 0.001),\n", + " (0.001, 0.01),\n", + " (0.01, 0.1),\n", + " (0.1, 0.2),\n", + " (0.2, 0.3),\n", + " (0.3, 0.4),\n", + " (0.4, 0.5),\n", + " (0.5, 0.6),\n", + " (0.6, 0.7),\n", + " (0.7, 0.8),\n", + " (0.8, 0.9),\n", + " (0.9, 0.9999),\n", + " (0.9999, 1.0)]\n", + "fig = plot_comparison_violinplot_three_panels([scores_normal_model_1.pos_vs_pos_scores, scores_normal_model_1.pos_vs_neg_scores, scores_normal_model_1.neg_vs_neg_scores],\n", + " [scores_balanced_model_1.pos_vs_pos_scores, scores_balanced_model_1.pos_vs_neg_scores, scores_balanced_model_1.neg_vs_neg_scores],\n", + " bins)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "61d20252-813a-4485-8beb-b58ef14d1898", + "metadata": {}, + "outputs": [], + "source": [ + "from typing import List\n", + "\n", + "import numpy as np\n", + "from matplotlib import pyplot as plt\n", + "\n", + "from ms2deepscore.validation_loss_calculation.PredictionsAndTanimotoScores import PredictionsAndTanimotoScores\n", + "from ms2deepscore.utils import create_evenly_spaced_bins\n", + "\n", + "def plot_loss_per_bin_multiple_benchmarks(list_of_predictions_and_tanimoto_scores: List[PredictionsAndTanimotoScores],\n", + " nr_of_bins=10,\n", + " loss_type=\"MSE\",\n", + " title=\"\"):\n", + " \"\"\"Combines the plot of multiple comparisons into one plot\n", + " \"\"\"\n", + " ref_score_bins = create_evenly_spaced_bins(nr_of_bins)\n", + " fig = plt.figure(figsize=(5,3))\n", + " labels = []\n", + " for predictions_and_tanimoto_scores in list_of_predictions_and_tanimoto_scores:\n", + " bin_content, rmses = predictions_and_tanimoto_scores.get_average_loss_per_bin_per_inchikey_pair(\n", + " loss_type, ref_score_bins)\n", + " plt.plot(np.arange(len(rmses)), rmses, \"o:\")\n", + " labels.append(predictions_and_tanimoto_scores.label)\n", + " plt.title(title)\n", + " plt.legend(labels)\n", + " plt.ylabel(loss_type)\n", + " plt.grid(True)\n", + " plt.xlabel(\"tanimoto score bin\")\n", + " plt.xticks(np.arange(len(ref_score_bins)),\n", + " [f\"{a:.1f} to < {b:.1f}\" for (a, b) in ref_score_bins], fontsize=9, rotation='vertical')\n", + " plt.grid(True)\n", + " plt.tight_layout()\n", + " return fig" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "64f87538-e49a-44e7-bf28-799bdcbdb2ae", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Selecting available inchikey pairs per bin: 10it [00:00, 76.31it/s]\n", + "Selecting available inchikey pairs per bin: 10it [00:00, 76.79it/s]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "figure = plot_loss_per_bin_multiple_benchmarks([scores_normal_model_1.neg_vs_neg_scores, scores_balanced_model_1.neg_vs_neg_scores], 10, \"RMSE\", \"negative vs negative\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "31aa6288-2882-4448-81fa-0e7f0288ed30", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Selecting available inchikey pairs per bin: 10it [00:00, 20.27it/s]\n", + "Selecting available inchikey pairs per bin: 10it [00:00, 20.32it/s]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "figure = plot_loss_per_bin_multiple_benchmarks([scores_normal_model_1.pos_vs_pos_scores, scores_balanced_model_1.pos_vs_pos_scores], 10, \"RMSE\", \"positive vs positive\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "6abcc1d2-e00a-4ffe-9bfa-42676130437f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Selecting available inchikey pairs per bin: 10it [00:00, 39.85it/s]\n", + "Selecting available inchikey pairs per bin: 10it [00:00, 39.69it/s]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "figure = plot_loss_per_bin_multiple_benchmarks([scores_normal_model_1.pos_vs_neg_scores, scores_balanced_model_1.pos_vs_neg_scores], 10, \"RMSE\", \"positive vs negative\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "f5425072-e869-43f4-9437-e5f9fdfaa501", + "metadata": {}, + "source": [ + "# Redo for old test set\n", + "There will of course be data leakage, this is just a sanity check, to make sure it is the test set that became more difficult. It seems to be the case that it is something test set specific, but we don't fully understand what is going on." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "ba778bfd-a12f-4ab2-8658-0590ad451f95", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "7142it [00:02, 2540.73it/s]\n", + "24911it [00:09, 2566.56it/s]\n" + ] + } + ], + "source": [ + "neg_test = list(tqdm(load_from_mgf(\"/lustre/BIF/nobackup/jonge094/ms2deepscore/data/pytorch/new_corinna_included/training_and_validation_split/negative_testing_spectra.mgf\")))\n", + "pos_test = list(tqdm(load_from_mgf(\"/lustre/BIF/nobackup/jonge094/ms2deepscore/data/pytorch/new_corinna_included/training_and_validation_split/positive_testing_spectra.mgf\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "3ddba656-39c9-4753-8d22-6aee7ae5a247", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/lustre/BIF/nobackup/jonge094/ms2deepscore/ms2deepscore/ms2deepscore/models/load_model.py:34: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", + " model_settings = torch.load(filename, map_location=device)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "24911it [00:25, 973.65it/s]\n", + "7142it [00:07, 973.28it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1831/1831 [00:05<00:00, 307.19it/s]\n", + "Calculating fingerprints: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 924/924 [00:02<00:00, 309.66it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n", + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "24911it [00:25, 968.49it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1831/1831 [00:05<00:00, 316.83it/s]\n", + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1831/1831 [00:05<00:00, 315.82it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n", + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "7142it [00:07, 971.02it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 924/924 [00:03<00:00, 307.72it/s]\n", + "Calculating fingerprints: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 924/924 [00:03<00:00, 305.47it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/lustre/BIF/nobackup/jonge094/ms2deepscore/ms2deepscore/ms2deepscore/models/load_model.py:34: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", + " model_settings = torch.load(filename, map_location=device)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "24911it [00:25, 972.55it/s]\n", + "7142it [00:07, 973.24it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1831/1831 [00:05<00:00, 312.88it/s]\n", + "Calculating fingerprints: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 924/924 [00:03<00:00, 307.42it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n", + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "24911it [00:25, 966.07it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1831/1831 [00:05<00:00, 314.51it/s]\n", + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1831/1831 [00:05<00:00, 313.86it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n", + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "7142it [00:07, 962.44it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 924/924 [00:03<00:00, 307.24it/s]\n", + "Calculating fingerprints: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 924/924 [00:03<00:00, 307.41it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n" + ] + } + ], + "source": [ + "from ms2deepscore.benchmarking.CalculateScoresBetweenAllIonmodes import CalculateScoresBetweenAllIonmodes\n", + "scores_normal_model = CalculateScoresBetweenAllIonmodes(normal_model_file_name, pos_test, neg_test, fingerprint_type=\"daylight\", n_bits_fingerprint=4096)\n", + "scores_balanced_model = CalculateScoresBetweenAllIonmodes(balanced_model_file_name, pos_test, neg_test, fingerprint_type=\"daylight\", n_bits_fingerprint=4096)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "6e889a1e-660c-4325-bc42-d87c0c3ef49b", + "metadata": {}, + "outputs": [], + "source": [ + "scores_normal_model.neg_vs_neg_scores.label = \"Normal model\"\n", + "scores_normal_model.pos_vs_pos_scores.label = \"Normal model\"\n", + "scores_normal_model.pos_vs_neg_scores.label = \"Normal model\"\n", + "\n", + "scores_balanced_model.neg_vs_neg_scores.label=\"Balanced across ionmodes\"\n", + "scores_balanced_model.pos_vs_pos_scores.label=\"Balanced across ionmodes\"\n", + "scores_balanced_model.pos_vs_neg_scores.label=\"Balanced across ionmodes\"" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "8823d6a0-b348-406d-b1a7-132ce84779f9", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Selecting available inchikey pairs per bin: 10it [00:00, 60.05it/s]\n", + "Selecting available inchikey pairs per bin: 10it [00:00, 59.94it/s]\n", + "Selecting available inchikey pairs per bin: 10it [00:00, 94.22it/s]\n", + "Selecting available inchikey pairs per bin: 10it [00:00, 120.04it/s]\n", + "Selecting available inchikey pairs per bin: 10it [00:00, 257.35it/s]\n", + "Selecting available inchikey pairs per bin: 10it [00:00, 261.44it/s]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = plot_comparison_violinplot_three_panels([scores_normal_model.pos_vs_pos_scores, scores_normal_model.pos_vs_neg_scores, scores_normal_model.neg_vs_neg_scores],\n", + " [scores_balanced_model.pos_vs_pos_scores, scores_balanced_model.pos_vs_neg_scores, scores_balanced_model.neg_vs_neg_scores],\n", + " 10)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "947f38c1-94db-478a-a7e0-1e6db2fe0737", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Selecting available inchikey pairs per bin: 11it [00:00, 63.32it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 63.71it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 123.30it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 125.02it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 272.29it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 270.06it/s]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "bins = [(-1e-08, 0.1),\n", + " (0.1, 0.2),\n", + " (0.2, 0.3),\n", + " (0.3, 0.4),\n", + " (0.4, 0.5),\n", + " (0.5, 0.6),\n", + " (0.6, 0.7),\n", + " (0.7, 0.8),\n", + " (0.8, 0.9),\n", + " (0.9, 0.99),\n", + " (0.99, 1.0)]\n", + "fig = plot_comparison_violinplot_three_panels([scores_normal_model.pos_vs_pos_scores, scores_normal_model.pos_vs_neg_scores, scores_normal_model.neg_vs_neg_scores],\n", + " [scores_balanced_model.pos_vs_pos_scores, scores_balanced_model.pos_vs_neg_scores, scores_balanced_model.neg_vs_neg_scores],\n", + " bins)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "8506cb4d-4093-473c-9ee4-5f4a6f52a221", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Selecting available inchikey pairs per bin: 10it [00:00, 249.38it/s]\n", + "Selecting available inchikey pairs per bin: 10it [00:00, 252.85it/s]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "figure = plot_loss_per_bin_multiple_benchmarks([scores_normal_model.neg_vs_neg_scores, scores_balanced_model.neg_vs_neg_scores], 10, \"MSE\", \"negative vs negative\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "27254cf3-62dc-4ad4-94f1-584449c3f1de", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Selecting available inchikey pairs per bin: 10it [00:00, 59.77it/s]\n", + "Selecting available inchikey pairs per bin: 10it [00:00, 59.36it/s]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "figure = plot_loss_per_bin_multiple_benchmarks([scores_normal_model.pos_vs_pos_scores, scores_balanced_model.pos_vs_pos_scores], 10, \"MSE\", \"positive vs positive\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "329fe156-cf2e-4e44-b7d3-2359bbd166c0", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Selecting available inchikey pairs per bin: 10it [00:00, 121.71it/s]\n", + "Selecting available inchikey pairs per bin: 10it [00:00, 121.31it/s]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "figure = plot_loss_per_bin_multiple_benchmarks([scores_normal_model.pos_vs_neg_scores, scores_balanced_model.pos_vs_neg_scores], 10, \"MSE\", \"positive vs negative\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "ed772eda-9e95-47c6-a244-e8c417750dd7", + "metadata": {}, + "source": [ + "# Run on only GNPS test spectra" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71e0be6a-acde-43fe-9163-74fccfcbe1df", + "metadata": {}, + "outputs": [], + "source": [ + "gnps_spectra = []\n", + "for spectrum in test_spectra:\n", + " if spectrum.get(\"spectype\") is None:\n", + " gnps_spectra.append(spectrum)\n", + "print(len(gnps_spectra))" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "bbd020b3-69aa-44f6-b0a1-623aeaf3f270", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Splitting pos and neg mode spectra: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33709/33709 [00:00<00:00, 643431.89it/s]\n", + "/lustre/BIF/nobackup/jonge094/ms2deepscore/ms2deepscore/ms2deepscore/models/load_model.py:34: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", + " model_settings = torch.load(filename, map_location=device)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The spectra, are split in 24174 positive spectra and 9535 negative mode spectra. 0 were removed\n", + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "24174it [00:24, 969.83it/s]\n", + "9535it [00:09, 973.79it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2218/2218 [00:06<00:00, 316.93it/s]\n", + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1240/1240 [00:03<00:00, 315.59it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n", + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "24174it [00:24, 971.66it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2218/2218 [00:06<00:00, 317.56it/s]\n", + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2218/2218 [00:06<00:00, 319.79it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n", + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "9535it [00:09, 974.63it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1240/1240 [00:03<00:00, 315.72it/s]\n", + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1240/1240 [00:03<00:00, 317.11it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/lustre/BIF/nobackup/jonge094/ms2deepscore/ms2deepscore/ms2deepscore/models/load_model.py:34: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", + " model_settings = torch.load(filename, map_location=device)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "24174it [00:24, 971.13it/s]\n", + "9535it [00:09, 974.63it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2218/2218 [00:07<00:00, 305.83it/s]\n", + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1240/1240 [00:04<00:00, 301.10it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n", + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "24174it [00:24, 969.49it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2218/2218 [00:06<00:00, 320.23it/s]\n", + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2218/2218 [00:07<00:00, 315.87it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n", + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "9535it [00:09, 965.39it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1240/1240 [00:03<00:00, 314.46it/s]\n", + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1240/1240 [00:03<00:00, 314.89it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n" + ] + } + ], + "source": [ + "pos_test, neg_test = split_by_ionmode(gnps_spectra)\n", + "from ms2deepscore.benchmarking.CalculateScoresBetweenAllIonmodes import CalculateScoresBetweenAllIonmodes\n", + "scores_normal_model_gnps = CalculateScoresBetweenAllIonmodes(normal_model_file_name, pos_test, neg_test, fingerprint_type=\"daylight\", n_bits_fingerprint=4096)\n", + "scores_balanced_model_gnps = CalculateScoresBetweenAllIonmodes(balanced_model_file_name, pos_test, neg_test, fingerprint_type=\"daylight\", n_bits_fingerprint=4096)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "1a56d4ce-97ca-4767-baef-9504f83b192f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Selecting available inchikey pairs per bin: 11it [00:00, 42.21it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 42.64it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 76.06it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 78.91it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 152.71it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 149.56it/s]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = plot_comparison_violinplot_three_panels([scores_normal_model_gnps.pos_vs_pos_scores, scores_normal_model_gnps.pos_vs_neg_scores, scores_normal_model_gnps.neg_vs_neg_scores],\n", + " [scores_balanced_model_gnps.pos_vs_pos_scores, scores_balanced_model_gnps.pos_vs_neg_scores, scores_balanced_model_gnps.neg_vs_neg_scores],\n", + " bins)" + ] + }, + { + "cell_type": "markdown", + "id": "7d28c01f-0b60-49d0-a6eb-f7755e74a070", + "metadata": {}, + "source": [ + "# Run only on spectra which ionize in both modes" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "333ae0e2-e0f5-4900-8034-6eaf4bb7c64d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3092\n", + "1659\n", + "1369\n" + ] + } + ], + "source": [ + "pos_inchikeys = []\n", + "neg_inchikeys = []\n", + "for spectrum in test_spectra:\n", + " if spectrum.get(\"ionmode\") == \"positive\":\n", + " pos_inchikeys.append(spectrum.get(\"inchikey\")[:14])\n", + " else:\n", + " neg_inchikeys.append(spectrum.get(\"inchikey\")[:14])\n", + " \n", + "print(len(set(pos_inchikeys)))\n", + "print(len(set(neg_inchikeys)))\n", + "print(len(set(neg_inchikeys)&set(pos_inchikeys)))\n", + "inchikeys_ionizing_in_both_modes = set(neg_inchikeys)&set(pos_inchikeys)\n", + "\n", + "both_modes_ionizing = []\n", + "for spectrum in test_spectra:\n", + " if spectrum.get(\"inchikey\")[:14] in inchikeys_ionizing_in_both_modes:\n", + " both_modes_ionizing.append(spectrum)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "ff385a03-2199-4c4b-86c7-debb4bdeaf93", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Splitting pos and neg mode spectra: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31796/31796 [00:00<00:00, 444444.00it/s]\n", + "/lustre/BIF/nobackup/jonge094/ms2deepscore/ms2deepscore/ms2deepscore/models/load_model.py:34: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", + " model_settings = torch.load(filename, map_location=device)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The spectra, are split in 18327 positive spectra and 13469 negative mode spectra. 0 were removed\n", + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "18327it [00:19, 964.03it/s]\n", + "13469it [00:13, 967.01it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1369/1369 [00:04<00:00, 295.75it/s]\n", + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1369/1369 [00:04<00:00, 317.57it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n", + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "18327it [00:18, 965.99it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1369/1369 [00:04<00:00, 314.73it/s]\n", + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1369/1369 [00:04<00:00, 311.74it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n", + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13469it [00:13, 963.65it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1369/1369 [00:04<00:00, 314.58it/s]\n", + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1369/1369 [00:04<00:00, 311.56it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/lustre/BIF/nobackup/jonge094/ms2deepscore/ms2deepscore/ms2deepscore/models/load_model.py:34: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", + " model_settings = torch.load(filename, map_location=device)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "18327it [00:18, 967.43it/s]\n", + "13469it [00:13, 969.48it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1369/1369 [00:04<00:00, 309.71it/s]\n", + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1369/1369 [00:04<00:00, 302.92it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n", + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "18327it [01:39, 183.82it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1369/1369 [00:04<00:00, 315.63it/s]\n", + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1369/1369 [00:04<00:00, 316.72it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n", + "Calculating embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13469it [00:13, 971.04it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating similarity between embeddings\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1369/1369 [00:04<00:00, 314.71it/s]\n", + "Calculating fingerprints: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1369/1369 [00:04<00:00, 309.37it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating tanimoto scores\n" + ] + } + ], + "source": [ + "pos_test, neg_test = split_by_ionmode(both_modes_ionizing)\n", + "from ms2deepscore.benchmarking.CalculateScoresBetweenAllIonmodes import CalculateScoresBetweenAllIonmodes\n", + "scores_normal_model_both_modes_ionizing = CalculateScoresBetweenAllIonmodes(normal_model_file_name, pos_test, neg_test, fingerprint_type=\"daylight\", n_bits_fingerprint=4096)\n", + "scores_balanced_model_both_modes_ionizing = CalculateScoresBetweenAllIonmodes(balanced_model_file_name, pos_test, neg_test, fingerprint_type=\"daylight\", n_bits_fingerprint=4096)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "2def2c25-1795-4c2b-be7f-c93779df3f34", + "metadata": {}, + "outputs": [], + "source": [ + "scores_normal_model_both_modes_ionizing.neg_vs_neg_scores.label = \"Normal model\"\n", + "scores_normal_model_both_modes_ionizing.pos_vs_pos_scores.label = \"Normal model\"\n", + "scores_normal_model_both_modes_ionizing.pos_vs_neg_scores.label = \"Normal model\"\n", + "\n", + "scores_balanced_model_both_modes_ionizing.neg_vs_neg_scores.label=\"Balanced across ionmodes\"\n", + "scores_balanced_model_both_modes_ionizing.pos_vs_pos_scores.label=\"Balanced across ionmodes\"\n", + "scores_balanced_model_both_modes_ionizing.pos_vs_neg_scores.label=\"Balanced across ionmodes\"" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "ed11f62d-25c3-4c6b-a03a-2e0c1f35b255", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Selecting available inchikey pairs per bin: 11it [00:00, 120.74it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 120.28it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 119.77it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 120.53it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 118.35it/s]\n", + "Selecting available inchikey pairs per bin: 11it [00:00, 120.38it/s]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = plot_comparison_violinplot_three_panels([scores_normal_model_both_modes_ionizing.pos_vs_pos_scores, scores_normal_model_both_modes_ionizing.pos_vs_neg_scores, scores_normal_model_both_modes_ionizing.neg_vs_neg_scores],\n", + " [scores_balanced_model_both_modes_ionizing.pos_vs_pos_scores, scores_balanced_model_both_modes_ionizing.pos_vs_neg_scores, scores_balanced_model_both_modes_ionizing.neg_vs_neg_scores],\n", + " bins)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}