From 235f318e3c3ff0d25537fa48126a9d99d892bf70 Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Tue, 21 Nov 2023 14:48:13 +0100 Subject: [PATCH] Raise a value error if there are too little training spectra. --- ms2query/create_new_library/split_data_for_training.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ms2query/create_new_library/split_data_for_training.py b/ms2query/create_new_library/split_data_for_training.py index d9f311d0..8b8f7985 100644 --- a/ms2query/create_new_library/split_data_for_training.py +++ b/ms2query/create_new_library/split_data_for_training.py @@ -71,7 +71,11 @@ def split_spectra_on_inchikeys(spectra, validation_fraction): random.shuffle(unique_inchikeys) nr_of_inchikeys = len(unique_inchikeys)//validation_fraction validation_inchikeys = unique_inchikeys[-nr_of_inchikeys:] - + if len(validation_inchikeys) == 0: + raise ValueError(f"Too little spectra are used for training. " + f"{len(spectra)} spectra are provided, with {len(unique_inchikeys)} unique inchikeys. " + f"The minimum number of unique_inchikeys needed is {validation_fraction}, " + f"since there won't be any validation spectra selected otherwise") # Select spectra belonging to the selected inchikeys validation_spectra, training_spectra = select_spectra_belonging_to_inchikey(spectra, validation_inchikeys) return training_spectra, validation_spectra