diff --git a/aisp/csa/_ai_recognition_sys.py b/aisp/csa/_ai_recognition_sys.py index 605cdea..b5e82ca 100644 --- a/aisp/csa/_ai_recognition_sys.py +++ b/aisp/csa/_ai_recognition_sys.py @@ -212,42 +212,16 @@ def fit( ) x_class = X[sample_index[_class_]] - # Calculating the similarity threshold between antigens + self._cells_affinity_threshold(x_class) - sufficiently_similar = ( - self.affinity_threshold * self.affinity_threshold_scalar - ) - # Initialize memory cells for a class. + sufficiently_similar = self.affinity_threshold * self.affinity_threshold_scalar + pool_c: list[BCell] = self._init_memory_c(x_class) for ai in x_class: - # Calculating the stimulation of memory cells with aᵢ and selecting the largest - # stimulation from the memory set. - c_match = pool_c[0] - match_stimulation = -1.0 - for cell in pool_c: - stimulation = self._affinity(cell.vector, ai) - if stimulation > match_stimulation: - match_stimulation = stimulation - c_match = cell - - arb_list: list[_ARB] = [ - _ARB(vector=c_match.vector, stimulation=match_stimulation) - ] - - set_clones: npt.NDArray = c_match.hyper_clonal_mutate( - int(self.rate_hypermutation * self.rate_clonal * match_stimulation), - self._feature_type, - ) - - for clone in set_clones: - arb_list.append( - _ARB( - vector=clone, - stimulation=self._affinity(clone, ai), - ) - ) + c_match, match_stimulation = self._select_best_matching_cell(ai, pool_c) + arb_list = self._generate_arb_list(ai, c_match, match_stimulation) c_candidate = self._refinement_arb(ai, match_stimulation, arb_list) if c_candidate.stimulation > match_stimulation: @@ -315,6 +289,77 @@ def predict(self, X: Union[npt.NDArray, list]) -> npt.NDArray: X, self.k, self._all_class_cell_vectors, self._affinity ) + def _select_best_matching_cell( + self, + ai: npt.NDArray, + pool_c: list[BCell] + ) -> tuple[BCell, float]: + """Select the BCell with the highest affinity with antigen. + + Parameters + ---------- + ai : npt.NDArray + The current antigen. + pool_c : list[BCell] + Pool of memory B-Cells belonging to same class. + + Returns + ------- + tuple[BCell, float] + A tuple containing the best B cell and their affinity. + """ + c_match = pool_c[0] + match_stimulation = -1.0 + for cell in pool_c: + stimulation = self._affinity(cell.vector, ai) + if stimulation > match_stimulation: + match_stimulation = stimulation + c_match = cell + + return c_match, match_stimulation + + def _generate_arb_list( + self, + ai: npt.NDArray, + c_match: BCell, + match_stimulation: float + ) -> list[_ARB]: + """Generate a pool from the best affinity B cell. + + Parameters + ---------- + ai : npt.NDArray + The current antigen. + c_match : BCell + The best B-Cell + match_stimulation : float + The corresponding stimulation (affinity) value + + Returns + ------- + list[_ARB] + ARB set. + """ + n_clones = int(self.rate_hypermutation * self.rate_clonal * match_stimulation) + arb_list: list[_ARB] = [ + _ARB(vector=c_match.vector, stimulation=match_stimulation) + ] + + if n_clones <= 0: + return arb_list + + set_clones: npt.NDArray = c_match.hyper_clonal_mutate( + n_clones, + self._feature_type, + ) + + arb_list.extend( + _ARB(vector=clone, stimulation=self._affinity(clone, ai)) + for clone in set_clones + ) + + return arb_list + def _refinement_arb( self, ai: npt.NDArray, @@ -372,7 +417,6 @@ def _refinement_arb( if iters == self.max_iters or avg_stimulation > self.affinity_threshold: break - # pick a random cell for mutations. random_index = random.randint(0, len(arb_list) - 1) clone_arb = arb_list[random_index].hyper_clonal_mutate( int(self.rate_clonal * c_match_stimulation), self._feature_type diff --git a/aisp/csa/_clonalg.py b/aisp/csa/_clonalg.py index 76af9df..f54284d 100644 --- a/aisp/csa/_clonalg.py +++ b/aisp/csa/_clonalg.py @@ -42,7 +42,8 @@ class Clonalg(BaseOptimizer): Maximum number of possible clones of a cell. This value is multiplied by cell_affinity to determine the number of clones. rate_hypermutation : float, default=1.0 - Rate of mutated clones, used as a scalar factor. + Hypermutation rate controls the intensity of mutations during clonal expansion. Higher + values decrease mutation intensity, while lower values increase it. n_diversity_injection : int, default=5 Number of new random memory cells injected to maintain diversity. selection_size : int, default=5 diff --git a/aisp/nsa/_base.py b/aisp/nsa/_base.py index 84314b9..e8d0b43 100644 --- a/aisp/nsa/_base.py +++ b/aisp/nsa/_base.py @@ -41,7 +41,6 @@ def check_detector_bnsa_validity( return False for i in range(x_class.shape[0]): - # Calculate the normalized Hamming Distance if hamming(x_class[i], vector_x) <= aff_thresh: return False return True @@ -77,9 +76,7 @@ def bnsa_class_prediction( total_distance = 0.0 class_found = True - # Calculates the Hamming distance between the row and all detectors. for detector_index in range(n_detectors): - # Calculates the normalized Hamming distance between the sample and the detector distance = hamming(features, class_detectors[class_index][detector_index]) # If the distance is less than or equal to the threshold, the detector recognizes @@ -89,7 +86,6 @@ def bnsa_class_prediction( break total_distance += distance - # if the sample is self for the class if class_found: avg_distance = total_distance / n_detectors # Choose the class with the largest average distance. diff --git a/aisp/nsa/_binary_negative_selection.py b/aisp/nsa/_binary_negative_selection.py index f2511ca..65b28bd 100644 --- a/aisp/nsa/_binary_negative_selection.py +++ b/aisp/nsa/_binary_negative_selection.py @@ -168,16 +168,12 @@ def fit( check_shape_match(X, y) check_binary_array(X) - # Converts the entire array X to boolean X = X.astype(np.bool_) self._n_features = X.shape[1] - # Identifying the possible classes within the output array `y`. self.classes = np.unique(y) - # Dictionary that will store detectors with classes as keys. + list_detectors_by_class: dict = {} - # Separates the classes for training. sample_index: dict = self._slice_index_list_by_class(y) - # Progress bar for generating all detectors. progress = tqdm( total=int(self.N * (len(self.classes))), @@ -187,18 +183,17 @@ def fit( ) for _class_ in self.classes: - # Initializes the empty set that will contain the valid detectors. valid_detectors_set: list = [] discard_count: int = 0 - # Updating the progress bar with the current class the algorithm is processing. progress.set_description_str( f"Generating the detectors for the {_class_} class:" ) x_class = X[sample_index[_class_]] while len(valid_detectors_set) < self.N: - # Generates a candidate detector vector randomly with values 0 and 1. - vector_x = np.random.randint(0, 2, size=(self._n_features,)).astype(np.bool_) - # If the detector is valid, add it to the list of valid detectors. + vector_x = np.random.randint(0, 2, size=(self._n_features,)).astype( + np.bool_ + ) + if check_detector_bnsa_validity(x_class, vector_x, self.aff_thresh): discard_count = 0 valid_detectors_set.append(vector_x) @@ -208,16 +203,13 @@ def fit( if discard_count == self.max_discards: raise MaxDiscardsReachedError(_class_) - # Add detectors to the dictionary with classes as keys. list_detectors_by_class[_class_] = np.array(valid_detectors_set) - # Notify the completion of detector generation for the classes. progress.set_description( f"\033[92m✔ Non-self detectors for classes ({', '.join(map(str, self.classes))}) " f"successfully generated\033[0m" ) progress.close() - # Saves the found detectors in the attribute for the class detectors. self._detectors = list_detectors_by_class self._detectors_stack = np.array( [np.stack(self._detectors[class_name]) for class_name in self.classes] @@ -261,16 +253,14 @@ def predict(self, X: Union[npt.NDArray, list]) -> npt.NDArray: check_feature_dimension(X, self._n_features) check_binary_array(X) - # Converts the entire array X to boolean. if X.dtype != bool: X = X.astype(bool) - # Initializes an empty array that will store the predictions. c = [] - # For each sample row in X. + for line in X: class_found: bool = True - # Class prediction based on detectors + class_index = bnsa_class_prediction( line, self._detectors_stack, self.aff_thresh ) diff --git a/aisp/nsa/_negative_selection.py b/aisp/nsa/_negative_selection.py index e88b011..babb4bd 100644 --- a/aisp/nsa/_negative_selection.py +++ b/aisp/nsa/_negative_selection.py @@ -23,6 +23,7 @@ check_array_type, check_shape_match, check_feature_dimension, + check_value_range, ) @@ -116,7 +117,7 @@ class RNSA(BaseClassifier): >>> rnsa = rnsa.fit(x_train, y_train, verbose=False) >>> x_test = [ ... [0.15, 0.45], # Expected: Class 'a' - ... [0.85, 0.65], # Esperado: Classe 'b' + ... [0.85, 0.65], # Expected: Class 'b' ... ] >>> y_pred = rnsa.predict(x_test) >>> print(y_pred) @@ -159,12 +160,10 @@ def __init__( ) self.max_discards: int = sanitize_param(max_discards, 1000, lambda x: x > 0) - # Retrieves the variables from kwargs. self.p: np.float64 = np.float64(kwargs.get("p", 2)) self.cell_bounds: bool = bool(kwargs.get("cell_bounds", False)) self.non_self_label: str = str(kwargs.get("non_self_label", "non-self")) - # Initializes the other class variables as None. self._detectors: Optional[Dict[str | int, list[Detector]]] = None self.classes: Optional[npt.NDArray] = None @@ -199,24 +198,24 @@ def fit( MaxDiscardsReachedError The maximum number of detector discards was reached during maturation. Check the defined radius value and consider reducing it. + ValueError + If the array X fall outside the interval (0, 1). Returns ------- self : RNSA - Returns the instance itself. + Returns the instance itself. """ X = check_array_type(X) y = check_array_type(y, "y") check_shape_match(X, y) + check_value_range(X) self._n_features = X.shape[1] - # Identifying the possible classes within the output array `y`. self.classes = np.unique(y) - # Dictionary that will store detectors with classes as keys. list_detectors_by_class = {} - # Separates the classes for training. sample_index = self._slice_index_list_by_class(y) - # Progress bar for generating all detectors. + progress = tqdm( total=int(self.N * (len(self.classes))), bar_format="{desc} ┇{bar}┇ {n}/{total} detectors", @@ -224,21 +223,19 @@ def fit( disable=not verbose, ) for _class_ in self.classes: - # Initializes the empty set that will contain the valid detectors. valid_detectors_set: List[Detector] = [] discard_count = 0 x_class = X[sample_index[_class_]] - # Indicating which class the algorithm is currently processing for the progress bar. + progress.set_description_str( f"Generating the detectors for the {_class_} class:" ) + while len(valid_detectors_set) < self.N: - # Generates a candidate detector vector randomly with values between 0 and 1. vector_x = np.random.random_sample(size=(self._n_features,)) - # Checks the validity of the detector for non-self with respect to the class samples + valid_detector = self._checks_valid_detector(x_class, vector_x) - # If the detector is valid, add it to the list of valid detectors. if valid_detector is not False: discard_count = 0 radius: Optional[float] = None @@ -253,15 +250,14 @@ def fit( if discard_count == self.max_discards: raise MaxDiscardsReachedError(_class_) - # Add detectors, with classes as keys in the dictionary. list_detectors_by_class[_class_] = valid_detectors_set - # Notify completion of detector generation for the classes. + progress.set_description( f"\033[92m✔ Non-self detectors for classes ({', '.join(map(str, self.classes))}) " f"successfully generated\033[0m" ) progress.close() - # Saves the found detectors in the attribute for the non-self detectors of the trained model + self._detectors = list_detectors_by_class return self @@ -283,6 +279,8 @@ def predict(self, X: Union[npt.NDArray, list]) -> npt.NDArray: ModelNotFittedError If the mode has not yet been adjusted and does not have defined detectors or classes, it is not able to predictions + ValueError + If the array X fall outside the interval (0, 1). Returns ------- @@ -294,10 +292,9 @@ def predict(self, X: Union[npt.NDArray, list]) -> npt.NDArray: raise ModelNotFittedError("RNSA") X = check_array_type(X) check_feature_dimension(X, self._n_features) + check_value_range(X) - # Initializes an empty array that will store the predictions. c = [] - # For each sample row in X. for line in X: class_found: bool _class_ = self._compare_sample_to_detectors(line) @@ -343,7 +340,6 @@ def _checks_valid_detector( is_valid : Union[bool, tuple[bool, float]] Returns whether the detector is valid or not. """ - # If any of the input arrays have zero size, Returns false. if np.size(x_class) == 0 or np.size(vector_x) == 0: return False # If self.k > 1, uses the k nearest neighbors (kNN); otherwise, checks the detector @@ -351,12 +347,9 @@ def _checks_valid_detector( if self.k > 1: knn_list: list = [] for x in x_class: - # Calculates the distance between the two vectors and adds it to the kNN list if - # the distance is smaller than the largest distance in the list. self._compare_knearest_neighbors_list( knn_list, self._distance(x, vector_x) ) - # If the average of the distances in the kNN list is less than the radius, Returns true. distance_mean = np.mean(knn_list) if self.algorithm == "V-detector": return self._detector_is_valid_to_vdetector( @@ -371,16 +364,13 @@ def _checks_valid_detector( ) return self._detector_is_valid_to_vdetector(distance, vector_x) - # Calculates the distance between the vectors; if not it is less than or equal to - # the radius plus the sample's radius, sets the validity of the detector to - # true. threshold: float = self.r + self.r_s if check_detector_rnsa_validity( x_class, vector_x, threshold, get_metric_code(self.metric), self.p ): - return True # Detector is valid! + return True - return False # Detector is not valid! + return False def _compare_knearest_neighbors_list(self, knn: list, distance: float) -> None: """ @@ -395,12 +385,9 @@ def _compare_knearest_neighbors_list(self, knn: list, distance: float) -> None: distance : float Distance to check. """ - # If the number of distances in kNN is less than k, adds the distance. if len(knn) < self.k: knn.append(distance) knn.sort() - # Otherwise, add the distance if the new distance is smaller than the largest - # distance in the list. elif knn[self.k - 1] > distance: knn[self.k - 1] = distance knn.sort() @@ -423,10 +410,8 @@ def _compare_sample_to_detectors(self, line: npt.NDArray) -> Optional[str]: if self._detectors is None or self.classes is None: return None - # List to store the classes and the average distance between the detectors and the sample. possible_classes = [] for _class_ in self.classes: - # Variable to indicate if the class was found with the detectors. class_found: bool = True sum_distance = 0.0 for detector in self._detectors[_class_]: diff --git a/aisp/utils/validation.py b/aisp/utils/validation.py index 0b111d0..bd044c4 100644 --- a/aisp/utils/validation.py +++ b/aisp/utils/validation.py @@ -130,3 +130,33 @@ def check_binary_array(x: npt.NDArray): raise ValueError( "The array x contains values that are not composed only of 0 and 1." ) + + +def check_value_range( + x: npt.NDArray, + name: str = 'X', + min_value: float = 0.0, + max_value: float = 1.0 +) -> None: + """Ensure all values in the x array fall within a range. + + Parameters + ---------- + x : npt.NDArray + Array, containing the samples. + name : str, default='X' + Name used in the error message. + min_value : float, default=0.0 + Minimum allowed value. + max_value : float, default=1.0 + Maximum allowed value. + + Raises + ------ + ValueError + If the array fall outside the interval (min_value, max_value). + """ + if x.min() < min_value or x.max() > max_value: + raise ValueError( + f"{name} must contain oly values within [{min_value}, {max_value}]." + )