diff --git a/optimhc/cli.py b/optimhc/cli.py index f39222f..2f7c589 100644 --- a/optimhc/cli.py +++ b/optimhc/cli.py @@ -26,11 +26,6 @@ def cli(): pass -def parse_cli_config(**kwargs): - # Remove None values and build a config dict - return {k: v for k, v in kwargs.items() if v is not None and v != ()} - - @cli.command() @click.option( "--config", @@ -129,7 +124,7 @@ def pipeline( if visualization is not None: pipeline_config["visualization"] = visualization if numprocesses: - pipeline_config["numProcess"] = numprocesses + pipeline_config["numProcesses"] = numprocesses if allele: pipeline_config["allele"] = list(allele) if loglevel: diff --git a/optimhc/core/config.py b/optimhc/core/config.py index e3dfa4b..a88fabf 100644 --- a/optimhc/core/config.py +++ b/optimhc/core/config.py @@ -16,7 +16,7 @@ "saveModels": True, "toFlashLFQ": True, "allele": [], - "numProcess": 4, + "numProcesses": 4, "removePreNxtAA": False, "showProgress": True, "logLevel": "INFO", @@ -213,7 +213,8 @@ def validate(self): input_files = self._config["inputFile"] if not isinstance(input_files, (list, tuple)): logger.debug(f"inputFile is not a list or tuple: {input_files}. Converting to list.") - self._config["inputFile"] = list(input_files) + self._config["inputFile"] = [input_files] + input_files = self._config["inputFile"] if not input_files: logger.error("inputFile list cannot be empty") raise ValueError("inputFile list cannot be empty") diff --git a/optimhc/core/pipeline.py b/optimhc/core/pipeline.py index 6574ec4..32b5653 100644 --- a/optimhc/core/pipeline.py +++ b/optimhc/core/pipeline.py @@ -281,6 +281,8 @@ def _run_single_experiment(self, psms, exp_config, exp_name, exp_dir): bool True if experiment succeeded, False otherwise. """ + results = None + models = None try: os.makedirs(exp_dir, exist_ok=True) @@ -333,12 +335,8 @@ def _run_single_experiment(self, psms, exp_config, exp_name, exp_dir): return False finally: - # Explicit resource release to free up memory after each experiment - try: - del results - del models - except Exception: - pass + del results + del models gc.collect() def run(self): @@ -382,8 +380,6 @@ def run_experiments(self): psms = self.read_input() psms = self._generate_features(psms) - - # Save the generated pin file for reference pin_path = os.path.join(self.output_dir, f"optimhc.{self.experiment}.pin") psms.write_pin(pin_path) fig_summary_dir = os.path.join(self.output_dir, "figures") diff --git a/optimhc/feature_generator/PWM.py b/optimhc/feature_generator/PWM.py index bf6793c..84b541d 100644 --- a/optimhc/feature_generator/PWM.py +++ b/optimhc/feature_generator/PWM.py @@ -284,7 +284,7 @@ def _default_allele_pwm_files(self) -> Dict[str, Dict[int, str]]: logger.debug(f"Default PWM file paths set for alleles: {self.alleles}") return pwm_files - def _most_conserved_postions(self, pwm: pd.DataFrame, n: int = 2) -> List[int]: + def _most_conserved_positions(self, pwm: pd.DataFrame, n: int = 2) -> List[int]: """ Find the n most conserved positions in the PWM. @@ -635,7 +635,7 @@ def generate_features(self) -> pd.DataFrame: min_mer = min(self.pwms[allele].keys()) max_mer = max(self.pwms[allele].keys()) for mer_len in range(min_mer, max_mer + 1): - anchor_dict[mer_len] = self._most_conserved_postions( + anchor_dict[mer_len] = self._most_conserved_positions( self.pwms[allele][mer_len], self.anchors ) logger.info(f"Most conserved positions for allele {allele}: {anchor_dict}") diff --git a/optimhc/feature_generator/mhcflurry.py b/optimhc/feature_generator/mhcflurry.py index d5fbe07..1cc3b30 100644 --- a/optimhc/feature_generator/mhcflurry.py +++ b/optimhc/feature_generator/mhcflurry.py @@ -297,15 +297,7 @@ def generate_features(self) -> pd.DataFrame: ] if features_df.isna().sum().sum() > 0: logger.warning("NaN values found in the generated features.") - return features_df[ - [ - "Peptide", - "mhcflurry_affinity", - "mhcflurry_processing_score", - "mhcflurry_presentation_score", - "mhcflurry_presentation_percentile", - ] - ] + return features_df def get_best_allele(self) -> pd.DataFrame: """ diff --git a/optimhc/utils.py b/optimhc/utils.py index 5ee1be1..3a4e51f 100644 --- a/optimhc/utils.py +++ b/optimhc/utils.py @@ -1,5 +1,6 @@ # utils.py +import re from logging import getLogger from pathlib import Path from typing import List @@ -80,8 +81,6 @@ def strip_flanking_and_charge(peptide: str) -> str: This function removes any amino acids before the first '.' and after the last '.' in the peptide sequence. """ - import re - peptide = re.sub(r"^[^.]*\.|\.[^.]*$", "", peptide) # Some PIN may have charge state at the end of the peptide, e.g., R.RRVEHHDHAVVSGR4.L @@ -120,8 +119,6 @@ def remove_modifications(peptide: str, keep_modification=None) -> str: If keep_modification is provided, only those specific modifications will be preserved in the output sequence. """ - import re - if keep_modification is None: return re.sub(r"\[.*?\]", "", peptide) else: diff --git a/optimhc/visualization/plot_features.py b/optimhc/visualization/plot_features.py index 6a61412..bddb375 100644 --- a/optimhc/visualization/plot_features.py +++ b/optimhc/visualization/plot_features.py @@ -120,7 +120,7 @@ def plot_feature_importance( source_colors = dict(zip(rescoring_features.keys(), colors)) for source, features in rescoring_features.items(): - color = source_colors[source] # 修改:使用预分配的颜色 + color = source_colors[source] indices = [ i for i, name in enumerate(sum(rescoring_features.values(), [])) if name in features ] diff --git a/optimhc/visualization/save_or_show_plot.py b/optimhc/visualization/save_or_show_plot.py index 4264177..26d89db 100644 --- a/optimhc/visualization/save_or_show_plot.py +++ b/optimhc/visualization/save_or_show_plot.py @@ -12,3 +12,4 @@ def save_or_show_plot(save_path, logger, tight_layout=True): logger.info(f"Plot saved to {save_path}") else: plt.show() + plt.close("all") diff --git a/tests/test_config.py b/tests/test_config.py index 3ca8f50..54be89d 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -130,6 +130,6 @@ def test_default_config_values(self): assert DEFAULT_CONFIG["inputType"] == "pepxml" assert DEFAULT_CONFIG["outputDir"] == "./results" assert DEFAULT_CONFIG["decoyPrefix"] == "DECOY_" - assert DEFAULT_CONFIG["numProcess"] == 4 + assert DEFAULT_CONFIG["numProcesses"] == 4 assert DEFAULT_CONFIG["rescore"]["testFDR"] == 0.01 assert DEFAULT_CONFIG["rescore"]["model"] == "Percolator"