Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions optimhc/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,6 @@ def cli():
pass


def parse_cli_config(**kwargs):
# Remove None values and build a config dict
return {k: v for k, v in kwargs.items() if v is not None and v != ()}


@cli.command()
@click.option(
"--config",
Expand Down Expand Up @@ -129,7 +124,7 @@ def pipeline(
if visualization is not None:
pipeline_config["visualization"] = visualization
if numprocesses:
pipeline_config["numProcess"] = numprocesses
pipeline_config["numProcesses"] = numprocesses
if allele:
pipeline_config["allele"] = list(allele)
if loglevel:
Expand Down
5 changes: 3 additions & 2 deletions optimhc/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"saveModels": True,
"toFlashLFQ": True,
"allele": [],
"numProcess": 4,
"numProcesses": 4,
"removePreNxtAA": False,
"showProgress": True,
"logLevel": "INFO",
Expand Down Expand Up @@ -213,7 +213,8 @@ def validate(self):
input_files = self._config["inputFile"]
if not isinstance(input_files, (list, tuple)):
logger.debug(f"inputFile is not a list or tuple: {input_files}. Converting to list.")
self._config["inputFile"] = list(input_files)
self._config["inputFile"] = [input_files]
input_files = self._config["inputFile"]
if not input_files:
logger.error("inputFile list cannot be empty")
raise ValueError("inputFile list cannot be empty")
Expand Down
12 changes: 4 additions & 8 deletions optimhc/core/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ def _run_single_experiment(self, psms, exp_config, exp_name, exp_dir):
bool
True if experiment succeeded, False otherwise.
"""
results = None
models = None
try:
os.makedirs(exp_dir, exist_ok=True)

Expand Down Expand Up @@ -333,12 +335,8 @@ def _run_single_experiment(self, psms, exp_config, exp_name, exp_dir):
return False

finally:
# Explicit resource release to free up memory after each experiment
try:
del results
del models
except Exception:
pass
del results
del models
gc.collect()

def run(self):
Expand Down Expand Up @@ -382,8 +380,6 @@ def run_experiments(self):

psms = self.read_input()
psms = self._generate_features(psms)

# Save the generated pin file for reference
pin_path = os.path.join(self.output_dir, f"optimhc.{self.experiment}.pin")
psms.write_pin(pin_path)
fig_summary_dir = os.path.join(self.output_dir, "figures")
Expand Down
4 changes: 2 additions & 2 deletions optimhc/feature_generator/PWM.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def _default_allele_pwm_files(self) -> Dict[str, Dict[int, str]]:
logger.debug(f"Default PWM file paths set for alleles: {self.alleles}")
return pwm_files

def _most_conserved_postions(self, pwm: pd.DataFrame, n: int = 2) -> List[int]:
def _most_conserved_positions(self, pwm: pd.DataFrame, n: int = 2) -> List[int]:
"""
Find the n most conserved positions in the PWM.

Expand Down Expand Up @@ -635,7 +635,7 @@ def generate_features(self) -> pd.DataFrame:
min_mer = min(self.pwms[allele].keys())
max_mer = max(self.pwms[allele].keys())
for mer_len in range(min_mer, max_mer + 1):
anchor_dict[mer_len] = self._most_conserved_postions(
anchor_dict[mer_len] = self._most_conserved_positions(
self.pwms[allele][mer_len], self.anchors
)
logger.info(f"Most conserved positions for allele {allele}: {anchor_dict}")
Expand Down
10 changes: 1 addition & 9 deletions optimhc/feature_generator/mhcflurry.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,15 +297,7 @@ def generate_features(self) -> pd.DataFrame:
]
if features_df.isna().sum().sum() > 0:
logger.warning("NaN values found in the generated features.")
return features_df[
[
"Peptide",
"mhcflurry_affinity",
"mhcflurry_processing_score",
"mhcflurry_presentation_score",
"mhcflurry_presentation_percentile",
]
]
return features_df

def get_best_allele(self) -> pd.DataFrame:
"""
Expand Down
5 changes: 1 addition & 4 deletions optimhc/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# utils.py

import re
from logging import getLogger
from pathlib import Path
from typing import List
Expand Down Expand Up @@ -80,8 +81,6 @@ def strip_flanking_and_charge(peptide: str) -> str:
This function removes any amino acids before the first '.' and after the last '.'
in the peptide sequence.
"""
import re

peptide = re.sub(r"^[^.]*\.|\.[^.]*$", "", peptide)

# Some PIN may have charge state at the end of the peptide, e.g., R.RRVEHHDHAVVSGR4.L
Expand Down Expand Up @@ -120,8 +119,6 @@ def remove_modifications(peptide: str, keep_modification=None) -> str:
If keep_modification is provided, only those specific modifications will be
preserved in the output sequence.
"""
import re

if keep_modification is None:
return re.sub(r"\[.*?\]", "", peptide)
else:
Expand Down
2 changes: 1 addition & 1 deletion optimhc/visualization/plot_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def plot_feature_importance(
source_colors = dict(zip(rescoring_features.keys(), colors))

for source, features in rescoring_features.items():
color = source_colors[source] # 修改:使用预分配的颜色
color = source_colors[source]
indices = [
i for i, name in enumerate(sum(rescoring_features.values(), [])) if name in features
]
Expand Down
1 change: 1 addition & 0 deletions optimhc/visualization/save_or_show_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ def save_or_show_plot(save_path, logger, tight_layout=True):
logger.info(f"Plot saved to {save_path}")
else:
plt.show()
plt.close("all")
2 changes: 1 addition & 1 deletion tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,6 @@ def test_default_config_values(self):
assert DEFAULT_CONFIG["inputType"] == "pepxml"
assert DEFAULT_CONFIG["outputDir"] == "./results"
assert DEFAULT_CONFIG["decoyPrefix"] == "DECOY_"
assert DEFAULT_CONFIG["numProcess"] == 4
assert DEFAULT_CONFIG["numProcesses"] == 4
assert DEFAULT_CONFIG["rescore"]["testFDR"] == 0.01
assert DEFAULT_CONFIG["rescore"]["model"] == "Percolator"